First commit

This commit is contained in:
Iain Bradley
2025-10-07 12:55:34 +01:00
commit 4ca7f9667a
7 changed files with 501 additions and 0 deletions
+27
View File
@@ -0,0 +1,27 @@
# Environment variables (contains passwords)
.env
# Downloaded files
downloads/
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
# Virtual environment
venv/
env/
ENV/
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
+28
View File
@@ -0,0 +1,28 @@
FROM python:3.11-slim
# Install Firefox and dependencies
RUN apt-get update && apt-get install -y \
firefox-esr \
wget \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the download script
COPY download_cbz.py .
# Create downloads directory
RUN mkdir -p /app/downloads
# Set environment variables (can be overridden at runtime)
ENV EMAIL=""
ENV PASSWORD=""
ENV OUTPUT_DIR="/app/downloads"
# Run the script
CMD python download_cbz.py
+333
View File
@@ -0,0 +1,333 @@
import requests
from bs4 import BeautifulSoup
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.common.exceptions import TimeoutException
from webdriver_manager.firefox import GeckoDriverManager
def setup_driver(headless=True):
"""
Set up Selenium WebDriver with Firefox (auto-installs GeckoDriver).
Args:
headless: Run browser in headless mode (no visible window)
Returns:
WebDriver instance
"""
firefox_options = Options()
if headless:
firefox_options.add_argument('--headless')
firefox_options.set_preference('general.useragent.override', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0')
# Auto-install GeckoDriver
service = Service(GeckoDriverManager().install())
driver = webdriver.Firefox(service=service, options=firefox_options)
return driver
def login_with_selenium(driver, email, password):
"""
Log in to shop.2000ad.com using Selenium.
Args:
driver: Selenium WebDriver instance
email: Your email address
password: Your password
Returns:
True if login successful, False otherwise
"""
login_url = "https://shop.2000ad.com/account/sign-in"
print("🔐 Navigating to login page...")
driver.get(login_url)
try:
# Wait for and fill in email field
email_field = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, "email"))
)
email_field.send_keys(email)
# Fill in password field
password_field = driver.find_element(By.NAME, "password")
password_field.send_keys(password)
# Submit the form
print("🔐 Logging in...")
password_field.submit()
# Wait for redirect after login
time.sleep(3)
# Check if login was successful
if 'login' not in driver.current_url.lower():
print("✅ Login successful!")
return True
else:
print("❌ Login failed - check your credentials")
return False
except TimeoutException:
print("❌ Login form not found - page may have changed")
return False
except Exception as e:
print(f"❌ Login error: {e}")
return False
def scroll_and_load_all_items(driver, downloads_url):
"""
Navigate to downloads page and scroll to load all items.
Args:
driver: Selenium WebDriver instance
downloads_url: URL of the downloads page
Returns:
HTML content with all items loaded
"""
print(f"\n📄 Loading downloads page: {downloads_url}")
driver.get(downloads_url)
# Wait for initial content to load
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "product"))
)
except TimeoutException:
print("⚠️ Warning: No products found on page")
return driver.page_source
print("📜 Scrolling to load all items...")
last_height = driver.execute_script("return document.body.scrollHeight")
items_count = 0
no_change_count = 0
while True:
# Count current items
current_items = len(driver.find_elements(By.CLASS_NAME, "product"))
if current_items != items_count:
print(f" Loaded {current_items} items so far...")
items_count = current_items
no_change_count = 0
else:
no_change_count += 1
# Scroll to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait for new content to load
time.sleep(2)
# Calculate new scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
# Check if we've stopped loading new content
if new_height == last_height and no_change_count >= 3:
print(f"✅ Finished loading - found {items_count} total items")
break
last_height = new_height
# Safety limit to prevent infinite loops
if no_change_count >= 10:
print(f"⚠️ Stopped scrolling after no changes - found {items_count} items")
break
return driver.page_source
def transfer_cookies_to_requests(driver, session):
"""
Transfer cookies from Selenium to requests Session.
Args:
driver: Selenium WebDriver instance
session: requests.Session instance
"""
selenium_cookies = driver.get_cookies()
for cookie in selenium_cookies:
session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])
print(f"✅ Transferred {len(selenium_cookies)} cookies to requests session")
def download_cbz_files(email, password, output_dir='downloads', headless=True):
"""
Log in, load all downloads, and download all CBZ files.
Args:
email: Your shop.2000ad.com email
password: Your shop.2000ad.com password
output_dir: Directory where files will be saved (default: 'downloads')
headless: Run browser in headless mode (default: True)
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Set up Selenium
print("🌐 Starting browser...")
driver = setup_driver(headless=headless)
try:
# Log in
if not login_with_selenium(driver, email, password):
print("\n❌ Cannot proceed without successful login")
return
# Load downloads page with infinite scroll
downloads_url = "https://shop.2000ad.com/account/downloads"
html_content = scroll_and_load_all_items(driver, downloads_url)
# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
products = soup.find_all('li', class_='product')
print(f"\n📚 Found {len(products)} products to process\n")
# Create requests session and transfer cookies
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://shop.2000ad.com/'
})
transfer_cookies_to_requests(driver, session)
# Close browser - we don't need it anymore
driver.quit()
print("✅ Browser closed\n")
print(f"{'='*50}")
print("Starting downloads...\n")
downloaded = 0
skipped = 0
failed = 0
for product in products:
# Get product name for better logging
product_name = product.get('data-name', 'Unknown')
# Get publication date
release_date = product.get('data-released', '')
date_str = ''
if release_date:
# Format: YYYYMMDDHHMMSS -> YYYY-MM-DD
try:
date_str = f"{release_date[0:4]}-{release_date[4:6]}-{release_date[6:8]}"
except:
date_str = ''
# Determine subdirectory based on product name
if 'megazine' in product_name.lower():
product_output_dir = os.path.join(output_dir, 'Megazine')
else:
product_output_dir = os.path.join(output_dir, '2000ad')
# Create subdirectory if it doesn't exist
os.makedirs(product_output_dir, exist_ok=True)
# Find all forms within this product
forms = product.find_all('form')
for form in forms:
# Check if this form is for a CBZ download
button = form.find('button', type='submit')
if button and 'CBZ' in button.get_text():
# Get the download URL
download_url = form.get('action')
if download_url:
# Create a safe filename with date
if date_str:
filename = f"{date_str} - {product_name}.cbz"
else:
filename = f"{product_name}.cbz"
filename = filename.replace('/', '-').replace('\\', '-').replace(':', '-')
filepath = os.path.join(product_output_dir, filename)
# Check if file already exists
if os.path.exists(filepath):
subdir = 'Megazine' if 'megazine' in product_name.lower() else '2000ad'
print(f"⏭️ Skipping (already exists): {subdir}/{filename}")
skipped += 1
continue
try:
subdir = 'Megazine' if 'megazine' in product_name.lower() else '2000ad'
print(f"📥 Downloading to {subdir}/: {filename}")
# Download the file
response = session.get(download_url, stream=True, allow_redirects=True)
# Check if we got HTML (login page) instead of CBZ
content_type = response.headers.get('Content-Type', '')
if 'text/html' in content_type:
print(f"⚠️ Warning: Got HTML response instead of file")
print(f" This might be a permission issue or the file isn't available")
failed += 1
continue
response.raise_for_status()
# Save the file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
file_size = os.path.getsize(filepath)
print(f"✅ Saved to {subdir}/: {filename} ({file_size / 1024 / 1024:.2f} MB)")
downloaded += 1
# Be polite - add a small delay between downloads
time.sleep(1)
except requests.exceptions.RequestException as e:
print(f"❌ Error downloading {filename}: {e}")
failed += 1
except Exception as e:
print(f"❌ Error saving {filename}: {e}")
failed += 1
print(f"\n{'='*50}")
print(f"Download complete!")
print(f"✅ Successfully downloaded: {downloaded}")
print(f"⏭️ Skipped (already exist): {skipped}")
print(f"❌ Failed: {failed}")
print(f"📁 Files saved to: {os.path.abspath(output_dir)}")
except Exception as e:
print(f"\n❌ Fatal error: {e}")
import traceback
traceback.print_exc()
finally:
# Make sure browser is closed
try:
driver.quit()
except:
pass
if __name__ == "__main__":
# Configuration - can be set via environment variables or directly
email = os.environ.get('EMAIL', 'your_email@example.com')
password = os.environ.get('PASSWORD', 'your_password')
output_dir = os.environ.get('OUTPUT_DIR', 'downloads')
if email == 'your_email@example.com' or password == 'your_password':
print("⚠️ Warning: Please set EMAIL and PASSWORD environment variables")
print(" Example: EMAIL=your@email.com PASSWORD=yourpass python download_cbz.py")
exit(1)
# Run the download
# Set headless=False if you want to see the browser window
download_cbz_files(email, password, output_dir=output_dir, headless=True)
+17
View File
@@ -0,0 +1,17 @@
version: '3.8'
services:
cbz-downloader:
build: .
container_name: 2000ad-downloader
environment:
- EMAIL=${EMAIL:-your_email@example.com}
- PASSWORD=${PASSWORD:-your_password}
- OUTPUT_DIR=/app/downloads
volumes:
- ./downloads:/app/downloads
# Optional: Mount the script for easy editing without rebuilding
- ./download_cbz.py:/app/download_cbz.py
restart: "no"
# Uncomment below to run on a schedule instead of immediately
# command: sh -c "while true; do python download_cbz.py && sleep 86400; done"
+5
View File
@@ -0,0 +1,5 @@
# Copy this file to .env and fill in your credentials
# cp .env.example .env
EMAIL=your_email@example.com
PASSWORD=your_password
+87
View File
@@ -0,0 +1,87 @@
# 2000 AD CBZ Downloader
Automatically downloads all CBZ files from your shop.2000ad.com account.
## Setup
1. **Create `.env` file with your credentials:**
```bash
cp .env.example .env
```
Then edit `.env` and add your email and password.
2. **Build the container:**
```bash
docker-compose build
```
## Usage
### Run once (download all new files):
```bash
docker-compose up
```
### Run in background:
```bash
docker-compose up -d
```
### View logs:
```bash
docker-compose logs -f
```
### Stop the container:
```bash
docker-compose down
```
## File Structure
```
.
├── docker-compose.yml
├── Dockerfile
├── requirements.txt
├── download_cbz.py
├── .env (your credentials - not committed to git)
├── .env.example (template)
└── downloads/
└── (your CBZ files will be downloaded here)
```
## Scheduled Downloads
To run automatically every day, edit `docker-compose.yml` and uncomment the `command` line:
```yaml
command: sh -c "while true; do python download_cbz.py && sleep 86400; done"
```
Then change `restart: "no"` to `restart: unless-stopped`.
This will:
- Run the downloader immediately
- Wait 24 hours (86400 seconds)
- Run again
- Repeat forever
## Troubleshooting
**If downloads fail:**
- Check your credentials in `.env`
- Run with logs visible: `docker-compose up` (without `-d`)
- Check that Firefox is working: The script will show login progress
**If you want to see the browser:**
- Edit `download_cbz.py` and change `headless=True` to `headless=False`
- Rebuild: `docker-compose build`
- You'll need X11 forwarding for this in Docker
## Security Note
The `.env` file contains your password. Make sure to:
- Add `.env` to `.gitignore` if using git
- Never commit credentials to version control
- Keep file permissions restricted: `chmod 600 .env`
+4
View File
@@ -0,0 +1,4 @@
selenium==4.15.2
beautifulsoup4==4.12.2
requests==2.31.0
webdriver-manager==4.0.1