First commit
This commit is contained in:
+27
@@ -0,0 +1,27 @@
|
|||||||
|
# Environment variables (contains passwords)
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Downloaded files
|
||||||
|
downloads/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
|
||||||
|
# Virtual environment
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
+28
@@ -0,0 +1,28 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Install Firefox and dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
firefox-esr \
|
||||||
|
wget \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy requirements and install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the download script
|
||||||
|
COPY download_cbz.py .
|
||||||
|
|
||||||
|
# Create downloads directory
|
||||||
|
RUN mkdir -p /app/downloads
|
||||||
|
|
||||||
|
# Set environment variables (can be overridden at runtime)
|
||||||
|
ENV EMAIL=""
|
||||||
|
ENV PASSWORD=""
|
||||||
|
ENV OUTPUT_DIR="/app/downloads"
|
||||||
|
|
||||||
|
# Run the script
|
||||||
|
CMD python download_cbz.py
|
||||||
@@ -0,0 +1,333 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.webdriver.firefox.options import Options
|
||||||
|
from selenium.webdriver.firefox.service import Service
|
||||||
|
from selenium.common.exceptions import TimeoutException
|
||||||
|
from webdriver_manager.firefox import GeckoDriverManager
|
||||||
|
|
||||||
|
def setup_driver(headless=True):
|
||||||
|
"""
|
||||||
|
Set up Selenium WebDriver with Firefox (auto-installs GeckoDriver).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
headless: Run browser in headless mode (no visible window)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
WebDriver instance
|
||||||
|
"""
|
||||||
|
firefox_options = Options()
|
||||||
|
if headless:
|
||||||
|
firefox_options.add_argument('--headless')
|
||||||
|
firefox_options.set_preference('general.useragent.override', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0')
|
||||||
|
|
||||||
|
# Auto-install GeckoDriver
|
||||||
|
service = Service(GeckoDriverManager().install())
|
||||||
|
driver = webdriver.Firefox(service=service, options=firefox_options)
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def login_with_selenium(driver, email, password):
|
||||||
|
"""
|
||||||
|
Log in to shop.2000ad.com using Selenium.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
driver: Selenium WebDriver instance
|
||||||
|
email: Your email address
|
||||||
|
password: Your password
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if login successful, False otherwise
|
||||||
|
"""
|
||||||
|
login_url = "https://shop.2000ad.com/account/sign-in"
|
||||||
|
|
||||||
|
print("🔐 Navigating to login page...")
|
||||||
|
driver.get(login_url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Wait for and fill in email field
|
||||||
|
email_field = WebDriverWait(driver, 10).until(
|
||||||
|
EC.presence_of_element_located((By.NAME, "email"))
|
||||||
|
)
|
||||||
|
email_field.send_keys(email)
|
||||||
|
|
||||||
|
# Fill in password field
|
||||||
|
password_field = driver.find_element(By.NAME, "password")
|
||||||
|
password_field.send_keys(password)
|
||||||
|
|
||||||
|
# Submit the form
|
||||||
|
print("🔐 Logging in...")
|
||||||
|
password_field.submit()
|
||||||
|
|
||||||
|
# Wait for redirect after login
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Check if login was successful
|
||||||
|
if 'login' not in driver.current_url.lower():
|
||||||
|
print("✅ Login successful!")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print("❌ Login failed - check your credentials")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except TimeoutException:
|
||||||
|
print("❌ Login form not found - page may have changed")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Login error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def scroll_and_load_all_items(driver, downloads_url):
|
||||||
|
"""
|
||||||
|
Navigate to downloads page and scroll to load all items.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
driver: Selenium WebDriver instance
|
||||||
|
downloads_url: URL of the downloads page
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
HTML content with all items loaded
|
||||||
|
"""
|
||||||
|
print(f"\n📄 Loading downloads page: {downloads_url}")
|
||||||
|
driver.get(downloads_url)
|
||||||
|
|
||||||
|
# Wait for initial content to load
|
||||||
|
try:
|
||||||
|
WebDriverWait(driver, 10).until(
|
||||||
|
EC.presence_of_element_located((By.CLASS_NAME, "product"))
|
||||||
|
)
|
||||||
|
except TimeoutException:
|
||||||
|
print("⚠️ Warning: No products found on page")
|
||||||
|
return driver.page_source
|
||||||
|
|
||||||
|
print("📜 Scrolling to load all items...")
|
||||||
|
|
||||||
|
last_height = driver.execute_script("return document.body.scrollHeight")
|
||||||
|
items_count = 0
|
||||||
|
no_change_count = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Count current items
|
||||||
|
current_items = len(driver.find_elements(By.CLASS_NAME, "product"))
|
||||||
|
|
||||||
|
if current_items != items_count:
|
||||||
|
print(f" Loaded {current_items} items so far...")
|
||||||
|
items_count = current_items
|
||||||
|
no_change_count = 0
|
||||||
|
else:
|
||||||
|
no_change_count += 1
|
||||||
|
|
||||||
|
# Scroll to bottom
|
||||||
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
|
||||||
|
# Wait for new content to load
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Calculate new scroll height
|
||||||
|
new_height = driver.execute_script("return document.body.scrollHeight")
|
||||||
|
|
||||||
|
# Check if we've stopped loading new content
|
||||||
|
if new_height == last_height and no_change_count >= 3:
|
||||||
|
print(f"✅ Finished loading - found {items_count} total items")
|
||||||
|
break
|
||||||
|
|
||||||
|
last_height = new_height
|
||||||
|
|
||||||
|
# Safety limit to prevent infinite loops
|
||||||
|
if no_change_count >= 10:
|
||||||
|
print(f"⚠️ Stopped scrolling after no changes - found {items_count} items")
|
||||||
|
break
|
||||||
|
|
||||||
|
return driver.page_source
|
||||||
|
|
||||||
|
def transfer_cookies_to_requests(driver, session):
|
||||||
|
"""
|
||||||
|
Transfer cookies from Selenium to requests Session.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
driver: Selenium WebDriver instance
|
||||||
|
session: requests.Session instance
|
||||||
|
"""
|
||||||
|
selenium_cookies = driver.get_cookies()
|
||||||
|
for cookie in selenium_cookies:
|
||||||
|
session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])
|
||||||
|
print(f"✅ Transferred {len(selenium_cookies)} cookies to requests session")
|
||||||
|
|
||||||
|
def download_cbz_files(email, password, output_dir='downloads', headless=True):
|
||||||
|
"""
|
||||||
|
Log in, load all downloads, and download all CBZ files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
email: Your shop.2000ad.com email
|
||||||
|
password: Your shop.2000ad.com password
|
||||||
|
output_dir: Directory where files will be saved (default: 'downloads')
|
||||||
|
headless: Run browser in headless mode (default: True)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Set up Selenium
|
||||||
|
print("🌐 Starting browser...")
|
||||||
|
driver = setup_driver(headless=headless)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Log in
|
||||||
|
if not login_with_selenium(driver, email, password):
|
||||||
|
print("\n❌ Cannot proceed without successful login")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Load downloads page with infinite scroll
|
||||||
|
downloads_url = "https://shop.2000ad.com/account/downloads"
|
||||||
|
html_content = scroll_and_load_all_items(driver, downloads_url)
|
||||||
|
|
||||||
|
# Parse the HTML
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
products = soup.find_all('li', class_='product')
|
||||||
|
|
||||||
|
print(f"\n📚 Found {len(products)} products to process\n")
|
||||||
|
|
||||||
|
# Create requests session and transfer cookies
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update({
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.5',
|
||||||
|
'Referer': 'https://shop.2000ad.com/'
|
||||||
|
})
|
||||||
|
|
||||||
|
transfer_cookies_to_requests(driver, session)
|
||||||
|
|
||||||
|
# Close browser - we don't need it anymore
|
||||||
|
driver.quit()
|
||||||
|
print("✅ Browser closed\n")
|
||||||
|
|
||||||
|
print(f"{'='*50}")
|
||||||
|
print("Starting downloads...\n")
|
||||||
|
|
||||||
|
downloaded = 0
|
||||||
|
skipped = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for product in products:
|
||||||
|
# Get product name for better logging
|
||||||
|
product_name = product.get('data-name', 'Unknown')
|
||||||
|
|
||||||
|
# Get publication date
|
||||||
|
release_date = product.get('data-released', '')
|
||||||
|
date_str = ''
|
||||||
|
if release_date:
|
||||||
|
# Format: YYYYMMDDHHMMSS -> YYYY-MM-DD
|
||||||
|
try:
|
||||||
|
date_str = f"{release_date[0:4]}-{release_date[4:6]}-{release_date[6:8]}"
|
||||||
|
except:
|
||||||
|
date_str = ''
|
||||||
|
|
||||||
|
# Determine subdirectory based on product name
|
||||||
|
if 'megazine' in product_name.lower():
|
||||||
|
product_output_dir = os.path.join(output_dir, 'Megazine')
|
||||||
|
else:
|
||||||
|
product_output_dir = os.path.join(output_dir, '2000ad')
|
||||||
|
|
||||||
|
# Create subdirectory if it doesn't exist
|
||||||
|
os.makedirs(product_output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Find all forms within this product
|
||||||
|
forms = product.find_all('form')
|
||||||
|
|
||||||
|
for form in forms:
|
||||||
|
# Check if this form is for a CBZ download
|
||||||
|
button = form.find('button', type='submit')
|
||||||
|
if button and 'CBZ' in button.get_text():
|
||||||
|
# Get the download URL
|
||||||
|
download_url = form.get('action')
|
||||||
|
|
||||||
|
if download_url:
|
||||||
|
# Create a safe filename with date
|
||||||
|
if date_str:
|
||||||
|
filename = f"{date_str} - {product_name}.cbz"
|
||||||
|
else:
|
||||||
|
filename = f"{product_name}.cbz"
|
||||||
|
|
||||||
|
filename = filename.replace('/', '-').replace('\\', '-').replace(':', '-')
|
||||||
|
filepath = os.path.join(product_output_dir, filename)
|
||||||
|
|
||||||
|
# Check if file already exists
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
subdir = 'Megazine' if 'megazine' in product_name.lower() else '2000ad'
|
||||||
|
print(f"⏭️ Skipping (already exists): {subdir}/{filename}")
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
subdir = 'Megazine' if 'megazine' in product_name.lower() else '2000ad'
|
||||||
|
print(f"📥 Downloading to {subdir}/: {filename}")
|
||||||
|
|
||||||
|
# Download the file
|
||||||
|
response = session.get(download_url, stream=True, allow_redirects=True)
|
||||||
|
|
||||||
|
# Check if we got HTML (login page) instead of CBZ
|
||||||
|
content_type = response.headers.get('Content-Type', '')
|
||||||
|
if 'text/html' in content_type:
|
||||||
|
print(f"⚠️ Warning: Got HTML response instead of file")
|
||||||
|
print(f" This might be a permission issue or the file isn't available")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Save the file
|
||||||
|
with open(filepath, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
file_size = os.path.getsize(filepath)
|
||||||
|
print(f"✅ Saved to {subdir}/: {filename} ({file_size / 1024 / 1024:.2f} MB)")
|
||||||
|
downloaded += 1
|
||||||
|
|
||||||
|
# Be polite - add a small delay between downloads
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"❌ Error downloading {filename}: {e}")
|
||||||
|
failed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error saving {filename}: {e}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Download complete!")
|
||||||
|
print(f"✅ Successfully downloaded: {downloaded}")
|
||||||
|
print(f"⏭️ Skipped (already exist): {skipped}")
|
||||||
|
print(f"❌ Failed: {failed}")
|
||||||
|
print(f"📁 Files saved to: {os.path.abspath(output_dir)}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Fatal error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
# Make sure browser is closed
|
||||||
|
try:
|
||||||
|
driver.quit()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Configuration - can be set via environment variables or directly
|
||||||
|
email = os.environ.get('EMAIL', 'your_email@example.com')
|
||||||
|
password = os.environ.get('PASSWORD', 'your_password')
|
||||||
|
output_dir = os.environ.get('OUTPUT_DIR', 'downloads')
|
||||||
|
|
||||||
|
if email == 'your_email@example.com' or password == 'your_password':
|
||||||
|
print("⚠️ Warning: Please set EMAIL and PASSWORD environment variables")
|
||||||
|
print(" Example: EMAIL=your@email.com PASSWORD=yourpass python download_cbz.py")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Run the download
|
||||||
|
# Set headless=False if you want to see the browser window
|
||||||
|
download_cbz_files(email, password, output_dir=output_dir, headless=True)
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
cbz-downloader:
|
||||||
|
build: .
|
||||||
|
container_name: 2000ad-downloader
|
||||||
|
environment:
|
||||||
|
- EMAIL=${EMAIL:-your_email@example.com}
|
||||||
|
- PASSWORD=${PASSWORD:-your_password}
|
||||||
|
- OUTPUT_DIR=/app/downloads
|
||||||
|
volumes:
|
||||||
|
- ./downloads:/app/downloads
|
||||||
|
# Optional: Mount the script for easy editing without rebuilding
|
||||||
|
- ./download_cbz.py:/app/download_cbz.py
|
||||||
|
restart: "no"
|
||||||
|
# Uncomment below to run on a schedule instead of immediately
|
||||||
|
# command: sh -c "while true; do python download_cbz.py && sleep 86400; done"
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# Copy this file to .env and fill in your credentials
|
||||||
|
# cp .env.example .env
|
||||||
|
|
||||||
|
EMAIL=your_email@example.com
|
||||||
|
PASSWORD=your_password
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
# 2000 AD CBZ Downloader
|
||||||
|
|
||||||
|
Automatically downloads all CBZ files from your shop.2000ad.com account.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. **Create `.env` file with your credentials:**
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
Then edit `.env` and add your email and password.
|
||||||
|
|
||||||
|
2. **Build the container:**
|
||||||
|
```bash
|
||||||
|
docker-compose build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Run once (download all new files):
|
||||||
|
```bash
|
||||||
|
docker-compose up
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run in background:
|
||||||
|
```bash
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### View logs:
|
||||||
|
```bash
|
||||||
|
docker-compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stop the container:
|
||||||
|
```bash
|
||||||
|
docker-compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── Dockerfile
|
||||||
|
├── requirements.txt
|
||||||
|
├── download_cbz.py
|
||||||
|
├── .env (your credentials - not committed to git)
|
||||||
|
├── .env.example (template)
|
||||||
|
└── downloads/
|
||||||
|
└── (your CBZ files will be downloaded here)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scheduled Downloads
|
||||||
|
|
||||||
|
To run automatically every day, edit `docker-compose.yml` and uncomment the `command` line:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
command: sh -c "while true; do python download_cbz.py && sleep 86400; done"
|
||||||
|
```
|
||||||
|
|
||||||
|
Then change `restart: "no"` to `restart: unless-stopped`.
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Run the downloader immediately
|
||||||
|
- Wait 24 hours (86400 seconds)
|
||||||
|
- Run again
|
||||||
|
- Repeat forever
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**If downloads fail:**
|
||||||
|
- Check your credentials in `.env`
|
||||||
|
- Run with logs visible: `docker-compose up` (without `-d`)
|
||||||
|
- Check that Firefox is working: The script will show login progress
|
||||||
|
|
||||||
|
**If you want to see the browser:**
|
||||||
|
- Edit `download_cbz.py` and change `headless=True` to `headless=False`
|
||||||
|
- Rebuild: `docker-compose build`
|
||||||
|
- You'll need X11 forwarding for this in Docker
|
||||||
|
|
||||||
|
## Security Note
|
||||||
|
|
||||||
|
The `.env` file contains your password. Make sure to:
|
||||||
|
- Add `.env` to `.gitignore` if using git
|
||||||
|
- Never commit credentials to version control
|
||||||
|
- Keep file permissions restricted: `chmod 600 .env`
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
selenium==4.15.2
|
||||||
|
beautifulsoup4==4.12.2
|
||||||
|
requests==2.31.0
|
||||||
|
webdriver-manager==4.0.1
|
||||||
Reference in New Issue
Block a user