|
| 1 | +""" |
| 2 | +Download an RLCS season from Ballchasing to S3 (or local storage). |
| 3 | +
|
| 4 | +Edit the configuration block below, then run: |
| 5 | + python scripts/download_season.py |
| 6 | +
|
| 7 | +Re-running this script is safe — already-downloaded replays are skipped and |
| 8 | +any previously failed replays will be retried automatically. |
| 9 | +""" |
| 10 | + |
| 11 | +from impulse.collection import BallchasingClient, ReplayDownloader |
| 12 | +from impulse.collection.storage import S3Backend, LocalBackend |
| 13 | +from impulse.collection.database import ImpulseDB |
| 14 | +from impulse.collection.rlcs_manager import RLCSManager |
| 15 | +from impulse.config.collection_config import CollectionConfig |
| 16 | + |
| 17 | +# ============================================================================= |
| 18 | +# Configuration — edit before each run |
| 19 | +# ============================================================================= |
| 20 | + |
| 21 | +# Season to download. Available keys: '21-22', '22-23', '2024', '2025', '2026' |
| 22 | +SEASON = '2024' |
| 23 | + |
| 24 | +# Storage backend: 's3' or 'local' |
| 25 | +STORAGE_TYPE = 's3' |
| 26 | + |
| 27 | +# Path prefix applied under the storage root. |
| 28 | +# S3: files stored at {PATH_PREFIX}/{SEASON}/{group_hierarchy}/{id}.replay |
| 29 | +# Local: files stored at {LOCAL_BASE_DIR}/{PATH_PREFIX}/{SEASON}/{group_hierarchy}/{id}.replay |
| 30 | +PATH_PREFIX = ['replays', 'rlcs'] |
| 31 | + |
| 32 | +# Base directory for local storage (ignored when STORAGE_TYPE = 's3') |
| 33 | +LOCAL_BASE_DIR = '.' |
| 34 | + |
| 35 | +# Path to SQLite database for download tracking and resume |
| 36 | +DATABASE_PATH = './impulse.db' |
| 37 | + |
| 38 | +# Ballchasing API rate limits |
| 39 | +# Free tier: 1 req/sec, 200 req/hour |
| 40 | +# Donor tier: up to 200 req/sec, no hourly cap (set RATE_LIMIT_PER_HOUR to None) |
| 41 | +RATE_LIMIT_PER_SECOND = 1 |
| 42 | +RATE_LIMIT_PER_HOUR = 200 # Set to None to disable hourly cap (donor tier) |
| 43 | + |
| 44 | +# Retry failed downloads after the main run. |
| 45 | +# MAX_RETRIES attempts are made; stops early if all failures are recovered. |
| 46 | +AUTO_RETRY = True |
| 47 | +MAX_RETRIES = 3 |
| 48 | + |
| 49 | +# ============================================================================= |
| 50 | + |
| 51 | +season_info = RLCSManager.get_season_info(SEASON) |
| 52 | +print(f"Season: {season_info['name']}") |
| 53 | +print(f"Group ID: {season_info['group_id']}") |
| 54 | +print(f"Estimated replays: {season_info['estimated_replay_count']:,}") |
| 55 | +print(f"Estimated size: {season_info['estimated_size_gb']:.1f} GB") |
| 56 | +print() |
| 57 | + |
| 58 | +config = CollectionConfig.from_env() |
| 59 | +config.rate_limit_per_second = RATE_LIMIT_PER_SECOND |
| 60 | +config.rate_limit_per_hour = RATE_LIMIT_PER_HOUR |
| 61 | + |
| 62 | +client = BallchasingClient(config) |
| 63 | +db = ImpulseDB(DATABASE_PATH) |
| 64 | + |
| 65 | +if STORAGE_TYPE == 's3': |
| 66 | + storage = S3Backend() |
| 67 | +elif STORAGE_TYPE == 'local': |
| 68 | + storage = LocalBackend(base_dir=LOCAL_BASE_DIR) |
| 69 | +else: |
| 70 | + raise ValueError(f"Unknown STORAGE_TYPE: '{STORAGE_TYPE}'. Must be 's3' or 'local'.") |
| 71 | + |
| 72 | +downloader = ReplayDownloader(client, storage, db) |
| 73 | + |
| 74 | +result = downloader.download_group( |
| 75 | + group_id=season_info['group_id'], |
| 76 | + path_prefix=PATH_PREFIX + [SEASON] |
| 77 | +) |
| 78 | + |
| 79 | +if AUTO_RETRY and result.failed > 0: |
| 80 | + for attempt in range(1, MAX_RETRIES + 1): |
| 81 | + print(f"\nRetry {attempt}/{MAX_RETRIES}: {result.failed} failed replay(s)...") |
| 82 | + result = downloader.retry_failed_downloads(season_info['group_id']) |
| 83 | + if result.failed == 0: |
| 84 | + print("All replays recovered.") |
| 85 | + break |
| 86 | + else: |
| 87 | + print(f"Max retries ({MAX_RETRIES}) reached. {result.failed} replay(s) could not be recovered.") |
0 commit comments