Skip to content

Commit a0c79fa

Browse files
committed
Replace rlcs_downloader.py with a configurable script template
Remove the argparse CLI in favor of a simple script template with all options declared as variables at the top of the file. This better matches the actual workflow (SSH into EC2, edit one file, run it) and is easier to read and modify than remembering CLI flags. The new script consolidates download and retry logic into a single file: auto-retries failures up to MAX_RETRIES times after the main download, stopping early if all replays are recovered. Uses a unified PATH_PREFIX for both S3 and local storage so the directory structure mirrors S3 regardless of backend.
1 parent 16a2b98 commit a0c79fa

File tree

2 files changed

+87
-130
lines changed

2 files changed

+87
-130
lines changed

scripts/download_rlcs_season.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
"""
2+
Download an RLCS season from Ballchasing to S3 (or local storage).
3+
4+
Edit the configuration block below, then run:
5+
python scripts/download_season.py
6+
7+
Re-running this script is safe — already-downloaded replays are skipped and
8+
any previously failed replays will be retried automatically.
9+
"""
10+
11+
from impulse.collection import BallchasingClient, ReplayDownloader
12+
from impulse.collection.storage import S3Backend, LocalBackend
13+
from impulse.collection.database import ImpulseDB
14+
from impulse.collection.rlcs_manager import RLCSManager
15+
from impulse.config.collection_config import CollectionConfig
16+
17+
# =============================================================================
18+
# Configuration — edit before each run
19+
# =============================================================================
20+
21+
# Season to download. Available keys: '21-22', '22-23', '2024', '2025', '2026'
22+
SEASON = '2024'
23+
24+
# Storage backend: 's3' or 'local'
25+
STORAGE_TYPE = 's3'
26+
27+
# Path prefix applied under the storage root.
28+
# S3: files stored at {PATH_PREFIX}/{SEASON}/{group_hierarchy}/{id}.replay
29+
# Local: files stored at {LOCAL_BASE_DIR}/{PATH_PREFIX}/{SEASON}/{group_hierarchy}/{id}.replay
30+
PATH_PREFIX = ['replays', 'rlcs']
31+
32+
# Base directory for local storage (ignored when STORAGE_TYPE = 's3')
33+
LOCAL_BASE_DIR = '.'
34+
35+
# Path to SQLite database for download tracking and resume
36+
DATABASE_PATH = './impulse.db'
37+
38+
# Ballchasing API rate limits
39+
# Free tier: 1 req/sec, 200 req/hour
40+
# Donor tier: up to 200 req/sec, no hourly cap (set RATE_LIMIT_PER_HOUR to None)
41+
RATE_LIMIT_PER_SECOND = 1
42+
RATE_LIMIT_PER_HOUR = 200 # Set to None to disable hourly cap (donor tier)
43+
44+
# Retry failed downloads after the main run.
45+
# MAX_RETRIES attempts are made; stops early if all failures are recovered.
46+
AUTO_RETRY = True
47+
MAX_RETRIES = 3
48+
49+
# =============================================================================
50+
51+
season_info = RLCSManager.get_season_info(SEASON)
52+
print(f"Season: {season_info['name']}")
53+
print(f"Group ID: {season_info['group_id']}")
54+
print(f"Estimated replays: {season_info['estimated_replay_count']:,}")
55+
print(f"Estimated size: {season_info['estimated_size_gb']:.1f} GB")
56+
print()
57+
58+
config = CollectionConfig.from_env()
59+
config.rate_limit_per_second = RATE_LIMIT_PER_SECOND
60+
config.rate_limit_per_hour = RATE_LIMIT_PER_HOUR
61+
62+
client = BallchasingClient(config)
63+
db = ImpulseDB(DATABASE_PATH)
64+
65+
if STORAGE_TYPE == 's3':
66+
storage = S3Backend()
67+
elif STORAGE_TYPE == 'local':
68+
storage = LocalBackend(base_dir=LOCAL_BASE_DIR)
69+
else:
70+
raise ValueError(f"Unknown STORAGE_TYPE: '{STORAGE_TYPE}'. Must be 's3' or 'local'.")
71+
72+
downloader = ReplayDownloader(client, storage, db)
73+
74+
result = downloader.download_group(
75+
group_id=season_info['group_id'],
76+
path_prefix=PATH_PREFIX + [SEASON]
77+
)
78+
79+
if AUTO_RETRY and result.failed > 0:
80+
for attempt in range(1, MAX_RETRIES + 1):
81+
print(f"\nRetry {attempt}/{MAX_RETRIES}: {result.failed} failed replay(s)...")
82+
result = downloader.retry_failed_downloads(season_info['group_id'])
83+
if result.failed == 0:
84+
print("All replays recovered.")
85+
break
86+
else:
87+
print(f"Max retries ({MAX_RETRIES}) reached. {result.failed} replay(s) could not be recovered.")

scripts/rlcs_downloader.py

Lines changed: 0 additions & 130 deletions
This file was deleted.

0 commit comments

Comments
 (0)