Skip to content

Commit b89ac93

Browse files
committed
Fixing top n coins to download.
1 parent df3c5d7 commit b89ac93

File tree

4 files changed

+106
-18
lines changed

4 files changed

+106
-18
lines changed

docs/DATA_SOURCES.md

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,10 @@ The pipeline generates a `data_status.html` page (at `site/data_status.html`) th
192192

193193
| Card | Description |
194194
|------|-------------|
195-
| **Coins Requested** | Number requested from API (1200), with sublabel showing breakdown: "886 USD + 490 no-USD" |
196-
| **Coins Accepted** | Total coins accepted for download, with sublabel showing how many from no-USD source |
195+
| **Coins Requested** | Number requested from API (1200), also the cap on total accepted coins |
196+
| **Coins Accepted** | Total coins accepted for download (≤ requested), with sublabel showing how many from no-USD source |
197197
| **Coins Downloaded** | Coins that have downloaded price data in cache |
198-
| **Skipped / Failed** | Filtered coins (stablecoins, wrapped) + failed downloads (no BTC pair) |
198+
| **Skipped / Failed** | Filtered coins (stablecoins, wrapped) + failed downloads (no BTC pair) + capped no-USD coins |
199199
| **Total Pairs** | Sum of all quote pairs (BTC + USD) across all coins |
200200

201201
### Downloaded Coins Table
@@ -458,20 +458,22 @@ This error occurs when a coin doesn't have a direct trading pair on CryptoCompar
458458
- Coin may be too new (created after requested start date)
459459
- Check CryptoCompare directly: `https://min-api.cryptocompare.com/data/v2/histoday?fsym=ETH&tsym=BTC&limit=10`
460460

461-
### Discrepancy between requested and returned coins
461+
### Requested vs. returned coins
462462

463463
The CryptoCompare market cap API returns coins in two categories:
464464

465465
1. **Coins WITH USD data** (~886 of 1200): Have market cap, price, and volume data
466-
2. **Coins WITHOUT USD data** (~490 of 1200): Returned by API but missing USD price data
466+
2. **Coins WITHOUT USD data** (~490): Returned by API but missing USD price data
467467

468468
Lower-ranked coins (smaller market cap) are more likely to lack USD data on CryptoCompare. These coins often still have BTC trading pairs available via the `histoday` endpoint.
469469

470-
**Halvix now processes both categories:**
471-
- Filters both (removes stablecoins, wrapped, etc.)
470+
**Halvix processes both categories with a total cap:**
471+
- USD coins have priority (they have actual market cap data for ranking)
472+
- No-USD coins fill remaining slots up to the requested limit
473+
- Total accepted coins will never exceed `coins_requested`
474+
- Both are filtered (removes stablecoins, wrapped, etc.)
472475
- Marks each coin with `has_usd_data: true/false` in `coins_to_download.json`
473-
- Downloads BTC pairs for all altcoins (no change in behavior)
474-
- Shows "BTC-only" source in the data status page for coins without USD data
476+
- No-USD coins are sorted by their API position (CryptoCompare's internal ranking)
475477

476478
The `fetch_metadata.json` file records the full breakdown:
477479
```json
@@ -480,12 +482,19 @@ The `fetch_metadata.json` file records the full breakdown:
480482
"coins_fetched": 886,
481483
"coins_no_usd_data": 490,
482484
"coins_no_usd_filtered": 7,
483-
"coins_no_usd_accepted": 483,
485+
"coins_no_usd_accepted": 283,
486+
"coins_no_usd_capped": 200,
484487
"coins_filtered": 31,
485-
"coins_accepted": 1338
488+
"coins_accepted": 1138
486489
}
487490
```
488491

492+
In this example:
493+
- 886 coins had USD data, 31 were filtered → 855 USD coins accepted
494+
- 490 coins had no USD data, 7 were filtered → 483 passed filtering
495+
- But only 283 no-USD coins were included (200 capped to meet the 1200 limit)
496+
- Total: 855 + 283 = 1138 ≤ 1200
497+
489498
---
490499

491500
*Last updated: 2025-12-24*

src/api/cryptocompare.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ def __init__(
188188
headers["authorization"] = f"Apikey {api_key}"
189189
self.session.headers.update(headers)
190190

191+
# Rate limit logging: track calls for periodic status logging
192+
self._calls_since_last_log = 0
193+
self._calls_log_interval = 50 # Log rate limit status every N calls
194+
self._last_status_log_time: float | None = None
195+
self._status_log_interval = 60.0 # Or every N seconds
196+
191197
def get_rate_limit_status(self, use_cache: bool = True) -> RateLimitStatus:
192198
"""
193199
Get current rate limit status from the API.
@@ -266,6 +272,53 @@ def extract_calls(period_data: dict) -> tuple[int, int]:
266272
logger.warning("Error checking rate limit status: %s", e)
267273
return RateLimitStatus()
268274

275+
def _log_rate_limit_status_if_needed(self, status: RateLimitStatus) -> None:
276+
"""Log rate limit status periodically (every N calls or N seconds)."""
277+
self._calls_since_last_log += 1
278+
279+
should_log = False
280+
reason = ""
281+
282+
# Log every N calls
283+
if self._calls_since_last_log >= self._calls_log_interval:
284+
should_log = True
285+
reason = f"every {self._calls_log_interval} calls"
286+
287+
# Or log every N seconds
288+
elif self._last_status_log_time is not None:
289+
elapsed = time.time() - self._last_status_log_time
290+
if elapsed >= self._status_log_interval:
291+
should_log = True
292+
reason = f"every {self._status_log_interval:.0f}s"
293+
294+
# Or log on first call (no previous log time)
295+
elif self._last_status_log_time is None:
296+
should_log = True
297+
reason = "initial status"
298+
299+
if should_log:
300+
self._calls_since_last_log = 0
301+
self._last_status_log_time = time.time()
302+
303+
# Calculate totals for clearer logging
304+
total_second = status.calls_made_second + status.calls_left_second
305+
total_minute = status.calls_made_minute + status.calls_left_minute
306+
total_hour = status.calls_made_hour + status.calls_left_hour
307+
total_month = status.calls_made_month + status.calls_left_month
308+
309+
logger.info(
310+
"Rate limit status (%s): " "second %d/%d, minute %d/%d, hour %d/%d, month %d/%d",
311+
reason,
312+
status.calls_made_second,
313+
total_second,
314+
status.calls_made_minute,
315+
total_minute,
316+
status.calls_made_hour,
317+
total_hour,
318+
status.calls_made_month,
319+
total_month,
320+
)
321+
269322
def _wait_for_rate_limit(self) -> None:
270323
"""
271324
Wait if necessary to respect rate limits.
@@ -281,6 +334,10 @@ def _wait_for_rate_limit(self) -> None:
281334

282335
# Check rate limit status periodically
283336
status = self.get_rate_limit_status(use_cache=True)
337+
338+
# Log status periodically for visibility
339+
self._log_rate_limit_status_if_needed(status)
340+
284341
if status.is_near_limit:
285342
wait_time = status.recommended_wait_seconds
286343
if wait_time > 0:

src/data/fetcher.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,14 @@ class FetchResult:
5454

5555
success: bool
5656
message: str
57-
coins_requested: int = 0 # How many coins we asked the API for
57+
coins_requested: int = 0 # How many coins we asked the API for (also the cap)
5858
coins_fetched: int = 0 # How many coins had USD data and were returned
5959
coins_no_usd_data: int = 0 # How many coins were missing USD data from API
6060
coins_no_usd_filtered: int = 0 # How many no-USD coins were filtered (stablecoins, etc.)
61-
coins_no_usd_accepted: int = 0 # How many no-USD coins were accepted (BTC pairs only)
61+
coins_no_usd_accepted: int = 0 # How many no-USD coins were included (after cap)
62+
coins_no_usd_capped: int = 0 # How many no-USD coins were excluded due to cap
6263
coins_filtered: int = 0 # How many USD coins were filtered out (stablecoins, wrapped, etc.)
63-
coins_accepted: int = 0 # Total coins accepted for download (USD + no-USD)
64+
coins_accepted: int = 0 # Total coins accepted for download (USD + no-USD, capped at requested)
6465
errors: list[str] | None = None
6566

6667

@@ -213,8 +214,23 @@ def fetch_and_filter_coins(
213214
for coin in no_usd_accepted:
214215
coin["has_usd_data"] = False
215216

216-
# Combine both lists
217-
all_coins_to_download = coins_to_download + no_usd_accepted
217+
# --- Cap total coins at n ---
218+
# USD coins have priority (they have actual market cap data)
219+
# No-USD coins fill remaining slots up to the requested limit
220+
remaining_slots = max(0, n - len(coins_to_download))
221+
no_usd_included = no_usd_accepted[:remaining_slots]
222+
no_usd_capped = len(no_usd_accepted) - len(no_usd_included)
223+
224+
if no_usd_capped > 0:
225+
logger.info(
226+
"Capped no-USD coins: %d included, %d excluded to meet limit of %d",
227+
len(no_usd_included),
228+
no_usd_capped,
229+
n,
230+
)
231+
232+
# Combine both lists (respecting the cap)
233+
all_coins_to_download = coins_to_download + no_usd_included
218234

219235
# Export skipped coins for review (from USD coins only, main source)
220236
if export_skipped:
@@ -233,7 +249,8 @@ def fetch_and_filter_coins(
233249
coins_fetched=len(all_coins),
234250
coins_no_usd_data=len(coins_without_usd),
235251
coins_no_usd_filtered=no_usd_filtered,
236-
coins_no_usd_accepted=len(no_usd_accepted),
252+
coins_no_usd_accepted=len(no_usd_included), # Only those actually included
253+
coins_no_usd_capped=no_usd_capped, # How many were excluded by cap
237254
coins_filtered=usd_coins_filtered,
238255
coins_accepted=len(all_coins_to_download),
239256
)

src/main.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,9 +1751,13 @@ def cmd_list_coins(args: argparse.Namespace) -> int:
17511751
logger.info("")
17521752
logger.info(" Without USD data: %d (BTC pairs only)", result.coins_no_usd_data)
17531753
logger.info(" - Filtered: %d", result.coins_no_usd_filtered)
1754+
if result.coins_no_usd_capped > 0:
1755+
logger.info(" - Capped: %d (excluded to meet limit)", result.coins_no_usd_capped)
17541756
logger.info(" - Accepted: %d", result.coins_no_usd_accepted)
17551757
logger.info("")
1756-
logger.info(" Total accepted: %d coins", result.coins_accepted)
1758+
logger.info(
1759+
" Total accepted: %d coins (capped at %d)", result.coins_accepted, result.coins_requested
1760+
)
17571761

17581762
# Print filter breakdown for USD coins
17591763
summary = fetcher.get_filter_summary()
@@ -1777,6 +1781,7 @@ def cmd_list_coins(args: argparse.Namespace) -> int:
17771781
"coins_no_usd_data": result.coins_no_usd_data,
17781782
"coins_no_usd_filtered": result.coins_no_usd_filtered,
17791783
"coins_no_usd_accepted": result.coins_no_usd_accepted,
1784+
"coins_no_usd_capped": result.coins_no_usd_capped,
17801785
"coins_filtered": result.coins_filtered,
17811786
"coins_accepted": result.coins_accepted,
17821787
"timestamp": datetime.now().isoformat(),

0 commit comments

Comments
 (0)