Skip to content

Commit 3eb4757

Browse files
committed
Significantly updated the data_status.html page with coins with-usd and with-btc info.
1 parent f2a63d1 commit 3eb4757

File tree

9 files changed

+294
-84
lines changed

9 files changed

+294
-84
lines changed

.github/workflows/daily-update.yml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,13 @@ jobs:
7878
mkdir -p main/data
7979
cp -r raw-data-branch/raw main/data/ 2>/dev/null || true
8080
cp -r raw-data-branch/cache main/data/ 2>/dev/null || true
81+
# Copy processed files (coins list, metadata, skipped/failed)
8182
mkdir -p main/data/processed
8283
cp raw-data-branch/processed/coins_to_download.json main/data/processed/ 2>/dev/null || true
8384
cp raw-data-branch/processed/download_skipped.csv main/data/processed/ 2>/dev/null || true
85+
cp raw-data-branch/processed/download_failed.csv main/data/processed/ 2>/dev/null || true
86+
cp raw-data-branch/processed/no_usd_data.csv main/data/processed/ 2>/dev/null || true
87+
cp raw-data-branch/processed/fetch_metadata.json main/data/processed/ 2>/dev/null || true
8488
echo "Restored cached data from raw-data branch"
8589
echo "Price files found:"
8690
ls -la main/data/raw/prices/ 2>/dev/null | head -20 || echo "No price files yet"
@@ -143,13 +147,13 @@ jobs:
143147
cp -r ../main/data/cache . 2>/dev/null || true
144148
echo "Copied cache data"
145149
fi
150+
# Copy processed files (coins list, metadata, skipped/failed, no-USD)
146151
mkdir -p processed
147-
if [ -f "../main/data/processed/coins_to_download.json" ]; then
148-
cp ../main/data/processed/coins_to_download.json processed/ 2>/dev/null || true
149-
fi
150-
if [ -f "../main/data/processed/download_skipped.csv" ]; then
151-
cp ../main/data/processed/download_skipped.csv processed/ 2>/dev/null || true
152-
fi
152+
for file in coins_to_download.json download_skipped.csv download_failed.csv no_usd_data.csv fetch_metadata.json; do
153+
if [ -f "../main/data/processed/$file" ]; then
154+
cp "../main/data/processed/$file" processed/ 2>/dev/null || true
155+
fi
156+
done
153157
154158
# Copy data_status.html (generated by list-coins/fetch-prices)
155159
mkdir -p site
@@ -178,7 +182,7 @@ jobs:
178182
echo "- \`cache/\`: Coin list cache" >> README.md
179183
fi
180184
if [ -d "processed" ]; then
181-
echo "- \`processed/\`: Download metadata (coins_to_download.json, download_skipped.csv)" >> README.md
185+
echo "- \`processed/\`: Download metadata (coins list, fetch metadata, skipped/failed)" >> README.md
182186
fi
183187
if [ -d "site" ]; then
184188
echo "- \`site/\`: Generated HTML pages (data_status.html)" >> README.md

.github/workflows/fetch-raw-data.yml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,13 @@ jobs:
5757
mkdir -p main/data
5858
cp -r raw-data-branch/raw main/data/ 2>/dev/null || true
5959
cp -r raw-data-branch/cache main/data/ 2>/dev/null || true
60-
# Copy coins_to_download.json and download_skipped.csv
60+
# Copy processed files (coins list, metadata, skipped/failed)
6161
mkdir -p main/data/processed
6262
cp raw-data-branch/processed/coins_to_download.json main/data/processed/ 2>/dev/null || true
6363
cp raw-data-branch/processed/download_skipped.csv main/data/processed/ 2>/dev/null || true
64+
cp raw-data-branch/processed/download_failed.csv main/data/processed/ 2>/dev/null || true
65+
cp raw-data-branch/processed/no_usd_data.csv main/data/processed/ 2>/dev/null || true
66+
cp raw-data-branch/processed/fetch_metadata.json main/data/processed/ 2>/dev/null || true
6467
echo "Restored cached data from raw-data branch"
6568
echo "Price files found:"
6669
ls -la main/data/raw/prices/ 2>/dev/null | head -20 || echo "No price files yet"
@@ -131,14 +134,13 @@ jobs:
131134
cp -r ../main/data/cache . 2>/dev/null || true
132135
echo "Copied cache data"
133136
fi
134-
# Copy only specific processed files (coins_to_download.json, download_skipped.csv)
137+
# Copy processed files (coins list, metadata, skipped/failed, no-USD)
135138
mkdir -p processed
136-
if [ -f "../main/data/processed/coins_to_download.json" ]; then
137-
cp ../main/data/processed/coins_to_download.json processed/ 2>/dev/null || true
138-
fi
139-
if [ -f "../main/data/processed/download_skipped.csv" ]; then
140-
cp ../main/data/processed/download_skipped.csv processed/ 2>/dev/null || true
141-
fi
139+
for file in coins_to_download.json download_skipped.csv download_failed.csv no_usd_data.csv fetch_metadata.json; do
140+
if [ -f "../main/data/processed/$file" ]; then
141+
cp "../main/data/processed/$file" processed/ 2>/dev/null || true
142+
fi
143+
done
142144
143145
# Copy data_status.html (generated by list-coins/fetch-prices)
144146
mkdir -p site
@@ -168,7 +170,7 @@ jobs:
168170
echo "- \`cache/\`: Coin list cache" >> README.md
169171
fi
170172
if [ -d "processed" ]; then
171-
echo "- \`processed/\`: Download metadata (coins_to_download.json, download_skipped.csv)" >> README.md
173+
echo "- \`processed/\`: Download metadata (coins list, fetch metadata, skipped/failed)" >> README.md
172174
fi
173175
if [ -d "site" ]; then
174176
echo "- \`site/\`: Generated HTML pages (data_status.html)" >> README.md

docs/DATA_SOURCES.md

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,24 +111,31 @@ The `CryptoCompareClient` (`src/api/cryptocompare.py`) implements:
111111
│ ┌──────────────────────────────────────────────────────────┐ │
112112
│ │ GET /data/top/mktcapfull?limit=100&page=0..11 │ │
113113
│ │ Requests: TOP_N_BY_MARKETCAP_TO_FETCH = 1200 coins │ │
114-
│ │ Returns: symbol, name, market_cap, price, volume │ │
114+
│ │ │ │
115+
│ │ Two sources of coins: │ │
116+
│ │ • WITH USD data: ~886 coins (have market cap, price) │ │
117+
│ │ • WITHOUT USD data: ~490 coins (no market cap from API) │ │
118+
│ │ │ │
115119
│ │ Output: data/processed/coins_to_download.json │ │
120+
│ │ data/processed/no_usd_data.csv │ │
116121
│ │ data/processed/fetch_metadata.json │ │
117122
│ └──────────────────────────────────────────────────────────┘ │
118123
│ │ │
119124
│ ▼ │
120-
│ Step 2: Filter Coins (Local)
125+
│ Step 2: Filter Coins (Local) - applies to BOTH sources
121126
│ ┌──────────────────────────────────────────────────────────┐ │
122127
│ │ Remove: wrapped, staked, bridged, stablecoins │ │
123128
│ │ Keep: BTC (for BTC/USD chart) │ │
129+
│ │ Mark coins: has_usd_data=true/false │ │
124130
│ │ Output: data/processed/download_skipped.csv │ │
125131
│ └──────────────────────────────────────────────────────────┘ │
126132
│ │ │
127133
│ ▼ │
128134
│ Step 3: Fetch Historical Prices (CryptoCompare) │
129135
│ ┌──────────────────────────────────────────────────────────┐ │
130-
│ │ Altcoins: GET /data/v2/histoday?fsym=ETH&tsym=BTC │ │
131-
│ │ BTC: GET /data/v2/histoday?fsym=BTC&tsym=USD │ │
136+
│ │ Altcoins (USD coins): GET ...?fsym=ETH&tsym=BTC │ │
137+
│ │ Altcoins (no-USD): GET ...?fsym=RYO&tsym=BTC │ │
138+
│ │ BTC: GET ...?fsym=BTC&tsym=USD │ │
132139
│ │ Pagination: Multiple requests for 4000+ days │ │
133140
│ │ Output: data/raw/prices/{coin}-{quote}.parquet │ │
134141
│ │ data/processed/download_failed.csv (if any) │ │
@@ -155,19 +162,23 @@ The pipeline generates a `data_status.html` page (at `site/data_status.html`) th
155162

156163
| Card | Description |
157164
|------|-------------|
158-
| **Coins Requested** | Number requested from API (`TOP_N_BY_MARKETCAP_TO_FETCH = 1200`), with sublabel showing actual returned count |
159-
| **Coins with Price Data** | Coins that have downloaded price data in cache |
165+
| **Coins Requested** | Number requested from API (1200), with sublabel showing breakdown: "886 USD + 490 no-USD" |
166+
| **Coins Accepted** | Total coins accepted for download, with sublabel showing how many from no-USD source |
167+
| **Coins Downloaded** | Coins that have downloaded price data in cache |
160168
| **Skipped / Failed** | Filtered coins (stablecoins, wrapped) + failed downloads (no BTC pair) |
161-
| **Total Pairs Downloaded** | Sum of all quote pairs (BTC + USD) across all coins |
169+
| **Total Pairs** | Sum of all quote pairs (BTC + USD) across all coins |
162170

163171
### Downloaded Coins Table
164172

165173
Lists all coins with price data, including:
166174
- Symbol and name (linked to CryptoCompare)
175+
- **Source** column: "USD" for coins with market cap data, "BTC-only" for coins discovered without USD data
167176
- **Quote(s)** column: Shows available pairs (BTC, USD, or both)
168-
- Market cap
177+
- Market cap (shows "N/A" for BTC-only coins)
169178
- Date range and days of data
170179

180+
Coins are sorted: USD coins first (by market cap descending), then BTC-only coins.
181+
171182
### Skipped / Failed Table
172183

173184
Lists all excluded coins with reasons:
@@ -183,10 +194,11 @@ Lists all excluded coins with reasons:
183194

184195
| File | Description |
185196
|------|-------------|
186-
| `coins_to_download.json` | Coins accepted for price fetching |
187-
| `download_skipped.csv` | Coins filtered out (stablecoins, wrapped, etc.) |
197+
| `coins_to_download.json` | Coins accepted for price fetching. Each coin has `has_usd_data` field (true/false) |
198+
| `download_skipped.csv` | Coins filtered out (stablecoins, wrapped, etc.) from USD coins |
188199
| `download_failed.csv` | Coins that failed to download (no BTC pair on CryptoCompare) |
189-
| `fetch_metadata.json` | Metadata: coins_requested, coins_returned, timestamp |
200+
| `no_usd_data.csv` | Coins returned by API without USD price data (before filtering) |
201+
| `fetch_metadata.json` | Metadata: coins_requested, coins_fetched, coins_no_usd_data, coins_accepted, etc. |
190202
| `total2_index.parquet` | Calculated TOTAL2 index |
191203

192204
### Price Data Files
@@ -351,6 +363,7 @@ USE_YESTERDAY_AS_END_DATE = True
351363
COINS_TO_DOWNLOAD_JSON = PROCESSED_DIR / "coins_to_download.json"
352364
DOWNLOAD_SKIPPED_CSV = PROCESSED_DIR / "download_skipped.csv"
353365
DOWNLOAD_FAILED_CSV = PROCESSED_DIR / "download_failed.csv"
366+
NO_USD_DATA_CSV = PROCESSED_DIR / "no_usd_data.csv"
354367
FETCH_METADATA_JSON = PROCESSED_DIR / "fetch_metadata.json"
355368
```
356369

@@ -379,9 +392,31 @@ This error occurs when a coin doesn't have a direct trading pair on CryptoCompar
379392

380393
### Discrepancy between requested and returned coins
381394

382-
- `TOP_N_BY_MARKETCAP_TO_FETCH` (default: 1200) is the number requested
383-
- The API may return fewer coins if some don't have USD price data
384-
- The `fetch_metadata.json` file records both values for transparency
395+
The CryptoCompare market cap API returns coins in two categories:
396+
397+
1. **Coins WITH USD data** (~886 of 1200): Have market cap, price, and volume data
398+
2. **Coins WITHOUT USD data** (~490 of 1200): Returned by API but missing USD price data
399+
400+
Lower-ranked coins (smaller market cap) are more likely to lack USD data on CryptoCompare. These coins often still have BTC trading pairs available via the `histoday` endpoint.
401+
402+
**Halvix now processes both categories:**
403+
- Filters both (removes stablecoins, wrapped, etc.)
404+
- Marks each coin with `has_usd_data: true/false` in `coins_to_download.json`
405+
- Downloads BTC pairs for all altcoins (no change in behavior)
406+
- Shows "BTC-only" source in the data status page for coins without USD data
407+
408+
The `fetch_metadata.json` file records the full breakdown:
409+
```json
410+
{
411+
"coins_requested": 1200,
412+
"coins_fetched": 886,
413+
"coins_no_usd_data": 490,
414+
"coins_no_usd_filtered": 7,
415+
"coins_no_usd_accepted": 483,
416+
"coins_filtered": 31,
417+
"coins_accepted": 1338
418+
}
419+
```
385420

386421
---
387422

docs/DEPLOYMENT.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ This approach ensures:
5151
│ ┌─────────────────────────────────────────────┐ │
5252
│ │ raw-data branch (orphan) │ │
5353
│ │ • raw/prices/*.parquet (price data) │ │
54-
│ │ • cache/top_coins_1000.json (coin list) │ │
55-
│ │ • processed/coins_to_download.json │ │
56-
│ │ • processed/download_skipped.csv │ │
54+
│ │ • cache/top_coins_*.json (coin list cache) │ │
55+
│ │ • processed/ (coins list, metadata, etc.) │ │
56+
│ │ • site/data_status.html │ │
5757
│ └─────────────────────────────────────────────┘ │
5858
│ │ │
5959
│ ▼ │
@@ -101,9 +101,13 @@ Contains raw price data fetched from CryptoCompare API:
101101
| Path | Description |
102102
|------|-------------|
103103
| `raw/prices/*.parquet` | Daily OHLCV price data for each coin |
104-
| `cache/top_coins_1000.json` | Cached top 1000 coins by market cap |
105-
| `processed/coins_to_download.json` | List of coins selected for download |
106-
| `processed/download_skipped.csv` | Coins skipped during download |
104+
| `cache/top_coins_*.json` | Cached coin list from market cap API |
105+
| `processed/coins_to_download.json` | List of coins selected for download (with `has_usd_data` flag) |
106+
| `processed/download_skipped.csv` | Coins filtered out (stablecoins, wrapped, etc.) |
107+
| `processed/download_failed.csv` | Coins that failed to download (no BTC pair) |
108+
| `processed/no_usd_data.csv` | Coins without USD data from API (before filtering) |
109+
| `processed/fetch_metadata.json` | Fetch statistics (counts, timestamp) |
110+
| `site/data_status.html` | Generated data status page |
107111

108112
### `processed-data` branch (orphan)
109113
Contains calculated TOTAL2 index data:

src/api/cryptocompare.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,8 @@ def get_top_coins_by_market_cap(
335335
self,
336336
n: int = 300,
337337
vs_currency: str = "USD",
338-
) -> list[Coin]:
338+
track_no_data: bool = False,
339+
) -> list[Coin] | tuple[list[Coin], list[dict]]:
339340
"""
340341
Get top N coins by market capitalization.
341342
@@ -344,13 +345,17 @@ def get_top_coins_by_market_cap(
344345
Args:
345346
n: Number of top coins to fetch (default: 300)
346347
vs_currency: Quote currency for prices (default: "USD")
348+
track_no_data: If True, also return coins without price data
347349
348350
Returns:
349-
List of Coin objects sorted by market cap rank
351+
If track_no_data=False: List of Coin objects sorted by market cap rank
352+
If track_no_data=True: Tuple of (coins, coins_without_data)
350353
"""
351354
coins: list[Coin] = []
355+
coins_without_data: list[dict] = []
352356
page = 0
353357
per_page = 100 # CryptoCompare returns 100 per page max
358+
total_seen = 0
354359

355360
while len(coins) < n:
356361
data = self._request(
@@ -367,10 +372,20 @@ def get_top_coins_by_market_cap(
367372
break
368373

369374
for coin_data in coin_data_list:
375+
total_seen += 1
370376
coin_info = coin_data.get("CoinInfo", {})
371377
raw_data = coin_data.get("RAW", {}).get(vs_currency.upper(), {})
372378

373379
if not raw_data:
380+
# Track coins without price data
381+
if track_no_data:
382+
coins_without_data.append(
383+
{
384+
"symbol": coin_info.get("Name", ""),
385+
"name": coin_info.get("FullName", ""),
386+
"rank": total_seen,
387+
}
388+
)
374389
continue
375390

376391
coins.append(
@@ -394,6 +409,8 @@ def get_top_coins_by_market_cap(
394409
if len(coin_data_list) < per_page:
395410
break
396411

412+
if track_no_data:
413+
return coins[:n], coins_without_data
397414
return coins[:n]
398415

399416
def ping(self) -> bool:

src/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,12 @@ def get_regression_end_date() -> date:
447447
# coins_to_download.json - coins that will have price data fetched
448448
# download_skipped.csv - coins that are skipped with reason (stablecoins, wrapped tokens, etc.)
449449
# download_failed.csv - coins that failed to download (no BTC pair on CryptoCompare, etc.)
450+
# no_usd_data.csv - coins returned by API without USD price data (silently skipped)
450451
# fetch_metadata.json - metadata about the fetch operation (counts, timestamp)
451452
COINS_TO_DOWNLOAD_JSON = PROCESSED_DIR / "coins_to_download.json"
452453
DOWNLOAD_SKIPPED_CSV = PROCESSED_DIR / "download_skipped.csv"
453454
DOWNLOAD_FAILED_CSV = PROCESSED_DIR / "download_failed.csv"
455+
NO_USD_DATA_CSV = PROCESSED_DIR / "no_usd_data.csv"
454456
FETCH_METADATA_JSON = PROCESSED_DIR / "fetch_metadata.json"
455457

456458
# Analysis results

0 commit comments

Comments
 (0)