1- # Fetch cryptocurrency data and store in orphan 'data' branch
1+ # Fetch cryptocurrency price data and store in orphan 'raw- data' branch
22# This workflow is triggered manually from the GitHub Actions UI
3- name : Fetch Data
3+ name : Fetch Raw Data
44
55on :
66 # Manual trigger only - this is a heavy workflow
@@ -15,54 +15,57 @@ jobs:
1515 fetch-data :
1616 runs-on : ubuntu-latest
1717 permissions :
18- contents : write # Required to push to data branch
18+ contents : write # Required to push to raw- data branch
1919
2020 steps :
2121 - name : Checkout main branch
2222 uses : actions/checkout@v4
2323 with :
2424 path : main
2525
26- - name : Checkout data branch
27- id : checkout-data
26+ - name : Checkout raw- data branch
27+ id : checkout-raw- data
2828 uses : actions/checkout@v4
2929 with :
30- ref : data
31- path : data-branch
30+ ref : raw- data
31+ path : raw- data-branch
3232 continue-on-error : true # Branch may not exist yet
3333
34- - name : Initialize data branch if missing
35- if : steps.checkout-data.outcome == 'failure'
34+ - name : Initialize raw- data branch if missing
35+ if : steps.checkout-raw- data.outcome == 'failure'
3636 run : |
37- mkdir -p data-branch
38- cd data-branch
37+ mkdir -p raw- data-branch
38+ cd raw- data-branch
3939 git init
40- git checkout --orphan data
41- echo "# Halvix Data Cache " > README.md
40+ git checkout --orphan raw- data
41+ echo "# Halvix Raw Data " > README.md
4242 echo "" >> README.md
43- echo "This branch contains cached price data (parquet files)." >> README.md
43+ echo "This branch contains raw price data (parquet files)." >> README.md
4444 echo "It is automatically updated by CI and has no history (orphan branch)." >> README.md
4545 echo "" >> README.md
4646 echo "**Do not manually edit files in this branch.**" >> README.md
4747 git add README.md
4848 git config user.name "github-actions[bot]"
4949 git config user.email "github-actions[bot]@users.noreply.github.com"
50- git commit -m "Initialize data branch"
51- echo "Initialized new orphan data branch"
50+ git commit -m "Initialize raw- data branch"
51+ echo "Initialized new orphan raw- data branch"
5252
5353 - name : Restore cached data to main workspace
5454 run : |
55- # Copy existing data from data branch to main workspace
56- if [ -d "data-branch/raw" ]; then
55+ # Copy existing data from raw- data branch to main workspace
56+ if [ -d "raw- data-branch/raw" ]; then
5757 mkdir -p main/data
58- cp -r data-branch/raw main/data/ 2>/dev/null || true
59- cp -r data-branch/processed main/data/ 2>/dev/null || true
60- cp -r data-branch/cache main/data/ 2>/dev/null || true
61- echo "Restored cached data from data branch"
58+ cp -r raw-data-branch/raw main/data/ 2>/dev/null || true
59+ cp -r raw-data-branch/cache main/data/ 2>/dev/null || true
60+ # Copy coins_to_download.json and download_skipped.csv
61+ mkdir -p main/data/processed
62+ cp raw-data-branch/processed/coins_to_download.json main/data/processed/ 2>/dev/null || true
63+ cp raw-data-branch/processed/download_skipped.csv main/data/processed/ 2>/dev/null || true
64+ echo "Restored cached data from raw-data branch"
6265 echo "Price files found:"
6366 ls -la main/data/raw/prices/ 2>/dev/null | head -20 || echo "No price files yet"
6467 else
65- echo "No existing data in data branch (first run)"
68+ echo "No existing data in raw- data branch (first run)"
6669 mkdir -p main/data/raw/prices
6770 mkdir -p main/data/processed
6871 mkdir -p main/data/cache
@@ -108,9 +111,9 @@ jobs:
108111 working-directory : main
109112 run : poetry run python -m main status
110113
111- - name : Update data branch with new data
114+ - name : Update raw- data branch with new data
112115 run : |
113- cd data-branch
116+ cd raw- data-branch
114117
115118 # Configure git
116119 git config user.name "github-actions[bot]"
@@ -119,24 +122,28 @@ jobs:
119122 # Remove old data files (but keep README and .git)
120123 find . -maxdepth 1 -type d ! -name '.' ! -name '.git' -exec rm -rf {} + 2>/dev/null || true
121124
122- # Copy new data from main workspace
125+ # Copy raw data from main workspace
123126 if [ -d "../main/data/raw" ]; then
124127 cp -r ../main/data/raw . 2>/dev/null || true
125128 echo "Copied raw data"
126129 fi
127- if [ -d "../main/data/processed" ]; then
128- cp -r ../main/data/processed . 2>/dev/null || true
129- echo "Copied processed data"
130- fi
131130 if [ -d "../main/data/cache" ]; then
132131 cp -r ../main/data/cache . 2>/dev/null || true
133132 echo "Copied cache data"
134133 fi
134+ # Copy only specific processed files (coins_to_download.json, download_skipped.csv)
135+ mkdir -p processed
136+ if [ -f "../main/data/processed/coins_to_download.json" ]; then
137+ cp ../main/data/processed/coins_to_download.json processed/ 2>/dev/null || true
138+ fi
139+ if [ -f "../main/data/processed/download_skipped.csv" ]; then
140+ cp ../main/data/processed/download_skipped.csv processed/ 2>/dev/null || true
141+ fi
135142
136143 # Update README with timestamp
137- echo "# Halvix Data Cache " > README.md
144+ echo "# Halvix Raw Data " > README.md
138145 echo "" >> README.md
139- echo "This branch contains cached price data (parquet files)." >> README.md
146+ echo "This branch contains raw price data (parquet files)." >> README.md
140147 echo "It is automatically updated by CI and has no history (orphan branch)." >> README.md
141148 echo "" >> README.md
142149 echo "**Last updated:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> README.md
@@ -150,8 +157,11 @@ jobs:
150157 count=$(ls raw/prices/*.parquet 2>/dev/null | wc -l)
151158 echo "- \`raw/prices/\`: $count coin price files" >> README.md
152159 fi
160+ if [ -d "cache" ]; then
161+ echo "- \`cache/\`: Coin list cache" >> README.md
162+ fi
153163 if [ -d "processed" ]; then
154- echo "- \`processed/\`: Processed data files (accepted coins, rejected coins, TOTAL2 index )" >> README.md
164+ echo "- \`processed/\`: Download metadata (coins_to_download.json, download_skipped.csv )" >> README.md
155165 fi
156166
157167 # Show what we're about to commit
@@ -174,20 +184,19 @@ jobs:
174184 echo ""
175185
176186 # Create a single commit (squash history by amending or resetting)
177- # Using --amend to keep only one commit in the orphan branch
178187 if git rev-parse HEAD >/dev/null 2>&1; then
179188 echo "Amending existing commit to squash history..."
180- git commit --amend -m "Update data cache - $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
189+ git commit --amend -m "Update raw data - $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
181190 else
182191 echo "Creating initial commit..."
183- git commit -m "Update data cache - $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
192+ git commit -m "Update raw data - $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
184193 fi
185194
186195 # Force push to keep orphan branch with single commit
187196 git remote set-url origin "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" 2>/dev/null || \
188197 git remote add origin "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" 2>/dev/null || true
189198
190- echo "Pushing to data branch..."
191- git push --force origin data
199+ echo "Pushing to raw- data branch..."
200+ git push --force origin raw- data
192201
193- echo "=== Data branch updated successfully ==="
202+ echo "=== Raw data branch updated successfully ==="
0 commit comments