Skip to content

Daily Scrape and Stats #97

Daily Scrape and Stats

Daily Scrape and Stats #97

Workflow file for this run

name: Daily Scrape and Stats
on:
schedule:
# Triggers every day at midnight UTC
- cron: '0 0 * * *'
# Allows you to run it manually from the GitHub Actions tab
workflow_dispatch:
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-latest
steps:
# Step 1: Check out the repository code
- name: Checkout code
uses: actions/checkout@v4
# Step 2: Set up Python
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
# Step 3: Install required libraries
- name: Install dependencies
run: pip install -r requirements.txt
# Step 4: Run the scraper (generates CSV files in 'tags/')
- name: Run scraper
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: python scrape_tags.py
# Step 5: Generate the JSON stats using the new CSV data
- name: Update Statistics
run: python compare_last_two.py --json
# Step 6: Commit and Push tag_stats.json (always needed for GitHub Pages)
- name: Commit and Push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
# Configure the remote with the token for authentication
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git
# Always add tag_stats.json (needed for the live dashboard)
git add tag_stats.json
# Only commit tags/ and tags.csv if HuggingFace upload failed
if [ -f .hf_upload_status ] && grep -q "failed" .hf_upload_status; then
echo "HF upload failed - committing local data as backup"
git add tags/ tags.csv
fi
# Commit only if there are actual changes
git diff --quiet && git diff --staged --quiet || git commit -m "Daily update: scrape and stats"
# Push changes
git push