Daily Scrape and Stats #97
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Daily Scrape and Stats | |
| on: | |
| schedule: | |
| # Triggers every day at midnight UTC | |
| - cron: '0 0 * * *' | |
| # Allows you to run it manually from the GitHub Actions tab | |
| workflow_dispatch: | |
| permissions: | |
| contents: write | |
| jobs: | |
| build: | |
| runs-on: ubuntu-latest | |
| steps: | |
| # Step 1: Check out the repository code | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| # Step 2: Set up Python | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.10' | |
| # Step 3: Install required libraries | |
| - name: Install dependencies | |
| run: pip install -r requirements.txt | |
| # Step 4: Run the scraper (generates CSV files in 'tags/') | |
| - name: Run scraper | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: python scrape_tags.py | |
| # Step 5: Generate the JSON stats using the new CSV data | |
| - name: Update Statistics | |
| run: python compare_last_two.py --json | |
| # Step 6: Commit and Push tag_stats.json (always needed for GitHub Pages) | |
| - name: Commit and Push | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Configure the remote with the token for authentication | |
| git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git | |
| # Always add tag_stats.json (needed for the live dashboard) | |
| git add tag_stats.json | |
| # Only commit tags/ and tags.csv if HuggingFace upload failed | |
| if [ -f .hf_upload_status ] && grep -q "failed" .hf_upload_status; then | |
| echo "HF upload failed - committing local data as backup" | |
| git add tags/ tags.csv | |
| fi | |
| # Commit only if there are actual changes | |
| git diff --quiet && git diff --staged --quiet || git commit -m "Daily update: scrape and stats" | |
| # Push changes | |
| git push |