Skip to content

Scrape Tournaments #112

Scrape Tournaments

Scrape Tournaments #112

name: Scrape Tournaments
permissions:
contents: write
on:
schedule:
# Run at 08:00 UTC every day
- cron: "0 8 * * *"
workflow_dispatch:
inputs:
platform:
description: "Platform to scrape"
required: true
default: "longshanks+rollbetter"
type: choice
options:
- longshanks+rollbetter
- longshanks
- rollbetter
- listfortress
- all
time_range:
description: |
Time range to scrape.
Single day: today | yesterday | YYYY-MM-DD
Range keyword: last 3 days | last week | last month | last 3 months | last 6 months | last year | all time
Explicit range: YYYY-MM-DD:YYYY-MM-DD | YYYY-MM-DD:today | YYYY-MM-DD:yesterday | yesterday:today
Legacy: positive integer (days back from today, treated as last N days)
required: true
default: "yesterday"
type: string
environment:
description: "Target database environment (use none for SQLite artifact only)"
required: true
default: "prod"
type: choice
options:
- prod
- dev
- none
upload_sqlite_artifact:
description: "Upload scraped SQLite database as an artifact"
required: true
default: true
type: boolean
overwrite:
description: "Overwrite existing tournaments in the database"
required: false
type: boolean
default: false
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Install Playwright browsers
run: |
playwright install chromium --with-deps
- name: Run Scraper
env:
PYTHONPATH: .
run: |
# Use inputs if available, otherwise default to longshanks+rollbetter/yesterday
PLATFORM="${{ inputs.platform }}"
TIME_RANGE="${{ inputs.time_range }}"
DB_ENV="${{ inputs.environment }}"
if [ -z "$PLATFORM" ]; then PLATFORM="longshanks+rollbetter"; fi
if [ -z "$TIME_RANGE" ]; then TIME_RANGE="yesterday"; fi
if [ -z "$DB_ENV" ]; then DB_ENV="prod"; fi
if [ "$DB_ENV" = "prod" ]; then
export DATABASE_URL="${{ secrets.PROD_DATABASE_URL }}"
elif [ "$DB_ENV" = "dev" ]; then
export DATABASE_URL="${{ secrets.DEV_DATABASE_URL }}"
fi
OVERWRITE_FLAG=""
if [ "${{ inputs.overwrite }}" = "true" ]; then
OVERWRITE_FLAG="--overwrite"
fi
if [ "$DB_ENV" = "none" ]; then
python -m backend.scripts.scrape_tournaments_sqlite \
--sqlite-path scraped_tournaments.db \
--platform "$PLATFORM" \
--time-range "$TIME_RANGE" $OVERWRITE_FLAG
else
python -m backend.scripts.scrape_tournaments \
--platform "$PLATFORM" \
--time-range "$TIME_RANGE" \
--sqlite-output scraped_tournaments.db $OVERWRITE_FLAG
fi
- name: Upload Scraped Tournaments Artifact
if: always() && hashFiles('scraped_tournaments.db') != '' && github.event.inputs.upload_sqlite_artifact != 'false'
uses: actions/upload-artifact@v4
with:
name: scraped-tournaments-${{ github.run_id }}
path: scraped_tournaments.db
retention-days: 30
- name: Commit and Push Changes
if: inputs.environment != 'none'
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add backend/data/geocoding_cache.json
if [ -f main.db ]; then git add main.db; fi
git diff --cached --quiet || git commit -m "Update tournament data and geocoding cache" || exit 0
git push