Skip to content

Scheduled Scrape

Scheduled Scrape #270

Workflow file for this run

name: Scheduled Scrape
on:
schedule:
# Run every 6 hours
- cron: '0 */6 * * *'
workflow_dispatch:
inputs:
routes:
description: 'Comma-separated routes (e.g., JFK-LAX,IST-JFK)'
required: false
default: ''
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
- name: Initialize database
run: |
flight-cli db init
- name: Run scheduled routes
run: |
# Popular routes to scrape
ROUTES=${ROUTES:-"JFK-LAX,JFK-IST,JFK-CDG,SFO-TYO,LAX-LHR"}
for route in $(echo "$ROUTES" | tr ',' ' '); do
origin=$(echo "$route" | cut -d'-' -f1)
dest=$(echo "$route" | cut -d'-' -f2)
date=$(date -v+7d +%Y-%m-%d 2>/dev/null || date -d "+7 days" +%Y-%m-%d)
echo "Scraping $origin -> $dest for $date"
flight-cli scrape "$origin" "$dest" -d "$date" || true
done
- name: Report results
run: |
echo "Scrape completed"
flight-cli db info
flight-cli db history --days 1 || echo "No history yet"