PR Scraper Cron #2157
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Scraper Cron | |
| on: | |
| schedule: | |
| # Run every 12 minutes during business hours (Mon-Fri, 6am-6pm PT) | |
| # PT is UTC-8 (winter) / UTC-7 (summer), so 6am-6pm PT = 13:00-02:00 UTC (winter) | |
| # Using 13:00-01:59 UTC to cover 6am-6pm PT year-round | |
| # 12 min is max safe frequency: ~800 API calls/run × 5 runs/hour = 4,000 (limit: 5,000/hour) | |
| - cron: '*/12 13-23,0-1 * * 1-5' | |
| workflow_dispatch: # Allow manual trigger | |
| jobs: | |
| scrape: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| steps: | |
| # Step 0: Wake up the Render server (free tier sleeps after inactivity) | |
| - name: Wake up server | |
| run: | | |
| echo "Pinging server to wake it up..." | |
| for i in 1 2 3 4 5; do | |
| response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 "${{ secrets.API_URL }}/api/v1/reviews/version" 2>/dev/null) || true | |
| if [ "$response" = "200" ]; then | |
| echo "Server is up (attempt $i)" | |
| break | |
| fi | |
| echo "Attempt $i: server not ready (status: ${response:-timeout}), waiting 15s..." | |
| sleep 15 | |
| done | |
| # Final check | |
| response=$(curl -s --max-time 60 "${{ secrets.API_URL }}/api/v1/reviews/version") || true | |
| if [ -z "$response" ]; then | |
| echo "Warning: Server may not be fully awake yet, proceeding anyway" | |
| else | |
| echo "Server responded: $response" | |
| fi | |
| env: | |
| API_URL: ${{ secrets.API_URL }} | |
| # Step 1: Fetch basic PR info for all repos (low memory) | |
| - name: Step 1/4 - Fetch PR info (lite mode) | |
| run: | | |
| REPOS="vets-api vets-api-mockdata platform-atlas" | |
| OWNER="department-of-veterans-affairs" | |
| step_failed=false | |
| for repo in $REPOS; do | |
| echo "Step 1/4: Fetching basic PR info for $repo at $(date -u '+%Y-%m-%d %H:%M:%S UTC')..." | |
| response=$(curl -s -w "\n%{http_code}" -X POST "${{ secrets.API_URL }}/api/v1/admin/manual_scraper_run?token=${{ secrets.ADMIN_TOKEN }}&repository_name=${repo}&repository_owner=${OWNER}&lite_mode=true" \ | |
| -H "Content-Type: application/json" \ | |
| --max-time 300) || true | |
| http_code=$(echo "$response" | tail -n1) | |
| body=$(echo "$response" | sed '$d') | |
| echo "Response ($repo): $body" | |
| echo "HTTP Status ($repo): $http_code" | |
| if [ -z "$response" ]; then | |
| echo "Error: Step 1 timed out for $repo" | |
| if [ "$repo" = "vets-api" ]; then | |
| step_failed=true | |
| fi | |
| elif [ "$http_code" -ge 400 ]; then | |
| echo "Error: Step 1 failed for $repo with status $http_code" | |
| if [ "$repo" = "vets-api" ]; then | |
| step_failed=true | |
| fi | |
| else | |
| echo "Step 1/4 completed for $repo" | |
| fi | |
| done | |
| if [ "$step_failed" = true ]; then | |
| echo "Error: Step 1 failed for primary repo (vets-api)" | |
| exit 1 | |
| fi | |
| echo "Step 1/4 completed for all repos" | |
| env: | |
| API_URL: ${{ secrets.API_URL }} | |
| ADMIN_TOKEN: ${{ secrets.ADMIN_TOKEN }} | |
| # Step 2: Fetch reviews for all repos (medium memory) | |
| - name: Step 2/4 - Fetch reviews | |
| run: | | |
| REPOS="vets-api vets-api-mockdata platform-atlas" | |
| OWNER="department-of-veterans-affairs" | |
| for repo in $REPOS; do | |
| echo "Step 2/4: Fetching reviews for $repo at $(date -u '+%Y-%m-%d %H:%M:%S UTC')..." | |
| response=$(curl -s -w "\n%{http_code}" -X POST "${{ secrets.API_URL }}/api/v1/admin/fetch_reviews?token=${{ secrets.ADMIN_TOKEN }}&repository_name=${repo}&repository_owner=${OWNER}" \ | |
| -H "Content-Type: application/json" \ | |
| --max-time 540) || true | |
| http_code=$(echo "$response" | tail -n1) | |
| body=$(echo "$response" | sed '$d') | |
| echo "Response ($repo): $body" | |
| echo "HTTP Status ($repo): $http_code" | |
| if [ -z "$response" ]; then | |
| echo "Warning: Step 2 timed out or failed for $repo (continuing anyway)" | |
| elif [ "$http_code" -ge 400 ]; then | |
| echo "Warning: Step 2 failed for $repo with status $http_code (continuing anyway)" | |
| else | |
| echo "Step 2/4 completed for $repo" | |
| fi | |
| done | |
| echo "Step 2/4 completed for all repos" | |
| env: | |
| API_URL: ${{ secrets.API_URL }} | |
| ADMIN_TOKEN: ${{ secrets.ADMIN_TOKEN }} | |
| # Step 3: Fetch CI checks for all repos (medium memory) | |
| - name: Step 3/4 - Fetch CI checks | |
| run: | | |
| REPOS="vets-api vets-api-mockdata platform-atlas" | |
| OWNER="department-of-veterans-affairs" | |
| for repo in $REPOS; do | |
| echo "Step 3/4: Fetching CI checks for $repo at $(date -u '+%Y-%m-%d %H:%M:%S UTC')..." | |
| response=$(curl -s -w "\n%{http_code}" -X POST "${{ secrets.API_URL }}/api/v1/admin/fetch_ci_checks?token=${{ secrets.ADMIN_TOKEN }}&repository_name=${repo}&repository_owner=${OWNER}" \ | |
| -H "Content-Type: application/json" \ | |
| --max-time 480) || true | |
| http_code=$(echo "$response" | tail -n1) | |
| body=$(echo "$response" | sed '$d') | |
| echo "Response ($repo): $body" | |
| echo "HTTP Status ($repo): $http_code" | |
| if [ -z "$response" ]; then | |
| echo "Warning: Step 3 timed out or failed for $repo (continuing anyway)" | |
| elif [ "$http_code" -ge 400 ]; then | |
| echo "Warning: Step 3 failed for $repo with status $http_code (continuing anyway)" | |
| else | |
| echo "Step 3/4 completed for $repo" | |
| fi | |
| done | |
| echo "Step 3/4 completed for all repos" | |
| env: | |
| API_URL: ${{ secrets.API_URL }} | |
| ADMIN_TOKEN: ${{ secrets.ADMIN_TOKEN }} | |
| # Step 4: Cleanup merged/closed PRs | |
| - name: Step 4/4 - Cleanup merged/closed PRs | |
| run: | | |
| echo "Step 4/4: Cleaning up merged/closed PRs at $(date -u '+%Y-%m-%d %H:%M:%S UTC')..." | |
| response=$(curl -s -w "\n%{http_code}" -X POST "${{ secrets.API_URL }}/api/v1/admin/cleanup_merged_prs?token=${{ secrets.ADMIN_TOKEN }}" \ | |
| -H "Content-Type: application/json" \ | |
| --max-time 300) || true | |
| http_code=$(echo "$response" | tail -n1) | |
| body=$(echo "$response" | sed '$d') | |
| echo "Response: $body" | |
| echo "HTTP Status: $http_code" | |
| if [ -z "$response" ]; then | |
| echo "Warning: Step 4 timed out or failed (continuing anyway)" | |
| elif [ "$http_code" -ge 400 ]; then | |
| echo "Warning: Step 4 failed with status $http_code (continuing anyway)" | |
| else | |
| echo "Step 4/4 completed successfully" | |
| fi | |
| env: | |
| API_URL: ${{ secrets.API_URL }} | |
| ADMIN_TOKEN: ${{ secrets.ADMIN_TOKEN }} |