Add: Provenance-based fabrication prevention checklist #34
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: The D-AI-LY - Automated Pipeline | ||
| on: | ||
| # Run daily at 8am Eastern (13:00 UTC) | ||
| schedule: | ||
| - cron: '0 13 * * *' | ||
| # Allow manual trigger | ||
| workflow_dispatch: | ||
| inputs: | ||
| table: | ||
| description: 'Specific table number (optional)' | ||
| required: false | ||
| type: string | ||
| dry_run: | ||
| description: 'Dry run (discovery only)' | ||
| required: false | ||
| type: boolean | ||
| default: false | ||
| env: | ||
| NODE_VERSION: '20' | ||
| R_VERSION: '4.3' | ||
| jobs: | ||
| discover-and-fetch: | ||
| runs-on: ubuntu-latest | ||
| outputs: | ||
| selected_table: ${{ steps.discover.outputs.table }} | ||
| has_update: ${{ steps.discover.outputs.has_update }} | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
| - name: Setup R | ||
| uses: r-lib/actions/setup-r@v2 | ||
| with: | ||
| r-version: ${{ env.R_VERSION }} | ||
| - name: Install R dependencies | ||
| run: | | ||
| install.packages(c("cansim", "dplyr", "tidyr", "jsonlite"), repos = "https://cloud.r-project.org") | ||
| shell: Rscript {0} | ||
| - name: Run discovery | ||
| id: discover | ||
| run: | | ||
| mkdir -p output | ||
| if [ -n "${{ github.event.inputs.table }}" ]; then | ||
| echo "Using specified table: ${{ github.event.inputs.table }}" | ||
| echo "table=${{ github.event.inputs.table }}" >> $GITHUB_OUTPUT | ||
| echo "has_update=true" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "Running topic discovery..." | ||
| Rscript r-tools/discover_topics.R --configured --json --output=output | ||
| if [ -f output/discovery_results.json ]; then | ||
| TABLE=$(python3 -c " | ||
| import json | ||
| with open('output/discovery_results.json') as f: | ||
| data = json.load(f) | ||
| if data.get('recommendation'): | ||
| print(data['recommendation']['table_number']) | ||
| ") | ||
| if [ -n "$TABLE" ]; then | ||
| echo "table=$TABLE" >> $GITHUB_OUTPUT | ||
| echo "has_update=true" >> $GITHUB_OUTPUT | ||
| echo "Recommended table: $TABLE" | ||
| else | ||
| echo "has_update=false" >> $GITHUB_OUTPUT | ||
| echo "No newsworthy updates found" | ||
| fi | ||
| else | ||
| echo "has_update=false" >> $GITHUB_OUTPUT | ||
| fi | ||
| fi | ||
| - name: Fetch table data | ||
| if: steps.discover.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true' | ||
| run: | | ||
| TABLE="${{ steps.discover.outputs.table }}" | ||
| echo "Fetching data for table: $TABLE" | ||
| Rscript r-tools/fetch_table.R "$TABLE" output | ||
| - name: Upload data artifact | ||
| if: steps.discover.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true' | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: cansim-data | ||
| path: output/ | ||
| retention-days: 7 | ||
| create-issue: | ||
| needs: discover-and-fetch | ||
| if: needs.discover-and-fetch.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true' | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
| - name: Download data artifact | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| name: cansim-data | ||
| path: output/ | ||
| - name: Get table info | ||
| id: table_info | ||
| run: | | ||
| TABLE="${{ needs.discover-and-fetch.outputs.selected_table }}" | ||
| # Extract title from discovery results | ||
| if [ -f output/discovery_results.json ]; then | ||
| TITLE=$(python3 -c " | ||
| import json | ||
| with open('output/discovery_results.json') as f: | ||
| data = json.load(f) | ||
| if data.get('recommendation'): | ||
| print(data['recommendation'].get('title', 'Unknown')) | ||
| ") | ||
| else | ||
| TITLE="Unknown" | ||
| fi | ||
| echo "table=$TABLE" >> $GITHUB_OUTPUT | ||
| echo "title=$TITLE" >> $GITHUB_OUTPUT | ||
| - name: Create GitHub Issue for manual generation | ||
| uses: actions/github-script@v7 | ||
| with: | ||
| script: | | ||
| const table = '${{ steps.table_info.outputs.table }}'; | ||
| const title = '${{ steps.table_info.outputs.title }}'; | ||
| const date = new Date().toISOString().split('T')[0]; | ||
| const issueTitle = `[D-AI-LY] Generate article for ${table} - ${date}`; | ||
| const issueBody = `## The D-AI-LY - Article Generation Required | ||
| **Date:** ${date} | ||
| **Table:** ${table} | ||
| **Title:** ${title} | ||
| ### Data Ready | ||
| The discovery and data fetch steps completed successfully. The data artifact is attached to the workflow run. | ||
| ### To Generate the Article | ||
| Run Claude Code locally with: | ||
| \`\`\`bash | ||
| # Navigate to the project | ||
| cd ~/Projects/the-daily | ||
| # Pull latest changes (includes data files) | ||
| git pull | ||
| # Run the generator | ||
| claude "/the-daily-generator ${table}" | ||
| # Or run the full pipeline | ||
| ./automation/run_pipeline.sh --table=${table} | ||
| \`\`\` | ||
| ### Why Manual? | ||
| This workflow creates an issue instead of generating automatically because: | ||
| - Article generation uses Claude Code via Max subscription (no API costs) | ||
| - Generation runs locally on your machine | ||
| ### Workflow Run | ||
| [View workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) | ||
| --- | ||
| *This issue was automatically created by the D-AI-LY pipeline.* | ||
| `; | ||
| await github.rest.issues.create({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| title: issueTitle, | ||
| body: issueBody, | ||
| labels: ['d-ai-ly', 'automation'] | ||
| }); | ||
| notify: | ||
| needs: [discover-and-fetch, create-issue] | ||
| if: always() | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - name: Summary | ||
| run: | | ||
| echo "## D-AI-LY Pipeline Summary" >> $GITHUB_STEP_SUMMARY | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| echo "- **Date**: $(date '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY | ||
| echo "- **Selected Table**: ${{ needs.discover-and-fetch.outputs.selected_table || 'None' }}" >> $GITHUB_STEP_SUMMARY | ||
| echo "- **Has Update**: ${{ needs.discover-and-fetch.outputs.has_update }}" >> $GITHUB_STEP_SUMMARY | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| if [ "${{ needs.create-issue.result }}" == "success" ]; then | ||
| echo "✅ Data fetched and issue created for manual generation." >> $GITHUB_STEP_SUMMARY | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| echo "Check GitHub Issues for generation instructions." >> $GITHUB_STEP_SUMMARY | ||
| elif [ "${{ needs.discover-and-fetch.outputs.has_update }}" == "false" ]; then | ||
| echo "ℹ️ No newsworthy updates found today." >> $GITHUB_STEP_SUMMARY | ||
| else | ||
| echo "⚠️ Pipeline completed with status: ${{ needs.create-issue.result }}" >> $GITHUB_STEP_SUMMARY | ||
| fi | ||