Skip to content

Docs: Add 2 new failure modes to data-workflow.md #33

Docs: Add 2 new failure modes to data-workflow.md

Docs: Add 2 new failure modes to data-workflow.md #33

Workflow file for this run

name: The D-AI-LY - Automated Pipeline
on:
# Run daily at 8am Eastern (13:00 UTC)
schedule:
- cron: '0 13 * * *'
# Allow manual trigger
workflow_dispatch:
inputs:
table:
description: 'Specific table number (optional)'
required: false
type: string
dry_run:
description: 'Dry run (discovery only)'
required: false
type: boolean
default: false
env:
NODE_VERSION: '20'
R_VERSION: '4.3'
jobs:
discover-and-fetch:
runs-on: ubuntu-latest
outputs:
selected_table: ${{ steps.discover.outputs.table }}
has_update: ${{ steps.discover.outputs.has_update }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ env.R_VERSION }}
- name: Install R dependencies
run: |
install.packages(c("cansim", "dplyr", "tidyr", "jsonlite"), repos = "https://cloud.r-project.org")
shell: Rscript {0}
- name: Run discovery
id: discover
run: |
mkdir -p output
if [ -n "${{ github.event.inputs.table }}" ]; then
echo "Using specified table: ${{ github.event.inputs.table }}"
echo "table=${{ github.event.inputs.table }}" >> $GITHUB_OUTPUT
echo "has_update=true" >> $GITHUB_OUTPUT
else
echo "Running topic discovery..."
Rscript r-tools/discover_topics.R --configured --json --output=output
if [ -f output/discovery_results.json ]; then
TABLE=$(python3 -c "
import json

Check failure on line 61 in .github/workflows/daily.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/daily.yml

Invalid workflow file

You have an error in your yaml syntax on line 61
with open('output/discovery_results.json') as f:
data = json.load(f)
if data.get('recommendation'):
print(data['recommendation']['table_number'])
")
if [ -n "$TABLE" ]; then
echo "table=$TABLE" >> $GITHUB_OUTPUT
echo "has_update=true" >> $GITHUB_OUTPUT
echo "Recommended table: $TABLE"
else
echo "has_update=false" >> $GITHUB_OUTPUT
echo "No newsworthy updates found"
fi
else
echo "has_update=false" >> $GITHUB_OUTPUT
fi
fi
- name: Fetch table data
if: steps.discover.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true'
run: |
TABLE="${{ steps.discover.outputs.table }}"
echo "Fetching data for table: $TABLE"
Rscript r-tools/fetch_table.R "$TABLE" output
- name: Upload data artifact
if: steps.discover.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true'
uses: actions/upload-artifact@v4
with:
name: cansim-data
path: output/
retention-days: 7
create-issue:
needs: discover-and-fetch
if: needs.discover-and-fetch.outputs.has_update == 'true' && github.event.inputs.dry_run != 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download data artifact
uses: actions/download-artifact@v4
with:
name: cansim-data
path: output/
- name: Get table info
id: table_info
run: |
TABLE="${{ needs.discover-and-fetch.outputs.selected_table }}"
# Extract title from discovery results
if [ -f output/discovery_results.json ]; then
TITLE=$(python3 -c "
import json
with open('output/discovery_results.json') as f:
data = json.load(f)
if data.get('recommendation'):
print(data['recommendation'].get('title', 'Unknown'))
")
else
TITLE="Unknown"
fi
echo "table=$TABLE" >> $GITHUB_OUTPUT
echo "title=$TITLE" >> $GITHUB_OUTPUT
- name: Create GitHub Issue for manual generation
uses: actions/github-script@v7
with:
script: |
const table = '${{ steps.table_info.outputs.table }}';
const title = '${{ steps.table_info.outputs.title }}';
const date = new Date().toISOString().split('T')[0];
const issueTitle = `[D-AI-LY] Generate article for ${table} - ${date}`;
const issueBody = `## The D-AI-LY - Article Generation Required
**Date:** ${date}
**Table:** ${table}
**Title:** ${title}
### Data Ready
The discovery and data fetch steps completed successfully. The data artifact is attached to the workflow run.
### To Generate the Article
Run Claude Code locally with:
\`\`\`bash
# Navigate to the project
cd ~/Projects/the-daily
# Pull latest changes (includes data files)
git pull
# Run the generator
claude "/the-daily-generator ${table}"
# Or run the full pipeline
./automation/run_pipeline.sh --table=${table}
\`\`\`
### Why Manual?
This workflow creates an issue instead of generating automatically because:
- Article generation uses Claude Code via Max subscription (no API costs)
- Generation runs locally on your machine
### Workflow Run
[View workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
---
*This issue was automatically created by the D-AI-LY pipeline.*
`;
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: issueTitle,
body: issueBody,
labels: ['d-ai-ly', 'automation']
});
notify:
needs: [discover-and-fetch, create-issue]
if: always()
runs-on: ubuntu-latest
steps:
- name: Summary
run: |
echo "## D-AI-LY Pipeline Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **Date**: $(date '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
echo "- **Selected Table**: ${{ needs.discover-and-fetch.outputs.selected_table || 'None' }}" >> $GITHUB_STEP_SUMMARY
echo "- **Has Update**: ${{ needs.discover-and-fetch.outputs.has_update }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ "${{ needs.create-issue.result }}" == "success" ]; then
echo "✅ Data fetched and issue created for manual generation." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Check GitHub Issues for generation instructions." >> $GITHUB_STEP_SUMMARY
elif [ "${{ needs.discover-and-fetch.outputs.has_update }}" == "false" ]; then
echo "ℹ️ No newsworthy updates found today." >> $GITHUB_STEP_SUMMARY
else
echo "⚠️ Pipeline completed with status: ${{ needs.create-issue.result }}" >> $GITHUB_STEP_SUMMARY
fi