-
Notifications
You must be signed in to change notification settings - Fork 5.5k
245 lines (211 loc) · 9.24 KB
/
Copy pathnotebook-tests.yml
File metadata and controls
245 lines (211 loc) · 9.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
name: Notebook Tests
on:
pull_request:
paths:
- '**/*.ipynb'
- 'tests/notebook_tests/**'
- 'pyproject.toml'
- 'uv.lock'
push:
branches: [main]
paths:
- '**/*.ipynb'
- 'tests/notebook_tests/**'
permissions:
contents: read
pull-requests: write
id-token: write # Anthropic Workload Identity Federation
jobs:
test-notebooks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Need full history for diff
- name: Install uv
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0 (sha-pinned)
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
- name: Set up Python 3.11
run: uv python install 3.11
- name: Install dependencies
run: uv sync --frozen --all-extras
- name: Get changed notebooks
id: changed-notebooks
env:
EVENT_NAME: ${{ github.event_name }}
BASE_REF: ${{ github.base_ref }}
run: |
if [ "$EVENT_NAME" = "pull_request" ]; then
# For PRs, get notebooks changed compared to base branch
git fetch origin "$BASE_REF"
CHANGED_NOTEBOOKS=$(git diff --name-only "origin/$BASE_REF"...HEAD | grep '\.ipynb$' || echo "")
else
# For push to main, get notebooks changed in the push
CHANGED_NOTEBOOKS=$(git diff --name-only HEAD~1 HEAD | grep '\.ipynb$' || echo "")
fi
if [ -z "$CHANGED_NOTEBOOKS" ]; then
echo "No notebooks changed"
echo "has_notebooks=false" >> $GITHUB_OUTPUT
echo "" > changed_notebooks.txt
else
echo "Changed notebooks:"
echo "$CHANGED_NOTEBOOKS"
echo "$CHANGED_NOTEBOOKS" > changed_notebooks.txt
echo "has_notebooks=true" >> $GITHUB_OUTPUT
# Count notebooks
NOTEBOOK_COUNT=$(echo "$CHANGED_NOTEBOOKS" | wc -l | tr -d ' ')
echo "notebook_count=$NOTEBOOK_COUNT" >> $GITHUB_OUTPUT
fi
- name: Run notebook structure tests
id: structure-tests
if: steps.changed-notebooks.outputs.has_notebooks == 'true'
run: |
echo "## Notebook Structure Tests" >> $GITHUB_STEP_SUMMARY
FAILED_NOTEBOOKS=""
PASSED_COUNT=0
FAILED_COUNT=0
while IFS= read -r notebook; do
if [ -z "$notebook" ]; then
continue
fi
echo "Testing: $notebook"
# Run pytest on this specific notebook
if uv run pytest tests/notebook_tests/test_notebooks.py \
-v --tb=short \
-m "not slow" \
--notebook "$notebook" \
2>&1 | tee "test_output_$(echo "$notebook" | tr '/' '_').txt"; then
echo "✅ $notebook" >> $GITHUB_STEP_SUMMARY
PASSED_COUNT=$((PASSED_COUNT + 1))
else
echo "❌ $notebook" >> $GITHUB_STEP_SUMMARY
FAILED_NOTEBOOKS="$FAILED_NOTEBOOKS$notebook\n"
FAILED_COUNT=$((FAILED_COUNT + 1))
fi
done < changed_notebooks.txt
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Results:** $PASSED_COUNT passed, $FAILED_COUNT failed" >> $GITHUB_STEP_SUMMARY
# Set outputs
echo "passed_count=$PASSED_COUNT" >> $GITHUB_OUTPUT
echo "failed_count=$FAILED_COUNT" >> $GITHUB_OUTPUT
if [ "$FAILED_COUNT" -gt 0 ]; then
echo "has_failures=true" >> $GITHUB_OUTPUT
echo -e "$FAILED_NOTEBOOKS" > failed_notebooks.txt
else
echo "has_failures=false" >> $GITHUB_OUTPUT
fi
continue-on-error: true
- name: Collect test results
if: steps.changed-notebooks.outputs.has_notebooks == 'true'
run: |
# Combine all test outputs
cat test_output_*.txt > all_test_output.txt 2>/dev/null || echo "No test output files"
- name: Post test results to PR
if: |
github.event_name == 'pull_request' &&
steps.changed-notebooks.outputs.has_notebooks == 'true' &&
steps.structure-tests.outputs.has_failures == 'true'
uses: anthropics/claude-code-action@bbfaf8e1ffe3e688f7ab65ceee78de241e24a238 # v1.0.132 (>=v1.0.130 for WIF inputs)
with:
# Anthropic auth via Workload Identity Federation — the action
# exchanges this job's GitHub OIDC token (id-token: write above)
# for a short-lived access token instead of a static API key.
anthropic_federation_rule_id: fdrl_01SqmTwzmEE547mtaYN1mqHL
anthropic_organization_id: 1ec12c5c-6542-4da8-bf2f-c15919aef01c
anthropic_service_account_id: svac_01BHcCBa1UWFvNrHMqJjuaUZ
github_token: ${{ secrets.GITHUB_TOKEN }}
prompt: |
The notebook tests found issues in the changed notebooks.
Test results: ${{ steps.structure-tests.outputs.passed_count }} passed, ${{ steps.structure-tests.outputs.failed_count }} failed
Here is the test output:
```
$(cat all_test_output.txt | head -200)
```
Create a helpful PR comment that:
- Lists which notebooks failed and why
- Groups similar issues (e.g., "cells not executed", "execution order issues")
- Explains how to fix common issues:
- "Cells not executed": Run all cells from top to bottom before committing
- "Execution order issues": Restart kernel and run all cells sequentially
- "Deprecated models": Update to current model versions (claude-sonnet-4-6, etc.)
- "Hardcoded API keys": Use os.environ.get("ANTHROPIC_API_KEY") instead
- Mentions they can test locally with: `make test-notebooks NOTEBOOK=path/to/notebook.ipynb`
- Uses friendly, constructive language
Post using: gh pr comment $PR_NUMBER --body "your comment"
claude_args: |
--allowedTools "Bash(gh pr comment:*),Bash(cat:*),Read"
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
# TODO: this step still reads the static ANTHROPIC_API_KEY secret. The
# claude-code-action step above uses Workload Identity Federation; this
# direct-API step needs a separate inline OIDC mint+exchange (or the
# anthropic SDK's WIF env-var trio). Gracefully skips when the secret
# is absent (the `[ -z "$ANTHROPIC_API_KEY" ]` guard below).
- name: Run notebook execution tests (maintainers only)
id: execution-tests
if: |
steps.changed-notebooks.outputs.has_notebooks == 'true' &&
(github.event_name == 'push' ||
github.event.pull_request.author_association == 'MEMBER' ||
github.event.pull_request.author_association == 'OWNER')
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
echo "## Notebook Execution Tests" >> $GITHUB_STEP_SUMMARY
# Only run if API key is available
if [ -z "$ANTHROPIC_API_KEY" ]; then
echo "⚠️ Skipping execution tests - no API key available" >> $GITHUB_STEP_SUMMARY
exit 0
fi
mkdir -p execution_outputs
EXEC_FAILED=0
while IFS= read -r notebook; do
if [ -z "$notebook" ]; then
continue
fi
echo "Executing: $notebook"
# Run execution test with timeout
if timeout 300 uv run pytest tests/notebook_tests/test_notebooks.py \
-v --tb=long \
--execute-notebooks \
--notebook-timeout 240 \
--notebook "$notebook" \
-k "test_notebook_executes_successfully" \
2>&1 | tee "execution_outputs/$(echo "$notebook" | tr '/' '_').txt"; then
echo "✅ Executed: $notebook" >> $GITHUB_STEP_SUMMARY
else
echo "❌ Failed: $notebook" >> $GITHUB_STEP_SUMMARY
EXEC_FAILED=$((EXEC_FAILED + 1))
fi
done < changed_notebooks.txt
if [ "$EXEC_FAILED" -gt 0 ]; then
echo "exec_failures=$EXEC_FAILED" >> $GITHUB_OUTPUT
fi
continue-on-error: true
- name: Upload test artifacts
if: always() && steps.changed-notebooks.outputs.has_notebooks == 'true'
uses: actions/upload-artifact@v4
with:
name: notebook-test-results
path: |
test_output_*.txt
all_test_output.txt
failed_notebooks.txt
execution_outputs/
retention-days: 7
if-no-files-found: ignore
- name: Final status check
if: steps.changed-notebooks.outputs.has_notebooks == 'true'
run: |
if [ "${{ steps.structure-tests.outputs.has_failures }}" = "true" ]; then
echo "❌ Some notebook tests failed. Please fix the issues above."
exit 1
fi
echo "✅ All notebook tests passed!"
- name: No notebooks changed
if: steps.changed-notebooks.outputs.has_notebooks == 'false'
run: |
echo "✅ No notebooks were changed in this PR/push"
echo "No notebooks to test" >> $GITHUB_STEP_SUMMARY