-
Notifications
You must be signed in to change notification settings - Fork 1
60 lines (47 loc) · 1.69 KB
/
daily_google_evals.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
name: Daily Convex Evaluations (Google Models)
on:
schedule:
- cron: '0 0 * * *' # Run at midnight UTC daily
workflow_dispatch: # Allow manual triggering
jobs:
run-evals:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
- name: Install Bun
run: npm install -g [email protected]
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install PDM
run: |
curl -sSL https://pdm.fming.dev/install-pdm.py | python3 -
- name: Install Python dependencies
run: pdm install
- name: Install Bun dependencies
run: bun install
- name: Create temp directory
run: mkdir -p /tmp/convex-evals
- name: Set main-branch-only environment variables
if: github.ref == 'refs/heads/main'
run: |
echo "CONVEX_EVAL_ENDPOINT=${{ secrets.CONVEX_EVAL_ENDPOINT }}" >> $GITHUB_ENV
echo "CONVEX_AUTH_TOKEN=${{ secrets.CONVEX_AUTH_TOKEN }}" >> $GITHUB_ENV
- name: Set Google models
run: |
echo "MODELS=gemini-2.0-flash-lite,gemini-2.0-flash,gemini-1.5-flash,gemini-2.5-pro-exp-03-25" >> $GITHUB_ENV
- name: Run evaluations
env:
ENVIRONMENT: ci
OUTPUT_TEMPDIR: /tmp/convex-evals
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
run: pdm run braintrust eval runner/eval_convex_coding.py