Skip to content

trigger eval

trigger eval #6

Workflow file for this run

name: LangSmith Eval Pipeline
on:
pull_request:
branches: [main]
workflow_dispatch:
inputs:
accuracy_threshold:
description: "Minimum accuracy score (1-10)"
required: false
default: "7"
permissions:
contents: read
pull-requests: write
env:
LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ACCURACY_THRESHOLD: ${{ github.event.inputs.accuracy_threshold || '7' }}
jobs:
# -----------------------------------------------------------------------
# Job 1: Run the evaluation — this is the quality gate
# -----------------------------------------------------------------------
evaluate:
name: Run Evaluation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Set up Python
run: uv python install
- name: Install dependencies
run: uv sync
- name: Run evaluation
run: uv run python evals/run_eval.py --threshold ${{ env.ACCURACY_THRESHOLD }}
# Upload the config artifact so the report job can read it
- name: Upload eval config
if: always()
uses: actions/upload-artifact@v4
with:
name: eval-configs
path: evaluation_config__*.json
# -----------------------------------------------------------------------
# Job 2: Generate report and post as PR comment
# -----------------------------------------------------------------------
report:
name: Eval Report
runs-on: ubuntu-latest
needs: evaluate
if: always() && github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Set up Python
run: uv python install
- name: Install dependencies
run: uv sync
- name: Download eval configs
uses: actions/download-artifact@v4
with:
name: eval-configs
- name: Generate report
run: uv run python evals/report_eval.py
- name: Post PR comment
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const report = fs.readFileSync('eval_report.md', 'utf8');
// Find and update existing comment, or create a new one
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const botComment = comments.find(c =>
c.body.includes('## Evaluation Report')
);
if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: report,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: report,
});
}