build(deps): Bump urllib3 from 2.6.3 to 2.7.0 #11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: LangSmith Eval Pipeline | |
| on: | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| accuracy_threshold: | |
| description: "Minimum accuracy score (1-10)" | |
| required: false | |
| default: "7" | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| env: | |
| LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }} | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| ACCURACY_THRESHOLD: ${{ github.event.inputs.accuracy_threshold || '7' }} | |
| jobs: | |
| # ----------------------------------------------------------------------- | |
| # Job 1: Run the evaluation — this is the quality gate | |
| # ----------------------------------------------------------------------- | |
| evaluate: | |
| name: Run Evaluation | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| - name: Set up Python | |
| run: uv python install | |
| - name: Install dependencies | |
| run: uv sync | |
| - name: Run evaluation | |
| run: uv run python evals/run_eval.py --threshold ${{ env.ACCURACY_THRESHOLD }} | |
| # Upload the config artifact so the report job can read it | |
| - name: Upload eval config | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: eval-configs | |
| path: evaluation_config__*.json | |
| # ----------------------------------------------------------------------- | |
| # Job 2: Generate report and post as PR comment | |
| # ----------------------------------------------------------------------- | |
| report: | |
| name: Eval Report | |
| runs-on: ubuntu-latest | |
| needs: evaluate | |
| if: always() && github.event_name == 'pull_request' | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| - name: Set up Python | |
| run: uv python install | |
| - name: Install dependencies | |
| run: uv sync | |
| - name: Download eval configs | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: eval-configs | |
| - name: Generate report | |
| run: uv run python evals/report_eval.py | |
| - name: Post PR comment | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const report = fs.readFileSync('eval_report.md', 'utf8'); | |
| // Find and update existing comment, or create a new one | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const botComment = comments.find(c => | |
| c.body.includes('## Evaluation Report') | |
| ); | |
| if (botComment) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: botComment.id, | |
| body: report, | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body: report, | |
| }); | |
| } |