Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions .github/workflows/eval-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: "Eval: E2E Lifecycle"

on:
schedule:
# 6am UTC Monday
- cron: "0 6 * * 1"
workflow_dispatch:

permissions:
id-token: write
contents: read

jobs:
e2e-lifecycle:
runs-on: ubuntu-latest
env:
AZURE_ENV_NAME: eval-e2e-${{ github.run_id }}
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Add azd to PATH
run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"

- name: Azure Login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}

- name: Install Waza CLI
run: npm install -g waza

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run lifecycle evaluations
working-directory: cli/azd/test/eval
continue-on-error: true
env:
COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
run: waza run --executor copilot-sdk --filter "tasks/lifecycle/"

- name: Upload E2E results
if: always()
uses: actions/upload-artifact@v4
with:
name: e2e-results-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 30

- name: Cleanup Azure resources
if: always()
working-directory: cli/azd/test/eval
run: |
cd /tmp
azd down --purge --force --no-prompt 2>/dev/null || true
env:
AZURE_ENV_NAME: eval-e2e-${{ github.run_id }}
62 changes: 62 additions & 0 deletions .github/workflows/eval-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: "Eval: Weekly Report"

on:
schedule:
# 8am UTC Monday, after E2E completes
- cron: "0 8 * * 1"
workflow_dispatch:

permissions:
contents: read
actions: read

jobs:
generate-report:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Download recent Waza artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdir -p cli/azd/test/eval/reports/waza
RUN_ID=$(gh api repos/${{ github.repository }}/actions/workflows/eval-waza.yml/runs \
--jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
if [ -n "$RUN_ID" ]; then
gh run download "$RUN_ID" -D cli/azd/test/eval/reports/waza 2>/dev/null || echo "No waza artifacts found"
else
echo "No successful waza runs found, skipping"
fi

- name: Download recent E2E artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdir -p cli/azd/test/eval/reports/e2e
RUN_ID=$(gh api repos/${{ github.repository }}/actions/workflows/eval-e2e.yml/runs \
--jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
if [ -n "$RUN_ID" ]; then
gh run download "$RUN_ID" -D cli/azd/test/eval/reports/e2e 2>/dev/null || echo "No e2e artifacts found"
else
echo "No successful e2e runs found, skipping"
fi

# TODO: Implement report generation script (scripts/generate-report.ts)
# that diffs Waza result JSON files and produces regression-issues.json.
# Once implemented, add a step to create GitHub issues from regressions.

- name: Upload aggregated artifacts
uses: actions/upload-artifact@v4
with:
name: eval-weekly-report-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 90
51 changes: 51 additions & 0 deletions .github/workflows/eval-unit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: "Eval: Unit Tests"

on:
pull_request:
paths:
- "cli/azd/test/eval/**"
- "cli/azd/internal/mcp/**"
- "cli/azd/cmd/mcp.go"
- "cli/azd/cmd/root.go"

permissions:
contents: read

jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run unit tests
working-directory: cli/azd/test/eval
run: npm run test:unit -- --ci

- name: Validate Waza task YAML
working-directory: cli/azd/test/eval
run: npm run waza:validate
continue-on-error: true

- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: eval-unit-results
path: cli/azd/test/eval/reports/
retention-days: 30
53 changes: 53 additions & 0 deletions .github/workflows/eval-waza.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: "Eval: Waza Runs"

on:
schedule:
# 5am, 12pm, 8pm UTC, Tuesday through Saturday
- cron: "0 5,12,20 * * 2-6"
workflow_dispatch:

permissions:
contents: read

jobs:
waza-run:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Add azd to PATH
run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"

- name: Install Waza CLI
run: npm install -g waza

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run Waza evaluations
working-directory: cli/azd/test/eval
continue-on-error: true
env:
COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
run: waza run --executor copilot-sdk

- name: Upload Waza results
if: always()
uses: actions/upload-artifact@v4
with:
name: waza-results-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 30
22 changes: 22 additions & 0 deletions cli/azd/.vscode/cspell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,28 @@ overrides:
- filename: extensions/azure.ai.models/internal/cmd/custom_create.go
words:
- Qwen
- filename: test/eval/README.md
words:
- Waza
- waza
- urlopen
- filename: "test/eval/graders/*.py"
words:
- Waza
- waza
- hdrs
- mysite
- mydb
- filename: "test/eval/tasks/**/*.yaml"
words:
- authenticat
- idempoten
- filename: "test/eval/tests/human/*.test.ts"
words:
- compdef
- badcfg
- provison
- notacommand
ignorePaths:
- "**/*_test.go"
- "**/mock*.go"
Expand Down
6 changes: 6 additions & 0 deletions cli/azd/test/eval/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
node_modules/
dist/
reports/*.json
reports/*.md
reports/junit.xml
!reports/.gitkeep
Loading
Loading