add test data generator for evaluation #116
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| types: | |
| uses: ./.github/workflows/_test-package.yml | |
| with: | |
| package-path: packages/types | |
| mcp-client: | |
| uses: ./.github/workflows/_test-package.yml | |
| with: | |
| package-path: packages/mcp-client | |
| mcp-server: | |
| uses: ./.github/workflows/_test-package.yml | |
| with: | |
| package-path: packages/mcp-server | |
| agentic-mcp-server: | |
| uses: ./.github/workflows/_test-package.yml | |
| with: | |
| package-path: packages/agentic-mcp-server | |
| sdk: | |
| uses: ./.github/workflows/_test-package.yml | |
| with: | |
| package-path: packages/sdk | |
| extra-deps: packages/types packages/mcp-client | |
| docs-freshness: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Lint scripts and tests | |
| run: | | |
| pip install ruff pytest | |
| ruff check scripts/ tests/ | |
| - name: Run generator tests | |
| run: python -m pytest tests/ -v --tb=short | |
| - name: Regenerate sdk_docs.json | |
| run: python scripts/generate_sdk_docs.py | |
| - name: Check for sdk_docs drift | |
| run: | | |
| if ! git diff --exit-code packages/mcp-server/src/agent_builder_mcp/knowledge/data/sdk_docs.json; then | |
| echo "::error::sdk_docs.json is stale. Run 'python scripts/generate_sdk_docs.py' and commit the result." | |
| exit 1 | |
| fi | |
| - name: Check botocore model sync | |
| run: | | |
| diff packages/sdk/src/agent_builder_sdk/botocore_models/transformagenticservice/2018-05-10/service-2.json \ | |
| packages/mcp-server/src/agent_builder_mcp/knowledge/data/agentic_api.json \ | |
| || { echo "::error::agentic_api.json is out of sync. Run: cp packages/sdk/src/agent_builder_sdk/botocore_models/transformagenticservice/2018-05-10/service-2.json packages/mcp-server/src/agent_builder_mcp/knowledge/data/agentic_api.json"; exit 1; } | |
| diff packages/sdk/src/agent_builder_sdk/botocore_models/atxagentregistryexternal/2022-07-26/service-2.json \ | |
| packages/mcp-server/src/agent_builder_mcp/knowledge/data/registry_api.json \ | |
| || { echo "::error::registry_api.json is out of sync. Run: cp packages/sdk/src/agent_builder_sdk/botocore_models/atxagentregistryexternal/2022-07-26/service-2.json packages/mcp-server/src/agent_builder_mcp/knowledge/data/registry_api.json"; exit 1; } | |
| bench: | |
| runs-on: ubuntu-latest | |
| needs: [mcp-server] | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install mcp-server | |
| run: pip install -e packages/mcp-server | |
| - name: Run retrieval benchmark | |
| run: | | |
| python packages/mcp-server/bench/eval_retrieval.py \ | |
| --output packages/mcp-server/bench/results/ci.json | |
| - name: Check quality threshold | |
| run: | | |
| python -c " | |
| import json, sys | |
| from pathlib import Path | |
| r = json.loads(Path('packages/mcp-server/bench/results/ci.json').read_text()) | |
| recall = r['overall']['recall'] | |
| mrr = r['overall']['mrr'] | |
| print(f'Recall@5={recall:.3f} MRR={mrr:.3f}') | |
| if recall < 0.70: | |
| print(f'::error::Recall@5 dropped below 0.70 threshold ({recall:.3f})') | |
| sys.exit(1) | |
| if mrr < 0.60: | |
| print(f'::error::MRR dropped below 0.60 threshold ({mrr:.3f})') | |
| sys.exit(1) | |
| print('PASS: Both thresholds met') | |
| " | |
| integration: | |
| runs-on: ubuntu-latest | |
| needs: [types, mcp-client, mcp-server, agentic-mcp-server, sdk] | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install all packages | |
| run: | | |
| pip install -e packages/types | |
| pip install -e packages/mcp-client | |
| pip install -e packages/mcp-server | |
| pip install -e packages/agentic-mcp-server | |
| pip install -e packages/sdk | |
| - name: Verify cross-package imports | |
| run: | | |
| python -c "import agent_builder_types; print('types OK')" | |
| python -c "import agent_builder_mcp_client; print('mcp-client OK')" | |
| python -c "import agent_builder_mcp; print('mcp-server OK')" | |
| python -c "import agent_builder_agentic_mcp; print('agentic-mcp OK')" | |
| python -c "import agent_builder_sdk; print('sdk OK')" | |
| - name: Verify CLI entrypoints | |
| run: | | |
| agent-builder-mcp --help | |
| agent-builder-agentic-mcp --help | |
| python -m agent_builder_agentic_mcp --help | |
| agent-builder-sdk --help |