Skip to content

add test data generator for evaluation #116

add test data generator for evaluation

add test data generator for evaluation #116

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
types:
uses: ./.github/workflows/_test-package.yml
with:
package-path: packages/types
mcp-client:
uses: ./.github/workflows/_test-package.yml
with:
package-path: packages/mcp-client
mcp-server:
uses: ./.github/workflows/_test-package.yml
with:
package-path: packages/mcp-server
agentic-mcp-server:
uses: ./.github/workflows/_test-package.yml
with:
package-path: packages/agentic-mcp-server
sdk:
uses: ./.github/workflows/_test-package.yml
with:
package-path: packages/sdk
extra-deps: packages/types packages/mcp-client
docs-freshness:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Lint scripts and tests
run: |
pip install ruff pytest
ruff check scripts/ tests/
- name: Run generator tests
run: python -m pytest tests/ -v --tb=short
- name: Regenerate sdk_docs.json
run: python scripts/generate_sdk_docs.py
- name: Check for sdk_docs drift
run: |
if ! git diff --exit-code packages/mcp-server/src/agent_builder_mcp/knowledge/data/sdk_docs.json; then
echo "::error::sdk_docs.json is stale. Run 'python scripts/generate_sdk_docs.py' and commit the result."
exit 1
fi
- name: Check botocore model sync
run: |
diff packages/sdk/src/agent_builder_sdk/botocore_models/transformagenticservice/2018-05-10/service-2.json \
packages/mcp-server/src/agent_builder_mcp/knowledge/data/agentic_api.json \
|| { echo "::error::agentic_api.json is out of sync. Run: cp packages/sdk/src/agent_builder_sdk/botocore_models/transformagenticservice/2018-05-10/service-2.json packages/mcp-server/src/agent_builder_mcp/knowledge/data/agentic_api.json"; exit 1; }
diff packages/sdk/src/agent_builder_sdk/botocore_models/atxagentregistryexternal/2022-07-26/service-2.json \
packages/mcp-server/src/agent_builder_mcp/knowledge/data/registry_api.json \
|| { echo "::error::registry_api.json is out of sync. Run: cp packages/sdk/src/agent_builder_sdk/botocore_models/atxagentregistryexternal/2022-07-26/service-2.json packages/mcp-server/src/agent_builder_mcp/knowledge/data/registry_api.json"; exit 1; }
bench:
runs-on: ubuntu-latest
needs: [mcp-server]
steps:
- uses: actions/checkout@v6
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install mcp-server
run: pip install -e packages/mcp-server
- name: Run retrieval benchmark
run: |
python packages/mcp-server/bench/eval_retrieval.py \
--output packages/mcp-server/bench/results/ci.json
- name: Check quality threshold
run: |
python -c "
import json, sys
from pathlib import Path
r = json.loads(Path('packages/mcp-server/bench/results/ci.json').read_text())
recall = r['overall']['recall']
mrr = r['overall']['mrr']
print(f'Recall@5={recall:.3f} MRR={mrr:.3f}')
if recall < 0.70:
print(f'::error::Recall@5 dropped below 0.70 threshold ({recall:.3f})')
sys.exit(1)
if mrr < 0.60:
print(f'::error::MRR dropped below 0.60 threshold ({mrr:.3f})')
sys.exit(1)
print('PASS: Both thresholds met')
"
integration:
runs-on: ubuntu-latest
needs: [types, mcp-client, mcp-server, agentic-mcp-server, sdk]
steps:
- uses: actions/checkout@v6
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install all packages
run: |
pip install -e packages/types
pip install -e packages/mcp-client
pip install -e packages/mcp-server
pip install -e packages/agentic-mcp-server
pip install -e packages/sdk
- name: Verify cross-package imports
run: |
python -c "import agent_builder_types; print('types OK')"
python -c "import agent_builder_mcp_client; print('mcp-client OK')"
python -c "import agent_builder_mcp; print('mcp-server OK')"
python -c "import agent_builder_agentic_mcp; print('agentic-mcp OK')"
python -c "import agent_builder_sdk; print('sdk OK')"
- name: Verify CLI entrypoints
run: |
agent-builder-mcp --help
agent-builder-agentic-mcp --help
python -m agent_builder_agentic_mcp --help
agent-builder-sdk --help