Skip to content

feat: add functional test framework for agent pipelines #21

feat: add functional test framework for agent pipelines

feat: add functional test framework for agent pipelines #21

name: Functional Tests
on:
push:
branches: [main]
paths:
- 'eval/**'
- 'internal/scaffold/**'
pull_request:
branches: [main]
paths:
- 'eval/**'
- 'internal/scaffold/**'
workflow_dispatch:
permissions:
contents: read
id-token: write
concurrency:
group: functional-tests-${{ github.ref }}
cancel-in-progress: true
jobs:
functional-tests:
runs-on: ubuntu-latest
environment: functional-tests
timeout-minutes: 45
steps:
- uses: actions/checkout@v6.0.2
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
- uses: actions/setup-python@v6.2.0
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v7.6.0
- name: Install agent-eval-harness
# Installs from the git submodule checked out above (submodules: true)
run: uv pip install --system -e 'eval/.agent-eval-harness[anthropic]'
- name: Install yq
run: |
curl -sSfL "https://github.com/mikefarah/yq/releases/download/v4.47.1/yq_linux_amd64" -o /usr/local/bin/yq
chmod +x /usr/local/bin/yq
- name: Configure git identity
run: |
git config --global user.name "fullsend-eval[bot]"
git config --global user.email "fullsend-eval[bot]@users.noreply.github.com"
- name: Build fullsend
run: make go-build
- name: Add bin to PATH
run: echo "${{ github.workspace }}/bin" >> "$GITHUB_PATH"
- name: Install OpenShell CLI
run: .github/scripts/install-openshell.sh
- name: Configure OpenShell gateway
run: |
mkdir -p $HOME/.config/openshell/
cat > $HOME/.config/openshell/gateway.env << EOF
OPENSHELL_BIND_ADDRESS=0.0.0.0
EOF
- name: Install Podman
run: |
sudo apt-get update
sudo apt-get install -y podman
- name: Configure rootless Podman
run: |
whoami_user="$(whoami)"
grep -q "^${whoami_user}:" /etc/subuid || sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 "${whoami_user}"
podman system migrate
systemctl --user start podman.socket
- name: Install validation dependencies
run: pip install --quiet "jsonschema>=4.18.0"
- name: Authenticate to GCP
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ secrets.E2E_GCP_WIF_PROVIDER }}
service_account: ${{ secrets.E2E_GCP_SERVICE_ACCOUNT }}
- name: Prepare sandbox credentials
run: |
echo "HOST_GOOGLE_APPLICATION_CREDENTIALS=$GOOGLE_APPLICATION_CREDENTIALS" >> "$GITHUB_ENV"
bash internal/scaffold/fullsend-repo/scripts/prepare-sandbox-credentials.sh
- name: Run functional tests
env:
EVAL_ORG: ${{ vars.EVAL_ORG }}
GH_TOKEN: ${{ secrets.EVAL_GH_TOKEN }}
ANTHROPIC_VERTEX_PROJECT_ID: ${{ vars.EVALS_VERTEX_PROJECT_ID }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.E2E_GCP_PROJECT_ID }}
CLOUD_ML_REGION: ${{ vars.EVALS_GCP_REGION }}
EVALS_HOST_CREDENTIALS: ${{ env.HOST_GOOGLE_APPLICATION_CREDENTIALS }}
run: make functional-tests
- name: Scrub secrets from eval results
if: always()
run: |
for dir in eval/runs/ /tmp/agent-eval/; do
find "$dir" -name '.eval-env' -delete 2>/dev/null || true
find "$dir" -name 'fixture-state.json' -delete 2>/dev/null || true
find "$dir" -name '*.jsonl' -delete 2>/dev/null || true
done
- name: Upload eval results
if: always()
uses: actions/upload-artifact@v4
with:
name: eval-results
path: |
eval/runs/
!eval/runs/**/.eval-env
!eval/runs/**/fixture-state.json
!eval/runs/**/*.jsonl
retention-days: 30