Add refined evaluation prompts and LiteLLM caching #29
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test and Publish Docker Image | |
| on: | |
| push: | |
| branches: [ main, master ] | |
| tags: [ 'v*' ] | |
| pull_request: | |
| branches: [ main, master ] | |
| env: | |
| REGISTRY: ghcr.io | |
| IMAGE_NAME: ${{ github.repository }} | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| with: | |
| version: "latest" | |
| - name: Set up Python | |
| run: uv python install 3.11 | |
| - name: Install dependencies | |
| run: uv sync --extra test | |
| - name: Build Docker image for testing | |
| run: docker build -t bracegreen-test . | |
| - name: Prepare authenticated data repo URL | |
| id: data_repo | |
| run: | | |
| DATA_REPO_URL="${{ secrets.DATA_REPO_URL }}" | |
| if [ -n "$DATA_REPO_URL" ]; then | |
| # If repo URL contains a token, use it as-is | |
| # Otherwise, if we have GITHUB_TOKEN, inject it for private repos | |
| if [[ "$DATA_REPO_URL" == *"@"* ]]; then | |
| echo "url=$DATA_REPO_URL" >> $GITHUB_OUTPUT | |
| elif [ -n "${{ secrets.GITHUB_TOKEN }}" ]; then | |
| # Inject token into HTTPS URL | |
| AUTH_URL=$(echo "$DATA_REPO_URL" | sed "s|https://|https://${{ secrets.GITHUB_TOKEN }}@|") | |
| echo "url=$AUTH_URL" >> $GITHUB_OUTPUT | |
| else | |
| echo "url=$DATA_REPO_URL" >> $GITHUB_OUTPUT | |
| fi | |
| else | |
| # Default public repo | |
| echo "url=https://github.com/LSX-UniWue/brace-ctf-data.git" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Start green agent container | |
| run: | | |
| echo "Starting green agent container..." | |
| docker run -d --name green-agent -p 9001:9001 \ | |
| -e OPENAI_API_KEY="${{ secrets.OPENAI_API_KEY }}" \ | |
| -e OPENAI_BASE_URL="${{ secrets.OPENAI_BASE_URL }}" \ | |
| -e DATA_REPO_URL="${{ steps.data_repo.outputs.url }}" \ | |
| -e DATA_BRANCH="${{ secrets.DATA_BRANCH || 'master' }}" \ | |
| bracegreen-test | |
| echo "Container started. Checking status:" | |
| docker ps -a | grep green-agent | |
| echo "" | |
| echo "Initial container logs:" | |
| sleep 3 | |
| docker logs green-agent || true | |
| - name: Wait for agent to be ready | |
| run: | | |
| echo "Waiting for agent to be ready (checking /.well-known/agent-card.json)..." | |
| MAX_ATTEMPTS=30 | |
| ATTEMPT=0 | |
| while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| # Check if container is still running | |
| if ! docker ps | grep -q green-agent; then | |
| echo "✗ Container stopped unexpectedly!" | |
| echo "Container logs:" | |
| docker logs green-agent | |
| exit 1 | |
| fi | |
| # Try to connect to agent card endpoint | |
| if curl -f -s http://localhost:9001/.well-known/agent-card.json > /dev/null 2>&1; then | |
| echo "✓ Agent is ready after $ATTEMPT attempts!" | |
| echo "" | |
| echo "Agent card response:" | |
| curl -s http://localhost:9001/.well-known/agent-card.json | head -30 | |
| exit 0 | |
| fi | |
| echo "Attempt $ATTEMPT/$MAX_ATTEMPTS: Agent not ready yet..." | |
| if [ $((ATTEMPT % 5)) -eq 0 ]; then | |
| echo "Recent container logs:" | |
| docker logs --tail 20 green-agent | |
| fi | |
| sleep 2 | |
| done | |
| echo "✗ Timeout: Agent did not become ready after $MAX_ATTEMPTS attempts" | |
| echo "" | |
| echo "Container status:" | |
| docker ps -a | grep green-agent | |
| echo "" | |
| echo "Full container logs:" | |
| docker logs green-agent | |
| exit 1 | |
| - name: Run A2A conformance tests | |
| run: | | |
| echo "Running A2A conformance tests..." | |
| echo "Agent URL: http://localhost:9001" | |
| uv run pytest tests/test_agent.py -v --tb=short --agent-url http://localhost:9001 | |
| - name: Show agent logs on failure | |
| if: failure() | |
| run: docker logs green-agent | |
| - name: Stop containers | |
| if: always() | |
| run: docker stop green-agent && docker rm green-agent | |
| build-and-publish: | |
| needs: test | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'push' | |
| permissions: | |
| contents: read | |
| packages: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Log in to Container Registry | |
| if: github.event_name != 'pull_request' | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ${{ env.REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Extract metadata | |
| id: meta | |
| uses: docker/metadata-action@v5 | |
| with: | |
| images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | |
| tags: | | |
| type=ref,event=branch | |
| type=semver,pattern={{version}} | |
| type=semver,pattern={{major}}.{{minor}} | |
| type=semver,pattern={{major}} | |
| type=raw,value=latest,enable={{is_default_branch}} | |
| - name: Build and push Docker image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| push: ${{ github.event_name != 'pull_request' }} | |
| tags: ${{ steps.meta.outputs.tags }} | |
| labels: ${{ steps.meta.outputs.labels }} | |