Skip to content

feat: add visor-agent-dev skill for AI coding assistants (#562) #2119

feat: add visor-agent-dev skill for AI coding assistants (#562)

feat: add visor-agent-dev skill for AI coding assistants (#562) #2119

name: 🤖 AI Battle Test

Check failure on line 1 in .github/workflows/ai-battle-test.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/ai-battle-test.yml

Invalid workflow file

(Line: 186, Col: 13): Unrecognized named-value: 'secrets'. Located at position 44 within expression: matrix.ai_provider.api_key_secret != '' && secrets[matrix.ai_provider.api_key_secret] != '', (Line: 278, Col: 13): Unrecognized named-value: 'secrets'. Located at position 47 within expression: matrix.ai_provider.name == 'Google Gemini' && secrets.GOOGLE_API_KEY != ''
on:
pull_request:
types: [opened, synchronize, edited]
issue_comment:
types: [created]
workflow_dispatch:
inputs:
test_scenario:
description: 'Test scenario to run'
required: false
default: 'all'
type: choice
options:
- 'all'
- 'incremental_analysis'
- 'xml_formatting'
- 'comment_management'
- 'provider_types'
# Cancel previous in-progress runs of this workflow on the same ref (branch/PR)
concurrency:
group: ai-battle-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
issues: write
checks: write
jobs:
ai-battle-test:
name: AI Battle Test — ${{ matrix.ai_provider.name }} — ${{ github.event_name }}
runs-on: ubuntu-latest
if: >
github.event_name == 'pull_request' ||
github.event_name == 'workflow_dispatch' ||
(
github.event_name == 'issue_comment' &&
github.event.issue.pull_request &&
contains(github.event.comment.body, '/ai-test')
)
strategy:
fail-fast: false
matrix:
ai_provider:
- name: 'Google Gemini'
api_key_secret: 'GOOGLE_API_KEY'
model: 'gemini-2.0-flash-exp'
env_var: 'GOOGLE_API_KEY'
- name: 'OpenAI GPT-4'
api_key_secret: 'OPENAI_API_KEY'
model: 'gpt-4o-mini'
env_var: 'OPENAI_API_KEY'
- name: 'Anthropic Claude'
api_key_secret: 'ANTHROPIC_API_KEY'
model: 'claude-3-haiku-20240307'
env_var: 'ANTHROPIC_API_KEY'
- name: 'Mock AI (Fallback)'
api_key_secret: ''
model: 'mock'
env_var: ''
steps:
- name: 🛒 Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
# For PR events, checkout the PR head
ref: ${{ github.event_name == 'pull_request' && github.head_ref || github.ref }}
- name: 🔧 Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'npm'
- name: 📦 Install dependencies
run: npm ci
- name: 🏗️ Build Visor
run: npm run build
- name: 🧪 Test 1 - XML Formatted PR Analysis
id: xml_test
uses: ./
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-review: true
add-reactions: 'false'
env:
# Multiple API keys for fallback - similar to probe.yml approach
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }} # Aider expects GEMINI_API_KEY
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Custom API URLs for quota management
GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
# Model and provider configuration
MODEL_NAME: ${{ matrix.ai_provider.model }}
FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
# GitHub context
GITHUB_CONTEXT: ${{ toJson(github) }}
continue-on-error: true
- name: 📊 Validate XML Test Results
if: steps.xml_test.outcome == 'success'
run: |
echo "✅ XML Formatted Analysis completed successfully"
echo "Provider: ${{ matrix.ai_provider.name }}"
echo "Total Issues: ${{ steps.xml_test.outputs.total-issues }}"
echo "Critical Issues: ${{ steps.xml_test.outputs.critical-issues }}"
echo "PR Action: ${{ steps.xml_test.outputs.pr-action }}"
echo "Incremental Analysis: ${{ steps.xml_test.outputs.incremental-analysis }}"
# Validate presence of total-issues for non-mock providers
if [ -z "${{ steps.xml_test.outputs.total-issues }}" ] && [ "${{ matrix.ai_provider.name }}" != "Mock AI (Fallback)" ]; then
echo "❌ Missing total-issues output"
exit 1
fi
- name: 🧪 Test 2 - Incremental Analysis (Simulate New Commit)
if: github.event_name == 'pull_request' && github.event.action == 'synchronize'
id: incremental_test
uses: ./
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-review: true
add-reactions: 'false'
env:
# Multiple API keys for fallback
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Custom API URLs
GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
# Model configuration
MODEL_NAME: ${{ matrix.ai_provider.model }}
FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
GITHUB_CONTEXT: ${{ toJson(github) }}
continue-on-error: true
- name: 📊 Validate Incremental Test Results
if: steps.incremental_test.outcome == 'success'
run: |
echo "✅ Incremental Analysis completed successfully"
echo "Incremental Analysis Flag: ${{ steps.incremental_test.outputs.incremental-analysis }}"
# For synchronize events, incremental-analysis should be true
if [ "${{ github.event.action }}" == "synchronize" ] && [ "${{ steps.incremental_test.outputs.incremental-analysis }}" != "true" ]; then
echo "❌ Expected incremental analysis to be true for synchronize event"
exit 1
fi
- name: 🧪 Test 3 - Comment Management (Check for Updates)
if: github.event_name == 'pull_request' && steps.xml_test.outcome == 'success'
run: |
echo "🔍 Testing comment management..."
# Use GitHub CLI to check if comments were updated rather than duplicated
COMMENT_COUNT=$(gh pr view ${{ github.event.number }} --json comments --jq '.comments | map(select(.body | contains("Visor Code Review"))) | length')
echo "Found $COMMENT_COUNT Visor review comments"
# Should have at most 1 review comment per PR (smart updating)
if [ "$COMMENT_COUNT" -gt 1 ]; then
echo "⚠️ Warning: Found $COMMENT_COUNT review comments. Smart updating may not be working properly."
else
echo "✅ Comment management working correctly"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: 🧪 Test 4 - Different Check Types
if: matrix.ai_provider.api_key_secret != '' && secrets[matrix.ai_provider.api_key_secret] != ''
continue-on-error: true # Allow test to continue if quota issues occur
run: |
chmod +x ./dist/index.js
echo "🔍 Testing different check provider types..."
# Test all checks (which includes AI-powered review)
echo "Testing all checks with AI provider..."
./dist/index.js --cli --check all --output json > ai_result.json
# Validate AI result
if ! jq -e '.summary.overallScore' ai_result.json > /dev/null; then
echo "❌ AI provider test failed"
exit 1
fi
echo "✅ AI provider test passed"
echo "AI Result:"
jq '.summary' ai_result.json
env:
# Set the appropriate API key based on provider
GOOGLE_API_KEY: ${{ matrix.ai_provider.env_var == 'GOOGLE_API_KEY' && secrets.GOOGLE_API_KEY || '' }}
GEMINI_API_KEY: ${{ matrix.ai_provider.env_var == 'GOOGLE_API_KEY' && secrets.GOOGLE_API_KEY || '' }}
OPENAI_API_KEY: ${{ matrix.ai_provider.env_var == 'OPENAI_API_KEY' && secrets.OPENAI_API_KEY || '' }}
ANTHROPIC_API_KEY: ${{ matrix.ai_provider.env_var == 'ANTHROPIC_API_KEY' && secrets.ANTHROPIC_API_KEY || '' }}
# Custom API URLs
GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
# Model configuration
MODEL_NAME: ${{ matrix.ai_provider.model }}
FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
- name: 🧪 Test 5 - Output Format Validation
run: |
chmod +x ./dist/index.js
echo "🔍 Testing output formats..."
# Test table output (should not truncate messages)
echo "Testing table output formatting..."
./dist/index.js --cli --check all --output table > table_output.txt
# Check that table contains expected sections
if grep -q "Analysis Summary" table_output.txt && grep -q "Issues" table_output.txt; then
echo "✅ Table output format test passed"
else
echo "❌ Table output format test failed"
cat table_output.txt
exit 1
fi
# Test JSON output
echo "Testing JSON output..."
./dist/index.js --cli --check all --output json > json_output.json
if jq -e '.summary' json_output.json > /dev/null; then
echo "✅ JSON output format test passed"
else
echo "❌ JSON output format test failed"
cat json_output.json
exit 1
fi
- name: 🧪 Test 6 - Error Handling and Fallbacks
run: |
chmod +x ./dist/index.js
echo "🔍 Testing error handling..."
# Test with invalid API key (should gracefully handle)
echo "Testing invalid API key handling..."
# Clear all API keys and set an invalid one
unset GOOGLE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
export GOOGLE_API_KEY="invalid-key-test"
./dist/index.js --cli --check ai --output json > error_test.json || {
echo "✅ Error handling working - invalid API key properly rejected"
}
# Test with no API key (should use fallback if available)
echo "Testing fallback behavior..."
unset GOOGLE_API_KEY ANTHROPIC_API_KEY OPENAI_API_KEY GEMINI_API_KEY LLM_BASE_URL
./dist/index.js --cli --check all --output json > fallback_test.json
if jq -e '.summary' fallback_test.json > /dev/null; then
echo "✅ Fallback behavior test passed"
else
echo "❌ Fallback behavior test failed"
exit 1
fi
- name: 📈 Performance Benchmark
if: matrix.ai_provider.name == 'Google Gemini' && secrets.GOOGLE_API_KEY != ''
continue-on-error: true # Allow benchmark to continue if quota issues occur
run: |
chmod +x ./dist/index.js
echo "🚀 Running performance benchmark..."
start_time=$(date +%s%3N)
./dist/index.js --cli --check all --output json > benchmark_result.json
end_time=$(date +%s%3N)
execution_time=$((end_time - start_time))
echo "Execution time: ${execution_time}ms"
# Check if execution completed within reasonable time (60 seconds)
if [ $execution_time -lt 60000 ]; then
echo "✅ Performance benchmark passed (${execution_time}ms)"
else
echo "⚠️ Performance benchmark slow (${execution_time}ms)"
fi
env:
# Multiple API keys with fallback strategy
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Custom API URLs for better quota management
GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
# Model configuration
MODEL_NAME: ${{ matrix.ai_provider.model }}
FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
- name: 📋 Test Results Summary
if: always()
run: |
echo "## 🧪 AI Battle Test Results Summary"
echo "**Provider:** ${{ matrix.ai_provider.name }}"
echo "**Event:** ${{ github.event_name }}${{ github.event.action && format(' ({0})', github.event.action) || '' }}"
echo "**Model:** ${{ matrix.ai_provider.model }}"
echo ""
echo "### Test Results:"
echo "- XML Formatted Analysis: ${{ steps.xml_test.outcome || 'skipped' }}"
echo "- Incremental Analysis: ${{ steps.incremental_test.outcome || 'skipped' }}"
echo "- Comment Management: ${{ github.event_name == 'pull_request' && 'tested' || 'skipped' }}"
echo "- Provider Types: ${{ matrix.ai_provider.api_key_secret != '' && 'tested' || 'skipped' }}"
echo "- Output Formats: tested"
echo "- Error Handling: tested"
echo "- Performance: ${{ matrix.ai_provider.name == 'Google Gemini' && 'tested' || 'skipped' }}"
echo ""
if [ "${{ steps.xml_test.outputs.review-score }}" ]; then
echo "### Metrics:"
echo "- Review Score: ${{ steps.xml_test.outputs.review-score }}/100"
echo "- Total Issues: ${{ steps.xml_test.outputs.total-issues }}"
echo "- Auto Review: ${{ steps.xml_test.outputs.auto-review-completed }}"
echo "- PR Action: ${{ steps.xml_test.outputs.pr-action }}"
echo "- Incremental: ${{ steps.xml_test.outputs.incremental-analysis }}"
fi
- name: 📤 Upload Test Artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: ai-battle-test-results-${{ matrix.ai_provider.name }}-${{ github.event_name }}
path: |
*.json
*.txt
*.log
retention-days: 7
battle-test-summary:
name: Battle Test Summary
needs: ai-battle-test
runs-on: ubuntu-latest
if: always()
steps:
- name: 📊 Generate Summary Report
run: |
echo "# 🤖 AI Battle Test Complete"
echo ""
echo "**Event:** ${{ github.event_name }}"
echo "**Trigger:** ${{ github.event.action || 'manual' }}"
echo "**Status:** ${{ needs.ai-battle-test.result }}"
echo ""
echo "## Test Matrix Results"
echo "All AI providers and scenarios have been tested."
echo ""
echo "Check individual job logs and artifacts for detailed results."