feat: add visor-agent-dev skill for AI coding assistants (#562) #2119

Workflow file for this run

.github/workflows/ai-battle-test.yml at 323657c

	name: 🤖 AI Battle Test
Check failure on line 1 in .github/workflows/ai-battle-test.yml View workflow run for this annotation GitHub Actions / .github/workflows/ai-battle-test.yml Invalid workflow file `(Line: 186, Col: 13): Unrecognized named-value: 'secrets'. Located at position 44 within expression: matrix.ai_provider.api_key_secret != '' && secrets[matrix.ai_provider.api_key_secret] != '', (Line: 278, Col: 13): Unrecognized named-value: 'secrets'. Located at position 47 within expression: matrix.ai_provider.name == 'Google Gemini' && secrets.GOOGLE_API_KEY != ''`

	on:
	pull_request:
	types: [opened, synchronize, edited]
	issue_comment:
	types: [created]
	workflow_dispatch:
	inputs:
	test_scenario:
	description: 'Test scenario to run'
	required: false
	default: 'all'
	type: choice
	options:
	- 'all'
	- 'incremental_analysis'
	- 'xml_formatting'
	- 'comment_management'
	- 'provider_types'

	# Cancel previous in-progress runs of this workflow on the same ref (branch/PR)
	concurrency:
	group: ai-battle-${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read
	pull-requests: write
	issues: write
	checks: write

	jobs:
	ai-battle-test:
	name: AI Battle Test — ${{ matrix.ai_provider.name }} — ${{ github.event_name }}
	runs-on: ubuntu-latest
	if: >
	github.event_name == 'pull_request' \|\|
	github.event_name == 'workflow_dispatch' \|\|
	(
	github.event_name == 'issue_comment' &&
	github.event.issue.pull_request &&
	contains(github.event.comment.body, '/ai-test')
	)

	strategy:
	fail-fast: false
	matrix:
	ai_provider:
	- name: 'Google Gemini'
	api_key_secret: 'GOOGLE_API_KEY'
	model: 'gemini-2.0-flash-exp'
	env_var: 'GOOGLE_API_KEY'
	- name: 'OpenAI GPT-4'
	api_key_secret: 'OPENAI_API_KEY'
	model: 'gpt-4o-mini'
	env_var: 'OPENAI_API_KEY'
	- name: 'Anthropic Claude'
	api_key_secret: 'ANTHROPIC_API_KEY'
	model: 'claude-3-haiku-20240307'
	env_var: 'ANTHROPIC_API_KEY'
	- name: 'Mock AI (Fallback)'
	api_key_secret: ''
	model: 'mock'
	env_var: ''

	steps:
	- name: 🛒 Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	# For PR events, checkout the PR head
	ref: ${{ github.event_name == 'pull_request' && github.head_ref \|\| github.ref }}

	- name: 🔧 Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'
	cache: 'npm'

	- name: 📦 Install dependencies
	run: npm ci

	- name: 🏗️ Build Visor
	run: npm run build

	- name: 🧪 Test 1 - XML Formatted PR Analysis
	id: xml_test
	uses: ./
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	auto-review: true
	add-reactions: 'false'
	env:
	# Multiple API keys for fallback - similar to probe.yml approach
	GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
	GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }} # Aider expects GEMINI_API_KEY
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	# Custom API URLs for quota management
	GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
	ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
	ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
	OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
	LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
	# Model and provider configuration
	MODEL_NAME: ${{ matrix.ai_provider.model }}
	FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
	# GitHub context
	GITHUB_CONTEXT: ${{ toJson(github) }}
	continue-on-error: true

	- name: 📊 Validate XML Test Results
	if: steps.xml_test.outcome == 'success'
	run: \|
	echo "✅ XML Formatted Analysis completed successfully"
	echo "Provider: ${{ matrix.ai_provider.name }}"
	echo "Total Issues: ${{ steps.xml_test.outputs.total-issues }}"
	echo "Critical Issues: ${{ steps.xml_test.outputs.critical-issues }}"
	echo "PR Action: ${{ steps.xml_test.outputs.pr-action }}"
	echo "Incremental Analysis: ${{ steps.xml_test.outputs.incremental-analysis }}"

	# Validate presence of total-issues for non-mock providers
	if [ -z "${{ steps.xml_test.outputs.total-issues }}" ] && [ "${{ matrix.ai_provider.name }}" != "Mock AI (Fallback)" ]; then
	echo "❌ Missing total-issues output"
	exit 1
	fi

	- name: 🧪 Test 2 - Incremental Analysis (Simulate New Commit)
	if: github.event_name == 'pull_request' && github.event.action == 'synchronize'
	id: incremental_test
	uses: ./
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	auto-review: true
	add-reactions: 'false'
	env:
	# Multiple API keys for fallback
	GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
	GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	# Custom API URLs
	GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
	ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
	ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
	OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
	LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
	# Model configuration
	MODEL_NAME: ${{ matrix.ai_provider.model }}
	FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}
	GITHUB_CONTEXT: ${{ toJson(github) }}
	continue-on-error: true

	- name: 📊 Validate Incremental Test Results
	if: steps.incremental_test.outcome == 'success'
	run: \|
	echo "✅ Incremental Analysis completed successfully"
	echo "Incremental Analysis Flag: ${{ steps.incremental_test.outputs.incremental-analysis }}"

	# For synchronize events, incremental-analysis should be true
	if [ "${{ github.event.action }}" == "synchronize" ] && [ "${{ steps.incremental_test.outputs.incremental-analysis }}" != "true" ]; then
	echo "❌ Expected incremental analysis to be true for synchronize event"
	exit 1
	fi

	- name: 🧪 Test 3 - Comment Management (Check for Updates)
	if: github.event_name == 'pull_request' && steps.xml_test.outcome == 'success'
	run: \|
	echo "🔍 Testing comment management..."

	# Use GitHub CLI to check if comments were updated rather than duplicated
	COMMENT_COUNT=$(gh pr view ${{ github.event.number }} --json comments --jq '.comments \| map(select(.body \| contains("Visor Code Review"))) \| length')
	echo "Found $COMMENT_COUNT Visor review comments"

	# Should have at most 1 review comment per PR (smart updating)
	if [ "$COMMENT_COUNT" -gt 1 ]; then
	echo "⚠️ Warning: Found $COMMENT_COUNT review comments. Smart updating may not be working properly."
	else
	echo "✅ Comment management working correctly"
	fi
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	- name: 🧪 Test 4 - Different Check Types
	if: matrix.ai_provider.api_key_secret != '' && secrets[matrix.ai_provider.api_key_secret] != ''
	continue-on-error: true # Allow test to continue if quota issues occur
	run: \|
	chmod +x ./dist/index.js
	echo "🔍 Testing different check provider types..."

	# Test all checks (which includes AI-powered review)
	echo "Testing all checks with AI provider..."
	./dist/index.js --cli --check all --output json > ai_result.json

	# Validate AI result
	if ! jq -e '.summary.overallScore' ai_result.json > /dev/null; then
	echo "❌ AI provider test failed"
	exit 1
	fi

	echo "✅ AI provider test passed"
	echo "AI Result:"
	jq '.summary' ai_result.json
	env:
	# Set the appropriate API key based on provider
	GOOGLE_API_KEY: ${{ matrix.ai_provider.env_var == 'GOOGLE_API_KEY' && secrets.GOOGLE_API_KEY \|\| '' }}
	GEMINI_API_KEY: ${{ matrix.ai_provider.env_var == 'GOOGLE_API_KEY' && secrets.GOOGLE_API_KEY \|\| '' }}
	OPENAI_API_KEY: ${{ matrix.ai_provider.env_var == 'OPENAI_API_KEY' && secrets.OPENAI_API_KEY \|\| '' }}
	ANTHROPIC_API_KEY: ${{ matrix.ai_provider.env_var == 'ANTHROPIC_API_KEY' && secrets.ANTHROPIC_API_KEY \|\| '' }}
	# Custom API URLs
	GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
	ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
	ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
	OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
	LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
	# Model configuration
	MODEL_NAME: ${{ matrix.ai_provider.model }}
	FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}

	- name: 🧪 Test 5 - Output Format Validation
	run: \|
	chmod +x ./dist/index.js
	echo "🔍 Testing output formats..."

	# Test table output (should not truncate messages)
	echo "Testing table output formatting..."
	./dist/index.js --cli --check all --output table > table_output.txt

	# Check that table contains expected sections
	if grep -q "Analysis Summary" table_output.txt && grep -q "Issues" table_output.txt; then
	echo "✅ Table output format test passed"
	else
	echo "❌ Table output format test failed"
	cat table_output.txt
	exit 1
	fi

	# Test JSON output
	echo "Testing JSON output..."
	./dist/index.js --cli --check all --output json > json_output.json

	if jq -e '.summary' json_output.json > /dev/null; then
	echo "✅ JSON output format test passed"
	else
	echo "❌ JSON output format test failed"
	cat json_output.json
	exit 1
	fi

	- name: 🧪 Test 6 - Error Handling and Fallbacks
	run: \|
	chmod +x ./dist/index.js
	echo "🔍 Testing error handling..."

	# Test with invalid API key (should gracefully handle)
	echo "Testing invalid API key handling..."
	# Clear all API keys and set an invalid one
	unset GOOGLE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
	export GOOGLE_API_KEY="invalid-key-test"
	./dist/index.js --cli --check ai --output json > error_test.json \|\| {
	echo "✅ Error handling working - invalid API key properly rejected"
	}

	# Test with no API key (should use fallback if available)
	echo "Testing fallback behavior..."
	unset GOOGLE_API_KEY ANTHROPIC_API_KEY OPENAI_API_KEY GEMINI_API_KEY LLM_BASE_URL
	./dist/index.js --cli --check all --output json > fallback_test.json

	if jq -e '.summary' fallback_test.json > /dev/null; then
	echo "✅ Fallback behavior test passed"
	else
	echo "❌ Fallback behavior test failed"
	exit 1
	fi

	- name: 📈 Performance Benchmark
	if: matrix.ai_provider.name == 'Google Gemini' && secrets.GOOGLE_API_KEY != ''
	continue-on-error: true # Allow benchmark to continue if quota issues occur
	run: \|
	chmod +x ./dist/index.js
	echo "🚀 Running performance benchmark..."

	start_time=$(date +%s%3N)
	./dist/index.js --cli --check all --output json > benchmark_result.json
	end_time=$(date +%s%3N)

	execution_time=$((end_time - start_time))
	echo "Execution time: ${execution_time}ms"

	# Check if execution completed within reasonable time (60 seconds)
	if [ $execution_time -lt 60000 ]; then
	echo "✅ Performance benchmark passed (${execution_time}ms)"
	else
	echo "⚠️ Performance benchmark slow (${execution_time}ms)"
	fi
	env:
	# Multiple API keys with fallback strategy
	GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
	GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	# Custom API URLs for better quota management
	GOOGLE_API_URL: ${{ secrets.GOOGLE_API_URL }}
	ANTHROPIC_API_URL: ${{ secrets.ANTHROPIC_API_URL }}
	ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_API_URL }}
	OPENAI_API_URL: ${{ secrets.OPENAI_API_URL }}
	LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
	# Model configuration
	MODEL_NAME: ${{ matrix.ai_provider.model }}
	FORCE_PROVIDER: ${{ secrets.FORCE_PROVIDER }}

	- name: 📋 Test Results Summary
	if: always()
	run: \|
	echo "## 🧪 AI Battle Test Results Summary"
	echo "Provider: ${{ matrix.ai_provider.name }}"
	echo "Event: ${{ github.event_name }}${{ github.event.action && format(' ({0})', github.event.action) \|\| '' }}"
	echo "Model: ${{ matrix.ai_provider.model }}"
	echo ""
	echo "### Test Results:"
	echo "- XML Formatted Analysis: ${{ steps.xml_test.outcome \|\| 'skipped' }}"
	echo "- Incremental Analysis: ${{ steps.incremental_test.outcome \|\| 'skipped' }}"
	echo "- Comment Management: ${{ github.event_name == 'pull_request' && 'tested' \|\| 'skipped' }}"
	echo "- Provider Types: ${{ matrix.ai_provider.api_key_secret != '' && 'tested' \|\| 'skipped' }}"
	echo "- Output Formats: tested"
	echo "- Error Handling: tested"
	echo "- Performance: ${{ matrix.ai_provider.name == 'Google Gemini' && 'tested' \|\| 'skipped' }}"
	echo ""
	if [ "${{ steps.xml_test.outputs.review-score }}" ]; then
	echo "### Metrics:"
	echo "- Review Score: ${{ steps.xml_test.outputs.review-score }}/100"
	echo "- Total Issues: ${{ steps.xml_test.outputs.total-issues }}"
	echo "- Auto Review: ${{ steps.xml_test.outputs.auto-review-completed }}"
	echo "- PR Action: ${{ steps.xml_test.outputs.pr-action }}"
	echo "- Incremental: ${{ steps.xml_test.outputs.incremental-analysis }}"
	fi

	- name: 📤 Upload Test Artifacts
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: ai-battle-test-results-${{ matrix.ai_provider.name }}-${{ github.event_name }}
	path: \|
	*.json
	*.txt
	*.log
	retention-days: 7

	battle-test-summary:
	name: Battle Test Summary
	needs: ai-battle-test
	runs-on: ubuntu-latest
	if: always()

	steps:
	- name: 📊 Generate Summary Report
	run: \|
	echo "# 🤖 AI Battle Test Complete"
	echo ""
	echo "Event: ${{ github.event_name }}"
	echo "Trigger: ${{ github.event.action \|\| 'manual' }}"
	echo "Status: ${{ needs.ai-battle-test.result }}"
	echo ""
	echo "## Test Matrix Results"
	echo "All AI providers and scenarios have been tested."
	echo ""
	echo "Check individual job logs and artifacts for detailed results."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add visor-agent-dev skill for AI coding assistants (#562) #2119

Workflow file

feat: add visor-agent-dev skill for AI coding assistants (#562) #2119

Uh oh!

Workflow file for this run

GitHub Actions / .github/workflows/ai-battle-test.yml