ci: add agent unit test workflow with auto-discovery and reporting (#103)

tarun-etikala · claude · web-flow · commit 237a0b5e2a64 · 2026-05-15T14:37:41.000-04:00
Add a new agent-tests.yml workflow that runs unit tests on PRs and
pushes to main. Key features:

- Auto-discovers agents with tests (tests/test_*.py) — no workflow
  edits needed when new agents are added
- On PRs, runs only changed agents' tests via git diff filtering
- On push to main, runs all agents (full regression)
- Produces a consolidated "Agent Test Results" check via
  mikepenz/action-junit-report with inline failure annotations
- All actions pinned to Node.js 24 SHAs
- Test result artifacts retained for 1 day only

Also fixes broken unit tests across 3 agents (react_agent,
agentic_rag, openai_responses_agent) and standardizes all 9 agent
Makefiles to exclude integration/behavioral tests from make test,
with $(PYTEST_ARGS) support for CI to inject --junitxml.

Ref: RHAIENG-4065

Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.github/workflows/agent-tests.yml b/.github/workflows/agent-tests.yml
@@ -0,0 +1,95 @@
+name: Agent Tests
+
+on:
+  push:
+    branches: [main]
+    paths: ['agents/**']
+  pull_request:
+    branches: [main]
+    paths: ['agents/**']
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  checks: write
+
+concurrency:
+  group: agent-tests-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Unit Tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout
+        uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd  # v5.0.1
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78  # v7.6.0
+
+      - name: Run tests
+        id: run-tests
+        run: |
+          # Discover agent dirs containing unit-test files
+          discovered=$(find agents/*/*/tests -maxdepth 1 -name 'test_*.py' -type f 2>/dev/null \
+            | cut -d'/' -f1-3 | sort -u)
+
+          # On PRs, filter to only agents with changed files
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            base_ref="origin/${{ github.event.pull_request.base.ref }}"
+            changed_files=$(git diff --name-only "${base_ref}...HEAD")
+            filtered=""
+            while IFS= read -r agent_dir; do
+              [ -z "$agent_dir" ] && continue
+              if echo "$changed_files" | grep -q "^${agent_dir}/"; then
+                filtered="${filtered}${filtered:+$'\n'}${agent_dir}"
+              fi
+            done <<< "$discovered"
+            discovered="$filtered"
+          fi
+
+          count=$(echo "$discovered" | grep -c '[^[:space:]]' || true)
+          if [ "$count" -eq 0 ]; then
+            echo "No testable agents found (or none changed). Skipping."
+            exit 0
+          fi
+
+          echo "Running tests for ${count} agents"
+          mkdir -p test-results
+          failed=0
+
+          while IFS= read -r agent_dir; do
+            [ -z "$agent_dir" ] && continue
+            name=$(echo "$agent_dir" | sed 's|agents/||; s|/|-|g')
+            echo ""
+            echo "::group::${name}"
+            echo "=== Testing ${agent_dir} ==="
+
+            if make -C "$agent_dir" test PYTEST_ARGS="--junitxml=$(pwd)/test-results/${name}.xml -v --tb=short"; then
+              echo "✓ ${name} passed"
+            else
+              echo "✗ ${name} failed"
+              failed=1
+            fi
+            echo "::endgroup::"
+          done <<< "$discovered"
+
+          exit "$failed"
+
+      - name: Publish test report
+        if: always()
+        uses: mikepenz/action-junit-report@bccf2e31636835cf0874589931c4116687171386  # v6.4.0
+        with:
+          report_paths: test-results/*.xml
+          check_name: Agent Test Results
+          include_passed: true
+          annotate_only: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }}
diff --git a/agents/autogen/mcp_agent/Makefile b/agents/autogen/mcp_agent/Makefile
@@ -201,5 +201,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove agent deployment from cluster (does NOT remove MCP server)
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/crewai/websearch_agent/Makefile b/agents/crewai/websearch_agent/Makefile
@@ -167,8 +167,8 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/ --ignore=tests/integration
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
 
 test-integration: ## Run integration deployment test
 	PYTHONPATH=$$(git rev-parse --show-toplevel)/tests \
diff --git a/agents/google/adk/Makefile b/agents/google/adk/Makefile
@@ -156,5 +156,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/langgraph/agentic_rag/Makefile b/agents/langgraph/agentic_rag/Makefile
@@ -182,5 +182,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/langgraph/agentic_rag/tests/test_tools.py b/agents/langgraph/agentic_rag/tests/test_tools.py
@@ -4,7 +4,6 @@
 
 import pytest
 import src.agentic_rag.tools as tools_module
-from dotenv import load_dotenv
 
 # Add parent directory to path
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -177,19 +176,16 @@ def test_get_retriever_components_initialization(mock_get_env, mock_client_class
     tools_module._client_cache = None
     tools_module._vector_store_id_cache = None
 
-    # Mock environment variable
-    mock_get_env.return_value = "http://localhost:8321"
+    # Mock environment variables: BASE_URL, VECTOR_STORE_ID, API_KEY
+    def getenv_side_effect(key):
+        return {
+            "BASE_URL": "http://localhost:8321",
+            "VECTOR_STORE_ID": "test-vector-store-123",
+            "API_KEY": "test-key",
+        }.get(key)
 
-    # Mock client and vector store list
-    mock_client = Mock()
-    mock_vector_store = Mock()
-    mock_vector_store.id = "test-vector-store-123"
-
-    mock_list_response = Mock()
-    mock_list_response.data = [mock_vector_store]
-
-    mock_client.vector_stores.list.return_value = mock_list_response
-    mock_client_class.return_value = mock_client
+    mock_get_env.side_effect = getenv_side_effect
+    mock_client_class.return_value = Mock()
 
     # Call function
     result = get_retriever_components()
@@ -198,7 +194,9 @@ def test_get_retriever_components_initialization(mock_get_env, mock_client_class
     assert "client" in result
     assert "vector_store_id" in result
     assert result["vector_store_id"] == "test-vector-store-123"
-    mock_client_class.assert_called_once_with(base_url="http://localhost:8321")
+    mock_client_class.assert_called_once_with(
+        base_url="http://localhost:8321", api_key="test-key"
+    )
 
 
 @patch("src.agentic_rag.tools.LlamaStackClient")
@@ -220,62 +218,51 @@ def test_get_retriever_components_caching(mock_get_env, mock_client_class):
 
 
 @patch("src.agentic_rag.tools.LlamaStackClient")
-def test_get_retriever_components_with_base_url(mock_client_class):
+@patch("src.agentic_rag.tools.getenv")
+def test_get_retriever_components_with_base_url(mock_get_env, mock_client_class):
     """Test that base_url parameter is used when provided."""
     # Reset cache
     tools_module._client_cache = None
     tools_module._vector_store_id_cache = None
 
-    # Mock client and vector store list
-    mock_client = Mock()
-    mock_vector_store = Mock()
-    mock_vector_store.id = "test-id"
-
-    mock_list_response = Mock()
-    mock_list_response.data = [mock_vector_store]
+    def getenv_side_effect(key):
+        return {
+            "VECTOR_STORE_ID": "test-id",
+            "API_KEY": "test-key",
+        }.get(key)
 
-    mock_client.vector_stores.list.return_value = mock_list_response
-    mock_client_class.return_value = mock_client
+    mock_get_env.side_effect = getenv_side_effect
+    mock_client_class.return_value = Mock()
 
     # Call with explicit base_url
     result = get_retriever_components(base_url="http://custom:9999")
 
-    # Should use provided base_url
-    mock_client_class.assert_called_once_with(base_url="http://custom:9999")
+    # Should use provided base_url (stripped of /v1 suffix if present)
+    mock_client_class.assert_called_once_with(
+        base_url="http://custom:9999", api_key="test-key"
+    )
     assert result["vector_store_id"] == "test-id"
 
 
-@patch("src.agentic_rag.tools.LlamaStackClient")
 @patch("src.agentic_rag.tools.getenv")
-def test_get_retriever_components_no_vector_store(mock_get_env, mock_client_class):
-    """Test error handling when no vector store is found."""
+def test_get_retriever_components_no_vector_store(mock_get_env):
+    """Test error handling when VECTOR_STORE_ID env var is not set."""
     # Reset cache
     tools_module._client_cache = None
     tools_module._vector_store_id_cache = None
 
-    mock_get_env.return_value = "http://localhost:8321"
-
-    # Mock client with empty vector store list
-    mock_client = Mock()
-    mock_list_response = Mock()
-    mock_list_response.data = []  # No vector stores
+    def getenv_side_effect(key):
+        return {"BASE_URL": "http://localhost:8321"}.get(key)
 
-    mock_client.vector_stores.list.return_value = mock_list_response
-    mock_client_class.return_value = mock_client
+    mock_get_env.side_effect = getenv_side_effect
 
-    # Should raise RuntimeError
+    # Should raise RuntimeError when VECTOR_STORE_ID is missing
     with pytest.raises(RuntimeError) as exc_info:
         get_retriever_components()
 
-    assert "No vector store found" in str(exc_info.value)
+    assert "VECTOR_STORE_ID" in str(exc_info.value)
     assert "load_documents.py" in str(exc_info.value)
 
 
-def test_get_retriever_components():
-    load_dotenv(verbose=True)
-    base_url = os.getenv("BASE_URL")
-    get_retriever_components(base_url)
-
-
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/agents/langgraph/human_in_the_loop/Makefile b/agents/langgraph/human_in_the_loop/Makefile
@@ -156,8 +156,8 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/ --ignore=tests/integration
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
 
 test-integration: ## Run integration deployment test
 	PYTHONPATH=$$(git rev-parse --show-toplevel)/tests \
diff --git a/agents/langgraph/react_agent/Makefile b/agents/langgraph/react_agent/Makefile
@@ -167,8 +167,8 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/ --ignore=tests/integration
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
 
 test-integration: ## Run integration deployment test
 	PYTHONPATH=$$(git rev-parse --show-toplevel)/tests \
diff --git a/agents/langgraph/react_agent/tests/test_tools.py b/agents/langgraph/react_agent/tests/test_tools.py
@@ -54,7 +54,7 @@ def test_dummy_web_search_return_format():
     # Should be a string, not a list
     assert isinstance(result, str)
     assert "FINAL ANSWER:" in result
-    assert "best company" in result.lower()
+    assert "RedHat" in result
 
 
 def test_dummy_web_search_with_empty_query():
diff --git a/agents/langgraph/react_with_database_memory/Makefile b/agents/langgraph/react_with_database_memory/Makefile
@@ -176,5 +176,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/llamaindex/websearch_agent/Makefile b/agents/llamaindex/websearch_agent/Makefile
@@ -167,5 +167,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/vanilla_python/openai_responses_agent/Makefile b/agents/vanilla_python/openai_responses_agent/Makefile
@@ -128,5 +128,5 @@ dry-run: _check-env ## Render Helm templates without deploying
 undeploy: ## Remove deployment from cluster
 	helm uninstall $(AGENT_NAME)
 
-test: ## Run tests
-	uv run --extra dev python -m pytest tests/
+test: ## Run unit tests
+	uv run --extra dev python -m pytest tests/ --ignore=tests/integration --ignore=tests/behavioral $(PYTEST_ARGS)
diff --git a/agents/vanilla_python/openai_responses_agent/tests/test_health.py b/agents/vanilla_python/openai_responses_agent/tests/test_health.py
@@ -1,17 +1,17 @@
-from unittest.mock import patch
-
 import pytest
 from fastapi.testclient import TestClient
 
 
 @pytest.fixture
 def client():
-    """Create a test client with a mocked agent."""
-    with patch("main.get_agent_closure") as mock_closure:
-        mock_closure.return_value = lambda: None
-        from main import app
+    """Create a test client with the agent global set to a mock factory."""
+    import main
 
-        yield TestClient(app)
+    original = main.get_agent
+    main.get_agent = lambda: None
+    with TestClient(main.app, raise_server_exceptions=False) as c:
+        yield c
+    main.get_agent = original
 
 
 def test_health_endpoint(client):