From 3ed17bb5a414dabbe72c9fe6ee8f3f28a18a15a5 Mon Sep 17 00:00:00 2001
From: adonheis <adonheis@redhat.com>
Date: Tue, 19 May 2026 15:17:08 -0400
Subject: [PATCH] refactor: extract shared load_golden() into
 evals/harness/fixtures.py

The load_golden() helper was copy-pasted identically across 5 agent
test files. Extract it into a shared module with an explicit
fixtures_dir parameter; each consumer keeps a thin 2-line wrapper
that preserves the existing zero-arg call signature, so no test
files need changes.

Closes RHAIENG-5096

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../mcp_agent/tests/behavioral/conftest.py    | 12 ++++-----
 .../tests/behavioral/conftest.py              | 11 +++-----
 .../agentic_rag/tests/behavioral/conftest.py  | 12 ++++-----
 .../tests/behavioral/test_tool_usage.py       | 12 +++------
 .../tests/behavioral/conftest.py              | 11 +++-----
 docs/adding-behavioral-tests.md               | 14 +++++++++++
 evals/harness/fixtures.py                     | 25 +++++++++++++++++++
 7 files changed, 61 insertions(+), 36 deletions(-)
 create mode 100644 evals/harness/fixtures.py

diff --git a/agents/autogen/mcp_agent/tests/behavioral/conftest.py b/agents/autogen/mcp_agent/tests/behavioral/conftest.py
index 3185e6de..6c203d88 100644
--- a/agents/autogen/mcp_agent/tests/behavioral/conftest.py
+++ b/agents/autogen/mcp_agent/tests/behavioral/conftest.py
@@ -13,6 +13,7 @@
 import httpx
 import pytest
 import yaml
+from harness.fixtures import load_golden as _load_golden_from
 from harness.runner import TaskConfig, TaskResult, run_task
 
 try:
@@ -56,15 +57,12 @@ def eval_config() -> dict[str, Any]:
         return yaml.safe_load(f)
 
 
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
 def load_golden(category: str | None = None) -> list[dict[str, Any]]:
     """Load golden queries from the fixtures directory, optionally filtering by category."""
-    path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
-    with open(path, encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-    queries = data.get("queries", [])
-    if category:
-        queries = [q for q in queries if q.get("category") == category]
-    return queries
+    return _load_golden_from(FIXTURES_DIR, category)
 
 
 @pytest.fixture
diff --git a/agents/crewai/websearch_agent/tests/behavioral/conftest.py b/agents/crewai/websearch_agent/tests/behavioral/conftest.py
index 232626df..e7056d9a 100644
--- a/agents/crewai/websearch_agent/tests/behavioral/conftest.py
+++ b/agents/crewai/websearch_agent/tests/behavioral/conftest.py
@@ -13,6 +13,7 @@
 import httpx
 import pytest
 import yaml
+from harness.fixtures import load_golden as _load_golden_from
 from harness.runner import TaskConfig, TaskResult, run_task
 
 try:
@@ -58,16 +59,12 @@ def eval_config() -> dict[str, Any]:
 
 SEARCH_EVIDENCE = ["openshift ai"]
 
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
 
 def load_golden(category: str | None = None) -> list[dict[str, Any]]:
     """Load golden queries from the fixtures directory, optionally filtering by category."""
-    path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
-    with open(path, encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-    queries = data.get("queries", [])
-    if category:
-        queries = [q for q in queries if q.get("category") == category]
-    return queries
+    return _load_golden_from(FIXTURES_DIR, category)
 
 
 @pytest.fixture
diff --git a/agents/langgraph/agentic_rag/tests/behavioral/conftest.py b/agents/langgraph/agentic_rag/tests/behavioral/conftest.py
index da39741e..f4ebd27a 100644
--- a/agents/langgraph/agentic_rag/tests/behavioral/conftest.py
+++ b/agents/langgraph/agentic_rag/tests/behavioral/conftest.py
@@ -13,6 +13,7 @@
 import httpx
 import pytest
 import yaml
+from harness.fixtures import load_golden as _load_golden_from
 from harness.runner import TaskConfig, TaskResult, run_task
 
 try:
@@ -48,15 +49,12 @@ def _find_repo_root() -> Path:
     )
 
 
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
 def load_golden(category: str | None = None) -> list[dict[str, Any]]:
     """Load golden queries from the fixtures directory, optionally filtering by category."""
-    path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
-    with open(path, encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-    queries = data.get("queries", [])
-    if category:
-        queries = [q for q in queries if q.get("category") == category]
-    return queries
+    return _load_golden_from(FIXTURES_DIR, category)
 
 
 @pytest.fixture
diff --git a/agents/langgraph/react_agent/tests/behavioral/test_tool_usage.py b/agents/langgraph/react_agent/tests/behavioral/test_tool_usage.py
index a8d5025e..2d26d003 100644
--- a/agents/langgraph/react_agent/tests/behavioral/test_tool_usage.py
+++ b/agents/langgraph/react_agent/tests/behavioral/test_tool_usage.py
@@ -20,7 +20,7 @@
 from typing import Any
 
 import pytest
-import yaml
+from harness.fixtures import load_golden as _load_golden_from
 from harness.scorers.tool_sequence import (
     score_hallucinated_tools,
     score_tool_call_validity,
@@ -29,16 +29,12 @@
 
 pytestmark = pytest.mark.langgraph_react
 
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
 
 def _load_golden(category: str | None = None) -> list[dict[str, Any]]:
     """Load golden queries, optionally filtering by category."""
-    path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
-    with open(path, encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-    queries = data.get("queries", [])
-    if category:
-        queries = [q for q in queries if q.get("category") == category]
-    return queries
+    return _load_golden_from(FIXTURES_DIR, category)
 
 
 def _factual_queries() -> list[dict[str, Any]]:
diff --git a/agents/vanilla_python/openai_responses_agent/tests/behavioral/conftest.py b/agents/vanilla_python/openai_responses_agent/tests/behavioral/conftest.py
index 4fda1997..f78976d4 100644
--- a/agents/vanilla_python/openai_responses_agent/tests/behavioral/conftest.py
+++ b/agents/vanilla_python/openai_responses_agent/tests/behavioral/conftest.py
@@ -13,6 +13,7 @@
 import httpx
 import pytest
 import yaml
+from harness.fixtures import load_golden as _load_golden_from
 from harness.runner import TaskConfig, TaskResult, run_task
 
 try:
@@ -59,16 +60,12 @@ def eval_config() -> dict[str, Any]:
 PRICE_EVIDENCE = ["price", "cost", "$", "dollar"]
 REVIEW_EVIDENCE = ["review", "rating", "star", "recommend"]
 
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
 
 def load_golden(category: str | None = None) -> list[dict[str, Any]]:
     """Load golden queries from the fixtures directory, optionally filtering by category."""
-    path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
-    with open(path, encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-    queries = data.get("queries", [])
-    if category:
-        queries = [q for q in queries if q.get("category") == category]
-    return queries
+    return _load_golden_from(FIXTURES_DIR, category)
 
 
 @pytest.fixture
diff --git a/docs/adding-behavioral-tests.md b/docs/adding-behavioral-tests.md
index 1e06e3bd..947de1b8 100644
--- a/docs/adding-behavioral-tests.md
+++ b/docs/adding-behavioral-tests.md
@@ -38,6 +38,20 @@ The conftest defines fixtures specific to your agent. Because agent tests live u
 - `agent_thresholds` — pulls from the shared `eval_config` fixture
 - `run_eval` — overrides the root fixture to add MLflow trace enrichment
 
+**`load_golden()` helper:** Import the shared loader from `harness.fixtures` and create a thin wrapper that binds `fixtures_dir` to `Path(__file__).parent / "fixtures"`:
+
+```python
+from pathlib import Path
+from typing import Any
+
+from harness.fixtures import load_golden as _load_golden_from
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+def load_golden(category: str | None = None) -> list[dict[str, Any]]:
+    return _load_golden_from(FIXTURES_DIR, category)
+```
+
 See existing agent implementations for working examples:
 
 - `agents/langgraph/react_agent/tests/behavioral/conftest.py`
diff --git a/evals/harness/fixtures.py b/evals/harness/fixtures.py
new file mode 100644
index 00000000..5d9d7c13
--- /dev/null
+++ b/evals/harness/fixtures.py
@@ -0,0 +1,25 @@
+"""Golden-query loader for behavioral tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+def load_golden(
+    fixtures_dir: Path | str,
+    category: str | None = None,
+) -> list[dict[str, Any]]:
+    """Load golden queries from *fixtures_dir*/golden_queries.yaml.
+
+    Expected YAML shape: ``{"queries": [{"category": str, "query": str, ...}]}``
+    """
+    path = Path(fixtures_dir) / "golden_queries.yaml"
+    with open(path, encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    queries = data.get("queries", [])
+    if category:
+        queries = [q for q in queries if q.get("category") == category]
+    return queries