Skip to content

Commit 3ed17bb

Browse files
refactor: extract shared load_golden() into evals/harness/fixtures.py
The load_golden() helper was copy-pasted identically across 5 agent test files. Extract it into a shared module with an explicit fixtures_dir parameter; each consumer keeps a thin 2-line wrapper that preserves the existing zero-arg call signature, so no test files need changes. Closes RHAIENG-5096 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7fb3ca4 commit 3ed17bb

7 files changed

Lines changed: 61 additions & 36 deletions

File tree

agents/autogen/mcp_agent/tests/behavioral/conftest.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import httpx
1414
import pytest
1515
import yaml
16+
from harness.fixtures import load_golden as _load_golden_from
1617
from harness.runner import TaskConfig, TaskResult, run_task
1718

1819
try:
@@ -56,15 +57,12 @@ def eval_config() -> dict[str, Any]:
5657
return yaml.safe_load(f)
5758

5859

60+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
61+
62+
5963
def load_golden(category: str | None = None) -> list[dict[str, Any]]:
6064
"""Load golden queries from the fixtures directory, optionally filtering by category."""
61-
path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
62-
with open(path, encoding="utf-8") as f:
63-
data = yaml.safe_load(f)
64-
queries = data.get("queries", [])
65-
if category:
66-
queries = [q for q in queries if q.get("category") == category]
67-
return queries
65+
return _load_golden_from(FIXTURES_DIR, category)
6866

6967

7068
@pytest.fixture

agents/crewai/websearch_agent/tests/behavioral/conftest.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import httpx
1414
import pytest
1515
import yaml
16+
from harness.fixtures import load_golden as _load_golden_from
1617
from harness.runner import TaskConfig, TaskResult, run_task
1718

1819
try:
@@ -58,16 +59,12 @@ def eval_config() -> dict[str, Any]:
5859

5960
SEARCH_EVIDENCE = ["openshift ai"]
6061

62+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
63+
6164

6265
def load_golden(category: str | None = None) -> list[dict[str, Any]]:
6366
"""Load golden queries from the fixtures directory, optionally filtering by category."""
64-
path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
65-
with open(path, encoding="utf-8") as f:
66-
data = yaml.safe_load(f)
67-
queries = data.get("queries", [])
68-
if category:
69-
queries = [q for q in queries if q.get("category") == category]
70-
return queries
67+
return _load_golden_from(FIXTURES_DIR, category)
7168

7269

7370
@pytest.fixture

agents/langgraph/agentic_rag/tests/behavioral/conftest.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import httpx
1414
import pytest
1515
import yaml
16+
from harness.fixtures import load_golden as _load_golden_from
1617
from harness.runner import TaskConfig, TaskResult, run_task
1718

1819
try:
@@ -48,15 +49,12 @@ def _find_repo_root() -> Path:
4849
)
4950

5051

52+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
53+
54+
5155
def load_golden(category: str | None = None) -> list[dict[str, Any]]:
5256
"""Load golden queries from the fixtures directory, optionally filtering by category."""
53-
path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
54-
with open(path, encoding="utf-8") as f:
55-
data = yaml.safe_load(f)
56-
queries = data.get("queries", [])
57-
if category:
58-
queries = [q for q in queries if q.get("category") == category]
59-
return queries
57+
return _load_golden_from(FIXTURES_DIR, category)
6058

6159

6260
@pytest.fixture

agents/langgraph/react_agent/tests/behavioral/test_tool_usage.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from typing import Any
2121

2222
import pytest
23-
import yaml
23+
from harness.fixtures import load_golden as _load_golden_from
2424
from harness.scorers.tool_sequence import (
2525
score_hallucinated_tools,
2626
score_tool_call_validity,
@@ -29,16 +29,12 @@
2929

3030
pytestmark = pytest.mark.langgraph_react
3131

32+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
33+
3234

3335
def _load_golden(category: str | None = None) -> list[dict[str, Any]]:
3436
"""Load golden queries, optionally filtering by category."""
35-
path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
36-
with open(path, encoding="utf-8") as f:
37-
data = yaml.safe_load(f)
38-
queries = data.get("queries", [])
39-
if category:
40-
queries = [q for q in queries if q.get("category") == category]
41-
return queries
37+
return _load_golden_from(FIXTURES_DIR, category)
4238

4339

4440
def _factual_queries() -> list[dict[str, Any]]:

agents/vanilla_python/openai_responses_agent/tests/behavioral/conftest.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import httpx
1414
import pytest
1515
import yaml
16+
from harness.fixtures import load_golden as _load_golden_from
1617
from harness.runner import TaskConfig, TaskResult, run_task
1718

1819
try:
@@ -59,16 +60,12 @@ def eval_config() -> dict[str, Any]:
5960
PRICE_EVIDENCE = ["price", "cost", "$", "dollar"]
6061
REVIEW_EVIDENCE = ["review", "rating", "star", "recommend"]
6162

63+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
64+
6265

6366
def load_golden(category: str | None = None) -> list[dict[str, Any]]:
6467
"""Load golden queries from the fixtures directory, optionally filtering by category."""
65-
path = Path(__file__).parent / "fixtures" / "golden_queries.yaml"
66-
with open(path, encoding="utf-8") as f:
67-
data = yaml.safe_load(f)
68-
queries = data.get("queries", [])
69-
if category:
70-
queries = [q for q in queries if q.get("category") == category]
71-
return queries
68+
return _load_golden_from(FIXTURES_DIR, category)
7269

7370

7471
@pytest.fixture

docs/adding-behavioral-tests.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,20 @@ The conftest defines fixtures specific to your agent. Because agent tests live u
3838
- `agent_thresholds` — pulls from the shared `eval_config` fixture
3939
- `run_eval` — overrides the root fixture to add MLflow trace enrichment
4040

41+
**`load_golden()` helper:** Import the shared loader from `harness.fixtures` and create a thin wrapper that binds `fixtures_dir` to `Path(__file__).parent / "fixtures"`:
42+
43+
```python
44+
from pathlib import Path
45+
from typing import Any
46+
47+
from harness.fixtures import load_golden as _load_golden_from
48+
49+
FIXTURES_DIR = Path(__file__).parent / "fixtures"
50+
51+
def load_golden(category: str | None = None) -> list[dict[str, Any]]:
52+
return _load_golden_from(FIXTURES_DIR, category)
53+
```
54+
4155
See existing agent implementations for working examples:
4256

4357
- `agents/langgraph/react_agent/tests/behavioral/conftest.py`

evals/harness/fixtures.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""Golden-query loader for behavioral tests."""
2+
3+
from __future__ import annotations
4+
5+
from pathlib import Path
6+
from typing import Any
7+
8+
import yaml
9+
10+
11+
def load_golden(
12+
fixtures_dir: Path | str,
13+
category: str | None = None,
14+
) -> list[dict[str, Any]]:
15+
"""Load golden queries from *fixtures_dir*/golden_queries.yaml.
16+
17+
Expected YAML shape: ``{"queries": [{"category": str, "query": str, ...}]}``
18+
"""
19+
path = Path(fixtures_dir) / "golden_queries.yaml"
20+
with open(path, encoding="utf-8") as f:
21+
data = yaml.safe_load(f) or {}
22+
queries = data.get("queries", [])
23+
if category:
24+
queries = [q for q in queries if q.get("category") == category]
25+
return queries

0 commit comments

Comments
 (0)