Skip to content

Commit 0cd910d

Browse files
committed
Add triage agent test suite
Can be executed by `make run-triage-agent-e2e-tests` Currently none of the tests reliably passes, but each of them provides insight into portion of process that is not working.
1 parent 0c65fd7 commit 0cd910d

File tree

10 files changed

+178
-16
lines changed

10 files changed

+178
-16
lines changed

Containerfile.c10s

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ RUN dnf -y install --allowerasing \
3838
sed \
3939
gawk \
4040
rsync \
41+
python3-tabulate \
4142
&& dnf clean all
4243

4344
RUN pip3 install --no-cache-dir \
@@ -46,7 +47,9 @@ RUN pip3 install --no-cache-dir \
4647
openinference-instrumentation-beeai \
4748
arize-phoenix-otel \
4849
redis \
49-
specfile
50+
specfile \
51+
pytest \
52+
pytest-asyncio
5053

5154
# Create user
5255
RUN useradd -m -G wheel beeai

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ run-triage-agent-standalone:
2727
-e MOCK_JIRA=$(MOCK_JIRA) \
2828
triage-agent
2929

30-
31-
30+
.PHONY: run-triage-agent-e2e-tests
31+
run-triage-agent-e2e-tests:
32+
$(COMPOSE_AGENTS) run --rm \
33+
-e MOCK_JIRA="true" \
34+
triage-agent-e2e-tests
3235

3336
.PHONY: run-rebase-agent-c9s-standalone
3437
run-rebase-agent-c9s-standalone:

agents/metrics_middleware.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from datetime import datetime
2+
3+
4+
from beeai_framework.context import (
5+
RunContextStartEvent,
6+
RunContextFinishEvent,
7+
RunMiddlewareProtocol,
8+
RunContext
9+
)
10+
from beeai_framework.emitter import EmitterOptions, EventMeta
11+
from beeai_framework.emitter.utils import create_internal_event_matcher
12+
13+
14+
class MetricsMiddleware(RunMiddlewareProtocol):
15+
def __init__(self):
16+
self.start_time: datetime | None = None
17+
self.end_time: datetime | None = None
18+
self.tool_calls: int = 0
19+
20+
def bind(self, ctx: RunContext) -> None:
21+
ctx.emitter.on(
22+
create_internal_event_matcher("start", ctx.instance),
23+
self._on_run_context_start,
24+
EmitterOptions(is_blocking=True, priority=1),
25+
)
26+
ctx.emitter.on(
27+
create_internal_event_matcher("finish", ctx.instance),
28+
self._on_run_context_finish,
29+
EmitterOptions(is_blocking=True, priority=1),
30+
)
31+
32+
async def _on_run_context_start(self, event: RunContextStartEvent, meta: EventMeta):
33+
self.start_time = datetime.now()
34+
35+
async def _on_run_context_finish(self, event: RunContextFinishEvent, meta: EventMeta):
36+
self.end_time = datetime.now()
37+
38+
@property
39+
def duration(self) -> float:
40+
if self.start_time and self.end_time:
41+
return (self.end_time - self.start_time).total_seconds()
42+
return 0
43+
44+
def get_metrics(self) -> dict:
45+
return {"duration": self.duration}

agents/tasks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99

1010
from common.models import LogOutputSchema, CachedMRMetadata
1111
from common.utils import is_cs_branch
12-
from constants import BRANCH_PREFIX, JIRA_COMMENT_TEMPLATE
13-
from utils import check_subprocess, run_subprocess, run_tool, mcp_tools
14-
from tools.specfile import UpdateReleaseTool
12+
from agents.constants import BRANCH_PREFIX, JIRA_COMMENT_TEMPLATE
13+
from agents.utils import check_subprocess, run_subprocess, run_tool, mcp_tools
14+
from agents.tools.specfile import UpdateReleaseTool
1515

1616
logger = logging.getLogger(__name__)
1717

agents/tests/e2e/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import pytest
2+
3+
@pytest.hookimpl(wrapper=True)
4+
def pytest_terminal_summary(terminalreporter: pytest.TerminalReporter, exitstatus, config: pytest.Config):
5+
yield
6+
metrics = config.stash.get("metrics", None)
7+
8+
if metrics:
9+
terminalreporter.write_sep("=", "Metrics")
10+
terminalreporter.write_line(metrics, flush=True)

agents/tests/e2e/test_triage.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from tabulate import tabulate
2+
import pytest
3+
import os
4+
5+
from agents.triage_agent import run_workflow, TriageState
6+
from agents.observability import setup_observability
7+
from common.models import TriageOutputSchema, Resolution, BackportData
8+
9+
10+
class TriageAgentTestCase:
11+
def __init__(self, input, expected_output):
12+
self.input = input
13+
self.expected_output = expected_output
14+
self.metrics: dict = None
15+
16+
async def run(self) -> TriageState:
17+
return await run_workflow(self.input, False)
18+
19+
20+
test_cases=[
21+
TriageAgentTestCase(input="RHEL-15216",
22+
expected_output=TriageOutputSchema(resolution=Resolution.BACKPORT,
23+
data=BackportData(package="dnsmasq",
24+
patch_urls=["http://thekelleys.org.uk/gitweb/?p=dnsmasq.git;a=patch;h=dd33e98da09c487a58b6cb6693b8628c0b234a3b"],
25+
justification="not-implemented",
26+
jira_issue="RHEL-15216",
27+
cve_id=None,
28+
fix_version="rhel-8.10"))
29+
),
30+
TriageAgentTestCase(input="RHEL-112546",
31+
expected_output=TriageOutputSchema(resolution=Resolution.BACKPORT,
32+
data=BackportData(package="libtiff",
33+
patch_urls=["https://gitlab.com/libtiff/libtiff/-/commit/d1c0719e004fbb223c571d286c73911569d4dbb6.patch"],
34+
justification="not-implemented",
35+
jira_issue="RHEL-112546",
36+
cve_id="CVE-2025-9900",
37+
fix_version="rhel-9.6.z"))
38+
),
39+
TriageAgentTestCase(input="RHEL-61943",
40+
expected_output=TriageOutputSchema(resolution=Resolution.BACKPORT,
41+
data=BackportData(package="dnsmasq",
42+
patch_urls=["http://thekelleys.org.uk/gitweb/?p=dnsmasq.git;a=patch;h=eb1fe15ca80b6bc43cd6bfdf309ec6c590aff811"],
43+
justification="not-implemented",
44+
jira_issue="RHEL-61943",
45+
cve_id=None,
46+
fix_version="rhel-8.10.z"))
47+
),
48+
TriageAgentTestCase(input="RHEL-29712",
49+
expected_output=TriageOutputSchema(resolution=Resolution.BACKPORT,
50+
data=BackportData(package="bind",
51+
patch_urls=["https://gitlab.isc.org/isc-projects/bind9/-/commit/7e2f50c36958f8c98d54e6d131f088a4837ce269"],
52+
justification="not-implemented",
53+
jira_issue="RHEL-29712",
54+
cve_id=None,
55+
fix_version="rhel-8.10.z"))
56+
),
57+
]
58+
59+
60+
@pytest.fixture(scope="session", autouse=True)
61+
def observability_fixture():
62+
return setup_observability(os.environ["COLLECTOR_ENDPOINT"])
63+
64+
65+
@pytest.fixture(scope="session", autouse=True)
66+
def mydata(request):
67+
yield
68+
collected_metrics = [[test_case.input] + list(test_case.metrics.values()) for test_case in test_cases]
69+
request.config.stash["metrics"] = tabulate(collected_metrics, ["Issue", "Time"])
70+
71+
72+
@pytest.mark.asyncio
73+
@pytest.mark.parametrize(
74+
"test_case",
75+
test_cases,
76+
)
77+
async def test_triage_agent(test_case: TriageAgentTestCase):
78+
def verify_result(real_output: TriageOutputSchema, expected_output: TriageOutputSchema):
79+
assert real_output.resolution == expected_output.resolution
80+
assert real_output.data.package == expected_output.data.package
81+
assert real_output.data.patch_urls == expected_output.data.patch_urls
82+
assert real_output.data.jira_issue == expected_output.data.jira_issue
83+
assert real_output.data.cve_id == expected_output.data.cve_id
84+
assert real_output.data.fix_version == expected_output.data.fix_version
85+
86+
finished_state = await test_case.run()
87+
test_case.metrics = finished_state.metrics
88+
verify_result(finished_state.triage_result, test_case.expected_output)

agents/tools/commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from beeai_framework.emitter import Emitter
99
from beeai_framework.tools import JSONToolOutput, Tool, ToolError, ToolRunOptions
1010

11-
from utils import run_subprocess
11+
from agents.utils import run_subprocess
1212

1313
TIMEOUT = 10 * 60 # seconds
1414
ELLIPSIZED_LINES = 200

agents/tools/specfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from common.constants import BREWHUB_URL
1717
from common.validators import NonEmptyString
18-
from utils import get_absolute_path
18+
from agents.utils import get_absolute_path
1919

2020

2121
class GetPackageInfoToolInput(BaseModel):

agents/triage_agent.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from beeai_framework.workflows import Workflow
2222
from beeai_framework.utils.strings import to_json
2323

24-
import tasks
24+
import agents.tasks as tasks
25+
from agents.metrics_middleware import MetricsMiddleware
2526
from common.config import load_rhel_config
2627
from common.models import (
2728
Task,
@@ -35,12 +36,12 @@
3536
)
3637
from common.utils import redis_client, fix_await
3738
from common.constants import JiraLabels, RedisQueues
38-
from observability import setup_observability
39-
from tools.commands import RunShellCommandTool
40-
from tools.patch_validator import PatchValidatorTool
41-
from tools.version_mapper import VersionMapperTool
42-
from tools.upstream_search import UpstreamSearchTool
43-
from utils import get_agent_execution_config, get_chat_model, get_tool_call_checker_config, mcp_tools, run_tool
39+
from agents.observability import setup_observability
40+
from agents.tools.commands import RunShellCommandTool
41+
from agents.tools.patch_validator import PatchValidatorTool
42+
from agents.tools.version_mapper import VersionMapperTool
43+
from agents.tools.upstream_search import UpstreamSearchTool
44+
from agents.utils import get_agent_execution_config, get_chat_model, get_tool_call_checker_config, mcp_tools, run_tool
4445

4546
logger = logging.getLogger(__name__)
4647

@@ -291,9 +292,11 @@ class TriageState(BaseModel):
291292
cve_eligibility_result: CVEEligibilityResult | None = Field(default=None)
292293
triage_result: OutputSchema | None = Field(default=None)
293294
target_branch: str | None = Field(default=None)
295+
metrics: dict | None = Field(default=None)
294296

295297

296298
async def run_workflow(jira_issue, dry_run):
299+
current_metrics_middleware = MetricsMiddleware()
297300
async with mcp_tools(os.getenv("MCP_GATEWAY_URL")) as gateway_tools:
298301
triage_agent = RequirementAgent(
299302
name="TriageAgent",
@@ -317,7 +320,7 @@ async def run_workflow(jira_issue, dry_run):
317320
ConditionalRequirement(PatchValidatorTool, only_after="get_jira_details"),
318321
ConditionalRequirement("set_jira_fields", only_after="get_jira_details"),
319322
],
320-
middlewares=[GlobalTrajectoryMiddleware(pretty=True)],
323+
middlewares=[current_metrics_middleware, GlobalTrajectoryMiddleware(pretty=True)],
321324
role="Red Hat Enterprise Linux developer",
322325
instructions=[
323326
"Use the `think` tool to reason through complex decisions and document your approach.",
@@ -497,6 +500,7 @@ async def comment_in_jira(state):
497500
workflow.add_step("comment_in_jira", comment_in_jira)
498501

499502
response = await workflow.run(TriageState(jira_issue=jira_issue))
503+
response.state.metrics = current_metrics_middleware.get_metrics()
500504
return response.state
501505

502506

compose.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,15 @@ services:
135135
command: ["python", "agents/triage_agent.py"]
136136
profiles: ["agents"]
137137

138+
triage-agent-e2e-tests:
139+
<<: *beeai-agent-c10s
140+
environment:
141+
<<: *beeai-env
142+
# the option about default loop is here because of litellm issue
143+
# https://github.com/BerriAI/litellm/issues/14521
144+
command: ["pytest", "agents/tests/e2e/test_triage.py", "-o", "asyncio_default_test_loop_scope=session"]
145+
profiles: ["agents"]
146+
138147
backport-agent-c9s:
139148
<<: *beeai-agent-c9s
140149
command: ["python", "agents/backport_agent.py"]

0 commit comments

Comments
 (0)