Skip to content

Commit 3c59405

Browse files
authored
Fix bugs to support bedrock (#132)
1 parent 9a011a1 commit 3c59405

File tree

6 files changed

+93
-5
lines changed

6 files changed

+93
-5
lines changed

.vscode/launch.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"python": "./.venv/bin/python",
99
"module": "rogue",
1010
"args": [
11+
"--debug",
1112
"server",
1213
"--host",
1314
"0.0.0.0",
@@ -57,7 +58,7 @@
5758
"--protocol",
5859
"mcp",
5960
"--judge-llm",
60-
"openai/o4-mini",
61+
"bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0",
6162
"--workdir",
6263
"./examples/tshirt_store_agent/.rogue"
6364
],

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.2.2
1+
0.2.3

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ requires-python = ">=3.10"
77
dependencies = [
88
"a2a-sdk==0.2.10",
99
"backoff>=2.2.1",
10+
"boto3>=1.40.69",
1011
"click>=8.0.0",
1112
"datasets==3.6.0",
1213
"fastapi>=0.121.0",

rogue/evaluator_agent/base_evaluator_agent.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from abc import ABC, abstractmethod
23
from types import TracebackType
34
from typing import TYPE_CHECKING, Any, Callable, Optional, Self, Type
@@ -365,7 +366,6 @@ def _log_evaluation(
365366
context_id: str,
366367
evaluation_passed: bool,
367368
reason: str,
368-
scenario_type: Optional[str],
369369
**kwargs,
370370
) -> None:
371371
"""
@@ -395,7 +395,19 @@ def _log_evaluation(
395395
"context_id": context_id,
396396
},
397397
)
398-
scenario_dict = {"scenario": scenario}
398+
399+
try:
400+
scenario_dict = json.loads(scenario)
401+
except json.JSONDecodeError:
402+
logger.warning(
403+
"⚠️ Failed to parse scenario dict as JSON - recovering",
404+
extra={
405+
"scenario": scenario,
406+
"context_id": context_id,
407+
},
408+
)
409+
scenario_dict = {"scenario": scenario}
410+
return
399411
elif isinstance(scenario, dict):
400412
scenario_dict = scenario
401413
else:
@@ -423,7 +435,10 @@ def _log_evaluation(
423435
),
424436
"evaluation_passed (from agent)": evaluation_passed,
425437
"reason (from agent)": reason,
426-
"scenario_type": scenario_type,
438+
"scenario_type": scenario_dict.get(
439+
"scenario_type",
440+
ScenarioType.POLICY.value,
441+
),
427442
"expected_outcome": scenario_dict.get(
428443
"expected_outcome",
429444
"None",

rogue/evaluator_agent/mcp/mcp_evaluator_agent.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@ async def _send_message_to_evaluated_agent(
102102
context_id: str,
103103
message: str,
104104
) -> dict[str, str]:
105+
"""
106+
Sends a message to the evaluated agent and returns the agent's response.
107+
:param message: the text to send to the other agent.
108+
:param context_id: The context ID of the conversation.
109+
Each conversation has a unique context_id. All messages in the conversation
110+
have the same context_id.
111+
:return: A dictionary containing the response from the evaluated agent.
112+
- "response": the response string. If there is no response
113+
from the other agent, the string is empty.
114+
"""
105115
logger.info(
106116
"🔗 Making MCP call to evaluated agent",
107117
extra={

0 commit comments

Comments
 (0)