Fix bugs to support bedrock (#132)

yuval-qf · web-flow · commit 3c59405e2dd4 · 2025-11-10T16:05:56.000+02:00
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -8,6 +8,7 @@
             "python": "./.venv/bin/python",
             "module": "rogue",
             "args": [
+                "--debug",
                 "server",
                 "--host",
                 "0.0.0.0",
@@ -57,7 +58,7 @@
                 "--protocol",
                 "mcp",
                 "--judge-llm",
-                "openai/o4-mini",
+                "bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0",
                 "--workdir",
                 "./examples/tshirt_store_agent/.rogue"
             ],
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.2.2
+0.2.3
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
     "a2a-sdk==0.2.10",
     "backoff>=2.2.1",
+    "boto3>=1.40.69",
     "click>=8.0.0",
     "datasets==3.6.0",
     "fastapi>=0.121.0",
diff --git a/rogue/evaluator_agent/base_evaluator_agent.py b/rogue/evaluator_agent/base_evaluator_agent.py
@@ -1,3 +1,4 @@
+import json
 from abc import ABC, abstractmethod
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Callable, Optional, Self, Type
@@ -365,7 +366,6 @@ def _log_evaluation(
         context_id: str,
         evaluation_passed: bool,
         reason: str,
-        scenario_type: Optional[str],
         **kwargs,
     ) -> None:
         """
@@ -395,7 +395,19 @@ def _log_evaluation(
                     "context_id": context_id,
                 },
             )
-            scenario_dict = {"scenario": scenario}
+
+            try:
+                scenario_dict = json.loads(scenario)
+            except json.JSONDecodeError:
+                logger.warning(
+                    "⚠️ Failed to parse scenario dict as JSON - recovering",
+                    extra={
+                        "scenario": scenario,
+                        "context_id": context_id,
+                    },
+                )
+                scenario_dict = {"scenario": scenario}
+                return
         elif isinstance(scenario, dict):
             scenario_dict = scenario
         else:
@@ -423,7 +435,10 @@ def _log_evaluation(
                 ),
                 "evaluation_passed (from agent)": evaluation_passed,
                 "reason (from agent)": reason,
-                "scenario_type": scenario_type,
+                "scenario_type": scenario_dict.get(
+                    "scenario_type",
+                    ScenarioType.POLICY.value,
+                ),
                 "expected_outcome": scenario_dict.get(
                     "expected_outcome",
                     "None",
diff --git a/rogue/evaluator_agent/mcp/mcp_evaluator_agent.py b/rogue/evaluator_agent/mcp/mcp_evaluator_agent.py
@@ -102,6 +102,16 @@ async def _send_message_to_evaluated_agent(
         context_id: str,
         message: str,
     ) -> dict[str, str]:
+        """
+        Sends a message to the evaluated agent and returns the agent's response.
+        :param message: the text to send to the other agent.
+        :param context_id: The context ID of the conversation.
+            Each conversation has a unique context_id. All messages in the conversation
+            have the same context_id.
+        :return: A dictionary containing the response from the evaluated agent.
+            - "response": the response string. If there is no response
+                from the other agent, the string is empty.
+        """
         logger.info(
             "🔗 Making MCP call to evaluated agent",
             extra={
diff --git a/uv.lock b/uv.lock