fix: retrieve multiple text contentBlock in messageConent

poshinchen · poshinchen · commit ee634214cd02 · 2026-02-11T10:48:55.000-05:00
diff --git a/src/strands_evals/evaluators/conciseness_evaluator.py b/src/strands_evals/evaluators/conciseness_evaluator.py
@@ -7,7 +7,7 @@
 from typing_extensions import Union
 
 from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
-from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
+from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
 from .evaluator import Evaluator
 from .prompt_templates.conciseness import get_template
 
@@ -101,9 +101,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
 
         last_msg = parsed_input.session_history[-1]
         if not isinstance(last_msg, list) and self._has_text_content(last_msg):
-            first_content = last_msg.content[0]
-            if isinstance(first_content, TextContent):
-                return first_content.text
+            return self._extract_text_content(last_msg)
 
         return ""
 
@@ -124,9 +122,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
                 if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
                     continue  # Skip tool execution lists
                 if not isinstance(msg, list) and self._has_text_content(msg):
-                    first_content = msg.content[0]
-                    if isinstance(first_content, TextContent):
-                        history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
+                    text = self._extract_text_content(msg)
+                    history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"# Previous turns:\n{history_str}")
 
diff --git a/src/strands_evals/evaluators/evaluator.py b/src/strands_evals/evaluators/evaluator.py
@@ -131,14 +131,37 @@ def _has_text_content(self, msg: UserMessage | AssistantMessage) -> TypeGuard[Us
             msg: Message object to check (UserMessage or AssistantMessage)
 
         Returns:
-            True if msg has content attribute with at least one item that is TextContent
+            True if msg has content attribute with at least one TextContent block.
+            Note: TextContent may not be at index 0 due to tool calls or other content types.
         """
-        return (
-            hasattr(msg, "content")
-            and bool(msg.content)
-            and len(msg.content) > 0
-            and isinstance(msg.content[0], TextContent)
-        )
+        if not hasattr(msg, "content") or not msg.content:
+            return False
+
+        # Check if ANY content block is TextContent, not just the first
+        return any(isinstance(content_block, TextContent) for content_block in msg.content)
+
+    def _extract_text_content(self, msg: UserMessage | AssistantMessage) -> str:
+        """Extract and concatenate text from all TextContent blocks in a message.
+
+        Args:
+            msg: Message object containing content blocks
+
+        Returns:
+            Concatenated text from all TextContent blocks, or empty string if none found.
+            Multiple text blocks are joined with a space.
+            Note: Iterates through all content blocks since TextContent may not be first.
+        """
+        if not hasattr(msg, "content") or not msg.content:
+            return ""
+
+        # Collect all TextContent blocks - there could be multiple
+        text_blocks = []
+        for content_block in msg.content:
+            if isinstance(content_block, TextContent):
+                text_blocks.append(content_block.text)
+
+        # Join multiple text blocks with space
+        return " ".join(text_blocks) if text_blocks else ""
 
     @classmethod
     def get_type_name(cls) -> str:
diff --git a/src/strands_evals/evaluators/faithfulness_evaluator.py b/src/strands_evals/evaluators/faithfulness_evaluator.py
@@ -108,7 +108,7 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
                         history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
                         history_lines.append(f"Tool: {tool_exec.tool_result.content}")
                 else:
-                    text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
+                    text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
                     history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"# Conversation History:\n{history_str}")
diff --git a/src/strands_evals/evaluators/harmfulness_evaluator.py b/src/strands_evals/evaluators/harmfulness_evaluator.py
@@ -5,7 +5,7 @@
 from strands import Agent
 
 from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
-from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
+from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
 from .evaluator import Evaluator
 from .prompt_templates.harmfulness import get_template
 
@@ -101,9 +101,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
 
         last_msg = parsed_input.session_history[-1]
         if not isinstance(last_msg, list) and self._has_text_content(last_msg):
-            first_content = last_msg.content[0]
-            if isinstance(first_content, TextContent):
-                return first_content.text
+            return self._extract_text_content(last_msg)
 
         return ""
 
@@ -124,9 +122,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
                 if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
                     continue  # Skip tool execution lists
                 if not isinstance(msg, list) and self._has_text_content(msg):
-                    first_content = msg.content[0]
-                    if isinstance(first_content, TextContent):
-                        history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
+                    text = self._extract_text_content(msg)
+                    history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"# Previous turns:\n{history_str}")
 
diff --git a/src/strands_evals/evaluators/helpfulness_evaluator.py b/src/strands_evals/evaluators/helpfulness_evaluator.py
@@ -7,7 +7,7 @@
 from typing_extensions import Union
 
 from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
-from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
+from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
 from .evaluator import Evaluator
 from .prompt_templates.helpfulness import get_template
 
@@ -115,9 +115,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
 
         last_msg = parsed_input.session_history[-1]
         if not isinstance(last_msg, list) and self._has_text_content(last_msg):
-            first_content = last_msg.content[0]
-            if isinstance(first_content, TextContent):
-                return first_content.text
+            return self._extract_text_content(last_msg)
 
         return ""
 
@@ -138,9 +136,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
                 if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
                     continue  # Skip tool execution lists
                 if not isinstance(msg, list) and self._has_text_content(msg):
-                    first_content = msg.content[0]
-                    if isinstance(first_content, TextContent):
-                        history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
+                    text = self._extract_text_content(msg)
+                    history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"# Previous turns:\n{history_str}")
 
diff --git a/src/strands_evals/evaluators/response_relevance_evaluator.py b/src/strands_evals/evaluators/response_relevance_evaluator.py
@@ -8,7 +8,7 @@
 from typing_extensions import Union
 
 from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
-from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
+from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
 from .evaluator import Evaluator
 from .prompt_templates.response_relevance import get_template
 
@@ -106,9 +106,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
 
         last_msg = parsed_input.session_history[-1]
         if not isinstance(last_msg, list) and self._has_text_content(last_msg):
-            first_content = last_msg.content[0]
-            if isinstance(first_content, TextContent):
-                return first_content.text
+            return self._extract_text_content(last_msg)
 
         return ""
 
@@ -129,9 +127,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
                 if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
                     continue  # Skip tool execution lists
                 if not isinstance(msg, list) and self._has_text_content(msg):
-                    first_content = msg.content[0]
-                    if isinstance(first_content, TextContent):
-                        history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
+                    text = self._extract_text_content(msg)
+                    history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"# Previous turns:\n{history_str}")
 
diff --git a/src/strands_evals/evaluators/tool_parameter_accuracy_evaluator.py b/src/strands_evals/evaluators/tool_parameter_accuracy_evaluator.py
@@ -107,7 +107,7 @@ def _format_prompt(self, tool_input: ToolLevelInput) -> str:
                         history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
                         history_lines.append(f"Tool: {tool_exec.tool_result.content}")
                 else:
-                    text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
+                    text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
                     history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"## Previous conversation history\n{history_str}")
diff --git a/src/strands_evals/evaluators/tool_selection_accuracy_evaluator.py b/src/strands_evals/evaluators/tool_selection_accuracy_evaluator.py
@@ -107,7 +107,7 @@ def _format_prompt(self, tool_input: ToolLevelInput) -> str:
                         history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
                         history_lines.append(f"Tool: {tool_exec.tool_result.content}")
                 else:
-                    text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
+                    text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
                     history_lines.append(f"{msg.role.value.capitalize()}: {text}")
             history_str = "\n".join(history_lines)
             parts.append(f"## Previous conversation history\n{history_str}")
diff --git a/src/strands_evals/extractors/tools_use_extractor.py b/src/strands_evals/extractors/tools_use_extractor.py
@@ -43,13 +43,24 @@ def extract_agent_tools_used_from_messages(agent_messages):
                             if next_message.get("role") == "user":
                                 content = next_message.get("content")
                                 if content:
-                                    tool_result_dict = content[0].get("toolResult")
+                                    # Find toolResult in content blocks - may not be at index 0
+                                    tool_result_dict = None
+                                    for content_block in content:
+                                        if "toolResult" in content_block:
+                                            tool_result_dict = content_block.get("toolResult")
+                                            break
+
                                     if tool_result_dict and tool_result_dict.get("toolUseId") == tool_id:
                                         tool_result_content = tool_result_dict.get("content", [])
-                                        if len(tool_result_content) > 0:
-                                            tool_result = tool_result_content[0].get("text")
-                                            is_error = tool_result_dict.get("status") == "error"
-                                            break
+                                        # Find first text in tool result content - may not be at index 0
+                                        tool_result = None
+                                        if tool_result_content:
+                                            for result_item in tool_result_content:
+                                                if isinstance(result_item, dict) and "text" in result_item:
+                                                    tool_result = result_item.get("text")
+                                                    break
+                                        is_error = tool_result_dict.get("status") == "error"
+                                        break
 
                         tools_used.append(
                             {"name": tool_name, "input": tool_input, "tool_result": tool_result, "is_error": is_error}
diff --git a/tests/strands_evals/evaluators/test_evaluator.py b/tests/strands_evals/evaluators/test_evaluator.py
@@ -149,3 +149,155 @@ def test_to_dict_with_none_model():
     assert evaluator_dict["rubric"] == "test rubric"
     assert "model" not in evaluator_dict
     assert evaluator_dict["model_id"] == DEFAULT_BEDROCK_MODEL_ID
+
+
+
+# Tests for _has_text_content and _extract_text_content helper methods
+
+
+def test_has_text_content_with_single_text_at_start():
+    """Test _has_text_content with TextContent at index 0"""
+    from strands_evals.types.trace import AssistantMessage, TextContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[TextContent(text="Hello")])
+
+    assert evaluator._has_text_content(msg) is True
+
+
+def test_has_text_content_with_text_after_tool_call():
+    """Test _has_text_content with TextContent not at index 0"""
+    from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(
+        content=[
+            ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
+            TextContent(text="Let me calculate that"),
+        ]
+    )
+
+    assert evaluator._has_text_content(msg) is True
+
+
+def test_has_text_content_with_no_text():
+    """Test _has_text_content with no TextContent blocks"""
+    from strands_evals.types.trace import AssistantMessage, ToolCallContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1")])
+
+    assert evaluator._has_text_content(msg) is False
+
+
+def test_has_text_content_with_multiple_text_blocks():
+    """Test _has_text_content with multiple TextContent blocks"""
+    from strands_evals.types.trace import AssistantMessage, TextContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[TextContent(text="Hello"), TextContent(text="World")])
+
+    assert evaluator._has_text_content(msg) is True
+
+
+def test_has_text_content_with_empty_content():
+    """Test _has_text_content with empty content list"""
+    from strands_evals.types.trace import AssistantMessage
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[])
+
+    assert evaluator._has_text_content(msg) is False
+
+
+def test_extract_text_content_single_block():
+    """Test _extract_text_content with single TextContent block"""
+    from strands_evals.types.trace import UserMessage, TextContent
+
+    evaluator = SimpleEvaluator()
+    msg = UserMessage(content=[TextContent(text="Hello world")])
+
+    result = evaluator._extract_text_content(msg)
+    assert result == "Hello world"
+
+
+def test_extract_text_content_multiple_blocks():
+    """Test _extract_text_content with multiple TextContent blocks"""
+    from strands_evals.types.trace import AssistantMessage, TextContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[TextContent(text="Hello"), TextContent(text="world")])
+
+    result = evaluator._extract_text_content(msg)
+    assert result == "Hello world"
+
+
+def test_extract_text_content_text_not_at_start():
+    """Test _extract_text_content with TextContent not at index 0"""
+    from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(
+        content=[
+            ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
+            TextContent(text="Let me calculate"),
+        ]
+    )
+
+    result = evaluator._extract_text_content(msg)
+    assert result == "Let me calculate"
+
+
+def test_extract_text_content_mixed_content():
+    """Test _extract_text_content with mixed content types"""
+    from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(
+        content=[
+            TextContent(text="First text"),
+            ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
+            TextContent(text="Second text"),
+        ]
+    )
+
+    result = evaluator._extract_text_content(msg)
+    assert result == "First text Second text"
+
+
+def test_extract_text_content_no_text():
+    """Test _extract_text_content with no TextContent blocks"""
+    from strands_evals.types.trace import AssistantMessage, ToolCallContent
+
+    evaluator = SimpleEvaluator()
+    msg = AssistantMessage(content=[ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1")])
+
+    result = evaluator._extract_text_content(msg)
+    assert result == ""
+
+
+def test_extract_text_content_empty_content():
+    """Test _extract_text_content with empty content list"""
+    from strands_evals.types.trace import UserMessage
+
+    evaluator = SimpleEvaluator()
+    msg = UserMessage(content=[])
+
+    result = evaluator._extract_text_content(msg)
+    assert result == ""
+
+
+def test_extract_text_content_user_message_with_tool_result():
+    """Test _extract_text_content with UserMessage containing tool results and text"""
+    from strands_evals.types.trace import UserMessage, TextContent, ToolResultContent
+
+    evaluator = SimpleEvaluator()
+    msg = UserMessage(
+        content=[
+            ToolResultContent(content="Result: 42", tool_call_id="t1"),
+            TextContent(text="Here's the result"),
+        ]
+    )
+
+    result = evaluator._extract_text_content(msg)
+    assert result == "Here's the result"