Skip to content

Commit ee63421

Browse files
committed
fix: retrieve multiple text contentBlock in messageConent
1 parent 8618b04 commit ee63421

File tree

10 files changed

+217
-43
lines changed

10 files changed

+217
-43
lines changed

src/strands_evals/evaluators/conciseness_evaluator.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing_extensions import Union
88

99
from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
10-
from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
10+
from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
1111
from .evaluator import Evaluator
1212
from .prompt_templates.conciseness import get_template
1313

@@ -101,9 +101,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
101101

102102
last_msg = parsed_input.session_history[-1]
103103
if not isinstance(last_msg, list) and self._has_text_content(last_msg):
104-
first_content = last_msg.content[0]
105-
if isinstance(first_content, TextContent):
106-
return first_content.text
104+
return self._extract_text_content(last_msg)
107105

108106
return ""
109107

@@ -124,9 +122,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
124122
if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
125123
continue # Skip tool execution lists
126124
if not isinstance(msg, list) and self._has_text_content(msg):
127-
first_content = msg.content[0]
128-
if isinstance(first_content, TextContent):
129-
history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
125+
text = self._extract_text_content(msg)
126+
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
130127
history_str = "\n".join(history_lines)
131128
parts.append(f"# Previous turns:\n{history_str}")
132129

src/strands_evals/evaluators/evaluator.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,37 @@ def _has_text_content(self, msg: UserMessage | AssistantMessage) -> TypeGuard[Us
131131
msg: Message object to check (UserMessage or AssistantMessage)
132132
133133
Returns:
134-
True if msg has content attribute with at least one item that is TextContent
134+
True if msg has content attribute with at least one TextContent block.
135+
Note: TextContent may not be at index 0 due to tool calls or other content types.
135136
"""
136-
return (
137-
hasattr(msg, "content")
138-
and bool(msg.content)
139-
and len(msg.content) > 0
140-
and isinstance(msg.content[0], TextContent)
141-
)
137+
if not hasattr(msg, "content") or not msg.content:
138+
return False
139+
140+
# Check if ANY content block is TextContent, not just the first
141+
return any(isinstance(content_block, TextContent) for content_block in msg.content)
142+
143+
def _extract_text_content(self, msg: UserMessage | AssistantMessage) -> str:
144+
"""Extract and concatenate text from all TextContent blocks in a message.
145+
146+
Args:
147+
msg: Message object containing content blocks
148+
149+
Returns:
150+
Concatenated text from all TextContent blocks, or empty string if none found.
151+
Multiple text blocks are joined with a space.
152+
Note: Iterates through all content blocks since TextContent may not be first.
153+
"""
154+
if not hasattr(msg, "content") or not msg.content:
155+
return ""
156+
157+
# Collect all TextContent blocks - there could be multiple
158+
text_blocks = []
159+
for content_block in msg.content:
160+
if isinstance(content_block, TextContent):
161+
text_blocks.append(content_block.text)
162+
163+
# Join multiple text blocks with space
164+
return " ".join(text_blocks) if text_blocks else ""
142165

143166
@classmethod
144167
def get_type_name(cls) -> str:

src/strands_evals/evaluators/faithfulness_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
108108
history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
109109
history_lines.append(f"Tool: {tool_exec.tool_result.content}")
110110
else:
111-
text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
111+
text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
112112
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
113113
history_str = "\n".join(history_lines)
114114
parts.append(f"# Conversation History:\n{history_str}")

src/strands_evals/evaluators/harmfulness_evaluator.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from strands import Agent
66

77
from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
8-
from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
8+
from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
99
from .evaluator import Evaluator
1010
from .prompt_templates.harmfulness import get_template
1111

@@ -101,9 +101,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
101101

102102
last_msg = parsed_input.session_history[-1]
103103
if not isinstance(last_msg, list) and self._has_text_content(last_msg):
104-
first_content = last_msg.content[0]
105-
if isinstance(first_content, TextContent):
106-
return first_content.text
104+
return self._extract_text_content(last_msg)
107105

108106
return ""
109107

@@ -124,9 +122,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
124122
if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
125123
continue # Skip tool execution lists
126124
if not isinstance(msg, list) and self._has_text_content(msg):
127-
first_content = msg.content[0]
128-
if isinstance(first_content, TextContent):
129-
history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
125+
text = self._extract_text_content(msg)
126+
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
130127
history_str = "\n".join(history_lines)
131128
parts.append(f"# Previous turns:\n{history_str}")
132129

src/strands_evals/evaluators/helpfulness_evaluator.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing_extensions import Union
88

99
from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
10-
from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
10+
from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
1111
from .evaluator import Evaluator
1212
from .prompt_templates.helpfulness import get_template
1313

@@ -115,9 +115,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
115115

116116
last_msg = parsed_input.session_history[-1]
117117
if not isinstance(last_msg, list) and self._has_text_content(last_msg):
118-
first_content = last_msg.content[0]
119-
if isinstance(first_content, TextContent):
120-
return first_content.text
118+
return self._extract_text_content(last_msg)
121119

122120
return ""
123121

@@ -138,9 +136,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
138136
if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
139137
continue # Skip tool execution lists
140138
if not isinstance(msg, list) and self._has_text_content(msg):
141-
first_content = msg.content[0]
142-
if isinstance(first_content, TextContent):
143-
history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
139+
text = self._extract_text_content(msg)
140+
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
144141
history_str = "\n".join(history_lines)
145142
parts.append(f"# Previous turns:\n{history_str}")
146143

src/strands_evals/evaluators/response_relevance_evaluator.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from typing_extensions import Union
99

1010
from ..types.evaluation import EvaluationData, EvaluationOutput, InputT, OutputT
11-
from ..types.trace import EvaluationLevel, TextContent, ToolExecution, TraceLevelInput
11+
from ..types.trace import EvaluationLevel, ToolExecution, TraceLevelInput
1212
from .evaluator import Evaluator
1313
from .prompt_templates.response_relevance import get_template
1414

@@ -106,9 +106,7 @@ def _extract_user_prompt(self, parsed_input: TraceLevelInput) -> str:
106106

107107
last_msg = parsed_input.session_history[-1]
108108
if not isinstance(last_msg, list) and self._has_text_content(last_msg):
109-
first_content = last_msg.content[0]
110-
if isinstance(first_content, TextContent):
111-
return first_content.text
109+
return self._extract_text_content(last_msg)
112110

113111
return ""
114112

@@ -129,9 +127,8 @@ def _format_prompt(self, parsed_input: TraceLevelInput) -> str:
129127
if isinstance(msg, list) and msg and isinstance(msg[0], ToolExecution):
130128
continue # Skip tool execution lists
131129
if not isinstance(msg, list) and self._has_text_content(msg):
132-
first_content = msg.content[0]
133-
if isinstance(first_content, TextContent):
134-
history_lines.append(f"{msg.role.value.capitalize()}: {first_content.text}")
130+
text = self._extract_text_content(msg)
131+
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
135132
history_str = "\n".join(history_lines)
136133
parts.append(f"# Previous turns:\n{history_str}")
137134

src/strands_evals/evaluators/tool_parameter_accuracy_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _format_prompt(self, tool_input: ToolLevelInput) -> str:
107107
history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
108108
history_lines.append(f"Tool: {tool_exec.tool_result.content}")
109109
else:
110-
text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
110+
text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
111111
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
112112
history_str = "\n".join(history_lines)
113113
parts.append(f"## Previous conversation history\n{history_str}")

src/strands_evals/evaluators/tool_selection_accuracy_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _format_prompt(self, tool_input: ToolLevelInput) -> str:
107107
history_lines.append(f"Action: {tool_exec.tool_call.name}({tool_exec.tool_call.arguments})")
108108
history_lines.append(f"Tool: {tool_exec.tool_result.content}")
109109
else:
110-
text = msg.content[0].text if msg.content and hasattr(msg.content[0], "text") else ""
110+
text = self._extract_text_content(msg) if self._has_text_content(msg) else ""
111111
history_lines.append(f"{msg.role.value.capitalize()}: {text}")
112112
history_str = "\n".join(history_lines)
113113
parts.append(f"## Previous conversation history\n{history_str}")

src/strands_evals/extractors/tools_use_extractor.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,24 @@ def extract_agent_tools_used_from_messages(agent_messages):
4343
if next_message.get("role") == "user":
4444
content = next_message.get("content")
4545
if content:
46-
tool_result_dict = content[0].get("toolResult")
46+
# Find toolResult in content blocks - may not be at index 0
47+
tool_result_dict = None
48+
for content_block in content:
49+
if "toolResult" in content_block:
50+
tool_result_dict = content_block.get("toolResult")
51+
break
52+
4753
if tool_result_dict and tool_result_dict.get("toolUseId") == tool_id:
4854
tool_result_content = tool_result_dict.get("content", [])
49-
if len(tool_result_content) > 0:
50-
tool_result = tool_result_content[0].get("text")
51-
is_error = tool_result_dict.get("status") == "error"
52-
break
55+
# Find first text in tool result content - may not be at index 0
56+
tool_result = None
57+
if tool_result_content:
58+
for result_item in tool_result_content:
59+
if isinstance(result_item, dict) and "text" in result_item:
60+
tool_result = result_item.get("text")
61+
break
62+
is_error = tool_result_dict.get("status") == "error"
63+
break
5364

5465
tools_used.append(
5566
{"name": tool_name, "input": tool_input, "tool_result": tool_result, "is_error": is_error}

tests/strands_evals/evaluators/test_evaluator.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,155 @@ def test_to_dict_with_none_model():
149149
assert evaluator_dict["rubric"] == "test rubric"
150150
assert "model" not in evaluator_dict
151151
assert evaluator_dict["model_id"] == DEFAULT_BEDROCK_MODEL_ID
152+
153+
154+
155+
# Tests for _has_text_content and _extract_text_content helper methods
156+
157+
158+
def test_has_text_content_with_single_text_at_start():
159+
"""Test _has_text_content with TextContent at index 0"""
160+
from strands_evals.types.trace import AssistantMessage, TextContent
161+
162+
evaluator = SimpleEvaluator()
163+
msg = AssistantMessage(content=[TextContent(text="Hello")])
164+
165+
assert evaluator._has_text_content(msg) is True
166+
167+
168+
def test_has_text_content_with_text_after_tool_call():
169+
"""Test _has_text_content with TextContent not at index 0"""
170+
from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
171+
172+
evaluator = SimpleEvaluator()
173+
msg = AssistantMessage(
174+
content=[
175+
ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
176+
TextContent(text="Let me calculate that"),
177+
]
178+
)
179+
180+
assert evaluator._has_text_content(msg) is True
181+
182+
183+
def test_has_text_content_with_no_text():
184+
"""Test _has_text_content with no TextContent blocks"""
185+
from strands_evals.types.trace import AssistantMessage, ToolCallContent
186+
187+
evaluator = SimpleEvaluator()
188+
msg = AssistantMessage(content=[ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1")])
189+
190+
assert evaluator._has_text_content(msg) is False
191+
192+
193+
def test_has_text_content_with_multiple_text_blocks():
194+
"""Test _has_text_content with multiple TextContent blocks"""
195+
from strands_evals.types.trace import AssistantMessage, TextContent
196+
197+
evaluator = SimpleEvaluator()
198+
msg = AssistantMessage(content=[TextContent(text="Hello"), TextContent(text="World")])
199+
200+
assert evaluator._has_text_content(msg) is True
201+
202+
203+
def test_has_text_content_with_empty_content():
204+
"""Test _has_text_content with empty content list"""
205+
from strands_evals.types.trace import AssistantMessage
206+
207+
evaluator = SimpleEvaluator()
208+
msg = AssistantMessage(content=[])
209+
210+
assert evaluator._has_text_content(msg) is False
211+
212+
213+
def test_extract_text_content_single_block():
214+
"""Test _extract_text_content with single TextContent block"""
215+
from strands_evals.types.trace import UserMessage, TextContent
216+
217+
evaluator = SimpleEvaluator()
218+
msg = UserMessage(content=[TextContent(text="Hello world")])
219+
220+
result = evaluator._extract_text_content(msg)
221+
assert result == "Hello world"
222+
223+
224+
def test_extract_text_content_multiple_blocks():
225+
"""Test _extract_text_content with multiple TextContent blocks"""
226+
from strands_evals.types.trace import AssistantMessage, TextContent
227+
228+
evaluator = SimpleEvaluator()
229+
msg = AssistantMessage(content=[TextContent(text="Hello"), TextContent(text="world")])
230+
231+
result = evaluator._extract_text_content(msg)
232+
assert result == "Hello world"
233+
234+
235+
def test_extract_text_content_text_not_at_start():
236+
"""Test _extract_text_content with TextContent not at index 0"""
237+
from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
238+
239+
evaluator = SimpleEvaluator()
240+
msg = AssistantMessage(
241+
content=[
242+
ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
243+
TextContent(text="Let me calculate"),
244+
]
245+
)
246+
247+
result = evaluator._extract_text_content(msg)
248+
assert result == "Let me calculate"
249+
250+
251+
def test_extract_text_content_mixed_content():
252+
"""Test _extract_text_content with mixed content types"""
253+
from strands_evals.types.trace import AssistantMessage, TextContent, ToolCallContent
254+
255+
evaluator = SimpleEvaluator()
256+
msg = AssistantMessage(
257+
content=[
258+
TextContent(text="First text"),
259+
ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1"),
260+
TextContent(text="Second text"),
261+
]
262+
)
263+
264+
result = evaluator._extract_text_content(msg)
265+
assert result == "First text Second text"
266+
267+
268+
def test_extract_text_content_no_text():
269+
"""Test _extract_text_content with no TextContent blocks"""
270+
from strands_evals.types.trace import AssistantMessage, ToolCallContent
271+
272+
evaluator = SimpleEvaluator()
273+
msg = AssistantMessage(content=[ToolCallContent(name="calculator", arguments={"x": 1}, tool_call_id="t1")])
274+
275+
result = evaluator._extract_text_content(msg)
276+
assert result == ""
277+
278+
279+
def test_extract_text_content_empty_content():
280+
"""Test _extract_text_content with empty content list"""
281+
from strands_evals.types.trace import UserMessage
282+
283+
evaluator = SimpleEvaluator()
284+
msg = UserMessage(content=[])
285+
286+
result = evaluator._extract_text_content(msg)
287+
assert result == ""
288+
289+
290+
def test_extract_text_content_user_message_with_tool_result():
291+
"""Test _extract_text_content with UserMessage containing tool results and text"""
292+
from strands_evals.types.trace import UserMessage, TextContent, ToolResultContent
293+
294+
evaluator = SimpleEvaluator()
295+
msg = UserMessage(
296+
content=[
297+
ToolResultContent(content="Result: 42", tool_call_id="t1"),
298+
TextContent(text="Here's the result"),
299+
]
300+
)
301+
302+
result = evaluator._extract_text_content(msg)
303+
assert result == "Here's the result"

0 commit comments

Comments
 (0)