Skip to content

Commit a717486

Browse files
committed
feat(conventions): added otel v1.37 conventions mapping
1 parent 9d032f9 commit a717486

File tree

4 files changed

+608
-26
lines changed

4 files changed

+608
-26
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Converters for transforming telemetry data to Session format."""
22

33
from .session_mapper import SessionMapper
4-
from .strands_in_memory_session_mapper import StrandsInMemorySessionMapper
4+
from .strands_in_memory_session_mapper import GenAIConventionVersion, StrandsInMemorySessionMapper
55

6-
__all__ = ["SessionMapper", "StrandsInMemorySessionMapper"]
6+
__all__ = ["GenAIConventionVersion", "SessionMapper", "StrandsInMemorySessionMapper"]

src/strands_evals/mappers/strands_in_memory_session_mapper.py

Lines changed: 256 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
from collections import defaultdict
44
from datetime import datetime, timezone
5+
from enum import Enum
56
from typing import Any
67

78
from opentelemetry.sdk.trace import ReadableSpan
@@ -27,10 +28,52 @@
2728
logger = logging.getLogger(__name__)
2829

2930

31+
class GenAIConventionVersion(Enum):
32+
"""GenAI semantic convention versions following OTEL_SEMCONV_STABILITY_OPT_IN.
33+
34+
This enum aligns with OpenTelemetry's semantic convention stability options
35+
as defined in OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
36+
37+
Attributes:
38+
LEGACY: Use legacy conventions (v1.36.0 or prior) with gen_ai.system attribute
39+
and separate message events (gen_ai.user.message, gen_ai.choice, etc.)
40+
LATEST_EXPERIMENTAL: Use latest experimental conventions (v1.37+) with
41+
gen_ai.provider.name attribute and unified gen_ai.client.inference.operation.details events.
42+
Corresponds to OTEL's "gen_ai_latest_experimental" stability option.
43+
AUTO: Automatically detect convention version from span attributes.
44+
Checks for presence of gen_ai.provider.name to determine version.
45+
"""
46+
47+
LEGACY = "legacy"
48+
LATEST_EXPERIMENTAL = "gen_ai_latest_experimental"
49+
AUTO = "auto"
50+
51+
3052
class StrandsInMemorySessionMapper(SessionMapper):
31-
"""Maps OpenTelemetry in-memory spans to Session format for evaluation."""
53+
"""Maps OpenTelemetry in-memory spans to Session format for evaluation.
54+
55+
Supports both legacy and latest GenAI semantic conventions:
56+
- Latest (v1.37+): gen_ai.provider.name with unified gen_ai.client.inference.operation.details events
57+
- Legacy: gen_ai.system with separate message events (gen_ai.user.message, gen_ai.choice, etc.)
58+
59+
The mapper can auto-detect the convention version or accept an explicit setting. Default to Legacy.
60+
"""
61+
62+
def __init__(self, convention_version: GenAIConventionVersion = GenAIConventionVersion.AUTO):
63+
"""Initialize the mapper.
64+
65+
Args:
66+
convention_version: GenAI semantic convention version to use.
67+
Defaults to AUTO which auto-detects from span attributes.
68+
"""
69+
self.convention_version = convention_version
3270

3371
def map_to_session(self, otel_spans: list[ReadableSpan], session_id: str) -> Session:
72+
# Auto-detect convention version if set to AUTO
73+
if otel_spans and self.convention_version == GenAIConventionVersion.AUTO:
74+
detected_version = self._detect_convention_version(otel_spans[0])
75+
self.convention_version = detected_version
76+
3477
traces_by_id = defaultdict(list)
3578
for span in otel_spans:
3679
trace_id = format(span.context.trace_id, "032x")
@@ -44,6 +87,26 @@ def map_to_session(self, otel_spans: list[ReadableSpan], session_id: str) -> Ses
4487

4588
return Session(traces=traces, session_id=session_id)
4689

90+
def _detect_convention_version(self, span: ReadableSpan) -> GenAIConventionVersion:
91+
"""Detect which GenAI semantic convention version is being used.
92+
93+
Returns:
94+
GenAIConventionVersion.LATEST_EXPERIMENTAL if using latest conventions,
95+
GenAIConventionVersion.LEGACY otherwise
96+
"""
97+
if span.attributes and "gen_ai.provider.name" in span.attributes:
98+
return GenAIConventionVersion.LATEST_EXPERIMENTAL
99+
100+
return GenAIConventionVersion.LEGACY
101+
102+
def _use_latest_conventions(self) -> bool:
103+
"""Helper method to determine if latest conventions should be used.
104+
105+
Returns:
106+
True if LATEST_EXPERIMENTAL, False if LEGACY
107+
"""
108+
return self.convention_version == GenAIConventionVersion.LATEST_EXPERIMENTAL
109+
47110
def _convert_trace(self, trace_id: str, otel_spans: list[ReadableSpan], session_id: str) -> Trace:
48111
converted_spans: list[InferenceSpan | ToolExecutionSpan | AgentInvocationSpan] = []
49112

@@ -126,6 +189,16 @@ def _process_tool_results(self, content_list: list[dict[str, Any]]) -> list[Text
126189

127190
def _convert_inference_span(self, span: ReadableSpan, session_id: str) -> InferenceSpan:
128191
span_info = self._create_span_info(span, session_id)
192+
193+
if self._use_latest_conventions():
194+
messages = self._extract_messages_from_inference_details(span)
195+
else:
196+
messages = self._extract_messages_from_events(span)
197+
198+
return InferenceSpan(span_info=span_info, messages=messages, metadata={})
199+
200+
def _extract_messages_from_events(self, span: ReadableSpan) -> list[UserMessage | AssistantMessage]:
201+
"""Extract messages from legacy event format (gen_ai.user.message, etc.)."""
129202
messages: list[UserMessage | AssistantMessage] = []
130203

131204
for event in span.events:
@@ -156,29 +229,160 @@ def _convert_inference_span(self, span: ReadableSpan, session_id: str) -> Infere
156229
except Exception as e:
157230
logger.warning(f"Failed to process event {event.name}: {e}")
158231

159-
return InferenceSpan(span_info=span_info, messages=messages, metadata={})
232+
return messages
233+
234+
def _extract_messages_from_inference_details(self, span: ReadableSpan) -> list[UserMessage | AssistantMessage]:
235+
"""Extract messages from latest event format (gen_ai.client.inference.operation.details)."""
236+
messages: list[UserMessage | AssistantMessage] = []
237+
238+
for event in span.events:
239+
try:
240+
if event.name == "gen_ai.client.inference.operation.details":
241+
event_attributes = event.attributes
242+
if not event_attributes:
243+
continue
244+
# Check for input messages
245+
if "gen_ai.input.messages" in event_attributes:
246+
input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
247+
for msg in input_messages:
248+
input_content = self._convert_inference_messages(msg)
249+
if input_content:
250+
messages.append(input_content)
251+
252+
# Check for output messages
253+
if "gen_ai.output.messages" in event_attributes:
254+
output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
255+
for msg in output_messages:
256+
output_content = self._convert_inference_messages(msg)
257+
if output_content:
258+
messages.append(output_content)
259+
except Exception as e:
260+
logger.warning(f"Failed to process inference details event: {e}")
261+
262+
return messages
263+
264+
def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage | AssistantMessage | None:
265+
"""Convert OTEL message format (with parts) to internal message types.
266+
267+
Args:
268+
otel_msg: Message in OTEL format with 'role' and 'parts' fields
269+
270+
Returns:
271+
UserMessage or AssistantMessage, or None if conversion fails
272+
"""
273+
try:
274+
role = otel_msg.get("role", "")
275+
parts = otel_msg.get("parts", [])
276+
277+
if role == "assistant":
278+
assistant_content: list[TextContent | ToolCallContent] = []
279+
280+
for part in parts:
281+
part_type = part.get("type", "")
282+
283+
if part_type == "text":
284+
assistant_content.append(TextContent(text=part.get("content", "")))
285+
286+
elif part_type == "tool_call":
287+
assistant_content.append(
288+
ToolCallContent(
289+
name=part.get("name", ""),
290+
arguments=part.get("arguments", {}),
291+
tool_call_id=part.get("id"),
292+
)
293+
)
294+
return AssistantMessage(content=assistant_content) if assistant_content else None
295+
296+
# Tool messages are represented as UserMessage with ToolResultContent
297+
content: list[TextContent | ToolResultContent] = []
298+
299+
for part in parts:
300+
part_type = part.get("type", "")
301+
302+
if part_type == "text":
303+
content.append(TextContent(text=part.get("content", "")))
304+
305+
if part_type == "tool_call_response":
306+
# Extract text from response array if present
307+
response = part.get("response", [])
308+
response_text = ""
309+
310+
## To-do: Compare the differences for multiple toolResults
311+
if isinstance(response, list) and response:
312+
response_text = (
313+
response[0].get("text", "") if isinstance(response[0], dict) else str(response[0])
314+
)
315+
elif isinstance(response, str):
316+
response_text = response
317+
318+
content.append(
319+
ToolResultContent(
320+
content=response_text,
321+
tool_call_id=part.get("id"),
322+
)
323+
)
324+
return UserMessage(content=content) if content else None
325+
326+
except Exception as e:
327+
logger.warning(f"Failed to convert OTEL message: {e}")
328+
return None
160329

161330
def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> ToolExecutionSpan:
162331
span_info = self._create_span_info(span, session_id)
163332
attrs = span.attributes or {}
164333

165334
tool_name = str(attrs.get("gen_ai.tool.name", ""))
166335
tool_call_id = str(attrs.get("gen_ai.tool.call.id", ""))
167-
tool_status = attrs.get("tool.status", "")
336+
tool_status = attrs.get("gen_ai.tool.status", attrs.get("tool.status", ""))
168337
tool_error = None if tool_status == "success" else (str(tool_status) if tool_status else None)
169338

170339
tool_arguments = {}
171340
tool_result_content = ""
172341

173-
for event in span.events:
174-
try:
175-
if event.name == "gen_ai.tool.message":
176-
tool_arguments = self._parse_json_attr(event.attributes, "content", "{}")
177-
elif event.name == "gen_ai.choice":
178-
message_list = self._parse_json_attr(event.attributes, "message")
179-
tool_result_content = message_list[0].get("text", "") if message_list else ""
180-
except Exception as e:
181-
logger.warning(f"Failed to process tool event {event.name}: {e}")
342+
if self._use_latest_conventions():
343+
# Extract from gen_ai.client.inference.operation.details events
344+
for event in span.events:
345+
try:
346+
if event.name == "gen_ai.client.inference.operation.details":
347+
event_attributes = event.attributes
348+
if not event_attributes:
349+
continue
350+
if "gen_ai.input.messages" in event_attributes:
351+
input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
352+
if input_messages and input_messages[0].get("parts"):
353+
part = input_messages[0]["parts"][0]
354+
if part.get("type") == "tool_call":
355+
tool_arguments = part.get("arguments", {})
356+
357+
if "gen_ai.output.messages" in event_attributes:
358+
output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
359+
if output_messages and output_messages[0].get("parts"):
360+
part = output_messages[0]["parts"][0]
361+
if part.get("type") == "tool_call_response":
362+
response = part.get("response", [])
363+
if isinstance(response, list) and response:
364+
tool_result_content = (
365+
response[0].get("text", "")
366+
if isinstance(response[0], dict)
367+
else str(response[0])
368+
)
369+
elif isinstance(response, str):
370+
tool_result_content = response
371+
except Exception as e:
372+
logger.warning(f"Failed to process tool event {event.name}: {e}")
373+
else:
374+
for event in span.events:
375+
try:
376+
event_attributes = event.attributes
377+
if not event_attributes:
378+
continue
379+
if event.name == "gen_ai.tool.message":
380+
tool_arguments = self._parse_json_attr(event_attributes, "content", "{}")
381+
elif event.name == "gen_ai.choice":
382+
message_list = self._parse_json_attr(event_attributes, "message")
383+
tool_result_content = message_list[0].get("text", "") if message_list else ""
384+
except Exception as e:
385+
logger.warning(f"Failed to process tool event {event.name}: {e}")
182386

183387
tool_call = ToolCall(name=tool_name, arguments=tool_arguments, tool_call_id=tool_call_id)
184388
tool_result = ToolResult(content=tool_result_content, error=tool_error, tool_call_id=tool_call_id)
@@ -198,16 +402,46 @@ def _convert_agent_invocation_span(self, span: ReadableSpan, session_id: str) ->
198402
except Exception as e:
199403
logger.warning(f"Failed to parse available tools: {e}")
200404

201-
for event in span.events:
202-
try:
203-
if event.name == "gen_ai.user.message":
204-
content_list = self._parse_json_attr(event.attributes, "content")
205-
user_prompt = content_list[0].get("text", "") if content_list else ""
206-
elif event.name == "gen_ai.choice":
207-
msg = event.attributes.get("message", "") if event.attributes else ""
208-
agent_response = str(msg)
209-
except Exception as e:
210-
logger.warning(f"Failed to process agent event {event.name}: {e}")
405+
if self._use_latest_conventions():
406+
for event in span.events:
407+
try:
408+
if event.name == "gen_ai.client.inference.operation.details":
409+
event_attributes = event.attributes
410+
if not event_attributes:
411+
continue
412+
if "gen_ai.input.messages" in event_attributes:
413+
input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
414+
if input_messages and input_messages[0].get("parts"):
415+
parts = input_messages[0]["parts"]
416+
for part in parts:
417+
if part.get("type") == "text":
418+
user_prompt = part.get("content", "")
419+
break
420+
421+
if "gen_ai.output.messages" in event_attributes:
422+
output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
423+
if output_messages and output_messages[0].get("parts"):
424+
parts = output_messages[0]["parts"]
425+
for part in parts:
426+
if part.get("type") == "text":
427+
agent_response = part.get("content", "")
428+
break
429+
except Exception as e:
430+
logger.warning(f"Failed to process agent event {event.name}: {e}")
431+
else:
432+
for event in span.events:
433+
try:
434+
event_attributes = event.attributes
435+
if not event_attributes:
436+
continue
437+
if event.name == "gen_ai.user.message":
438+
content_list = self._parse_json_attr(event_attributes, "content")
439+
user_prompt = content_list[0].get("text", "") if content_list else ""
440+
elif event.name == "gen_ai.choice":
441+
msg = event_attributes.get("message", "") if event_attributes else ""
442+
agent_response = str(msg)
443+
except Exception as e:
444+
logger.warning(f"Failed to process agent event {event.name}: {e}")
211445

212446
return AgentInvocationSpan(
213447
span_info=span_info,

src/strands_evals/types/trace.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from enum import Enum
99

1010
from pydantic import BaseModel, field_serializer
11-
from typing_extensions import TypeAlias, Union
11+
from typing_extensions import Mapping, Sequence, TypeAlias, Union
1212

1313

1414
class Role(str, Enum):
@@ -191,3 +191,10 @@ class EvaluatorResult(BaseModel):
191191

192192
class EvaluationResponse(BaseModel):
193193
evaluator_results: list[EvaluatorResult]
194+
195+
196+
AttributeValue = Mapping[
197+
str, str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
198+
]
199+
200+
Attributes = Mapping[str, AttributeValue] | None

0 commit comments

Comments
 (0)