Skip to content

Commit 05973fd

Browse files
add support for traces exported from Tempo
1 parent 0cfce1c commit 05973fd

24 files changed

Lines changed: 947 additions & 167 deletions

docs/streaming.md

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,18 @@ See [examples/README.md](../examples/README.md) for details on supported instrum
6666

6767
### OTLP/JSON Support
6868

69-
Native OpenTelemetry format — no conversion to Jaeger needed:
69+
Native OpenTelemetry format. The CLI auto-detects Jaeger vs OTLP from
70+
file contents, so `.json` and `.jsonl` exports from Tempo, Jaeger, or
71+
the OTel collector all work without a `--format` flag:
7072

7173
```bash
72-
# Load OTLP files directly
73-
agentevals run trace.otlp.json --format otlp-json --eval-set eval.json
74+
# Load any trace file directly; format is auto-detected
75+
agentevals run trace.otlp.json --eval-set eval.json
7476
```
7577

78+
Pass `--format otlp-json` (or `jaeger-json`) only as an override when
79+
auto-detection fails on a non-standard export.
80+
7681
### Real-time Span Streaming
7782

7883
The `AgentEvalsStreamingProcessor` is an OTel `SpanProcessor` that streams spans over WebSocket as they complete:
@@ -311,6 +316,7 @@ This installs `opentelemetry-sdk>=1.20.0`. Agent code also needs `websockets` fo
311316
## Compatibility
312317

313318
All existing workflows continue to work:
314-
- Jaeger JSON files still supported: `agentevals run trace.json --eval-set ...`
315-
- OTLP/JSON files: `agentevals run trace.otlp.json --format otlp-json --eval-set ...`
316-
- Web UI upload flow unchanged
319+
- Trace files (Jaeger or OTLP, including Tempo exports) auto-detect by
320+
content: `agentevals run trace.json --eval-set ...`
321+
- Pass `--format` only to override detection on non-standard exports.
322+
- Web UI upload flow unchanged.

samples/tempo_export_with_batches.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/agentevals/api/routes.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
)
2828
from ..converter import convert_traces
2929
from ..extraction import get_extractor
30+
from ..loader import load_traces
3031
from ..loader.otlp import OtlpJsonLoader
3132
from ..runner import (
3233
RunResult,
33-
get_loader,
3434
load_eval_set,
3535
load_eval_set_from_dict,
3636
run_evaluation,
@@ -331,17 +331,10 @@ def _serialize_invocation(inv) -> dict[str, Any]:
331331
return _camel_keys(inv_dict)
332332

333333

334-
def _get_format_for_file(path: str, explicit_format: str) -> str:
335-
"""Return the loader format for a single file, auto-detecting from extension."""
336-
if explicit_format:
337-
return explicit_format
338-
return "otlp-json" if path.lower().endswith(".jsonl") else "jaeger-json"
339-
340-
341334
@router.post("/convert", response_model=StandardResponse[ConvertTracesData])
342335
async def convert_trace_files(
343336
trace_files: list[UploadFile] = File(...),
344-
trace_format: str = Form(""),
337+
trace_format: str | None = Form(None),
345338
):
346339
"""Convert trace files to invocations and metadata without running evaluation."""
347340
temp_dir = tempfile.mkdtemp()
@@ -380,10 +373,8 @@ async def convert_trace_files(
380373
trace_to_filename: dict[str, str] = {}
381374
load_warnings: list[str] = []
382375
for path, original in saved_files:
383-
fmt = _get_format_for_file(path, trace_format)
384-
loader = get_loader(fmt)
385376
try:
386-
traces = loader.load(path)
377+
traces = load_traces(path, format=trace_format or None)
387378
for t in traces:
388379
trace_to_filename[t.trace_id] = original
389380
all_traces.extend(traces)
@@ -496,12 +487,6 @@ async def evaluate_traces(
496487
)
497488

498489
trace_format = config_dict.get("trace_format")
499-
if not trace_format:
500-
first_file = trace_paths[0]
501-
if first_file.endswith(".jsonl"):
502-
trace_format = "otlp-json"
503-
else:
504-
trace_format = "jaeger-json"
505490

506491
eval_set_path = None
507492
if eval_set_file and eval_set_file.filename:
@@ -612,12 +597,6 @@ async def event_generator():
612597
return
613598

614599
trace_format = config_dict.get("trace_format")
615-
if not trace_format:
616-
first_file = trace_paths[0]
617-
if first_file.endswith(".jsonl"):
618-
trace_format = "otlp-json"
619-
else:
620-
trace_format = "jaeger-json"
621600

622601
eval_set_path = None
623602
if eval_set_file and eval_set_file.filename:
@@ -663,10 +642,9 @@ async def event_generator():
663642
trajectory_match_type=config_dict.get("trajectoryMatchType"),
664643
)
665644

666-
loader = get_loader(eval_config.trace_format)
667645
for trace_file_path in trace_paths:
668646
try:
669-
traces = loader.load(trace_file_path)
647+
traces = load_traces(trace_file_path, format=eval_config.trace_format)
670648
for trace in traces:
671649
extractor = get_extractor(trace)
672650
perf_metrics = _camel_keys(extract_performance_metrics(trace, extractor))

src/agentevals/cli.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,9 @@ def main(verbose: int) -> None:
9393
"--format",
9494
"-f",
9595
"trace_format",
96-
default="jaeger-json",
97-
help="Trace file format.",
96+
default=None,
97+
type=click.Choice(["jaeger-json", "otlp-json"], case_sensitive=False),
98+
help="Override the trace file format. Auto-detected from file contents when omitted.",
9899
)
99100
@click.option(
100101
"--judge-model",
@@ -134,7 +135,7 @@ def run(
134135
trace_files: tuple[str, ...],
135136
eval_set: str | None,
136137
metric: tuple[str, ...] | None,
137-
trace_format: str,
138+
trace_format: str | None,
138139
judge_model: str | None,
139140
threshold: float | None,
140141
trajectory_match_type: str | None,

src/agentevals/config.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,16 +160,19 @@ def _validate_trajectory_match_type(cls, v: str | None) -> str | None:
160160
class EvalRunConfig(EvalParams):
161161
"""Full configuration for file-based evaluation runs."""
162162

163-
trace_files: list[str] = Field(description="Paths to trace files (Jaeger JSON or OTLP JSON).")
163+
trace_files: list[str] = Field(description="Paths to trace files (Jaeger or OTLP JSON, .json or .jsonl).")
164164

165165
eval_set_file: str | None = Field(
166166
default=None,
167167
description="Path to a golden eval set JSON file (ADK EvalSet format).",
168168
)
169169

170-
trace_format: str = Field(
171-
default="jaeger-json",
172-
description="Format of the trace files (jaeger-json or otlp-json).",
170+
trace_format: str | None = Field(
171+
default=None,
172+
description=(
173+
"Optional explicit trace format override ('jaeger-json' or 'otlp-json'). "
174+
"Leave unset to auto-detect from file contents."
175+
),
173176
)
174177

175178
output_format: str = Field(

src/agentevals/converter.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424
extract_tool_result_from_span,
2525
extract_user_text_from_attrs,
2626
get_extractor,
27+
has_adk_descendant,
28+
is_adk_scope,
2729
parse_json,
2830
)
2931
from .loader.base import Span, Trace
3032
from .trace_attrs import (
3133
ADK_INVOCATION_ID,
3234
ADK_LLM_REQUEST,
3335
ADK_LLM_RESPONSE,
34-
ADK_SCOPE_VALUE,
3536
OTEL_GENAI_AGENT_NAME,
36-
OTEL_SCOPE,
3737
)
3838

3939
logger = logging.getLogger(__name__)
@@ -101,13 +101,26 @@ def convert_traces(traces: list[Trace]) -> list[ConversionResult]:
101101

102102

103103
def _find_adk_spans(trace: Trace, operation: str) -> list[Span]:
104-
"""Find spans with ``otel.scope.name == "gcp.vertex.agent"`` matching an operation prefix."""
104+
"""Find ADK-instrumented spans matching an operation prefix.
105+
106+
Detection delegates to ``is_adk_scope``, which accepts either the OTel
107+
scope marker, the ``gen_ai.system`` semconv attribute, or any
108+
``gcp.vertex.agent.*`` custom attribute. The fallbacks matter for
109+
Tempo-exported traces where scope info gets lost during compaction.
110+
111+
For ``invoke_agent`` we additionally accept spans whose subtree is ADK
112+
instrumented even when the parent itself lost its markers — Tempo's
113+
compactor can strip scope info on the parent while children retain
114+
their ``gcp.vertex.agent.*`` attributes.
115+
"""
105116
matches = []
106117
for span in trace.all_spans:
107-
if span.get_tag(OTEL_SCOPE) != ADK_SCOPE_VALUE:
118+
if not span.operation_name.startswith(operation):
119+
continue
120+
if is_adk_scope(span):
121+
matches.append(span)
108122
continue
109-
# operationName is e.g. "invoke_agent helm_agent" or "call_llm"
110-
if span.operation_name.startswith(operation):
123+
if operation == "invoke_agent" and has_adk_descendant(span):
111124
matches.append(span)
112125
matches.sort(key=lambda s: s.start_time)
113126
return matches

src/agentevals/eval_config_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def merge_configs(file_config: EvalRunConfig, cli_config: EvalRunConfig) -> Eval
140140
merged.threshold = cli_config.threshold
141141
if cli_config.trajectory_match_type is not None:
142142
merged.trajectory_match_type = cli_config.trajectory_match_type
143-
if cli_config.trace_format != "jaeger-json":
143+
if cli_config.trace_format is not None:
144144
merged.trace_format = cli_config.trace_format
145145
if cli_config.output_format != "table":
146146
merged.output_format = cli_config.output_format

src/agentevals/extraction.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,52 @@ def extract_tool_result_from_span(span: Span) -> dict[str, Any] | None:
344344
# ---------------------------------------------------------------------------
345345

346346

347+
_ADK_ATTR_MARKERS = (
348+
ADK_LLM_REQUEST,
349+
ADK_LLM_RESPONSE,
350+
ADK_TOOL_CALL_ARGS,
351+
ADK_TOOL_RESPONSE,
352+
"gcp.vertex.agent.invocation_id",
353+
"gcp.vertex.agent.session_id",
354+
"gcp.vertex.agent.event_id",
355+
)
356+
357+
358+
def has_adk_descendant(span: Span) -> bool:
359+
"""Return True if any descendant of ``span`` is ADK-instrumented.
360+
361+
Used to recover ADK invocation parents when round-tripped exports
362+
(Tempo, etc.) drop scope info on the parent but retain ADK custom
363+
attributes on the LLM/tool child spans.
364+
"""
365+
for child in span.children:
366+
if is_adk_scope(child):
367+
return True
368+
if has_adk_descendant(child):
369+
return True
370+
return False
371+
372+
347373
def is_adk_scope(span: Span) -> bool:
348-
return span.get_tag(OTEL_SCOPE) == ADK_SCOPE_VALUE
374+
"""Return True for spans emitted by Google ADK instrumentation.
375+
376+
Recognized signals (any one is sufficient):
377+
1. ``otel.scope.name == "gcp.vertex.agent"`` — the canonical OTel scope.
378+
2. ``gen_ai.system == "gcp.vertex.agent"`` — the per-span semconv marker.
379+
Tempo's compactor sometimes drops/merges scope info, so this fallback
380+
is required for round-tripped Tempo exports.
381+
3. Any ``gcp.vertex.agent.*`` custom attribute (llm_request, llm_response,
382+
tool_call_args, tool_response, invocation_id, session_id, event_id).
383+
These are unambiguous ADK markers.
384+
"""
385+
if span.get_tag(OTEL_SCOPE) == ADK_SCOPE_VALUE:
386+
return True
387+
if span.get_tag(OTEL_GENAI_SYSTEM) == ADK_SCOPE_VALUE:
388+
return True
389+
for marker in _ADK_ATTR_MARKERS:
390+
if span.get_tag(marker) is not None:
391+
return True
392+
return False
349393

350394

351395
def is_llm_span(span: Span) -> bool:
@@ -423,7 +467,12 @@ def format_name(self) -> str:
423467
return "adk"
424468

425469
def find_invocation_spans(self, trace: Trace) -> list[Span]:
426-
matches = [s for s in trace.all_spans if is_adk_scope(s) and s.operation_name.startswith("invoke_agent")]
470+
matches: list[Span] = []
471+
for s in trace.all_spans:
472+
if not s.operation_name.startswith("invoke_agent"):
473+
continue
474+
if is_adk_scope(s) or has_adk_descendant(s):
475+
matches.append(s)
427476
matches.sort(key=lambda s: s.start_time)
428477
return matches
429478

src/agentevals/loader/__init__.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,29 @@
1-
"""Trace loader implementations."""
1+
"""Trace loader implementations.
22
3+
Most callers should use :func:`load_traces` from
4+
:mod:`agentevals.loader.auto`, which auto-detects the on-disk format
5+
(Jaeger or OTLP, including Tempo's ``batches`` / wrapper variants) and
6+
dispatches to the right underlying loader.
7+
"""
8+
9+
from .auto import (
10+
JAEGER_JSON,
11+
OTLP_JSON,
12+
detect_format,
13+
get_loader_for_format,
14+
load_traces,
15+
)
316
from .base import TraceLoader
417
from .jaeger import JaegerJsonLoader
518
from .otlp import OtlpJsonLoader
619

7-
__all__ = ["JaegerJsonLoader", "OtlpJsonLoader", "TraceLoader"]
20+
__all__ = [
21+
"JAEGER_JSON",
22+
"OTLP_JSON",
23+
"JaegerJsonLoader",
24+
"OtlpJsonLoader",
25+
"TraceLoader",
26+
"detect_format",
27+
"get_loader_for_format",
28+
"load_traces",
29+
]

0 commit comments

Comments
 (0)