Skip to content

Commit a5feb00

Browse files
feat: export eval scores and behavioral metrics to Langfuse and OTLP backends (#76)
Adds agent-strace export --scores --backend langfuse|otlp: Langfuse path (--backend langfuse): - Sessions exported as Langfuse Traces via /api/public/ingestion - Tool call/result pairs exported as Spans (type=SPAN) - LLM request/response pairs exported as Generations with token counts - eval.json judge scores exported as Langfuse Scores attached to trace OTLP metrics path (--backend otlp): - Behavioral metrics exported as OTLP gauge metrics to /v1/metrics - Metrics: cost_usd, error_rate, retry_rate, blast_radius, duration_s, tool_calls, eval.score (one per judge) - Compatible with Datadog, Honeycomb, Grafana, New Relic No new dependencies. All HTTP calls use urllib.request. Credentials via env vars (LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, OTEL_EXPORTER_OTLP_ENDPOINT) or CLI flags. - langfuse_export.py: LangfuseConfig, OtlpMetricsConfig, EvalScore, export_session_to_langfuse(), export_metrics_to_otlp(), cmd_export_scores() - cli.py: --scores, --metrics, --backend, --since, --langfuse-* and --otlp-* flags added to export subcommand; routes to cmd_export_scores when any of these flags are set - 31 new tests covering config, score loading, trace/observation/score building, gauge building, metrics extraction, and mocked HTTP export Closes #71 Co-authored-by: Ona <no-reply@ona.com>
1 parent 5e611d4 commit a5feb00

3 files changed

Lines changed: 991 additions & 1 deletion

File tree

src/agent_trace/cli.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .a2a import cmd_a2a_tree
2626
from .annotate import cmd_annotate
2727
from .drift import cmd_drift
28+
from .langfuse_export import cmd_export_scores
2829
from .oncall import cmd_oncall
2930
from .optimize import cmd_optimize
3031
from .freshness import cmd_freshness
@@ -224,6 +225,10 @@ def cmd_inspect(args: argparse.Namespace) -> int:
224225

225226

226227
def cmd_export(args: argparse.Namespace) -> int:
228+
# Route to Langfuse/OTLP export when --scores, --metrics, or --backend is set
229+
if getattr(args, "scores", False) or getattr(args, "metrics", False) or getattr(args, "backend", None):
230+
return cmd_export_scores(args)
231+
227232
"""Export a session to JSON, CSV, or OTLP."""
228233
store = TraceStore(args.trace_dir)
229234

@@ -461,11 +466,30 @@ def build_parser() -> argparse.ArgumentParser:
461466

462467
# export
463468
p_export = sub.add_parser("export", help="export a session")
464-
p_export.add_argument("session_id", help="session ID or prefix")
469+
p_export.add_argument("session_id", nargs="?", help="session ID or prefix")
465470
p_export.add_argument("--format", choices=["json", "csv", "ndjson", "otlp"], default="json")
466471
p_export.add_argument("--endpoint", help="OTLP collector URL (e.g. http://localhost:4318)")
467472
p_export.add_argument("--header", action="append", help="HTTP header for OTLP (e.g. 'x-honeycomb-team: KEY')")
468473
p_export.add_argument("--service-name", default="agent-trace", help="OTel service name (default: agent-trace)")
474+
# Langfuse / OTLP metrics flags
475+
p_export.add_argument("--scores", action="store_true",
476+
help="include eval scores in export")
477+
p_export.add_argument("--metrics", action="store_true",
478+
help="export behavioral metrics as OTLP gauges")
479+
p_export.add_argument("--backend", choices=["langfuse", "otlp"],
480+
help="export backend: langfuse or otlp")
481+
p_export.add_argument("--since", metavar="Nd",
482+
help="export sessions from the last N days (e.g. 7d)")
483+
p_export.add_argument("--langfuse-public-key", dest="langfuse_public_key", metavar="KEY",
484+
help="Langfuse public key (overrides LANGFUSE_PUBLIC_KEY)")
485+
p_export.add_argument("--langfuse-secret-key", dest="langfuse_secret_key", metavar="KEY",
486+
help="Langfuse secret key (overrides LANGFUSE_SECRET_KEY)")
487+
p_export.add_argument("--langfuse-host", dest="langfuse_host", metavar="URL",
488+
help="Langfuse host (default: https://cloud.langfuse.com)")
489+
p_export.add_argument("--otlp-endpoint", dest="otlp_endpoint", metavar="URL",
490+
help="OTLP metrics endpoint (overrides OTEL_EXPORTER_OTLP_ENDPOINT)")
491+
p_export.add_argument("--otlp-headers", dest="otlp_headers", metavar="HEADERS",
492+
help="OTLP headers as key=value,key=value")
469493

470494
# stats
471495
p_stats = sub.add_parser("stats", help="show session statistics")

0 commit comments

Comments
 (0)