-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathagent.py
More file actions
2559 lines (2216 loc) · 108 KB
/
agent.py
File metadata and controls
2559 lines (2216 loc) · 108 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import argparse
import asyncio
import atexit
import base64
from collections import OrderedDict
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field
import json
import logging
import os
from pathlib import Path
import platform
import pprint
import re
import socket
import sys
import threading
from typing import Any
from typing import Awaitable
from typing import Callable
from typing import DefaultDict
from typing import Dict
from typing import List
from typing import Literal
from typing import Mapping
from typing import Optional
from typing import Set
from typing import Tuple
from typing import cast
from urllib.parse import urlparse
from urllib.parse import urlunparse
from aiohttp import ClientResponse
from aiohttp import ClientSession
from aiohttp import web
from aiohttp.web import HTTPException
from aiohttp.web import Request
from aiohttp.web import middleware
from grpc import aio as grpc_aio
from msgpack.exceptions import ExtraData as MsgPackExtraDataException
from multidict import CIMultiDict
from opentelemetry.proto.collector.logs.v1.logs_service_pb2 import ExportLogsServiceResponse
from opentelemetry.proto.collector.logs.v1.logs_service_pb2_grpc import add_LogsServiceServicer_to_server
from opentelemetry.proto.collector.metrics.v1.metrics_service_pb2 import ExportMetricsServiceResponse
from opentelemetry.proto.collector.metrics.v1.metrics_service_pb2_grpc import add_MetricsServiceServicer_to_server
from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ExportTraceServiceResponse
from opentelemetry.proto.collector.trace.v1.trace_service_pb2_grpc import add_TraceServiceServicer_to_server
from . import _get_version
from . import trace_snapshot
from . import tracestats_snapshot
from .apmtelemetry import TelemetryEvent
from .apmtelemetry import v2_decode_request as v2_apmtelemetry_decode_request
from .checks import CheckTrace
from .checks import Checks
from .checks import start_trace
from .claude_hooks import ClaudeHooksAPI
from .claude_hooks import write_claude_code_hooks
from .claude_link_tracker import ClaudeLinkTracker
from .claude_proxy import ClaudeProxyAPI
from .integration import Integration
from .llmobs_event_platform import LLMObsEventPlatformAPI
from .logs import LOGS_ENDPOINT
from .logs import OTLPLogsGRPCServicer
from .logs import decode_logs_request
from .metrics import METRICS_ENDPOINT
from .metrics import OTLPMetricsGRPCServicer
from .metrics import decode_metrics_request
from .remoteconfig import RemoteConfigServer
from .trace import Span
from .trace import Trace
from .trace import TraceMap
from .trace import decode_v1 as trace_decode_v1
from .trace import decode_v04 as trace_decode_v04
from .trace import decode_v05 as trace_decode_v05
from .trace import decode_v07 as trace_decode_v07
from .trace import pprint_trace
from .trace import v04TracePayload
from .trace_checks import CheckMetaEventsIsValidJSON
from .trace_checks import CheckMetaTracerVersionHeader
from .trace_checks import CheckTraceContentLength
from .trace_checks import CheckTraceCountHeader
from .trace_checks import CheckTraceDDService
from .trace_checks import CheckTracePeerService
from .trace_checks import CheckTraceStallAsync
from .tracerflare import TracerFlareEvent
from .tracerflare import v1_decode as v1_tracerflare_decode
from .traces_otlp import OTLPTracesGRPCServicer
from .traces_otlp import TRACES_ENDPOINT
from .traces_otlp import decode_traces_request
from .tracestats import decode_v06 as tracestats_decode_v06
from .tracestats import v06StatsPayload
from .vcr_proxy import proxy_request
# Default ports
DEFAULT_APM_PORT = 8126
DEFAULT_OTLP_HTTP_PORT = 4318
DEFAULT_OTLP_GRPC_PORT = 4317
class NoSuchSessionException(Exception):
pass
_Handler = Callable[[Request], Awaitable[web.Response]]
log = logging.getLogger(__name__)
def _parse_csv(s: str) -> List[str]:
"""Return the values of a csv string.
>>> _parse_csv("a,b,c")
['a', 'b', 'c']
>>> _parse_csv(" a, b ,c ")
['a', 'b', 'c']
>>> _parse_csv(" a,b,c ")
['a', 'b', 'c']
>>> _parse_csv(" a,")
['a']
>>> _parse_csv("a, ")
['a']
"""
return [s.strip() for s in s.split(",") if s.strip() != ""]
def _parse_map(s: str) -> Dict[str, str]:
"""Return the values of a csv string.
>>> _parse_map("a:b,b:c,c:d")
{'a': 'b', 'b': 'c', 'c': 'd'}
"""
return dict([s.strip().split(":", 1) for s in s.split(",") if s.strip()])
def _session_token(request: Request) -> Optional[str]:
token: Optional[str]
if "X-Datadog-Test-Session-Token" in request.headers:
token = request.headers["X-Datadog-Test-Session-Token"]
elif "test_session_token" in request.url.query:
token = request.url.query.get("test_session_token")
else:
token = None
return token
async def _vcr_proxy_cassette_prefix(request: Request) -> Optional[str]:
try:
request_body: Dict[str, str] = await request.json()
requested_test_name = request_body.get("test_name")
return requested_test_name
except (json.JSONDecodeError, UnicodeDecodeError):
return None
@middleware
async def session_token_middleware(request: Request, handler: _Handler) -> web.Response:
"""Extract session token from the request and store it in the request.
The token is retrieved from the headers or params of the request.
"""
token = _session_token(request)
request["session_token"] = token
return await handler(request)
@middleware
async def handle_exception_middleware(request: Request, handler: _Handler) -> web.Response:
"""Turn exceptions into 400s with the reason from the exception."""
try:
response = await handler(request)
return response
except HTTPException:
raise
except Exception as e:
raise web.HTTPBadRequest(reason=str(e))
async def _forward_request(
request_data: bytes, headers: Mapping[str, str], full_agent_url: str
) -> Tuple[ClientResponse, str]:
async with ClientSession() as session:
async with session.post(
full_agent_url,
headers=headers,
data=request_data,
) as resp:
assert resp.status == 200, f"Request to agent unsuccessful, received [{resp.status}] response."
if "text/plain" in resp.content_type:
response_data = await resp.text()
log.info("Response %r from agent:", response_data)
else:
raw_response_data = await resp.read()
if len(raw_response_data) == 0:
log.info("Received empty response: %r from agent.", raw_response_data)
response_data = ""
else:
if isinstance(raw_response_data, bytes):
response_data = raw_response_data.decode()
try:
response_data = json.dumps(json.loads(raw_response_data))
except json.JSONDecodeError as e:
log.warning("Error decoding response data: %s, data=%r", str(e), response_data)
log.warning("Original Request: %r", request_data)
response_data = ""
log.info("Response %r from agent:", response_data)
return resp, response_data
async def _prepare_and_send_request(data: bytes, request: Request, headers: Mapping[str, str]) -> web.Response:
headers = {
"Content-Type": headers.get("Content-Type", "application/msgpack"),
**{k: v for k, v in headers.items() if k.lower() not in ["content-type", "host", "transfer-encoding"]},
}
agent_url = request.app["agent_url"]
full_agent_url = agent_url + request.path
log.info("Forwarding request to agent at %r", full_agent_url)
log.debug(f"Using headers: {headers}")
client_response, body = await _forward_request(data, headers, full_agent_url)
return web.Response(
status=client_response.status,
headers=client_response.headers,
body=body,
)
def update_trace_agent_port(url, new_port):
# Updates the Agent URL with a new port number, returning the updated URL and old port
parsed_url = urlparse(url)
old_port = parsed_url.port
new_netloc = parsed_url.netloc.replace(f":{old_port}", f":{new_port}")
new_url = urlunparse(
(parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)
)
return new_url
def default_value_trace_check_results_by_check():
return defaultdict(default_value_trace_results_summary)
def default_value_trace_failures():
return []
def default_value_trace_results_summary():
return {
"Passed_Checks": 0,
"Failed_Checks": 0,
"Skipped_Checks": 0,
}
async def _is_valid_api_key_and_site_combination(dd_api_key: str, dd_site: str) -> bool:
"""Check if the api key + site is a valid DD auth combo"""
url = f"https://api.{dd_site}/api/v1/validate"
headers = {
"DD-API-KEY": dd_api_key
}
async with ClientSession() as session:
async with session.get(
url,
headers=headers,
) as resp:
if resp.status == 403:
return False
result = cast(dict[str, bool], await resp.json())
return result.get("valid", False)
class MockQuery:
"""Mock query object that behaves like a dict."""
def __init__(self):
self._data = {} # Empty query params for named pipe processing
def get(self, key, default=None):
return self._data.get(key, default)
def __getitem__(self, key):
return self._data[key]
def __contains__(self, key):
return key in self._data
class MockURL:
"""Mock URL object for named pipe processing."""
def __init__(self, path: str):
self.path = path
self.query = MockQuery()
class MockRequest:
"""Mock Request object for named pipe processing."""
def __init__(
self, method: str, path: str, headers: Dict[str, str], body: bytes, agent: "Agent", app: web.Application
):
self.method = method
self.path = path
self.headers = headers
self._body = body
self._data: Dict[str, Any] = {}
self.url = MockURL(path)
self.content_type = headers.get("Content-Type", "application/msgpack")
self.app = app
async def read(self) -> bytes:
"""Mock read() method that returns the body data."""
return self._body
def __getitem__(self, key):
return self._data.get(key)
def __setitem__(self, key, value):
self._data[key] = value
def get(self, key, default=None):
return self._data.get(key, default)
@dataclass
class _AgentSession:
"""Maintain Agent state across requests."""
sample_rate_by_service_env: Dict[str, float] = field(default_factory=dict)
class Agent:
def __init__(self) -> None:
"""
Try to only store the requests sent to the agent. There are many representations
of data but typically information is lost while transforming the data so it is best
to keep the original and compute transformation when needed.
"""
# Token to be used if running test cases synchronously
self._requests: List[Request] = []
self._rc_server = RemoteConfigServer()
self._trace_failures: Dict[str, List[Tuple[CheckTrace, str]]] = defaultdict(default_value_trace_failures)
self._trace_check_results_by_check: Dict[str, Dict[str, Dict[str, int]]] = defaultdict(
default_value_trace_check_results_by_check
)
self._forward_endpoints: List[str] = [
"/v0.4/traces",
"/v0.5/traces",
"/v0.7/traces",
"/v1.0/traces",
"/v0.6/stats",
"/v0.7/config",
"/info",
"/telemetry/proxy/api/v2/apmtelemetry",
"/v0.1/pipeline_stats",
"/tracer_flare/v1",
"/evp_proxy/v2/api/v2/llmobs",
"/evp_proxy/v2/api/intake/llm-obs/v1/eval-metric",
"/evp_proxy/v2/api/intake/llm-obs/v2/eval-metric",
"/evp_proxy/v2/api/v2/exposures",
"/evp_proxy/v4/api/v2/errorsintake",
"/evp_proxy/v4/api/v2/llmobs",
]
# Note that sessions are not cleared at any point since we don't know
# definitively when a session is over.
self._sessions: DefaultDict[Optional[str], _AgentSession] = defaultdict(
lambda: _AgentSession(sample_rate_by_service_env={})
)
self.vcr_cassette_prefix: Optional[str] = None
async def traces(self) -> TraceMap:
"""Return the traces stored by the agent in the order in which they
arrived.
Spans from trace chunks are aggregated by trace id and returned as
complete lists.
"""
_traces: TraceMap = OrderedDict()
for req in reversed(self._requests):
traces = await self._traces_from_request(req)
for t in traces:
for s in t:
trace_id = s["trace_id"]
if trace_id not in _traces:
_traces[trace_id] = []
_traces[trace_id].append(s)
return _traces
async def clear_trace_check_failures(self, request: Request) -> web.Response:
"""Clear traces by session token provided."""
token = request["session_token"]
clear_all = "clear_all" in request.query and request.query["clear_all"].lower() == "true"
if clear_all:
failures_by_token = self._trace_failures
trace_failures = [value for sublist in failures_by_token.values() for value in sublist]
self._trace_failures = defaultdict(default_value_trace_failures)
self._trace_check_results_by_check = defaultdict(default_value_trace_check_results_by_check)
else:
trace_failures = self._trace_failures[token]
del self._trace_failures[token]
del self._trace_check_results_by_check[token]
log.info(f"Clearing {len(trace_failures)} Trace Check Failures for Token {token}, clear_all={clear_all}")
log.info(trace_failures)
return web.HTTPOk()
async def get_trace_check_failures(self, request: Request) -> web.Response:
"""Return the Trace Check failures that occurred, if pooling is enabled,
returned as either a Text (by default) or JSON response.
"""
token = request["session_token"]
return_all = "return_all" in request.query and request.query["return_all"].lower() == "true"
if return_all:
# check for whether to return all results
trace_check_failures = []
for f in self._trace_failures.values():
trace_check_failures.extend(f)
n_failures = len(trace_check_failures)
log.info(f"{n_failures} Trace Failures Occurred in Total")
else:
# or return results by token
trace_check_failures = self._trace_failures.get(token, [])
n_failures = len(trace_check_failures)
log.info(f"{n_failures} Trace Failures Occurred for Token {token}")
if n_failures > 0:
if "use_json" in request.query and request.query["use_json"].lower() == "true":
# check what response type to use
results: Dict[str, List[str]] = {}
for check_trace, failure_message in trace_check_failures:
results = check_trace.get_failures_by_check(results)
json_summary = json.dumps(results)
raise web.HTTPBadRequest(body=json_summary, content_type="application/json")
else:
# or use default response of text
msg = f"APM Test Agent Validation failed with {n_failures} Trace Check failures.\n"
for check_trace, failure_message in trace_check_failures:
msg += failure_message
raise web.HTTPBadRequest(text=msg)
else:
return web.HTTPOk()
async def get_trace_check_summary(self, request: Request) -> web.Response:
token = request["session_token"]
summary: Dict[str, Dict[str, int]] = defaultdict(default_value_trace_results_summary)
return_all = "return_all" in request.query and request.query["return_all"].lower() == "true"
if return_all:
for token, token_results in self._trace_check_results_by_check.items():
for check_name, check_results in token_results.items():
summary[check_name]["Passed_Checks"] += check_results["Passed_Checks"]
summary[check_name]["Failed_Checks"] += check_results["Failed_Checks"]
summary[check_name]["Skipped_Checks"] += check_results["Skipped_Checks"]
else:
summary = self._trace_check_results_by_check.get(token, {})
json_summary = json.dumps(summary)
return web.HTTPOk(body=json_summary, content_type="application/json")
async def apmtelemetry(self) -> List[TelemetryEvent]:
"""Return the telemetry events stored by the agent"""
_events: List[TelemetryEvent] = []
for req in reversed(self._requests):
if req.match_info.handler == self.handle_v2_apmtelemetry:
_events.append(await v2_apmtelemetry_decode_request(req, await req.read()))
return _events
async def _trace_by_trace_id(self, trace_id: int) -> Trace:
return (await self.traces())[trace_id]
async def _apmtelemetry_by_runtime_id(self, runtime_id: str) -> List[TelemetryEvent]:
return [event for event in await self.apmtelemetry() if event["runtime_id"] == runtime_id]
async def _store_request(self, request: Request) -> None:
"""Store the request object so that it can be queried later."""
# Store the request data on the request object to avoid concurrent read()s of the data which can
# result in: RuntimeError: readany() called while another coroutine is already waiting for incoming data
# See: https://github.com/DataDog/dd-apm-test-agent/pull/101 for more info
request["_testagent_data"] = await request.read()
self._requests.append(request)
def _request_data(self, request: Request) -> bytes:
"""Return the data from the request.
Note *only* use this method for requests stored with `_store_request()`.
"""
return cast(bytes, request["_testagent_data"])
def _requests_by_session(self, token: Optional[str]) -> List[Request]:
"""Return the latest requests sent with the given token.
All requests since the most recent /session/start request are included.
If no /session/start is given for the token then all requests made with
the token are returned.
"""
# Go backwards in the requests received gathering requests until
# the /session-start request for the token is found.
# Note that this may not return all associated traces, because some
# may be generated before the session-start call
session_reqs: List[Tuple[int, Request]] = []
sessionless_reqs: List[Tuple[int, Request]] = []
matched = token is None
for i, req in enumerate(reversed(self._requests)):
if req.match_info.handler == self.handle_session_start:
if token is None:
# If no token is specified, then we match the latest session
break
elif _session_token(req) == token:
# If a token is specified and it matches, we've hit the start of our session
matched = True
break
elif _session_token(req) != token:
# If a token is specified and it doesn't match, we've hit the start of a different session
# So we reset the list of requests
sessionless_reqs = []
continue
if _session_token(req) == token:
session_reqs.append((i, req))
elif _session_token(req) is None:
sessionless_reqs.append((i, req))
if not matched and not session_reqs:
raise NoSuchSessionException(f"No session found for token '{token}'")
return [x[1] for x in sorted(session_reqs + sessionless_reqs, key=lambda x: x[0])]
async def _traces_from_request(self, req: Request) -> List[List[Span]]:
"""Return the trace from a trace request."""
if req.match_info.handler == self.handle_v04_traces:
return self._decode_v04_traces(req)
elif req.match_info.handler == self.handle_v05_traces:
return self._decode_v05_traces(req)
elif req.match_info.handler == self.handle_v07_traces:
return self._decode_v07_traces(req)
elif req.match_info.handler == self.handle_v1_traces:
return self._decode_v1_traces(req)
return []
async def _traces_by_session(self, token: Optional[str]) -> List[Trace]:
"""Return the traces that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all traces that were sent since the last
/session-start request was made.
Spans are aggregated by trace_id (no ordering is performed).
"""
tracemap: TraceMap = OrderedDict()
for req in self._requests_by_session(token):
traces = await self._traces_from_request(req)
for trace in traces:
for span in trace:
trace_id = span["trace_id"]
if trace_id not in tracemap:
tracemap[trace_id] = []
tracemap[trace_id].append(span)
return list(tracemap.values())
async def _apmtelemetry_by_session(self, token: Optional[str]) -> List[TelemetryEvent]:
"""Return the telemetry events that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all telemetry events that were sent since
the last /session-start request was made.
"""
events: List[TelemetryEvent] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v2_apmtelemetry:
events.append(await v2_apmtelemetry_decode_request(req, await req.read()))
# TODO: Sort the events?
return events
async def _tracerflares_by_session(self, token: Optional[str]) -> List[TracerFlareEvent]:
"""Return the tracer-flare events that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all tracer-flare events that were sent
since the last /session-start request was made.
"""
events: List[TracerFlareEvent] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v1_tracer_flare:
events.append(await v1_tracerflare_decode(req, await req.read()))
return events
async def _tracestats_by_session(self, token: Optional[str]) -> List[v06StatsPayload]:
stats: List[v06StatsPayload] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v06_tracestats:
s = self._decode_v06_tracestats(req)
stats.append(s)
return stats
async def _logs_by_session(self, token: Optional[str]) -> List[Dict[str, Any]]:
"""Return the logs that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all logs that were sent since the last
/session-start request was made.
"""
logs: List[Dict[str, Any]] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v1_logs:
logs_data = self._decode_v1_logs(req)
logs.append(logs_data)
return logs
async def _metrics_by_session(self, token: Optional[str]) -> List[Dict[str, Any]]:
"""Return the metrics that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all metrics that were sent since the last
/session-start request was made.
"""
metrics: List[Dict[str, Any]] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v1_metrics:
metrics_data = self._decode_v1_metrics(req)
metrics.append(metrics_data)
return metrics
async def _traces_otlp_by_session(self, token: Optional[str]) -> List[Dict[str, Any]]:
"""Return the OTLP traces that belong to the given session token.
If token is None or if the token was used to manually start a session
with /session-start then return all OTLP traces that were sent since the last
/session-start request was made.
"""
traces: List[Dict[str, Any]] = []
for req in self._requests_by_session(token):
if req.match_info.handler == self.handle_v1_traces_otlp:
traces_data = self._decode_v1_traces_otlp(req)
traces.append(traces_data)
return traces
async def _integration_requests_by_session(
self,
token: Optional[str],
include_sent_integrations: Optional[bool] = False,
) -> List[Request]:
"""Get all requests with an associated tested Integration."""
integration_requests: List[Request] = []
requests = self._requests if token is None else self._requests_by_session(token)
for req in requests:
# see if the request was to update with a newly tested integration
if req.match_info.handler == self.handle_put_tested_integrations:
if "integration" not in req:
data = json.loads(await req.read())
integration_name = data.get("integration_name", None)
integration_version = data.get("integration_version", None)
req["integration"] = Integration(
integration_name=integration_name,
integration_version=integration_version,
dependency_name=data.get("dependency_name", integration_name),
)
req["tracer_version"] = data.get("tracer_version", None)
req["tracer_language"] = data.get("tracer_language", None)
integration_requests.append(req)
elif include_sent_integrations:
integration_requests.append(req)
# check if integration data was provided in the trace request instead
elif (
"_dd_trace_env_variables" in req
and "DD_INTEGRATION" in req["_dd_trace_env_variables"]
and "DD_INTEGRATION_VERSION" in req["_dd_trace_env_variables"]
):
integration_name = req["_dd_trace_env_variables"]["DD_INTEGRATION"]
integration_version = req["_dd_trace_env_variables"]["DD_INTEGRATION_VERSION"]
if "integration" not in req:
req["integration"] = Integration(
integration_name=integration_name,
integration_version=integration_version,
dependency_name=req["_dd_trace_env_variables"].get("DD_DEPENDENCY_NAME", integration_name),
)
if req.headers.get("dd-client-library-version", None):
req["tracer_version"] = req.headers.get("dd-client-library-version")
elif req.headers.get("datadog-meta-tracer-version", None):
req["tracer_version"] = req.headers.get("datadog-meta-tracer-version")
if req.headers.get("dd-client-library-language", None):
req["tracer_language"] = req.headers.get("dd-client-library-language")
elif req.headers.get("datadog-meta-lang", None):
req["tracer_language"] = req.headers.get("datadog-meta-lang")
integration_requests.append(req)
elif include_sent_integrations:
integration_requests.append(req)
return integration_requests
def _decode_v04_traces(self, request: Request) -> v04TracePayload:
content_type = request.content_type
raw_data = self._request_data(request)
return trace_decode_v04(content_type, raw_data, request.app["suppress_trace_parse_errors"])
def _decode_v05_traces(self, request: Request) -> v04TracePayload:
raw_data = self._request_data(request)
return trace_decode_v05(raw_data)
def _decode_v07_traces(self, request: Request) -> v04TracePayload:
raw_data = self._request_data(request)
return trace_decode_v07(raw_data)
def _decode_v1_traces(self, request: Request) -> v04TracePayload:
raw_data = self._request_data(request)
return trace_decode_v1(raw_data)
def _decode_v06_tracestats(self, request: Request) -> v06StatsPayload:
raw_data = self._request_data(request)
return tracestats_decode_v06(raw_data)
def _decode_v1_logs(self, request: Request) -> Dict[str, Any]:
raw_data = self._request_data(request)
content_type = request.headers.get("Content-Type", "").lower().strip()
try:
return decode_logs_request(raw_data, content_type)
except Exception as e:
raise web.HTTPBadRequest(text=str(e))
def _decode_v1_metrics(self, request: Request) -> Dict[str, Any]:
raw_data = self._request_data(request)
content_type = request.headers.get("Content-Type", "").lower().strip()
try:
return decode_metrics_request(raw_data, content_type)
except Exception as e:
raise web.HTTPBadRequest(text=str(e))
def _decode_v1_traces_otlp(self, request: Request) -> Dict[str, Any]:
raw_data = self._request_data(request)
content_type = request.headers.get("Content-Type", "").lower().strip()
try:
return decode_traces_request(raw_data, content_type)
except Exception as e:
raise web.HTTPBadRequest(text=str(e))
async def handle_v04_traces(self, request: Request) -> web.Response:
return await self._handle_traces(request, version="v0.4")
async def handle_v05_traces(self, request: Request) -> web.Response:
return await self._handle_traces(request, version="v0.5")
async def handle_v07_traces(self, request: Request) -> web.Response:
return await self._handle_traces(request, version="v0.7")
async def handle_v1_traces(self, request: Request) -> web.Response:
return await self._handle_traces(request, version="v1")
async def handle_v06_tracestats(self, request: Request) -> web.Response:
stats = self._decode_v06_tracestats(request)
nstats = len(stats["Stats"])
log.info(
"received /v0.6/stats payload with %r stats bucket%s",
nstats,
"s" if nstats else "",
)
return web.HTTPOk()
async def handle_v01_pipelinestats(self, request: Request) -> web.Response:
log.info("received /v0.1/pipeline_stats payload")
return web.HTTPOk()
async def handle_v1_logs(self, request: Request) -> web.Response:
logs_data = self._decode_v1_logs(request)
num_resource_logs = len(logs_data.get("resource_logs", []))
total_log_records = sum(
len(scope_log.get("log_records", []))
for resource_log in logs_data.get("resource_logs", [])
for scope_log in resource_log.get("scope_logs", [])
)
log.info(
"received /v1/logs payload with %r resource log(s) containing %r log record(s)",
num_resource_logs,
total_log_records,
)
return web.Response(
body=ExportLogsServiceResponse().SerializeToString(), status=200, content_type="application/x-protobuf"
)
async def handle_v1_metrics(self, request: Request) -> web.Response:
metrics_data = self._decode_v1_metrics(request)
num_resource_metrics = len(metrics_data.get("resource_metrics", []))
total_metrics = sum(
len(scope_metric.get("metrics", []))
for resource_metric in metrics_data.get("resource_metrics", [])
for scope_metric in resource_metric.get("scope_metrics", [])
)
log.info(
"received /v1/metrics payload with %r resource metric(s) containing %r metric(s)",
num_resource_metrics,
total_metrics,
)
return web.Response(
body=ExportMetricsServiceResponse().SerializeToString(), status=200, content_type="application/x-protobuf"
)
async def handle_v1_traces_otlp(self, request: Request) -> web.Response:
traces_data = self._decode_v1_traces_otlp(request)
num_resource_spans = len(traces_data.get("resource_spans", []))
total_spans = sum(
len(scope_span.get("spans", []))
for resource_span in traces_data.get("resource_spans", [])
for scope_span in resource_span.get("scope_spans", [])
)
log.info(
"received /v1/traces payload with %r resource span(s) containing %r span(s)",
num_resource_spans,
total_spans,
)
return web.Response(
body=ExportTraceServiceResponse().SerializeToString(), status=200, content_type="application/x-protobuf"
)
async def handle_v07_remoteconfig(self, request: Request) -> web.Response:
"""Emulates Remote Config endpoint: /v0.7/config"""
token = _session_token(request)
data = await self._rc_server.get_config_response(token)
return web.json_response(data)
async def handle_v07_remoteconfig_create(self, request: Request) -> web.Response:
"""Configure the response payload of /v0.7/config."""
raw_data = await request.read()
token = _session_token(request)
self._rc_server.create_config_response(token, json.loads(raw_data))
return web.HTTPAccepted()
async def handle_v07_remoteconfig_path_create(self, request: Request) -> web.Response:
"""
Remote Config payloads are quite complex. This endpoints builds a remote config payload with a target
file path and the content of it (msg)
"""
raw_data = await request.read()
content = json.loads(raw_data)
path = content["path"]
msg = content["msg"]
token = _session_token(request)
self._rc_server.create_config_path_response(token, path, msg)
return web.HTTPAccepted()
async def handle_v07_remoteconfig_put(self, request: Request) -> web.Response:
"""Configure the response payload of /v0.7/config"""
raw_data = await request.read()
token = _session_token(request)
self._rc_server.update_config_response(token, json.loads(raw_data))
return web.HTTPAccepted()
async def handle_v2_apmtelemetry(self, request: Request) -> web.Response:
await v2_apmtelemetry_decode_request(request, self._request_data(request))
# TODO: Validation
# TODO: Snapshots
return web.HTTPOk()
async def handle_v1_tracer_flare(self, request: Request) -> web.Response:
tracer_flare: TracerFlareEvent = await v1_tracerflare_decode(request, self._request_data(request))
if "error" in tracer_flare:
msg = f"Error while parsing flare request: {tracer_flare['error']}"
log.error(msg)
raise web.HTTPBadRequest(text=msg)
expectedFields = ["source", "case_id", "email", "hostname", "flare_file"]
missingFields = [k for k in expectedFields if k not in tracer_flare]
if len(missingFields) == 0:
return web.HTTPOk()
else:
msg = f"Flare request is missing {','.join(missingFields)}"
log.error(msg)
raise web.HTTPBadRequest(text=msg)
async def handle_evp_proxy_v2_api_v2_llmobs(self, request: Request) -> web.Response:
if request.app["disable_llmobs_data_forwarding"]:
return web.HTTPOk()
dd_site = request.app["dd_site"]
dd_api_key = request.app["dd_api_key"]
agent_url = request.app["agent_url"]
headers = request.headers.copy()
if agent_url:
url = f"{agent_url}/evp_proxy/v2/api/v2/llmobs" # use configured agent URL if provided
elif dd_api_key is None:
log.error("No DD_API_KEY set to forward LLM Observability events to Datadog. Skipping forwarding.")
return web.HTTPOk()
elif not dd_site:
log.error("No DD_SITE set to forward LLM Observability events to Datadog. Skipping forwarding.")
return web.HTTPOk()
else:
url = f"https://llmobs-intake.{dd_site}/api/v2/llmobs"
headers["DD-API-KEY"] = dd_api_key
async with ClientSession() as session:
async with session.post(url, headers=headers, data=await request.read()) as resp:
if not resp.ok:
log.warning(
f"Failed to forward LLM Observability events to Datadog: {resp.status} {await resp.text()}"
)
else:
log.info(f"Forwarded LLM Observability events to Datadog: {resp.status} {await resp.text()}")
return web.HTTPOk()
async def handle_evp_proxy_v2_api_v2_llmobs_update(self, request: Request) -> web.Response:
"""Forward span updates to DD backend, then update local stored spans."""
data = await request.read()
content_type = request.content_type or ""
# Forward to DD backend (same logic as handle_evp_proxy_v2_api_v2_llmobs)
if not request.app["disable_llmobs_data_forwarding"]:
dd_site = request.app["dd_site"]
dd_api_key = request.app["dd_api_key"]
agent_url = request.app["agent_url"]
headers = dict(request.headers)
if agent_url:
url = f"{agent_url}/evp_proxy/v2/api/v2/llmobs"
elif dd_api_key and dd_site:
url = f"https://llmobs-intake.{dd_site}/api/v2/llmobs"
headers["DD-API-KEY"] = dd_api_key
else:
url = ""
if url:
try:
async with ClientSession() as session:
async with session.post(url, headers=headers, data=data) as resp:
if not resp.ok:
log.warning("Failed to forward LLMObs update: %s %s", resp.status, await resp.text())
else:
log.info("Forwarded LLMObs span update: %s", resp.status)
except Exception as e:
log.warning("Error forwarding LLMObs span update: %s", e)
# Update local stored spans
llmobs_api = request.app.get("llmobs_event_platform_api")
if llmobs_api:
llmobs_api.update_spans(data, content_type)
return web.HTTPOk()
async def handle_evp_proxy_v2_llmobs_eval_metric(self, request: Request) -> web.Response:
return web.HTTPOk()
async def handle_evp_proxy_v2_api_v2_exposures(self, request: Request) -> web.Response:
return web.HTTPOk()
async def handle_evp_proxy_v4_api_v2_errorsintake(self, request: Request) -> web.Response:
return web.HTTPOk()
async def handle_evp_proxy_v4_api_v2_llmobs(self, request: Request) -> web.Response:
if request.app["disable_llmobs_data_forwarding"]:
return web.HTTPOk()
dd_site = request.app["dd_site"]
dd_api_key = request.app["dd_api_key"]
agent_url = request.app["agent_url"]
headers = request.headers.copy()
if agent_url:
url = f"{agent_url}/evp_proxy/v4/api/v2/llmobs"
elif dd_api_key is None:
log.error("No DD_API_KEY set to forward LLM Observability events to Datadog. Skipping forwarding.")
return web.HTTPOk()
elif not dd_site:
log.error("No DD_SITE set to forward LLM Observability events to Datadog. Skipping forwarding.")
return web.HTTPOk()
else:
url = f"https://llmobs-intake.{dd_site}/api/v2/llmobs"
headers["DD-API-KEY"] = dd_api_key
async with ClientSession() as session:
async with session.post(url, headers=headers, data=await request.read()) as resp:
if not resp.ok:
log.warning(
f"Failed to forward LLM Observability events to Datadog: {resp.status} {await resp.text()}"
)
else:
log.info(f"Forwarded LLM Observability events to Datadog: {resp.status} {await resp.text()}")
return web.HTTPOk()
async def handle_evp_proxy_v4_api_v2_llmobs_update(self, request: Request) -> web.Response:
"""Forward span updates to DD backend, then update local stored spans."""
data = await request.read()
content_type = request.content_type or ""
if not request.app["disable_llmobs_data_forwarding"]:
dd_site = request.app["dd_site"]
dd_api_key = request.app["dd_api_key"]
agent_url = request.app["agent_url"]
headers = dict(request.headers)
if agent_url:
url = f"{agent_url}/evp_proxy/v4/api/v2/llmobs"
elif dd_api_key and dd_site:
url = f"https://llmobs-intake.{dd_site}/api/v2/llmobs"
headers["DD-API-KEY"] = dd_api_key