Skip to content

Commit bb04ef4

Browse files
Abdkhan14Abdullah Khan
and
Abdullah Khan
authored
feat(tracing-without-performance): Returned orphan errors with trace … (#54103)
For the ticket: https://getsentry.atlassian.net/browse/PERF-2052. - We want to add new rows to trace views for traces that contain orphan errors. Currently the trace endpoint ignores orphan errors. ![tracing_without_perf (1)](https://github.com/getsentry/sentry/assets/60121741/d84f5de0-14f6-40cc-b834-f0941f41bdf2) - This PR aims to append orphan errors to the response of the trace endpoint and should work for both scenarios: - - Only errors in trace (Scenario 3 from image above). - - Mixup of errors and transactions (Scenario 2 from image above). - Returning `event_type:"error"` and `generation:0` with all errors, to help us identify the orphan errors in the frontend and always group them under the first generation. - Added tests for both scenarios. --------- Co-authored-by: Abdullah Khan <[email protected]>
1 parent e868b7b commit bb04ef4

File tree

2 files changed

+217
-5
lines changed

2 files changed

+217
-5
lines changed

src/sentry/api/endpoints/organization_events_trace.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ class TraceError(TypedDict):
8787
project_slug: str
8888
title: str
8989
level: str
90+
timestamp: str
91+
event_type: str
92+
generation: int
9093

9194

9295
class TracePerformanceIssue(TypedDict):
@@ -450,6 +453,9 @@ def serialize_error(event: SnubaError) -> TraceError:
450453
"project_slug": event["project"],
451454
"title": event["title"],
452455
"level": event["tags[level]"],
456+
"timestamp": event["timestamp"],
457+
"event_type": "error",
458+
"generation": 0,
453459
}
454460

455461
@staticmethod
@@ -545,7 +551,15 @@ def get(self, request: HttpRequest, organization: Organization, trace_id: str) -
545551
)
546552

547553
return Response(
548-
self.serialize(transactions, errors, roots, warning_extra, event_id, detailed)
554+
self.serialize(
555+
transactions,
556+
errors,
557+
roots,
558+
warning_extra,
559+
event_id,
560+
detailed,
561+
tracing_without_performance_enabled,
562+
)
549563
)
550564

551565

@@ -608,6 +622,7 @@ def serialize(
608622
warning_extra: Dict[str, str],
609623
event_id: Optional[str],
610624
detailed: bool = False,
625+
allow_orphan_errors: bool = False,
611626
) -> Sequence[LightResponse]:
612627
"""Because the light endpoint could potentially have gaps between root and event we return a flattened list"""
613628
if event_id is None:
@@ -730,6 +745,7 @@ def serialize(
730745
warning_extra: Dict[str, str],
731746
event_id: Optional[str],
732747
detailed: bool = False,
748+
allow_orphan_errors: bool = False,
733749
) -> Sequence[FullResponse]:
734750
"""For the full event trace, we return the results as a graph instead of a flattened list
735751
@@ -754,8 +770,8 @@ def serialize(
754770
results_map[None].append(root_event)
755771
to_check.append(root)
756772

773+
iteration = 0
757774
with sentry_sdk.start_span(op="building.trace", description="full trace"):
758-
iteration = 0
759775
has_orphans = False
760776
while parent_map or to_check:
761777
if len(to_check) == 0:
@@ -855,6 +871,19 @@ def serialize(
855871
)
856872
break
857873

874+
# We are now left with orphan errors in the error_map,
875+
# that we need to serialize and return with our results.
876+
orphan_errors: List[TraceError] = []
877+
if allow_orphan_errors and iteration <= MAX_TRACE_SIZE:
878+
for errors in error_map.values():
879+
for error in errors:
880+
orphan_errors.append(self.serialize_error(error))
881+
iteration += 1
882+
if iteration > MAX_TRACE_SIZE:
883+
break
884+
if iteration > MAX_TRACE_SIZE:
885+
break
886+
858887
root_traces: List[TraceEvent] = []
859888
orphans: List[TraceEvent] = []
860889
for index, result in enumerate(results_map.values()):
@@ -867,14 +896,17 @@ def serialize(
867896
# We sort orphans and roots separately because we always want the root(s) as the first element(s)
868897
root_traces.sort(key=child_sort_key)
869898
orphans.sort(key=child_sort_key)
899+
orphan_errors = sorted(orphan_errors, key=lambda k: k["timestamp"])
870900

871901
if len(orphans) > 0:
872902
sentry_sdk.set_tag("discover.trace-view.contains-orphans", "yes")
873903
logger.warning("discover.trace-view.contains-orphans", extra=warning_extra)
874904

875-
return [trace.full_dict(detailed) for trace in root_traces] + [
876-
orphan.full_dict(detailed) for orphan in orphans
877-
]
905+
return (
906+
[trace.full_dict(detailed) for trace in root_traces]
907+
+ [orphan.full_dict(detailed) for orphan in orphans]
908+
+ [orphan for orphan in orphan_errors]
909+
)
878910

879911

880912
@region_silo_endpoint

tests/snuba/api/endpoints/test_organization_events_trace.py

+180
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,9 @@ def test_with_errors(self):
11091109
"project_slug": self.gen1_project.slug,
11101110
"level": "fatal",
11111111
"title": error.title,
1112+
"timestamp": error.timestamp,
1113+
"generation": 0,
1114+
"event_type": "error",
11121115
} in gen1_event["errors"]
11131116
assert {
11141117
"event_id": error1.event_id,
@@ -1118,8 +1121,182 @@ def test_with_errors(self):
11181121
"project_slug": self.gen1_project.slug,
11191122
"level": "warning",
11201123
"title": error1.title,
1124+
"timestamp": error1.timestamp,
1125+
"generation": 0,
1126+
"event_type": "error",
11211127
} in gen1_event["errors"]
11221128

1129+
def test_with_only_orphan_errors_with_same_span_ids(self):
1130+
span_id = uuid4().hex[:16]
1131+
start, end = self.get_start_end(10000)
1132+
1133+
# Error 1
1134+
error_data = load_data(
1135+
"javascript",
1136+
timestamp=end,
1137+
)
1138+
error_data["contexts"]["trace"] = {
1139+
"type": "trace",
1140+
"trace_id": self.trace_id,
1141+
"span_id": span_id,
1142+
}
1143+
error_data["level"] = "fatal"
1144+
error = self.store_event(error_data, project_id=self.project.id)
1145+
1146+
# Error 2 before after Error 1
1147+
error_data1 = load_data(
1148+
"javascript",
1149+
timestamp=start,
1150+
)
1151+
error_data1["level"] = "warning"
1152+
error_data1["contexts"]["trace"] = {
1153+
"type": "trace",
1154+
"trace_id": self.trace_id,
1155+
"span_id": span_id,
1156+
}
1157+
error1 = self.store_event(error_data1, project_id=self.project.id)
1158+
1159+
with self.feature(
1160+
[*self.FEATURES, "organizations:performance-tracing-without-performance"]
1161+
):
1162+
response = self.client.get(
1163+
self.url,
1164+
data={"project": -1},
1165+
format="json",
1166+
)
1167+
assert response.status_code == 200, response.content
1168+
assert len(response.data) == 2
1169+
# Sorting by timestamp puts Error1 after Error2 in the response
1170+
assert {
1171+
"event_id": error.event_id,
1172+
"issue_id": error.group_id,
1173+
"span": span_id,
1174+
"project_id": self.project.id,
1175+
"project_slug": self.project.slug,
1176+
"level": "fatal",
1177+
"title": error.title,
1178+
"timestamp": error.timestamp,
1179+
"generation": 0,
1180+
"event_type": "error",
1181+
} == response.data[1]
1182+
assert {
1183+
"event_id": error1.event_id,
1184+
"issue_id": error1.group_id,
1185+
"span": span_id,
1186+
"project_id": self.project.id,
1187+
"project_slug": self.project.slug,
1188+
"level": "warning",
1189+
"title": error1.title,
1190+
"timestamp": error1.timestamp,
1191+
"generation": 0,
1192+
"event_type": "error",
1193+
} == response.data[0]
1194+
1195+
def test_with_only_orphan_errors_with_different_span_ids(self):
1196+
start, _ = self.get_start_end(1000)
1197+
span_id = uuid4().hex[:16]
1198+
error_data = load_data(
1199+
"javascript",
1200+
timestamp=start,
1201+
)
1202+
error_data["contexts"]["trace"] = {
1203+
"type": "trace",
1204+
"trace_id": self.trace_id,
1205+
"span_id": span_id,
1206+
}
1207+
error_data["level"] = "fatal"
1208+
error = self.store_event(error_data, project_id=self.project.id)
1209+
error_data["level"] = "warning"
1210+
span_id1 = uuid4().hex[:16]
1211+
error_data["contexts"]["trace"] = {
1212+
"type": "trace",
1213+
"trace_id": self.trace_id,
1214+
"span_id": span_id1,
1215+
}
1216+
error1 = self.store_event(error_data, project_id=self.project.id)
1217+
1218+
with self.feature(
1219+
[*self.FEATURES, "organizations:performance-tracing-without-performance"]
1220+
):
1221+
response = self.client.get(
1222+
self.url,
1223+
data={"project": -1},
1224+
format="json",
1225+
)
1226+
assert response.status_code == 200, response.content
1227+
assert len(response.data) == 2
1228+
assert {
1229+
"event_id": error.event_id,
1230+
"issue_id": error.group_id,
1231+
"span": span_id,
1232+
"project_id": self.project.id,
1233+
"project_slug": self.project.slug,
1234+
"level": "fatal",
1235+
"title": error.title,
1236+
"timestamp": error.timestamp,
1237+
"generation": 0,
1238+
"event_type": "error",
1239+
} in response.data
1240+
assert {
1241+
"event_id": error1.event_id,
1242+
"issue_id": error1.group_id,
1243+
"span": span_id1,
1244+
"project_id": self.project.id,
1245+
"project_slug": self.project.slug,
1246+
"level": "warning",
1247+
"title": error1.title,
1248+
"timestamp": error1.timestamp,
1249+
"generation": 0,
1250+
"event_type": "error",
1251+
} in response.data
1252+
1253+
def test_with_mixup_of_orphan_errors_with_simple_trace_data(self):
1254+
self.load_trace()
1255+
start, _ = self.get_start_end(1000)
1256+
span_id = uuid4().hex[:16]
1257+
error_data = load_data(
1258+
"javascript",
1259+
timestamp=start,
1260+
)
1261+
error_data["contexts"]["trace"] = {
1262+
"type": "trace",
1263+
"trace_id": self.trace_id,
1264+
"span_id": span_id,
1265+
}
1266+
error_data["level"] = "fatal"
1267+
error = self.store_event(error_data, project_id=self.project.id)
1268+
error_data["level"] = "warning"
1269+
span_id1 = uuid4().hex[:16]
1270+
error_data["contexts"]["trace"] = {
1271+
"type": "trace",
1272+
"trace_id": self.trace_id,
1273+
"span_id": span_id1,
1274+
}
1275+
1276+
with self.feature(
1277+
[*self.FEATURES, "organizations:performance-tracing-without-performance"]
1278+
):
1279+
response = self.client.get(
1280+
self.url,
1281+
data={"project": -1},
1282+
format="json",
1283+
)
1284+
assert response.status_code == 200, response.content
1285+
assert len(response.data) == 2
1286+
self.assert_trace_data(response.data[0])
1287+
assert {
1288+
"event_id": error.event_id,
1289+
"issue_id": error.group_id,
1290+
"span": span_id,
1291+
"project_id": self.project.id,
1292+
"project_slug": self.project.slug,
1293+
"level": "fatal",
1294+
"title": error.title,
1295+
"timestamp": error.timestamp,
1296+
"generation": 0,
1297+
"event_type": "error",
1298+
} in response.data
1299+
11231300
def test_with_default(self):
11241301
self.load_trace()
11251302
start, _ = self.get_start_end(1000)
@@ -1143,6 +1320,9 @@ def test_with_default(self):
11431320
"project_slug": self.gen1_project.slug,
11441321
"level": "debug",
11451322
"title": "this is a log message",
1323+
"timestamp": default_event.timestamp,
1324+
"generation": 0,
1325+
"event_type": "error",
11461326
} in root_event["errors"]
11471327

11481328
def test_pruning_root(self):

0 commit comments

Comments
 (0)