feat(tracing-without-performance): Returned orphan errors with trace … (#54103)

Abdkhan14 · Abdullah Khan · web-flow · commit bb04ef449cbd · 2023-08-03T17:33:21.000Z
For the ticket: https://getsentry.atlassian.net/browse/PERF-2052. - We want to add new rows to trace views for traces that contain orphan errors. Currently the trace endpoint ignores orphan errors. ![tracing_without_perf (1)](https://github.com/getsentry/sentry/assets/60121741/d84f5de0-14f6-40cc-b834-f0941f41bdf2) - This PR aims to append orphan errors to the response of the trace endpoint and should work for both scenarios: - - Only errors in trace (Scenario 3 from image above). - - Mixup of errors and transactions (Scenario 2 from image above). - Returning `event_type:"error"` and `generation:0` with all errors, to help us identify the orphan errors in the frontend and always group them under the first generation. - Added tests for both scenarios. --------- Co-authored-by: Abdullah Khan <abdullahkhan@PG9Y57YDXQ.local>
diff --git a/src/sentry/api/endpoints/organization_events_trace.py b/src/sentry/api/endpoints/organization_events_trace.py
@@ -87,6 +87,9 @@ class TraceError(TypedDict):
     project_slug: str
     title: str
     level: str
+    timestamp: str
+    event_type: str
+    generation: int
 
 
 class TracePerformanceIssue(TypedDict):
@@ -450,6 +453,9 @@ def serialize_error(event: SnubaError) -> TraceError:
             "project_slug": event["project"],
             "title": event["title"],
             "level": event["tags[level]"],
+            "timestamp": event["timestamp"],
+            "event_type": "error",
+            "generation": 0,
         }
 
     @staticmethod
@@ -545,7 +551,15 @@ def get(self, request: HttpRequest, organization: Organization, trace_id: str) -
             )
 
         return Response(
-            self.serialize(transactions, errors, roots, warning_extra, event_id, detailed)
+            self.serialize(
+                transactions,
+                errors,
+                roots,
+                warning_extra,
+                event_id,
+                detailed,
+                tracing_without_performance_enabled,
+            )
         )
 
 
@@ -608,6 +622,7 @@ def serialize(
         warning_extra: Dict[str, str],
         event_id: Optional[str],
         detailed: bool = False,
+        allow_orphan_errors: bool = False,
     ) -> Sequence[LightResponse]:
         """Because the light endpoint could potentially have gaps between root and event we return a flattened list"""
         if event_id is None:
@@ -730,6 +745,7 @@ def serialize(
         warning_extra: Dict[str, str],
         event_id: Optional[str],
         detailed: bool = False,
+        allow_orphan_errors: bool = False,
     ) -> Sequence[FullResponse]:
         """For the full event trace, we return the results as a graph instead of a flattened list
 
@@ -754,8 +770,8 @@ def serialize(
             results_map[None].append(root_event)
             to_check.append(root)
 
+        iteration = 0
         with sentry_sdk.start_span(op="building.trace", description="full trace"):
-            iteration = 0
             has_orphans = False
             while parent_map or to_check:
                 if len(to_check) == 0:
@@ -855,6 +871,19 @@ def serialize(
                     )
                     break
 
+        # We are now left with orphan errors in the error_map,
+        # that we need to serialize and return with our results.
+        orphan_errors: List[TraceError] = []
+        if allow_orphan_errors and iteration <= MAX_TRACE_SIZE:
+            for errors in error_map.values():
+                for error in errors:
+                    orphan_errors.append(self.serialize_error(error))
+                    iteration += 1
+                    if iteration > MAX_TRACE_SIZE:
+                        break
+                if iteration > MAX_TRACE_SIZE:
+                    break
+
         root_traces: List[TraceEvent] = []
         orphans: List[TraceEvent] = []
         for index, result in enumerate(results_map.values()):
@@ -867,14 +896,17 @@ def serialize(
         # We sort orphans and roots separately because we always want the root(s) as the first element(s)
         root_traces.sort(key=child_sort_key)
         orphans.sort(key=child_sort_key)
+        orphan_errors = sorted(orphan_errors, key=lambda k: k["timestamp"])
 
         if len(orphans) > 0:
             sentry_sdk.set_tag("discover.trace-view.contains-orphans", "yes")
             logger.warning("discover.trace-view.contains-orphans", extra=warning_extra)
 
-        return [trace.full_dict(detailed) for trace in root_traces] + [
-            orphan.full_dict(detailed) for orphan in orphans
-        ]
+        return (
+            [trace.full_dict(detailed) for trace in root_traces]
+            + [orphan.full_dict(detailed) for orphan in orphans]
+            + [orphan for orphan in orphan_errors]
+        )
 
 
 @region_silo_endpoint
diff --git a/tests/snuba/api/endpoints/test_organization_events_trace.py b/tests/snuba/api/endpoints/test_organization_events_trace.py
@@ -1109,6 +1109,9 @@ def test_with_errors(self):
             "project_slug": self.gen1_project.slug,
             "level": "fatal",
             "title": error.title,
+            "timestamp": error.timestamp,
+            "generation": 0,
+            "event_type": "error",
         } in gen1_event["errors"]
         assert {
             "event_id": error1.event_id,
@@ -1118,8 +1121,182 @@ def test_with_errors(self):
             "project_slug": self.gen1_project.slug,
             "level": "warning",
             "title": error1.title,
+            "timestamp": error1.timestamp,
+            "generation": 0,
+            "event_type": "error",
         } in gen1_event["errors"]
 
+    def test_with_only_orphan_errors_with_same_span_ids(self):
+        span_id = uuid4().hex[:16]
+        start, end = self.get_start_end(10000)
+
+        # Error 1
+        error_data = load_data(
+            "javascript",
+            timestamp=end,
+        )
+        error_data["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id,
+        }
+        error_data["level"] = "fatal"
+        error = self.store_event(error_data, project_id=self.project.id)
+
+        # Error 2 before after Error 1
+        error_data1 = load_data(
+            "javascript",
+            timestamp=start,
+        )
+        error_data1["level"] = "warning"
+        error_data1["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id,
+        }
+        error1 = self.store_event(error_data1, project_id=self.project.id)
+
+        with self.feature(
+            [*self.FEATURES, "organizations:performance-tracing-without-performance"]
+        ):
+            response = self.client.get(
+                self.url,
+                data={"project": -1},
+                format="json",
+            )
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 2
+        # Sorting by timestamp puts Error1 after Error2 in the response
+        assert {
+            "event_id": error.event_id,
+            "issue_id": error.group_id,
+            "span": span_id,
+            "project_id": self.project.id,
+            "project_slug": self.project.slug,
+            "level": "fatal",
+            "title": error.title,
+            "timestamp": error.timestamp,
+            "generation": 0,
+            "event_type": "error",
+        } == response.data[1]
+        assert {
+            "event_id": error1.event_id,
+            "issue_id": error1.group_id,
+            "span": span_id,
+            "project_id": self.project.id,
+            "project_slug": self.project.slug,
+            "level": "warning",
+            "title": error1.title,
+            "timestamp": error1.timestamp,
+            "generation": 0,
+            "event_type": "error",
+        } == response.data[0]
+
+    def test_with_only_orphan_errors_with_different_span_ids(self):
+        start, _ = self.get_start_end(1000)
+        span_id = uuid4().hex[:16]
+        error_data = load_data(
+            "javascript",
+            timestamp=start,
+        )
+        error_data["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id,
+        }
+        error_data["level"] = "fatal"
+        error = self.store_event(error_data, project_id=self.project.id)
+        error_data["level"] = "warning"
+        span_id1 = uuid4().hex[:16]
+        error_data["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id1,
+        }
+        error1 = self.store_event(error_data, project_id=self.project.id)
+
+        with self.feature(
+            [*self.FEATURES, "organizations:performance-tracing-without-performance"]
+        ):
+            response = self.client.get(
+                self.url,
+                data={"project": -1},
+                format="json",
+            )
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 2
+        assert {
+            "event_id": error.event_id,
+            "issue_id": error.group_id,
+            "span": span_id,
+            "project_id": self.project.id,
+            "project_slug": self.project.slug,
+            "level": "fatal",
+            "title": error.title,
+            "timestamp": error.timestamp,
+            "generation": 0,
+            "event_type": "error",
+        } in response.data
+        assert {
+            "event_id": error1.event_id,
+            "issue_id": error1.group_id,
+            "span": span_id1,
+            "project_id": self.project.id,
+            "project_slug": self.project.slug,
+            "level": "warning",
+            "title": error1.title,
+            "timestamp": error1.timestamp,
+            "generation": 0,
+            "event_type": "error",
+        } in response.data
+
+    def test_with_mixup_of_orphan_errors_with_simple_trace_data(self):
+        self.load_trace()
+        start, _ = self.get_start_end(1000)
+        span_id = uuid4().hex[:16]
+        error_data = load_data(
+            "javascript",
+            timestamp=start,
+        )
+        error_data["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id,
+        }
+        error_data["level"] = "fatal"
+        error = self.store_event(error_data, project_id=self.project.id)
+        error_data["level"] = "warning"
+        span_id1 = uuid4().hex[:16]
+        error_data["contexts"]["trace"] = {
+            "type": "trace",
+            "trace_id": self.trace_id,
+            "span_id": span_id1,
+        }
+
+        with self.feature(
+            [*self.FEATURES, "organizations:performance-tracing-without-performance"]
+        ):
+            response = self.client.get(
+                self.url,
+                data={"project": -1},
+                format="json",
+            )
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 2
+        self.assert_trace_data(response.data[0])
+        assert {
+            "event_id": error.event_id,
+            "issue_id": error.group_id,
+            "span": span_id,
+            "project_id": self.project.id,
+            "project_slug": self.project.slug,
+            "level": "fatal",
+            "title": error.title,
+            "timestamp": error.timestamp,
+            "generation": 0,
+            "event_type": "error",
+        } in response.data
+
     def test_with_default(self):
         self.load_trace()
         start, _ = self.get_start_end(1000)
@@ -1143,6 +1320,9 @@ def test_with_default(self):
             "project_slug": self.gen1_project.slug,
             "level": "debug",
             "title": "this is a log message",
+            "timestamp": default_event.timestamp,
+            "generation": 0,
+            "event_type": "error",
         } in root_event["errors"]
 
     def test_pruning_root(self):