feat(annotations): ensure span annotations are included in dataset examples

axiomofjoy · axiomofjoy · commit e60d5acd654c · 2025-05-03T18:38:56.000-07:00
diff --git a/src/phoenix/db/models.py b/src/phoenix/db/models.py
@@ -684,6 +684,7 @@ def _llm_token_count_total_expression(cls) -> ColumnElement[int]:
         )
 
     trace: Mapped["Trace"] = relationship("Trace", back_populates="spans")
+    span_annotations: Mapped[list["SpanAnnotation"]] = relationship(back_populates="span")
     document_annotations: Mapped[list["DocumentAnnotation"]] = relationship(back_populates="span")
     dataset_examples: Mapped[list["DatasetExample"]] = relationship(back_populates="span")
 
@@ -830,6 +831,9 @@ class SpanAnnotation(Base):
     )
     user_id: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id", ondelete="SET NULL"))
 
+    span: Mapped["Span"] = relationship(back_populates="span_annotations")
+    user: Mapped[Optional["User"]] = relationship("User")
+
     __table_args__ = (
         UniqueConstraint(
             "name",
diff --git a/src/phoenix/server/api/mutations/dataset_mutations.py b/src/phoenix/server/api/mutations/dataset_mutations.py
@@ -11,7 +11,9 @@
     ToolCallAttributes,
 )
 from sqlalchemy import and_, delete, distinct, func, insert, select, update
+from sqlalchemy.orm import contains_eager
 from strawberry import UNSET
+from strawberry.relay.types import GlobalID
 from strawberry.types import Info
 
 from phoenix.db import models
@@ -130,44 +132,43 @@ async def add_spans_to_dataset(
                 raise ValueError(
                     f"Unknown dataset: {dataset_id}"
                 )  # todo: implement error types https://github.com/Arize-ai/phoenix/issues/3221
-            dataset_version_rowid = await session.scalar(
-                insert(models.DatasetVersion)
-                .values(
-                    dataset_id=dataset_rowid,
-                    description=dataset_version_description,
-                    metadata_=dataset_version_metadata,
-                )
-                .returning(models.DatasetVersion.id)
+            dataset_version = models.DatasetVersion(
+                dataset_id=dataset_rowid,
+                description=dataset_version_description,
+                metadata_=dataset_version_metadata or {},
             )
+            session.add(dataset_version)
+            await session.flush()
             spans = (
-                await session.scalars(select(models.Span).where(models.Span.id.in_(span_rowids)))
-            ).all()
+                (
+                    await session.scalars(
+                        select(models.Span)
+                        .join(
+                            models.SpanAnnotation,
+                            models.Span.id == models.SpanAnnotation.span_rowid,
+                        )
+                        .outerjoin(models.User, models.SpanAnnotation.user_id == models.User.id)
+                        .order_by(
+                            models.Span.id,
+                            models.SpanAnnotation.name,
+                            models.User.username,
+                        )
+                        .where(models.Span.id.in_(span_rowids))
+                        .options(
+                            contains_eager(models.Span.span_annotations).contains_eager(
+                                models.SpanAnnotation.user
+                            )
+                        )
+                    )
+                )
+                .unique()
+                .all()
+            )
             if missing_span_rowids := span_rowids - {span.id for span in spans}:
                 raise ValueError(
                     f"Could not find spans with rowids: {', '.join(map(str, missing_span_rowids))}"
                 )  # todo: implement error handling types https://github.com/Arize-ai/phoenix/issues/3221
 
-            span_annotations = (
-                await session.scalars(
-                    select(models.SpanAnnotation).where(
-                        models.SpanAnnotation.span_rowid.in_(span_rowids)
-                    )
-                )
-            ).all()
-
-            span_annotations_by_span: dict[int, dict[Any, Any]] = {span.id: {} for span in spans}
-            for annotation in span_annotations:
-                span_id = annotation.span_rowid
-                if span_id not in span_annotations_by_span:
-                    span_annotations_by_span[span_id] = dict()
-                span_annotations_by_span[span_id][annotation.name] = {
-                    "label": annotation.label,
-                    "score": annotation.score,
-                    "explanation": annotation.explanation,
-                    "metadata": annotation.metadata_,
-                    "annotator_kind": annotation.annotator_kind,
-                }
-
             DatasetExample = models.DatasetExample
             dataset_example_rowids = (
                 await session.scalars(
@@ -201,7 +202,7 @@ async def add_spans_to_dataset(
                 [
                     {
                         DatasetExampleRevision.dataset_example_id.key: dataset_example_rowid,
-                        DatasetExampleRevision.dataset_version_id.key: dataset_version_rowid,
+                        DatasetExampleRevision.dataset_version_id.key: dataset_version.id,
                         DatasetExampleRevision.input.key: get_dataset_example_input(span),
                         DatasetExampleRevision.output.key: get_dataset_example_output(span),
                         DatasetExampleRevision.metadata_.key: {
@@ -212,11 +213,7 @@ async def add_spans_to_dataset(
                                 if k in nonprivate_span_attributes
                             },
                             "span_kind": span.span_kind,
-                            **(
-                                {"annotations": annotations}
-                                if (annotations := span_annotations_by_span[span.id])
-                                else {}
-                            ),
+                            "annotations": _gather_span_annotations_by_name(span.span_annotations),
                         },
                         DatasetExampleRevision.revision_kind.key: "CREATE",
                     }
@@ -602,6 +599,32 @@ def _to_orm_revision(
     }
 
 
+def _gather_span_annotations_by_name(
+    span_annotations: list[models.SpanAnnotation],
+) -> dict[str, list[dict[str, Any]]]:
+    span_annotations_by_name: dict[str, list[dict[str, Any]]] = {}
+    for span_annotation in span_annotations:
+        if span_annotation.name not in span_annotations_by_name:
+            span_annotations_by_name[span_annotation.name] = []
+        span_annotations_by_name[span_annotation.name].append(
+            _to_span_annotation_dict(span_annotation)
+        )
+    return span_annotations_by_name
+
+
+def _to_span_annotation_dict(span_annotation: models.SpanAnnotation) -> dict[str, Any]:
+    return {
+        "label": span_annotation.label,
+        "score": span_annotation.score,
+        "explanation": span_annotation.explanation,
+        "metadata": span_annotation.metadata_,
+        "annotator_kind": span_annotation.annotator_kind,
+        "user_id": str(GlobalID(models.User.__name__, str(span_annotation.user_id))),
+        "username": user.username if (user := span_annotation.user) is not None else None,
+        "email": user.email if user is not None else None,
+    }
+
+
 INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
 INPUT_VALUE = SpanAttributes.INPUT_VALUE
 OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE

Original file line number	Diff line number	Diff line change
`@@ -684,6 +684,7 @@ def _llm_token_count_total_expression(cls) -> ColumnElement[int]:`
`684`	`684`	`)`
`685`	`685`
`686`	`686`	`trace: Mapped["Trace"] = relationship("Trace", back_populates="spans")`
	`687`	`+ span_annotations: Mapped[list["SpanAnnotation"]] = relationship(back_populates="span")`
`687`	`688`	`document_annotations: Mapped[list["DocumentAnnotation"]] = relationship(back_populates="span")`
`688`	`689`	`dataset_examples: Mapped[list["DatasetExample"]] = relationship(back_populates="span")`
`689`	`690`
`@@ -830,6 +831,9 @@ class SpanAnnotation(Base):`
`830`	`831`	`)`
`831`	`832`	`user_id: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id", ondelete="SET NULL"))`
`832`	`833`
	`834`	`+ span: Mapped["Span"] = relationship(back_populates="span_annotations")`
	`835`	`+ user: Mapped[Optional["User"]] = relationship("User")`
	`836`	`+`
`833`	`837`	`__table_args__ = (`
`834`	`838`	`UniqueConstraint(`
`835`	`839`	`"name",`