From 96f5bdb73171feb496815078d4fbe2d4c5a06dc5 Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 20:10:03 -0400 Subject: [PATCH 1/9] Spike out DSL extension for annotation existence --- src/phoenix/trace/dsl/filter.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index 44d9931cc3..f229c5167a 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -13,6 +13,7 @@ from sqlalchemy.orm.util import AliasedClass from sqlalchemy.sql.expression import Select from typing_extensions import TypeAlias, TypeGuard, assert_never +from sqlalchemy import case, literal import phoenix.trace.v1 as pb from phoenix.db import models @@ -31,6 +32,10 @@ r"""\b((annotations|evals)\[(".*?"|'.*?')\][.](label|score))\b""" ) +EVAL_NAME_PATTERN = re.compile( + r"""\b((annotations|evals)\[(\".*?\"|'.*?')\])\b""" +) + @dataclass(frozen=True) class AliasedAnnotationRelation: @@ -46,16 +51,19 @@ class AliasedAnnotationRelation: table: AliasedClass[models.SpanAnnotation] = field(init=False, repr=False) _label_attribute_alias: str = field(init=False, repr=False) _score_attribute_alias: str = field(init=False, repr=False) + _exists_attribute_alias: str = field(init=False, repr=False) def __post_init__(self) -> None: table_alias = f"span_annotation_{self.index}" alias_id = uuid4().hex label_attribute_alias = f"{table_alias}_label_{alias_id}" score_attribute_alias = f"{table_alias}_score_{alias_id}" + exists_attribute_alias = f"{table_alias}_exists_{alias_id}" table = aliased(models.SpanAnnotation, name=table_alias) object.__setattr__(self, "_label_attribute_alias", label_attribute_alias) object.__setattr__(self, "_score_attribute_alias", score_attribute_alias) + object.__setattr__(self, "_exists_attribute_alias", exists_attribute_alias) object.__setattr__(self, "table", table) @property @@ -66,6 +74,9 @@ def attributes(self) -> typing.Iterator[tuple[str, Mapped[typing.Any]]]: """ yield self._label_attribute_alias, self.table.label yield self._score_attribute_alias, self.table.score + yield self._exists_attribute_alias, case( + (self.table.id.is_not(None), literal(True)) + ) def attribute_alias(self, attribute: AnnotationAttribute) -> str: """ @@ -555,6 +566,7 @@ def _validate_expression( isinstance(node, (ast.BoolOp, ast.Compare)) or isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.Not) + or _is_annotation(node) ): continue elif ( @@ -792,6 +804,15 @@ def _apply_eval_aliasing( eval_aliases[annotation_name] = eval_alias alias_name = eval_alias.attribute_alias(annotation_attribute) source = source.replace(annotation_expression, alias_name) + + for match in EVAL_NAME_PATTERN.finditer(source): + annotation_expression, annotation_type, quoted_eval_name = match.groups() + annotation_name = quoted_eval_name[1:-1] + if (eval_alias := eval_aliases.get(annotation_name)) is None: + eval_alias = AliasedAnnotationRelation(index=len(eval_aliases), name=annotation_name) + eval_aliases[annotation_name] = eval_alias + alias_name = eval_alias._exists_attribute_alias + source = source.replace(annotation_expression, alias_name) return source, tuple(eval_aliases.values()) From 0076ce0413af3b53573c815ec18047d7fce28b5c Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 20:20:57 -0400 Subject: [PATCH 2/9] Tweak query logic --- src/phoenix/trace/dsl/filter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index f229c5167a..df43ca07c7 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -33,7 +33,7 @@ ) EVAL_NAME_PATTERN = re.compile( - r"""\b((annotations|evals)\[(\".*?\"|'.*?')\])\b""" + r"""\b((annotations|evals)\[(".*?"|'.*?')\])\b""" ) @@ -75,7 +75,7 @@ def attributes(self) -> typing.Iterator[tuple[str, Mapped[typing.Any]]]: yield self._label_attribute_alias, self.table.label yield self._score_attribute_alias, self.table.score yield self._exists_attribute_alias, case( - (self.table.id.is_not(None), literal(True)) + (self.table.id.is_not(None), literal(True)), else_=literal(False) ) def attribute_alias(self, attribute: AnnotationAttribute) -> str: From 278d4ae0028bb78c9abf373a13e43df675d51ff8 Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 20:29:07 -0400 Subject: [PATCH 3/9] Remove boundary matchers --- src/phoenix/trace/dsl/filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index df43ca07c7..a3e9a37c33 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -33,7 +33,7 @@ ) EVAL_NAME_PATTERN = re.compile( - r"""\b((annotations|evals)\[(".*?"|'.*?')\])\b""" + r"""((annotations|evals)\[(".*?"|'.*?')\])""" ) From 2eb5b4e15842bab4879f830c54d0c036507b827a Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 20:36:39 -0400 Subject: [PATCH 4/9] Add test cases --- tests/unit/trace/dsl/test_query.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/trace/dsl/test_query.py b/tests/unit/trace/dsl/test_query.py index 1d20f11148..7ac6d8aba5 100644 --- a/tests/unit/trace/dsl/test_query.py +++ b/tests/unit/trace/dsl/test_query.py @@ -914,6 +914,10 @@ async def test_filter_on_trace_id_multiple( ["evals['0'].score is None or evals['1'].label is not None", ["234", "456", "567"]], ["evals['0'].score == 0 or evals['1'].label != '1'", ["345", "567"]], ["evals['0'].score != 0 or evals['1'].label == '1'", ["456"]], + ["evals['0']", ["345", "456"],], + ["annotations['0']", ["345", "456"],], + ["evals['1']", ["456", "567"],], + ["annotations['1']", ["456", "567"],], ], ) async def test_filter_on_span_annotation( From 5c24a942f51df5194c784d3c9c2f876d607514a5 Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 20:42:40 -0400 Subject: [PATCH 5/9] =?UTF-8?q?Ruff=20=F0=9F=90=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/phoenix/trace/dsl/filter.py | 11 +++++------ tests/unit/trace/dsl/test_query.py | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index a3e9a37c33..f4f3e2fc6e 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -9,11 +9,11 @@ from uuid import uuid4 import sqlalchemy +from sqlalchemy import case, literal from sqlalchemy.orm import Mapped, aliased from sqlalchemy.orm.util import AliasedClass from sqlalchemy.sql.expression import Select from typing_extensions import TypeAlias, TypeGuard, assert_never -from sqlalchemy import case, literal import phoenix.trace.v1 as pb from phoenix.db import models @@ -32,9 +32,7 @@ r"""\b((annotations|evals)\[(".*?"|'.*?')\][.](label|score))\b""" ) -EVAL_NAME_PATTERN = re.compile( - r"""((annotations|evals)\[(".*?"|'.*?')\])""" -) +EVAL_NAME_PATTERN = re.compile(r"""((annotations|evals)\[(".*?"|'.*?')\])""") @dataclass(frozen=True) @@ -74,8 +72,9 @@ def attributes(self) -> typing.Iterator[tuple[str, Mapped[typing.Any]]]: """ yield self._label_attribute_alias, self.table.label yield self._score_attribute_alias, self.table.score - yield self._exists_attribute_alias, case( - (self.table.id.is_not(None), literal(True)), else_=literal(False) + yield ( + self._exists_attribute_alias, + case((self.table.id.is_not(None), literal(True)), else_=literal(False)), ) def attribute_alias(self, attribute: AnnotationAttribute) -> str: diff --git a/tests/unit/trace/dsl/test_query.py b/tests/unit/trace/dsl/test_query.py index 7ac6d8aba5..e6e9e57dd2 100644 --- a/tests/unit/trace/dsl/test_query.py +++ b/tests/unit/trace/dsl/test_query.py @@ -914,10 +914,22 @@ async def test_filter_on_trace_id_multiple( ["evals['0'].score is None or evals['1'].label is not None", ["234", "456", "567"]], ["evals['0'].score == 0 or evals['1'].label != '1'", ["345", "567"]], ["evals['0'].score != 0 or evals['1'].label == '1'", ["456"]], - ["evals['0']", ["345", "456"],], - ["annotations['0']", ["345", "456"],], - ["evals['1']", ["456", "567"],], - ["annotations['1']", ["456", "567"],], + [ + "evals['0']", + ["345", "456"], + ], + [ + "annotations['0']", + ["345", "456"], + ], + [ + "evals['1']", + ["456", "567"], + ], + [ + "annotations['1']", + ["456", "567"], + ], ], ) async def test_filter_on_span_annotation( From fe9327c9f4ad680f42071ae298daf7d299019490 Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Fri, 2 May 2025 21:35:52 -0400 Subject: [PATCH 6/9] Use more-strict lookarounds --- src/phoenix/trace/dsl/filter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index f4f3e2fc6e..3348fb58a4 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -32,7 +32,9 @@ r"""\b((annotations|evals)\[(".*?"|'.*?')\][.](label|score))\b""" ) -EVAL_NAME_PATTERN = re.compile(r"""((annotations|evals)\[(".*?"|'.*?')\])""") +EVAL_NAME_PATTERN = re.compile( + r"""(? Date: Fri, 2 May 2025 21:58:04 -0400 Subject: [PATCH 7/9] Further refine regex --- src/phoenix/trace/dsl/filter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index 3348fb58a4..f036f62553 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -32,9 +32,7 @@ r"""\b((annotations|evals)\[(".*?"|'.*?')\][.](label|score))\b""" ) -EVAL_NAME_PATTERN = re.compile( - r"""(? Date: Fri, 2 May 2025 23:22:55 -0400 Subject: [PATCH 8/9] Add filter tests --- tests/unit/trace/dsl/test_filter.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unit/trace/dsl/test_filter.py b/tests/unit/trace/dsl/test_filter.py index 54c5cd2fc2..f0b67d1d45 100644 --- a/tests/unit/trace/dsl/test_filter.py +++ b/tests/unit/trace/dsl/test_filter.py @@ -232,6 +232,17 @@ async def test_filter_translated( "span_annotation_0_label_00000000000000000000000000000000 is not None", id="double-quoted-annotation-name", ), + # Existence checks (bare annotation reference) + pytest.param( + """evals['Hallucination']""", + "span_annotation_0_exists_00000000000000000000000000000000", + id="bare-evals-exists", + ), + pytest.param( + """annotations['Hallucination']""", + "span_annotation_0_exists_00000000000000000000000000000000", + id="bare-annotations-exists", + ), ], ) def test_apply_eval_aliasing(filter_condition: str, expected: str) -> None: From 82bbb57b122c7149a947a0913e504e80c125c0a9 Mon Sep 17 00:00:00 2001 From: Dustin Ngo Date: Sat, 3 May 2025 04:24:55 -0400 Subject: [PATCH 9/9] Clean up types --- src/phoenix/trace/dsl/filter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/phoenix/trace/dsl/filter.py b/src/phoenix/trace/dsl/filter.py index f036f62553..4dfa21bdf4 100644 --- a/src/phoenix/trace/dsl/filter.py +++ b/src/phoenix/trace/dsl/filter.py @@ -12,7 +12,7 @@ from sqlalchemy import case, literal from sqlalchemy.orm import Mapped, aliased from sqlalchemy.orm.util import AliasedClass -from sqlalchemy.sql.expression import Select +from sqlalchemy.sql.expression import ColumnElement, Select from typing_extensions import TypeAlias, TypeGuard, assert_never import phoenix.trace.v1 as pb @@ -65,7 +65,7 @@ def __post_init__(self) -> None: object.__setattr__(self, "table", table) @property - def attributes(self) -> typing.Iterator[tuple[str, Mapped[typing.Any]]]: + def attributes(self) -> typing.Iterator[tuple[str, ColumnElement[typing.Any]]]: """ Alias names and attributes (i.e., columns) of the `span_annotation` relation. @@ -794,7 +794,7 @@ def _apply_eval_aliasing( eval_aliases: dict[AnnotationName, AliasedAnnotationRelation] = {} for ( annotation_expression, - annotation_type, + _annotation_type, annotation_name, annotation_attribute, ) in _parse_annotation_expressions_and_names(source): @@ -805,7 +805,7 @@ def _apply_eval_aliasing( source = source.replace(annotation_expression, alias_name) for match in EVAL_NAME_PATTERN.finditer(source): - annotation_expression, annotation_type, quoted_eval_name = match.groups() + annotation_expression, _, quoted_eval_name = match.groups() annotation_name = quoted_eval_name[1:-1] if (eval_alias := eval_aliases.get(annotation_name)) is None: eval_alias = AliasedAnnotationRelation(index=len(eval_aliases), name=annotation_name) @@ -831,11 +831,11 @@ def _parse_annotation_expressions_and_names( for match in EVAL_EXPRESSION_PATTERN.finditer(source): ( annotation_expression, - annotation_type, + _annotation_type, quoted_eval_name, evaluation_attribute_name, ) = match.groups() - annotation_type = typing.cast(AnnotationType, annotation_type) + annotation_type = typing.cast(AnnotationType, _annotation_type) yield ( annotation_expression, annotation_type,