From 3b0ad5c802f0c660bc8fe32028b8e1b98ae363af Mon Sep 17 00:00:00 2001
From: Sebastian Sosa <1sebastian1sosa1@gmail.com>
Date: Sat, 26 Apr 2025 02:53:07 -0400
Subject: [PATCH 01/12] pre dynamic executor

---
 .../src/phoenix/evals/__init__.py             |   2 +
 .../src/phoenix/evals/declarative.py          | 258 ++++++++++++++++++
 2 files changed, 260 insertions(+)
 create mode 100644 packages/phoenix-evals/src/phoenix/evals/declarative.py

diff --git a/packages/phoenix-evals/src/phoenix/evals/__init__.py b/packages/phoenix-evals/src/phoenix/evals/__init__.py
index 72691112d6..1240e63cee 100644
--- a/packages/phoenix-evals/src/phoenix/evals/__init__.py
+++ b/packages/phoenix-evals/src/phoenix/evals/__init__.py
@@ -1,4 +1,5 @@
 from .classify import llm_classify, run_evals
+from .declarative import declarative_eval
 from .default_templates import (
     CODE_FUNCTIONALITY_PROMPT_RAILS_MAP,
     CODE_FUNCTIONALITY_PROMPT_TEMPLATE,
@@ -92,6 +93,7 @@
     "TOOL_CALLING_PROMPT_RAILS_MAP",
     "NOT_PARSABLE",
     "run_evals",
+    "declarative_eval",
     "LLMEvaluator",
     "HallucinationEvaluator",
     "QAEvaluator",
diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py
new file mode 100644
index 0000000000..1f5184d2b6
--- /dev/null
+++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py
@@ -0,0 +1,258 @@
+import time
+from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter
+from phoenix.evals.models import BaseModel
+
+import inspect
+import logging
+import warnings
+from collections import defaultdict
+from enum import Enum
+from functools import wraps
+from itertools import product
+from typing import (
+    Any,
+    Callable,
+    DefaultDict,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
+import json
+import pandas as pd
+from pandas import DataFrame
+from typing_extensions import TypeAlias
+
+from phoenix.evals.evaluators import LLMEvaluator
+from phoenix.evals.exceptions import PhoenixTemplateMappingError
+from phoenix.evals.executors import ExecutionStatus, get_executor_on_sync_context
+from phoenix.evals.models import OpenAIModel, set_verbosity
+from phoenix.evals.templates import (
+    ClassificationTemplate,
+    MultimodalPrompt,
+    PromptOptions,
+    PromptPartTemplate,
+    PromptPartContentType,
+    PromptTemplate,
+    normalize_classification_template,
+)
+from phoenix.evals.utils import (
+    NOT_PARSABLE,
+    get_tqdm_progress_bar_formatter,
+    openai_function_call_kwargs,
+    parse_openai_function_call,
+    printif,
+    snap_to_rail,
+)
+from pydantic import BaseModel, Field, create_model
+from typing import Union, List, Any, Optional, Callable
+import pandas as pd
+from openai import OpenAI, AsyncOpenAI
+from tqdm import tqdm
+import asyncio
+import aiohttp
+
+async def declarative_eval(
+    data: Union[pd.DataFrame, List[Any]],
+    model: Union[OpenAI, AsyncOpenAI],
+    schema: BaseModel,  # Pydantic model class
+    field_mappings: Dict[str, str], # key is the openinference target field value, value is the path to the field in the schema
+    system_instruction: Optional[str] = None,
+    verbose: bool = False,
+    include_prompt: bool = False,
+    include_response: bool = False,
+    include_exceptions: bool = False,
+    provide_explanation: bool = False,
+    max_retries: int = 10,
+    exit_on_error: bool = True,
+    run_sync: bool = False,
+    concurrency: Optional[int] = None,
+    progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("declarative_eval"),
+) -> pd.DataFrame:
+    """
+    Evaluates data using an LLM with a Pydantic schema to structure the output.
+    """
+
+    formatter = MustacheBaseTemplateFormatter()
+    template = PromptPartTemplate(
+        content_type=PromptPartContentType.TEXT,
+        template="""an input and output pair passed to an LLM
+        INPUT MESSAGES:
+        ```
+        {{input}}
+        ```
+        OUTPUT MESSAGE:
+        ```
+        {{output}}
+        ```
+        """
+    )
+
+    labels: Iterable[Optional[str]] = [None] * len(data)
+    explanations: Iterable[Optional[str]] = [None] * len(data)
+    scores: Iterable[Optional[float]] = [None] * len(data)
+
+    default_system_instruction = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema."
+
+    # Convert data to consistent format
+    if isinstance(data, pd.DataFrame):
+        dataframe = data
+        dataframe_index = data.index
+    else:
+        dataframe = pd.DataFrame(data)
+        dataframe_index = dataframe.index
+
+    
+    if provide_explanation:
+        # Update the schema
+        ExplainedSchema = create_model(
+            "ExplainedSchema",
+            schema=(schema, Field(..., description="The schema to evaluate")),
+            explanation=(str, Field(..., description="An explanation of the evaluation")),
+        )
+        schema = ExplainedSchema
+
+        # Update the field mappings
+        new_field_mappings = {}
+        for key, value in field_mappings.items():
+            new_field_mappings[key] = f"schema.{value}"
+        # Override the explanation field mapping
+        new_field_mappings["explanation"] = "explanation"
+        # Update the field mappings
+        field_mappings = new_field_mappings
+
+    print("field_mappings", field_mappings)
+
+        
+    def _map_template(data: pd.Series) -> str:
+        output_str = formatter.format(template.template, variables={
+                "input": json.dumps(data["attributes.llm.input_messages"]).replace("\\", "\\\\"),
+                "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\")
+            }
+        )
+        return output_str
+
+    async def _run_llm_eval_async() -> List[Tuple[pd.Series, Dict[str, Any], Optional[str]]]:
+        if type(model) is OpenAI:
+            raise ValueError("OpenAI is not supported for async operations")
+        # Handle async request        
+        async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
+            try:
+                start_time = time.time()
+                response = await model.beta.chat.completions.parse(
+                    model="gpt-4o-2024-08-06",
+                    messages=[
+                        {"role": "system", "content": system_instruction or default_system_instruction},
+                        {"role": "user", "content": _map_template(row)}
+                    ],
+                    response_format=schema,
+                )
+                parsed_response = response.choices[0].message.parsed
+                end_time = time.time()
+                execution_seconds = end_time - start_time
+                return idx, row, parsed_response, None, execution_seconds
+            except Exception as e:
+                return idx, row, None, str(e), 0
+        
+        # create tasks
+        tasks = []
+        for idx, (_, row) in enumerate(dataframe.iterrows()):
+            tasks.append(_make_request(idx, row))
+
+        results = [None] * len(tasks)
+        with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar:
+            for coro in asyncio.as_completed(tasks):
+                idx, row, parsed_response, error, execution_seconds = await coro
+                results[idx] = (row, parsed_response, error, execution_seconds)
+                pbar.update(1)
+        
+        return results
+    
+
+    def _run_llm_eval_sync() -> List[Tuple[pd.Series, Dict[str, Any]]]:
+        if type(model) is AsyncOpenAI:
+            raise ValueError("AsyncOpenAI is not supported for sync operations")
+        def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
+            try:
+                start_time = time.time()
+                response = model.beta.chat.completions.parse(
+                    model="gpt-4o-2024-08-06",
+                    messages=[
+                        {"role": "system", "content": system_instruction or default_system_instruction},
+                        {"role": "user", "content": _map_template(row)}
+                    ],
+                    response_format=schema,
+                )
+                parsed_response = response.choices[0].message.parsed
+                end_time = time.time()
+                execution_seconds = end_time - start_time
+                return idx, row, parsed_response, None, execution_seconds
+            except Exception as e:
+                return idx, row, None, str(e), 0
+        results = [None] * len(dataframe)
+        for idx, (_, row) in enumerate(dataframe.iterrows()):
+            idx, row, parsed_response, error, execution_seconds = _make_request(idx, row)
+            results[idx] = (row, parsed_response, error, execution_seconds)
+        return results
+    
+    def _get_nested_value(obj: Dict[str, Any], path: str) -> Any:
+        parts = path.split('.')
+        current = obj
+        for part in parts:
+            if part in current:
+                current = current[part]
+            else:
+                return None
+        return current
+    
+    def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optional[str], float]) -> Dict[str, Any]:
+        row, parsed_response, error, execution_seconds = result
+        results_data = {}
+        results_data["execution_seconds"] = execution_seconds
+        results_data["exceptions"] = []
+        if error:
+            results_data["exceptions"].append(error)
+            for schema_field, _ in field_mappings.items():
+                results_data[schema_field] = None
+        else:
+            for schema_field, object_path in field_mappings.items():
+                json_schema_object = parsed_response.model_dump()
+                results_data[schema_field] = _get_nested_value(json_schema_object, object_path)
+        
+        return results_data
+    
+    def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]:
+        results_data = []
+        for result in results:
+            results_data.append((result[0], _extract_data_using_field_mappings(result)))
+        return results_data
+    
+    
+    
+    results = await _run_llm_eval_async()
+    # results = _run_llm_eval_sync()
+    results_data = _parse_results(results)
+
+    rows = []
+    outcome_results = []
+    for result in results_data:
+        rows.append(result[0])
+        outcome_results.append(result[1])
+
+    # transform results from item centric to field centric
+    key_centric_results = {}
+    for field_name, _ in outcome_results[0].items():
+        field_values = [result.get(field_name) for result in outcome_results]
+        if field_values:
+            key_centric_results[field_name] = field_values
+    return pd.DataFrame(
+        data=key_centric_results,
+        index=dataframe_index,
+    )
+    
+    return results_data

From be26c84ecfd5d796500b2ba7ee1899e5fce16253 Mon Sep 17 00:00:00 2001
From: Sebastian Sosa <1sebastian1sosa1@gmail.com>
Date: Sat, 26 Apr 2025 03:39:38 -0400
Subject: [PATCH 02/12] prepare for executor integration & logging & cleanup

---
 .../src/phoenix/evals/declarative.py          | 98 ++++++++++++++-----
 1 file changed, 73 insertions(+), 25 deletions(-)

diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py
index 1f5184d2b6..590e69a9d0 100644
--- a/packages/phoenix-evals/src/phoenix/evals/declarative.py
+++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py
@@ -72,7 +72,7 @@ async def declarative_eval(
     exit_on_error: bool = True,
     run_sync: bool = False,
     concurrency: Optional[int] = None,
-    progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("declarative_eval"),
+    progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("llm_classify"),
 ) -> pd.DataFrame:
     """
     Evaluates data using an LLM with a Pydantic schema to structure the output.
@@ -126,7 +126,6 @@ async def declarative_eval(
         # Update the field mappings
         field_mappings = new_field_mappings
 
-    print("field_mappings", field_mappings)
 
         
     def _map_template(data: pd.Series) -> str:
@@ -137,9 +136,12 @@ def _map_template(data: pd.Series) -> str:
         )
         return output_str
 
-    async def _run_llm_eval_async() -> List[Tuple[pd.Series, Dict[str, Any], Optional[str]]]:
+    async def _run_llm_eval_async(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]:
+        # Guard clause
         if type(model) is OpenAI:
             raise ValueError("OpenAI is not supported for async operations")
+        idx, row = row_data
+
         # Handle async request        
         async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
             try:
@@ -155,28 +157,33 @@ async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseM
                 parsed_response = response.choices[0].message.parsed
                 end_time = time.time()
                 execution_seconds = end_time - start_time
+                printif(verbose, f"\n\nIndex: {idx}\nExecution time: {execution_seconds} s\nStructured output: {parsed_response.model_dump_json(indent=2)}\n\n")
                 return idx, row, parsed_response, None, execution_seconds
             except Exception as e:
                 return idx, row, None, str(e), 0
         
-        # create tasks
-        tasks = []
-        for idx, (_, row) in enumerate(dataframe.iterrows()):
-            tasks.append(_make_request(idx, row))
+        result = await _make_request(idx, row)
 
-        results = [None] * len(tasks)
-        with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar:
-            for coro in asyncio.as_completed(tasks):
-                idx, row, parsed_response, error, execution_seconds = await coro
-                results[idx] = (row, parsed_response, error, execution_seconds)
-                pbar.update(1)
+        # # create tasks
+        # tasks = []
+        # for idx, (_, row) in enumerate(dataframe.iterrows()):
+        #     tasks.append(_make_request(idx, row))
+
+        # results = [None] * len(tasks)
+        # with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar:
+        #     for coro in asyncio.as_completed(tasks):
+        #         idx, row, parsed_response, error, execution_seconds = await coro
+        #         results[idx] = (row, parsed_response, error, execution_seconds)
+        #         pbar.update(1)
         
-        return results
+        return result
     
 
-    def _run_llm_eval_sync() -> List[Tuple[pd.Series, Dict[str, Any]]]:
+    def _run_llm_eval_sync(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any]]:
         if type(model) is AsyncOpenAI:
             raise ValueError("AsyncOpenAI is not supported for sync operations")
+        
+        idx, row = row_data
         def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
             try:
                 start_time = time.time()
@@ -194,11 +201,12 @@ def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel,
                 return idx, row, parsed_response, None, execution_seconds
             except Exception as e:
                 return idx, row, None, str(e), 0
-        results = [None] * len(dataframe)
-        for idx, (_, row) in enumerate(dataframe.iterrows()):
-            idx, row, parsed_response, error, execution_seconds = _make_request(idx, row)
-            results[idx] = (row, parsed_response, error, execution_seconds)
-        return results
+        result = _make_request(idx, row)
+        # results = [None] * len(dataframe)
+        # for idx, (_, row) in enumerate(dataframe.iterrows()):
+        #     idx, row, parsed_response, error, execution_seconds = _make_request(idx, row)
+        #     results[idx] = (row, parsed_response, error, execution_seconds)
+        return result
     
     def _get_nested_value(obj: Dict[str, Any], path: str) -> Any:
         parts = path.split('.')
@@ -229,15 +237,55 @@ def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optio
     def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]:
         results_data = []
         for result in results:
-            results_data.append((result[0], _extract_data_using_field_mappings(result)))
+            _idx, row, model_response, error, execution_seconds = result
+            results_data.append((result[0], _extract_data_using_field_mappings(
+                (row, model_response, error, execution_seconds)
+            )))
         return results_data
-    
-    
-    
-    results = await _run_llm_eval_async()
+
+
+    # # USING EXECUTOR (cannot be used without acceptable model)
+    # fallback_return_value = (pd.Series(), {}, None, 0)
+    # executor = get_executor_on_sync_context(
+    #     _run_llm_eval_sync,
+    #     _run_llm_eval_async,
+    #     run_sync=run_sync,
+    #     concurrency=concurrency,
+    #     tqdm_bar_format=progress_bar_format,
+    #     max_retries=max_retries,
+    #     exit_on_error=exit_on_error,
+    #     fallback_return_value=fallback_return_value,
+    # )    
+
+    # inputs = [
+    #     row for _, row in dataframe.iterrows()
+    # ]
+    # print("inputs", inputs)
+    # import pdb; pdb.set_trace()
+    # results, execution_details = executor.run(inputs)
+    # print("results", results)
+    # print("execution_details", execution_details)
+
+    inputs = [
+        (idx, row) for idx, row in dataframe.iterrows()
+    ]
+    results = []
+    with tqdm(total=len(inputs), desc="Running Declarative Evaluations") as pbar:
+        tasks = []
+        for input in inputs:
+            task = _run_llm_eval_async(input)
+            tasks.append(task)
+        
+        for task in asyncio.as_completed(tasks):
+            result = await task
+            results.append(result)
+            pbar.update(1)
     # results = _run_llm_eval_sync()
     results_data = _parse_results(results)
 
+
+    
+
     rows = []
     outcome_results = []
     for result in results_data:

From 4a37d7a23e4061598117db14689f59d9b9c6cc6e Mon Sep 17 00:00:00 2001
From: Sebastian Sosa <1sebastian1sosa1@gmail.com>
Date: Sun, 27 Apr 2025 01:34:51 -0400
Subject: [PATCH 03/12] declarative eval tests

---
 .../src/phoenix/evals/__init__.py             |   3 +-
 .../src/phoenix/evals/declarative.py          |  17 +-
 tests/unit/evals/__init__.py                  |   0
 tests/unit/evals/test_declarative_eval.py     | 195 ++++++++++++
 uv.lock                                       | 290 +++++++++---------
 5 files changed, 348 insertions(+), 157 deletions(-)
 create mode 100644 tests/unit/evals/__init__.py
 create mode 100644 tests/unit/evals/test_declarative_eval.py

diff --git a/packages/phoenix-evals/src/phoenix/evals/__init__.py b/packages/phoenix-evals/src/phoenix/evals/__init__.py
index 1240e63cee..b732e9b7c1 100644
--- a/packages/phoenix-evals/src/phoenix/evals/__init__.py
+++ b/packages/phoenix-evals/src/phoenix/evals/__init__.py
@@ -1,5 +1,5 @@
 from .classify import llm_classify, run_evals
-from .declarative import declarative_eval
+from .declarative import declarative_eval, transform_field_mappings_for_explanation
 from .default_templates import (
     CODE_FUNCTIONALITY_PROMPT_RAILS_MAP,
     CODE_FUNCTIONALITY_PROMPT_TEMPLATE,
@@ -94,6 +94,7 @@
     "NOT_PARSABLE",
     "run_evals",
     "declarative_eval",
+    "transform_field_mappings_for_explanation",
     "LLMEvaluator",
     "HallucinationEvaluator",
     "QAEvaluator",
diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py
index 590e69a9d0..2abf58ae38 100644
--- a/packages/phoenix-evals/src/phoenix/evals/declarative.py
+++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py
@@ -1,3 +1,4 @@
+
 import time
 from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter
 from phoenix.evals.models import BaseModel
@@ -57,6 +58,14 @@
 import asyncio
 import aiohttp
 
+def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]:
+    new_field_mappings = {}
+    for key, value in field_mappings.items():
+        new_field_mappings[key] = f"schema.{value}"
+    # Override the explanation field mapping
+    new_field_mappings["explanation"] = "explanation"
+    return new_field_mappings
+
 async def declarative_eval(
     data: Union[pd.DataFrame, List[Any]],
     model: Union[OpenAI, AsyncOpenAI],
@@ -118,13 +127,7 @@ async def declarative_eval(
         schema = ExplainedSchema
 
         # Update the field mappings
-        new_field_mappings = {}
-        for key, value in field_mappings.items():
-            new_field_mappings[key] = f"schema.{value}"
-        # Override the explanation field mapping
-        new_field_mappings["explanation"] = "explanation"
-        # Update the field mappings
-        field_mappings = new_field_mappings
+        field_mappings = transform_field_mappings_for_explanation(field_mappings)
 
 
         
diff --git a/tests/unit/evals/__init__.py b/tests/unit/evals/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/evals/test_declarative_eval.py b/tests/unit/evals/test_declarative_eval.py
new file mode 100644
index 0000000000..fbb4a723bc
--- /dev/null
+++ b/tests/unit/evals/test_declarative_eval.py
@@ -0,0 +1,195 @@
+"""
+Test Declarative Eval
+"""
+import asyncio
+import pandas as pd
+import pytest
+from typing import Dict, Any, List, Literal
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from pydantic import BaseModel, Field
+from phoenix.evals import declarative_eval, transform_field_mappings_for_explanation
+
+
+class Conciseness(BaseModel):
+    is_concise: bool = Field(..., description="Whether the output is concise")
+class Formatting(BaseModel):
+    language: Literal["High", "Average", "Low"] = Field(..., description="The complexity of the formatting used in the output")
+class Schema(BaseModel):
+    conciseness: Conciseness = Field(..., description="A custom evaluation of the output")
+    formatting: Formatting = Field(..., description="A custom evaluation of the output")
+class SchemaWithExplanation(BaseModel):
+    schema: Schema = Field(..., description="The schema to evaluate")
+    explanation: str = Field(..., description="An explanation of the evaluation")
+
+@pytest.fixture
+def sample_dataframe() -> pd.DataFrame:
+    """Sample dataframe to simulate ArizeExportClient(...).export_model_to_df(...)"""
+    return pd.DataFrame({
+        "attributes.llm.input_messages": [
+            [{"role": "user", "content": "What is 2+2?"}],
+            [{"role": "user", "content": "Who was the first president?"}],
+        ],
+        "attributes.llm.output_messages": [
+            [{"role": "assistant", "content": "4"}],
+            [{"role": "assistant", "content": "George Washington"}],
+        ]
+    })
+
+@pytest.fixture
+def correct_field_mappings() -> Dict[str, str]:
+    """Accurate field mappings for the Schema"""
+    return {
+        "conciseness.label": "conciseness.is_concise",
+        "formatting.label": "formatting.language",
+    }
+
+@pytest.fixture
+def incorrect_field_mappings() -> Dict[str, str]:
+    """Incorrect field mappings for the Schema"""
+    return {
+        "conciseness.label": "not_conciseness.is_concise",
+        "formatting.label": "formatting.not_language",
+    }
+
+@pytest.fixture
+def mock_parse_responses():
+    """Mock responses for two consecutive OpenAI parse API calls."""
+    responses = []
+    for _ in range(2):
+        mock_resp = MagicMock()
+        mock_resp.choices = [MagicMock()]
+        mock_resp.choices[0].message = MagicMock()
+        mock_resp.choices[0].message.parsed = Schema(
+            conciseness=Conciseness(is_concise=True),
+            formatting=Formatting(language="High")
+        )
+        responses.append(mock_resp)
+    return responses
+
+@pytest.fixture
+def mock_client(mock_parse_responses):
+    """Mock OpenAI client with predefined responses."""
+    mock_client = MagicMock()
+    mock_client.beta = MagicMock()
+    mock_client.beta.chat = MagicMock()
+    mock_client.beta.chat.completions = MagicMock()
+    
+    # Set up the async mock to return different responses for each call
+    mock_parse = AsyncMock()
+    # Use side_effect to return a different response for each call
+    mock_parse.side_effect = mock_parse_responses
+    mock_client.beta.chat.completions.parse = mock_parse
+    
+    return mock_client
+
+@pytest.fixture
+def mock_parse_responses_with_explanation():
+    """Mock responses for two consecutive OpenAI parse API calls."""
+    responses = []
+    for _ in range(2):
+        mock_resp = MagicMock()
+        mock_resp.choices = [MagicMock()]
+        mock_resp.choices[0].message = MagicMock()
+        mock_resp.choices[0].message.parsed = SchemaWithExplanation(
+            schema=Schema(
+                conciseness=Conciseness(is_concise=True),
+                formatting=Formatting(language="High")
+            ),
+            explanation="Explanation"
+        )
+        responses.append(mock_resp)
+    return responses
+
+@pytest.fixture
+def mock_client_with_explanation(mock_parse_responses_with_explanation):
+    """Mock OpenAI client with predefined responses."""
+    mock_client = MagicMock()
+    mock_client.beta = MagicMock()
+    mock_client.beta.chat = MagicMock()
+    mock_client.beta.chat.completions = MagicMock()
+    
+    # Set up the async mock to return different responses for each call
+    mock_parse = AsyncMock()
+    # Use side_effect to return a different response for each call
+    mock_parse.side_effect = mock_parse_responses_with_explanation
+    mock_client.beta.chat.completions.parse = mock_parse
+    
+    return mock_client
+
+
+
+
+@pytest.mark.asyncio
+async def test_declarative_eval_correct_field_mappings(
+    sample_dataframe,
+    correct_field_mappings,
+    mock_client
+):
+    """Test declarative_eval with correct field mappings."""
+    result = await declarative_eval(
+        data=sample_dataframe,
+        model=mock_client,
+        schema=Schema,
+        field_mappings=correct_field_mappings,
+    )
+    fm_keys = set(correct_field_mappings.keys())
+    assert isinstance(result, pd.DataFrame)
+    assert result.shape[0] == 2
+    assert fm_keys.issubset(set(result.columns.tolist()))
+    assert result["conciseness.label"].tolist() == [True, True]
+    assert result["formatting.label"].tolist() == ["High", "High"]
+
+
+@pytest.mark.asyncio
+async def test_declarative_eval_incorrect_field_mappings(
+    sample_dataframe,
+    incorrect_field_mappings,
+    mock_client
+):
+    """Test declarative_eval with correct field mappings."""
+    result = await declarative_eval(
+        data=sample_dataframe,
+        model=mock_client,
+        schema=Schema,
+        field_mappings=incorrect_field_mappings,
+    )
+    fm_keys = set(incorrect_field_mappings.keys())
+    assert isinstance(result, pd.DataFrame)
+    assert result.shape[0] == 2
+    assert fm_keys.issubset(set(result.columns.tolist()))
+    for fm_key in fm_keys:
+        assert result[fm_key].tolist() == [None, None]
+
+@pytest.mark.asyncio
+async def test_declarative_eval_with_explanation(
+    sample_dataframe,
+    correct_field_mappings,
+    mock_client_with_explanation
+):
+    """Test declarative_eval with explanations."""
+    result = await declarative_eval(
+        data=sample_dataframe,
+        model=mock_client_with_explanation,
+        schema=SchemaWithExplanation,
+        field_mappings=correct_field_mappings,
+        provide_explanation=True,
+    )
+
+    pre_transform_fm = correct_field_mappings
+    pre_transform_fm_keys = set(pre_transform_fm.keys())
+    pre_transform_fm_values = set(pre_transform_fm.values())
+    
+    print(f"pre_transform_fm: {pre_transform_fm}")
+    correct_field_mappings = transform_field_mappings_for_explanation(correct_field_mappings)
+    print(f"correct_field_mappings: {correct_field_mappings}")
+    fm_keys = set(correct_field_mappings.keys())
+    fm_values = set(correct_field_mappings.values())
+    
+    assert isinstance(result, pd.DataFrame)
+    assert result.shape[0] == 2
+    assert fm_keys.issubset(set(result.columns.tolist()))
+    assert pre_transform_fm_values.isdisjoint(fm_values)
+    assert result["conciseness.label"].tolist() == [True, True]
+    assert result["formatting.label"].tolist() == ["High", "High"]
+    assert result["explanation"].tolist() == ["Explanation", "Explanation"]
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index e7ac256186..5be0dbc66b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,13 +1,11 @@
 version = 1
 requires-python = ">=3.9, <3.14"
 resolution-markers = [
-    "python_version < '0'",
     "python_full_version >= '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
     "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
     "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
     "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
     "python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-    "python_version < '0'",
     "(python_full_version >= '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.13' and sys_platform != 'darwin')",
     "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')",
     "(python_full_version == '3.11.*' and platform_machine != 'x86_64') or (python_full_version == '3.11.*' and sys_platform != 'darwin')",
@@ -187,7 +185,7 @@ wheels = [
 
 [[package]]
 name = "anthropic"
-version = "0.42.0"
+version = "0.49.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -198,9 +196,9 @@ dependencies = [
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e7/7c/91b79f5ae4a52497a4e330d66ea5929aec2878ee2c9f8a998dbe4f4c7f01/anthropic-0.42.0.tar.gz", hash = "sha256:bf8b0ed8c8cb2c2118038f29c58099d2f99f7847296cafdaa853910bfff4edf4", size = 192361 }
+sdist = { url = "https://files.pythonhosted.org/packages/86/e3/a88c8494ce4d1a88252b9e053607e885f9b14d0a32273d47b727cbee4228/anthropic-0.49.0.tar.gz", hash = "sha256:c09e885b0f674b9119b4f296d8508907f6cff0009bc20d5cf6b35936c40b4398", size = 210016 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/33/b907a6d27dd0d8d3adb4edb5c9e9c85a189719ec6855051cce3814c8ef13/anthropic-0.42.0-py3-none-any.whl", hash = "sha256:46775f65b723c078a2ac9e9de44a46db5c6a4fabeacfd165e5ea78e6817f4eff", size = 203365 },
+    { url = "https://files.pythonhosted.org/packages/76/74/5d90ad14d55fbe3f9c474fdcb6e34b4bed99e3be8efac98734a5ddce88c1/anthropic-0.49.0-py3-none-any.whl", hash = "sha256:bbc17ad4e7094988d2fa86b87753ded8dce12498f4b85fe5810f208f454a8375", size = 243368 },
 ]
 
 [[package]]
@@ -295,22 +293,23 @@ llm-evaluation = [
 
 [[package]]
 name = "arize-phoenix"
-version = "7.6.0"
+version = "8.26.3"
 source = { editable = "." }
 dependencies = [
     { name = "aioitertools" },
     { name = "aiosqlite" },
     { name = "alembic" },
+    { name = "arize-phoenix-client" },
     { name = "arize-phoenix-evals" },
     { name = "arize-phoenix-otel" },
     { name = "authlib" },
     { name = "cachetools" },
+    { name = "email-validator" },
     { name = "fastapi" },
     { name = "grpc-interceptor" },
     { name = "grpcio" },
     { name = "httpx" },
     { name = "jinja2" },
-    { name = "jsonschema" },
     { name = "numpy" },
     { name = "openinference-instrumentation" },
     { name = "openinference-semantic-conventions" },
@@ -333,14 +332,14 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
     { name = "uvicorn" },
-    { name = "websockets" },
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 
 [package.optional-dependencies]
 container = [
+    { name = "aiohttp" },
     { name = "anthropic" },
+    { name = "azure-identity" },
     { name = "fast-hdbscan" },
     { name = "google-generativeai" },
     { name = "numba" },
@@ -414,19 +413,23 @@ pg = [
 
 [package.metadata]
 requires-dist = [
+    { name = "aiohttp", marker = "extra == 'container'" },
     { name = "aioitertools" },
     { name = "aiosqlite" },
     { name = "alembic", specifier = ">=1.3.0,<2" },
-    { name = "anthropic", marker = "extra == 'container'" },
-    { name = "anthropic", marker = "extra == 'dev'" },
+    { name = "anthropic", marker = "extra == 'container'", specifier = ">=0.49.0" },
+    { name = "anthropic", marker = "extra == 'dev'", specifier = ">=0.49.0" },
     { name = "arize", extras = ["autoembeddings", "llm-evaluation"], marker = "extra == 'dev'" },
+    { name = "arize-phoenix-client" },
     { name = "arize-phoenix-evals", specifier = ">=0.13.1" },
     { name = "arize-phoenix-otel", specifier = ">=0.5.1" },
     { name = "asgi-lifespan", marker = "extra == 'dev'" },
     { name = "asyncpg", marker = "extra == 'dev'" },
     { name = "asyncpg", marker = "extra == 'pg'" },
     { name = "authlib" },
+    { name = "azure-identity", marker = "extra == 'container'" },
     { name = "cachetools" },
+    { name = "email-validator" },
     { name = "faker", marker = "extra == 'dev'", specifier = ">=30.1.0" },
     { name = "fast-hdbscan", marker = "extra == 'container'", specifier = ">=0.2.0" },
     { name = "fast-hdbscan", marker = "extra == 'embeddings'", specifier = ">=0.2.0" },
@@ -440,7 +443,6 @@ requires-dist = [
     { name = "hatch", marker = "extra == 'dev'" },
     { name = "httpx" },
     { name = "jinja2" },
-    { name = "jsonschema", specifier = ">=4.0.0,<=4.23.0" },
     { name = "jupyter", marker = "extra == 'dev'" },
     { name = "langchain", marker = "extra == 'dev'", specifier = ">=0.0.334" },
     { name = "litellm", marker = "extra == 'dev'", specifier = ">=1.0.3,<1.57.5" },
@@ -482,7 +484,7 @@ requires-dist = [
     { name = "psycopg", extras = ["binary", "pool"], marker = "extra == 'pg'" },
     { name = "py-grpc-prometheus", marker = "extra == 'container'" },
     { name = "pyarrow" },
-    { name = "pydantic", specifier = ">=1.0,!=2.0.*,<3" },
+    { name = "pydantic", specifier = ">=2.1.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.3" },
     { name = "pytest-asyncio", marker = "extra == 'dev'" },
     { name = "pytest-cov", marker = "extra == 'dev'" },
@@ -494,10 +496,10 @@ requires-dist = [
     { name = "scipy" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.4,<3" },
     { name = "sqlean-py", specifier = ">=3.45.1" },
-    { name = "starlette" },
-    { name = "strawberry-graphql", specifier = "==0.253.1" },
-    { name = "strawberry-graphql", extras = ["debug-server", "opentelemetry"], marker = "extra == 'dev'", specifier = "==0.253.1" },
-    { name = "strawberry-graphql", extras = ["opentelemetry"], marker = "extra == 'container'", specifier = "==0.253.1" },
+    { name = "starlette", specifier = ">=0.46.0" },
+    { name = "strawberry-graphql", specifier = ">=0.262.0" },
+    { name = "strawberry-graphql", extras = ["debug-server", "opentelemetry"], marker = "extra == 'dev'", specifier = "==0.262.5" },
+    { name = "strawberry-graphql", extras = ["opentelemetry"], marker = "extra == 'container'", specifier = "==0.262.5" },
     { name = "tabulate", marker = "extra == 'dev'" },
     { name = "tox", marker = "extra == 'dev'", specifier = "==4.18.1" },
     { name = "tox-uv", marker = "extra == 'dev'", specifier = "==1.11.3" },
@@ -511,9 +513,20 @@ requires-dist = [
     { name = "uvicorn" },
     { name = "uvloop", marker = "platform_system != 'Windows' and extra == 'container'" },
     { name = "uvloop", marker = "platform_system != 'Windows' and extra == 'dev'" },
-    { name = "websockets" },
-    { name = "wrapt", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'", specifier = ">=1.17" },
-    { name = "wrapt", marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'", specifier = "<1.17" },
+    { name = "wrapt", specifier = ">=1.17.2" },
+]
+
+[[package]]
+name = "arize-phoenix-client"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c7/d2/9170cc95ca4dbd2fa1509f8136c66ef8b26da5ad4e2df531a4e1259ede6f/arize_phoenix_client-1.3.0.tar.gz", hash = "sha256:37c3f72d4acfb9a5ca2f9f3cde2979ddae72dcea131fe08d550249efe8ccb1b3", size = 32841 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ce/83/598a4df37b4b7a4ba69016955dcc9ac24742e44f520efc75a87b86400a84/arize_phoenix_client-1.3.0-py3-none-any.whl", hash = "sha256:f2e2e0fae25d67063b0e6d967ecae47390350a5c89947b733bb5d8d934d6e1f0", size = 36544 },
 ]
 
 [[package]]
@@ -699,6 +712,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/43/53afb8ba17218f19b77c7834128566c5bbb100a0ad9ba2e8e89d089d7079/autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128", size = 45807 },
 ]
 
+[[package]]
+name = "azure-core"
+version = "1.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "requests" },
+    { name = "six" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
+]
+
+[[package]]
+name = "azure-identity"
+version = "1.21.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "cryptography" },
+    { name = "msal" },
+    { name = "msal-extensions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b5/a1/f1a683672e7a88ea0e3119f57b6c7843ed52650fdcac8bfa66ed84e86e40/azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6", size = 266445 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/9f/1f9f3ef4f49729ee207a712a5971a9ca747f2ca47d9cbf13cf6953e3478a/azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9", size = 189190 },
+]
+
 [[package]]
 name = "babel"
 version = "2.16.0"
@@ -1158,8 +1201,7 @@ name = "deprecated"
 version = "1.2.15"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2e/a3/53e7d78a6850ffdd394d7048a31a6f14e44900adedf190f9a165f6b69439/deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d", size = 2977612 }
 wheels = [
@@ -1202,6 +1244,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
 ]
 
+[[package]]
+name = "dnspython"
+version = "2.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632 },
+]
+
 [[package]]
 name = "docstring-parser"
 version = "0.16"
@@ -1211,6 +1262,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d5/7c/e9fcff7623954d86bdc17782036cbf715ecab1bec4847c008557affe1ca8/docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637", size = 36533 },
 ]
 
+[[package]]
+name = "email-validator"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dnspython" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521 },
+]
+
 [[package]]
 name = "evaluate"
 version = "0.4.3"
@@ -1289,16 +1353,16 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.115.6"
+version = "0.115.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/72/d83b98cd106541e8f5e5bfab8ef2974ab45a62e8a6c5b5e6940f26d2ed4b/fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654", size = 301336 }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/52/b3/7e4df40e585df024fac2f80d1a2d579c854ac37109675db2b0cc22c0bb9e/fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305", size = 94843 },
+    { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 },
 ]
 
 [[package]]
@@ -2778,8 +2842,7 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
     { name = "typing-inspect" },
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e8/8e/556d6aec36f475f5316bfb24e000a4d2359c6492d68c778f4a3cab11bb39/llama_index_core-0.11.0.post1.tar.gz", hash = "sha256:0378f750ffbebcd914649df0e2ec27aa94329e64cfcda693090ff1d7b9b86f41", size = 1314856 }
 wheels = [
@@ -3239,6 +3302,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 },
 ]
 
+[[package]]
+name = "msal"
+version = "1.32.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/aa/5f/ef42ef25fba682e83a8ee326a1a788e60c25affb58d014495349e37bce50/msal-1.32.0.tar.gz", hash = "sha256:5445fe3af1da6be484991a7ab32eaa82461dc2347de105b76af92c610c3335c2", size = 149817 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/93/5a/2e663ef56a5d89eba962941b267ebe5be8c5ea340a9929d286e2f5fac505/msal-1.32.0-py3-none-any.whl", hash = "sha256:9dbac5384a10bbbf4dae5c7ea0d707d14e087b92c5aa4954b3feaa2d1aa0bcb7", size = 114655 },
+]
+
+[[package]]
+name = "msal-extensions"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "msal" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583 },
+]
+
 [[package]]
 name = "multidict"
 version = "6.1.0"
@@ -3859,8 +3948,7 @@ dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "packaging" },
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/79/2e/2e59a7cb636dc394bd7cf1758ada5e8ed87590458ca6bb2f9c26e0243847/opentelemetry_instrumentation-0.50b0.tar.gz", hash = "sha256:7d98af72de8dec5323e5202e46122e5f908592b22c6d24733aad619f07d82979", size = 26539 }
 wheels = [
@@ -3907,8 +3995,7 @@ dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-instrumentation" },
     { name = "opentelemetry-semantic-conventions" },
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/51/56/658110193718ddde6e8e68ef3ad3fee7850055820a9fc2bd7ec1347afeca/opentelemetry_instrumentation_grpc-0.50b0.tar.gz", hash = "sha256:12381fbc0a7a91410fb9dad5f26f6de5eb5c30cd19c840fa9bfee78b584af7e7", size = 30746 }
 wheels = [
@@ -3924,8 +4011,7 @@ dependencies = [
     { name = "opentelemetry-instrumentation" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "packaging" },
-    { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" },
-    { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/72/ac/0cc668bb74b3646447936307bc0a56756568602e46be7a53a770cadab5f3/opentelemetry_instrumentation_sqlalchemy-0.50b0.tar.gz", hash = "sha256:8560fe2375d973746907599f360199ba0f658189ef6feba73c1702e8d832bb6e", size = 13632 }
 wheels = [
@@ -4807,6 +4893,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.10.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
 [[package]]
 name = "pynndescent"
 version = "0.5.13"
@@ -5846,29 +5946,30 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.41.3"
+version = "0.46.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.10'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1a/4c/9b5764bd22eec91c4039ef4c55334e9187085da2d8a2df7bd570869aae18/starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835", size = 2574159 }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/96/00/2b325970b3060c7cecebab6d295afe763365822b1306a12eeab198f74323/starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7", size = 73225 },
+    { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037 },
 ]
 
 [[package]]
 name = "strawberry-graphql"
-version = "0.253.1"
+version = "0.262.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "graphql-core" },
+    { name = "packaging" },
     { name = "python-dateutil" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/f3/0a18ad1f102e50aeb2ae612380bfb0264068e9d8efc4dda4b86e0052c9d1/strawberry_graphql-0.253.1.tar.gz", hash = "sha256:be43eac92e0896a7f1061ab293b89b060d369974e4c1444d16ad377d7a6f030d", size = 207814 }
+sdist = { url = "https://files.pythonhosted.org/packages/1d/9f/77a2611aeeef2b01dbfeea3d4a48be2517ba73935c87ee000e9c14844fd6/strawberry_graphql-0.262.5.tar.gz", hash = "sha256:92a5403133fb22ea4f31a09df9aa70567cbd7c860dc34afe92a32103125c6f26", size = 202428 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/24/b3/4745158fe8ebcd89bea2bdc6c070ef7a750bea52f6db299ae4fdc1bb2691/strawberry_graphql-0.253.1-py3-none-any.whl", hash = "sha256:f24cc55560546968255094aa080fb11a14eace4d61cd27eaf98dded863a2af17", size = 295163 },
+    { url = "https://files.pythonhosted.org/packages/4d/6b/715835515ff21ab9de351df401a769a51b051dad6b67fa43778682de13a5/strawberry_graphql-0.262.5-py3-none-any.whl", hash = "sha256:7bc62e19326d3f5294f473c2ca3418bd01297e6abfd4a5a133f33fc9a5fcd5e1", size = 296015 },
 ]
 
 [package.optional-dependencies]
@@ -6526,82 +6627,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
 ]
 
-[[package]]
-name = "websockets"
-version = "14.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f4/1b/380b883ce05bb5f45a905b61790319a28958a9ab1e4b6b95ff5464b60ca1/websockets-14.1.tar.gz", hash = "sha256:398b10c77d471c0aab20a845e7a60076b6390bfdaac7a6d2edb0d2c59d75e8d8", size = 162840 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/91/b1b375dbd856fd5fff3f117de0e520542343ecaf4e8fc60f1ac1e9f5822c/websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29", size = 161950 },
-    { url = "https://files.pythonhosted.org/packages/61/8f/4d52f272d3ebcd35e1325c646e98936099a348374d4a6b83b524bded8116/websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179", size = 159601 },
-    { url = "https://files.pythonhosted.org/packages/c4/b1/29e87b53eb1937992cdee094a0988aadc94f25cf0b37e90c75eed7123d75/websockets-14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2177ee3901075167f01c5e335a6685e71b162a54a89a56001f1c3e9e3d2ad250", size = 159854 },
-    { url = "https://files.pythonhosted.org/packages/3f/e6/752a2f5e8321ae2a613062676c08ff2fccfb37dc837a2ee919178a372e8a/websockets-14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f14a96a0034a27f9d47fd9788913924c89612225878f8078bb9d55f859272b0", size = 168835 },
-    { url = "https://files.pythonhosted.org/packages/60/27/ca62de7877596926321b99071639275e94bb2401397130b7cf33dbf2106a/websockets-14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f874ba705deea77bcf64a9da42c1f5fc2466d8f14daf410bc7d4ceae0a9fcb0", size = 167844 },
-    { url = "https://files.pythonhosted.org/packages/7e/db/f556a1d06635c680ef376be626c632e3f2bbdb1a0189d1d1bffb061c3b70/websockets-14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9607b9a442392e690a57909c362811184ea429585a71061cd5d3c2b98065c199", size = 168157 },
-    { url = "https://files.pythonhosted.org/packages/b3/bc/99e5f511838c365ac6ecae19674eb5e94201aa4235bd1af3e6fa92c12905/websockets-14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bea45f19b7ca000380fbd4e02552be86343080120d074b87f25593ce1700ad58", size = 168561 },
-    { url = "https://files.pythonhosted.org/packages/c6/e7/251491585bad61c79e525ac60927d96e4e17b18447cc9c3cfab47b2eb1b8/websockets-14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:219c8187b3ceeadbf2afcf0f25a4918d02da7b944d703b97d12fb01510869078", size = 167979 },
-    { url = "https://files.pythonhosted.org/packages/ac/98/7ac2e4eeada19bdbc7a3a66a58e3ebdf33648b9e1c5b3f08c3224df168cf/websockets-14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ad2ab2547761d79926effe63de21479dfaf29834c50f98c4bf5b5480b5838434", size = 167925 },
-    { url = "https://files.pythonhosted.org/packages/ab/3d/09e65c47ee2396b7482968068f6e9b516221e1032b12dcf843b9412a5dfb/websockets-14.1-cp310-cp310-win32.whl", hash = "sha256:1288369a6a84e81b90da5dbed48610cd7e5d60af62df9851ed1d1d23a9069f10", size = 162831 },
-    { url = "https://files.pythonhosted.org/packages/8a/67/59828a3d09740e6a485acccfbb66600632f2178b6ed1b61388ee96f17d5a/websockets-14.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0744623852f1497d825a49a99bfbec9bea4f3f946df6eb9d8a2f0c37a2fec2e", size = 163266 },
-    { url = "https://files.pythonhosted.org/packages/97/ed/c0d03cb607b7fe1f7ff45e2cd4bb5cd0f9e3299ced79c2c303a6fff44524/websockets-14.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:449d77d636f8d9c17952628cc7e3b8faf6e92a17ec581ec0c0256300717e1512", size = 161949 },
-    { url = "https://files.pythonhosted.org/packages/06/91/bf0a44e238660d37a2dda1b4896235d20c29a2d0450f3a46cd688f43b239/websockets-14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a35f704be14768cea9790d921c2c1cc4fc52700410b1c10948511039be824aac", size = 159606 },
-    { url = "https://files.pythonhosted.org/packages/ff/b8/7185212adad274c2b42b6a24e1ee6b916b7809ed611cbebc33b227e5c215/websockets-14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b1f3628a0510bd58968c0f60447e7a692933589b791a6b572fcef374053ca280", size = 159854 },
-    { url = "https://files.pythonhosted.org/packages/5a/8a/0849968d83474be89c183d8ae8dcb7f7ada1a3c24f4d2a0d7333c231a2c3/websockets-14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c3deac3748ec73ef24fc7be0b68220d14d47d6647d2f85b2771cb35ea847aa1", size = 169402 },
-    { url = "https://files.pythonhosted.org/packages/bd/4f/ef886e37245ff6b4a736a09b8468dae05d5d5c99de1357f840d54c6f297d/websockets-14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7048eb4415d46368ef29d32133134c513f507fff7d953c18c91104738a68c3b3", size = 168406 },
-    { url = "https://files.pythonhosted.org/packages/11/43/e2dbd4401a63e409cebddedc1b63b9834de42f51b3c84db885469e9bdcef/websockets-14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cf0ad281c979306a6a34242b371e90e891bce504509fb6bb5246bbbf31e7b6", size = 168776 },
-    { url = "https://files.pythonhosted.org/packages/6d/d6/7063e3f5c1b612e9f70faae20ebaeb2e684ffa36cb959eb0862ee2809b32/websockets-14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc1fc87428c1d18b643479caa7b15db7d544652e5bf610513d4a3478dbe823d0", size = 169083 },
-    { url = "https://files.pythonhosted.org/packages/49/69/e6f3d953f2fa0f8a723cf18cd011d52733bd7f6e045122b24e0e7f49f9b0/websockets-14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f95ba34d71e2fa0c5d225bde3b3bdb152e957150100e75c86bc7f3964c450d89", size = 168529 },
-    { url = "https://files.pythonhosted.org/packages/70/ff/f31fa14561fc1d7b8663b0ed719996cf1f581abee32c8fb2f295a472f268/websockets-14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9481a6de29105d73cf4515f2bef8eb71e17ac184c19d0b9918a3701c6c9c4f23", size = 168475 },
-    { url = "https://files.pythonhosted.org/packages/f1/15/b72be0e4bf32ff373aa5baef46a4c7521b8ea93ad8b49ca8c6e8e764c083/websockets-14.1-cp311-cp311-win32.whl", hash = "sha256:368a05465f49c5949e27afd6fbe0a77ce53082185bbb2ac096a3a8afaf4de52e", size = 162833 },
-    { url = "https://files.pythonhosted.org/packages/bc/ef/2d81679acbe7057ffe2308d422f744497b52009ea8bab34b6d74a2657d1d/websockets-14.1-cp311-cp311-win_amd64.whl", hash = "sha256:6d24fc337fc055c9e83414c94e1ee0dee902a486d19d2a7f0929e49d7d604b09", size = 163263 },
-    { url = "https://files.pythonhosted.org/packages/55/64/55698544ce29e877c9188f1aee9093712411a8fc9732cca14985e49a8e9c/websockets-14.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ed907449fe5e021933e46a3e65d651f641975a768d0649fee59f10c2985529ed", size = 161957 },
-    { url = "https://files.pythonhosted.org/packages/a2/b1/b088f67c2b365f2c86c7b48edb8848ac27e508caf910a9d9d831b2f343cb/websockets-14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:87e31011b5c14a33b29f17eb48932e63e1dcd3fa31d72209848652310d3d1f0d", size = 159620 },
-    { url = "https://files.pythonhosted.org/packages/c1/89/2a09db1bbb40ba967a1b8225b07b7df89fea44f06de9365f17f684d0f7e6/websockets-14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bc6ccf7d54c02ae47a48ddf9414c54d48af9c01076a2e1023e3b486b6e72c707", size = 159852 },
-    { url = "https://files.pythonhosted.org/packages/ca/c1/f983138cd56e7d3079f1966e81f77ce6643f230cd309f73aa156bb181749/websockets-14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9777564c0a72a1d457f0848977a1cbe15cfa75fa2f67ce267441e465717dcf1a", size = 169675 },
-    { url = "https://files.pythonhosted.org/packages/c1/c8/84191455d8660e2a0bdb33878d4ee5dfa4a2cedbcdc88bbd097303b65bfa/websockets-14.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a655bde548ca98f55b43711b0ceefd2a88a71af6350b0c168aa77562104f3f45", size = 168619 },
-    { url = "https://files.pythonhosted.org/packages/8d/a7/62e551fdcd7d44ea74a006dc193aba370505278ad76efd938664531ce9d6/websockets-14.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3dfff83ca578cada2d19e665e9c8368e1598d4e787422a460ec70e531dbdd58", size = 169042 },
-    { url = "https://files.pythonhosted.org/packages/ad/ed/1532786f55922c1e9c4d329608e36a15fdab186def3ca9eb10d7465bc1cc/websockets-14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6a6c9bcf7cdc0fd41cc7b7944447982e8acfd9f0d560ea6d6845428ed0562058", size = 169345 },
-    { url = "https://files.pythonhosted.org/packages/ea/fb/160f66960d495df3de63d9bcff78e1b42545b2a123cc611950ffe6468016/websockets-14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4b6caec8576e760f2c7dd878ba817653144d5f369200b6ddf9771d64385b84d4", size = 168725 },
-    { url = "https://files.pythonhosted.org/packages/cf/53/1bf0c06618b5ac35f1d7906444b9958f8485682ab0ea40dee7b17a32da1e/websockets-14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb6d38971c800ff02e4a6afd791bbe3b923a9a57ca9aeab7314c21c84bf9ff05", size = 168712 },
-    { url = "https://files.pythonhosted.org/packages/e5/22/5ec2f39fff75f44aa626f86fa7f20594524a447d9c3be94d8482cd5572ef/websockets-14.1-cp312-cp312-win32.whl", hash = "sha256:1d045cbe1358d76b24d5e20e7b1878efe578d9897a25c24e6006eef788c0fdf0", size = 162838 },
-    { url = "https://files.pythonhosted.org/packages/74/27/28f07df09f2983178db7bf6c9cccc847205d2b92ced986cd79565d68af4f/websockets-14.1-cp312-cp312-win_amd64.whl", hash = "sha256:90f4c7a069c733d95c308380aae314f2cb45bd8a904fb03eb36d1a4983a4993f", size = 163277 },
-    { url = "https://files.pythonhosted.org/packages/34/77/812b3ba5110ed8726eddf9257ab55ce9e85d97d4aa016805fdbecc5e5d48/websockets-14.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3630b670d5057cd9e08b9c4dab6493670e8e762a24c2c94ef312783870736ab9", size = 161966 },
-    { url = "https://files.pythonhosted.org/packages/8d/24/4fcb7aa6986ae7d9f6d083d9d53d580af1483c5ec24bdec0978307a0f6ac/websockets-14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36ebd71db3b89e1f7b1a5deaa341a654852c3518ea7a8ddfdf69cc66acc2db1b", size = 159625 },
-    { url = "https://files.pythonhosted.org/packages/f8/47/2a0a3a2fc4965ff5b9ce9324d63220156bd8bedf7f90824ab92a822e65fd/websockets-14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5b918d288958dc3fa1c5a0b9aa3256cb2b2b84c54407f4813c45d52267600cd3", size = 159857 },
-    { url = "https://files.pythonhosted.org/packages/dd/c8/d7b425011a15e35e17757e4df75b25e1d0df64c0c315a44550454eaf88fc/websockets-14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00fe5da3f037041da1ee0cf8e308374e236883f9842c7c465aa65098b1c9af59", size = 169635 },
-    { url = "https://files.pythonhosted.org/packages/93/39/6e3b5cffa11036c40bd2f13aba2e8e691ab2e01595532c46437b56575678/websockets-14.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8149a0f5a72ca36720981418eeffeb5c2729ea55fa179091c81a0910a114a5d2", size = 168578 },
-    { url = "https://files.pythonhosted.org/packages/cf/03/8faa5c9576299b2adf34dcccf278fc6bbbcda8a3efcc4d817369026be421/websockets-14.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77569d19a13015e840b81550922056acabc25e3f52782625bc6843cfa034e1da", size = 169018 },
-    { url = "https://files.pythonhosted.org/packages/8c/05/ea1fec05cc3a60defcdf0bb9f760c3c6bd2dd2710eff7ac7f891864a22ba/websockets-14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cf5201a04550136ef870aa60ad3d29d2a59e452a7f96b94193bee6d73b8ad9a9", size = 169383 },
-    { url = "https://files.pythonhosted.org/packages/21/1d/eac1d9ed787f80754e51228e78855f879ede1172c8b6185aca8cef494911/websockets-14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:88cf9163ef674b5be5736a584c999e98daf3aabac6e536e43286eb74c126b9c7", size = 168773 },
-    { url = "https://files.pythonhosted.org/packages/0e/1b/e808685530185915299740d82b3a4af3f2b44e56ccf4389397c7a5d95d39/websockets-14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:836bef7ae338a072e9d1863502026f01b14027250a4545672673057997d5c05a", size = 168757 },
-    { url = "https://files.pythonhosted.org/packages/b6/19/6ab716d02a3b068fbbeb6face8a7423156e12c446975312f1c7c0f4badab/websockets-14.1-cp313-cp313-win32.whl", hash = "sha256:0d4290d559d68288da9f444089fd82490c8d2744309113fc26e2da6e48b65da6", size = 162834 },
-    { url = "https://files.pythonhosted.org/packages/6c/fd/ab6b7676ba712f2fc89d1347a4b5bdc6aa130de10404071f2b2606450209/websockets-14.1-cp313-cp313-win_amd64.whl", hash = "sha256:8621a07991add373c3c5c2cf89e1d277e49dc82ed72c75e3afc74bd0acc446f0", size = 163277 },
-    { url = "https://files.pythonhosted.org/packages/4d/23/ac9d8c5ec7b90efc3687d60474ef7e698f8b75cb7c9dfedad72701e797c9/websockets-14.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01bb2d4f0a6d04538d3c5dfd27c0643269656c28045a53439cbf1c004f90897a", size = 161945 },
-    { url = "https://files.pythonhosted.org/packages/c5/6b/ffa450e3b736a86ae6b40ce20a758ac9af80c96a18548f6c323ed60329c5/websockets-14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:414ffe86f4d6f434a8c3b7913655a1a5383b617f9bf38720e7c0799fac3ab1c6", size = 159600 },
-    { url = "https://files.pythonhosted.org/packages/74/62/f90d1fd57ea7337ecaa99f17c31a544b9dcdb7c7c32a3d3997ccc42d57d3/websockets-14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8fda642151d5affdee8a430bd85496f2e2517be3a2b9d2484d633d5712b15c56", size = 159850 },
-    { url = "https://files.pythonhosted.org/packages/35/dd/1e71865de1f3c265e11d02b0b4c76178f84351c6611e515fbe3d2bd1b98c/websockets-14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd7c11968bc3860d5c78577f0dbc535257ccec41750675d58d8dc66aa47fe52c", size = 168616 },
-    { url = "https://files.pythonhosted.org/packages/ba/ae/0d069b52e26d48402dbe90c7581eb6a5bed5d7dbe3d9ca3cf1033859d58e/websockets-14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a032855dc7db987dff813583d04f4950d14326665d7e714d584560b140ae6b8b", size = 167619 },
-    { url = "https://files.pythonhosted.org/packages/1c/3f/d3f2df62704c53e0296f0ce714921b6a15df10e2e463734c737b1d9e2522/websockets-14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7e7ea2f782408c32d86b87a0d2c1fd8871b0399dd762364c731d86c86069a78", size = 167921 },
-    { url = "https://files.pythonhosted.org/packages/e0/e2/2dcb295bdae9393070cea58c790d87d1d36149bb4319b1da6014c8a36d42/websockets-14.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:39450e6215f7d9f6f7bc2a6da21d79374729f5d052333da4d5825af8a97e6735", size = 168343 },
-    { url = "https://files.pythonhosted.org/packages/6b/fd/fa48e8b4e10e2c165cbfc16dada7405b4008818be490fc6b99a4928e232a/websockets-14.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ceada5be22fa5a5a4cdeec74e761c2ee7db287208f54c718f2df4b7e200b8d4a", size = 167745 },
-    { url = "https://files.pythonhosted.org/packages/42/45/79db33f2b744d2014b40946428e6c37ce944fde8791d82e1c2f4d4a67d96/websockets-14.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3fc753451d471cff90b8f467a1fc0ae64031cf2d81b7b34e1811b7e2691bc4bc", size = 167705 },
-    { url = "https://files.pythonhosted.org/packages/da/27/f66507db34ca9c79562f28fa5983433f7b9080fd471cc188906006d36ba4/websockets-14.1-cp39-cp39-win32.whl", hash = "sha256:14839f54786987ccd9d03ed7f334baec0f02272e7ec4f6e9d427ff584aeea8b4", size = 162828 },
-    { url = "https://files.pythonhosted.org/packages/11/25/bb8f81a4ec94f595adb845608c5ec9549cb6b446945b292fe61807c7c95b/websockets-14.1-cp39-cp39-win_amd64.whl", hash = "sha256:d9fd19ecc3a4d5ae82ddbfb30962cf6d874ff943e56e0c81f5169be2fda62979", size = 163271 },
-    { url = "https://files.pythonhosted.org/packages/fb/cd/382a05a1ba2a93bd9fb807716a660751295df72e77204fb130a102fcdd36/websockets-14.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5dc25a9dbd1a7f61eca4b7cb04e74ae4b963d658f9e4f9aad9cd00b688692c8", size = 159633 },
-    { url = "https://files.pythonhosted.org/packages/b7/a0/fa7c62e2952ef028b422fbf420f9353d9dd4dfaa425de3deae36e98c0784/websockets-14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:04a97aca96ca2acedf0d1f332c861c5a4486fdcba7bcef35873820f940c4231e", size = 159867 },
-    { url = "https://files.pythonhosted.org/packages/c1/94/954b4924f868db31d5f0935893c7a8446515ee4b36bb8ad75a929469e453/websockets-14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df174ece723b228d3e8734a6f2a6febbd413ddec39b3dc592f5a4aa0aff28098", size = 161121 },
-    { url = "https://files.pythonhosted.org/packages/7a/2e/f12bbb41a8f2abb76428ba4fdcd9e67b5b364a3e7fa97c88f4d6950aa2d4/websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:034feb9f4286476f273b9a245fb15f02c34d9586a5bc936aff108c3ba1b21beb", size = 160731 },
-    { url = "https://files.pythonhosted.org/packages/13/97/b76979401f2373af1fe3e08f960b265cecab112e7dac803446fb98351a52/websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c308dabd2b380807ab64b62985eaccf923a78ebc572bd485375b9ca2b7dc7", size = 160681 },
-    { url = "https://files.pythonhosted.org/packages/39/9c/16916d9a436c109a1d7ba78817e8fee357b78968be3f6e6f517f43afa43d/websockets-14.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a42d3ecbb2db5080fc578314439b1d79eef71d323dc661aa616fb492436af5d", size = 163316 },
-    { url = "https://files.pythonhosted.org/packages/0f/57/50fd09848a80a1b63a572c610f230f8a17590ca47daf256eb28a0851df73/websockets-14.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddaa4a390af911da6f680be8be4ff5aaf31c4c834c1a9147bc21cbcbca2d4370", size = 159633 },
-    { url = "https://files.pythonhosted.org/packages/d7/2f/db728b0c7962ad6a13ced8286325bf430b59722d943e7f6bdbd8a78e2bfe/websockets-14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a4c805c6034206143fbabd2d259ec5e757f8b29d0a2f0bf3d2fe5d1f60147a4a", size = 159863 },
-    { url = "https://files.pythonhosted.org/packages/fa/e4/21e7481936fbfffee138edb488a6184eb3468b402a8181b95b9e44f6a676/websockets-14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:205f672a6c2c671a86d33f6d47c9b35781a998728d2c7c2a3e1cf3333fcb62b7", size = 161119 },
-    { url = "https://files.pythonhosted.org/packages/64/2d/efb6cf716d4f9da60190756e06f8db2066faf1ae4a4a8657ab136dfcc7a8/websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef440054124728cc49b01c33469de06755e5a7a4e83ef61934ad95fc327fbb0", size = 160724 },
-    { url = "https://files.pythonhosted.org/packages/40/b0/a70b972d853c3f26040834fcff3dd45c8a0292af9f5f0b36f9fbb82d5d44/websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7591d6f440af7f73c4bd9404f3772bfee064e639d2b6cc8c94076e71b2471c1", size = 160676 },
-    { url = "https://files.pythonhosted.org/packages/4a/76/f9da7f97476cc7b8c74829bb4851f1faf660455839689ffcc354b52860a7/websockets-14.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:25225cc79cfebc95ba1d24cd3ab86aaa35bcd315d12fa4358939bd55e9bd74a5", size = 163311 },
-    { url = "https://files.pythonhosted.org/packages/b0/0b/c7e5d11020242984d9d37990310520ed663b942333b83a033c2f20191113/websockets-14.1-py3-none-any.whl", hash = "sha256:4d4fc827a20abe6d544a119896f6b78ee13fe81cbfef416f3f2ddf09a03f0e2e", size = 156277 },
-]
-
 [[package]]
 name = "widgetsnbextension"
 version = "4.0.13"
@@ -6611,43 +6636,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71", size = 2335872 },
 ]
 
-[[package]]
-name = "wrapt"
-version = "1.16.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_version < '0'",
-    "python_full_version >= '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-    "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-    "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-    "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-    "python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/c6/5375258add3777494671d8cec27cdf5402abd91016dee24aa2972c61fedf/wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4", size = 37315 },
-    { url = "https://files.pythonhosted.org/packages/32/12/e11adfde33444986135d8881b401e4de6cbb4cced046edc6b464e6ad7547/wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", size = 38160 },
-    { url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313 },
-    { url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164 },
-    { url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614 },
-    { url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316 },
-    { url = "https://files.pythonhosted.org/packages/70/cc/b92e1da2cad6a9f8ee481000ece07a35e3b24e041e60ff8b850c079f0ebf/wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2", size = 37314 },
-    { url = "https://files.pythonhosted.org/packages/4a/cc/3402bcc897978be00fef608cd9e3e39ec8869c973feeb5e1e277670e5ad2/wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb", size = 38162 },
-    { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362 },
-]
-
 [[package]]
 name = "wrapt"
 version = "1.17.2"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_version < '0'",
-    "(python_full_version >= '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.13' and sys_platform != 'darwin')",
-    "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')",
-    "(python_full_version == '3.11.*' and platform_machine != 'x86_64') or (python_full_version == '3.11.*' and sys_platform != 'darwin')",
-    "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'x86_64') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform != 'darwin')",
-    "(python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.12.4' and python_full_version < '3.13' and sys_platform != 'darwin')",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 },

From 663f766040b4b1ddc65337dbd41a0061925e1e30 Mon Sep 17 00:00:00 2001
From: Anthony Powell <apowell@arize.com>
Date: Thu, 24 Apr 2025 20:25:30 -0400
Subject: [PATCH 04/12] fix: Allow scroll on settings pages (#7284)

---
 app/src/pages/settings/SettingsPage.tsx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/src/pages/settings/SettingsPage.tsx b/app/src/pages/settings/SettingsPage.tsx
index 0908306d6b..21aa6631f7 100644
--- a/app/src/pages/settings/SettingsPage.tsx
+++ b/app/src/pages/settings/SettingsPage.tsx
@@ -27,7 +27,6 @@ const settingsPageInnerCSS = css`
   width: 100%;
   margin-left: auto;
   margin-right: auto;
-  height: 100%;
 `;
 
 export function SettingsPage() {

From 854dc2e0498da58ac695cba17fee865c2f288d27 Mon Sep 17 00:00:00 2001
From: Mikyo King <mikyo@arize.com>
Date: Thu, 24 Apr 2025 18:37:09 -0600
Subject: [PATCH 05/12] chore(main): release arize-phoenix 8.27.1 (#7285)

---
 .release-please-manifest.json | 2 +-
 CHANGELOG.md                  | 7 +++++++
 src/phoenix/version.py        | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 613d0284cc..00f9c72232 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1 +1 @@
-{".":"8.27.0","packages/phoenix-evals":"0.20.6","packages/phoenix-otel":"0.9.2","packages/phoenix-client":"1.3.0"}
+{".":"8.27.1","packages/phoenix-evals":"0.20.6","packages/phoenix-otel":"0.9.2","packages/phoenix-client":"1.3.0"}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7549c7a33..930295ee7d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [8.27.1](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-v8.27.0...arize-phoenix-v8.27.1) (2025-04-25)
+
+
+### Bug Fixes
+
+* Allow scroll on settings pages ([#7284](https://github.com/Arize-ai/phoenix/issues/7284)) ([c25b071](https://github.com/Arize-ai/phoenix/commit/c25b07143b9c714b75e3d9655ca9db161542acb0))
+
 ## [8.27.0](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-v8.26.3...arize-phoenix-v8.27.0) (2025-04-24)
 
 
diff --git a/src/phoenix/version.py b/src/phoenix/version.py
index bd322bdcdb..cb5321f854 100644
--- a/src/phoenix/version.py
+++ b/src/phoenix/version.py
@@ -1 +1 @@
-__version__ = "8.27.0"
+__version__ = "8.27.1"

From 1cb5914fc49702a4e527544f099f12982c36b1f1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 24 Apr 2025 19:58:49 -0700
Subject: [PATCH 06/12] chore: update Phoenix version to 8.27.1 in Kustomize
 (#7286)

---
 kustomize/base/phoenix.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kustomize/base/phoenix.yaml b/kustomize/base/phoenix.yaml
index 81b9796d0b..e23d1413c3 100644
--- a/kustomize/base/phoenix.yaml
+++ b/kustomize/base/phoenix.yaml
@@ -28,7 +28,7 @@ spec:
                         value: /mnt/data
                       - name: PHOENIX_PORT
                         value: "6006"
-                  image: arizephoenix/phoenix:version-8.27.0
+                  image: arizephoenix/phoenix:version-8.27.1
                   name: phoenix
                   ports:
                       - containerPort: 6006

From 91b6d58ddbdc37b4982ea697a55e40400d745806 Mon Sep 17 00:00:00 2001
From: s-yeddula <syeddula@arize.com>
Date: Fri, 25 Apr 2025 17:22:39 -0700
Subject: [PATCH 07/12] update sessions notebook (#7293)

---
 .../tracing/openai_sessions_tutorial.ipynb    | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/tutorials/tracing/openai_sessions_tutorial.ipynb b/tutorials/tracing/openai_sessions_tutorial.ipynb
index d12c607922..6cdba3d430 100644
--- a/tutorials/tracing/openai_sessions_tutorial.ipynb
+++ b/tutorials/tracing/openai_sessions_tutorial.ipynb
@@ -2,7 +2,9 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "4kPKrMTP_n_j"
+   },
    "source": [
     "<center>\n",
     "    <p style=\"text-align:center\">\n",
@@ -25,7 +27,9 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "9y1DA5uu_n_k"
+   },
    "source": [
     "## 1. Install Dependencies and Import Libraries\n",
     "\n",
@@ -43,7 +47,9 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "u9xTBlt__n_l"
+   },
    "source": [
     "## Configure Your OpenAI API Key and Instantiate Your OpenAI Client\n",
     "\n",
@@ -68,7 +74,9 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "hY0NliPg_n_l"
+   },
    "source": [
     "## Instrument Your OpenAI Client\n",
     "\n",
@@ -91,7 +99,9 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "MhVjxN_R_n_l"
+   },
    "source": [
     "## Run Phoenix in the Background\n",
     "\n",
@@ -106,12 +116,14 @@
    "source": [
     "import phoenix as px\n",
     "\n",
-    "px.launch_app()"
+    "px.launch_app().view()"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "VoLsIY9o_n_m"
+   },
    "source": [
     "## Create a bare-bones Agent\n",
     "\n",
@@ -126,12 +138,10 @@
    "source": [
     "import uuid\n",
     "\n",
-    "import openai\n",
     "from openinference.instrumentation import using_session\n",
     "from openinference.semconv.trace import SpanAttributes\n",
     "from opentelemetry import trace\n",
     "\n",
-    "client = openai.Client()\n",
     "session_id = str(uuid.uuid4())\n",
     "\n",
     "tracer = trace.get_tracer(__name__)\n",
@@ -173,6 +183,11 @@
     "response = assistant(\n",
     "    messages,\n",
     "    session_id=session_id,\n",
+    ")\n",
+    "messages = messages + [response, {\"role\": \"user\", \"content\": \"what's 4+5?\"}]\n",
+    "response = assistant(\n",
+    "    messages,\n",
+    "    session_id=session_id,\n",
     ")"
    ]
   }
@@ -183,5 +198,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 0
 }

From 9ba32879ab89c812cf31f41efc93e17ba206b27a Mon Sep 17 00:00:00 2001
From: Mikyo King <mikyo@arize.com>
Date: Fri, 25 Apr 2025 19:31:33 -0600
Subject: [PATCH 08/12] docs(client): add general rules for the client (#7290)

* docs(client): add general rules for the client

* cleanup

* Update packages/phoenix-client/.cursor/rules/general.mdc

Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com>

* Update packages/phoenix-client/.cursor/rules/general.mdc

Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com>

* Update packages/phoenix-client/.cursor/rules/general.mdc

Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com>

---------

Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com>
---
 .../phoenix-client/.cursor/rules/general.mdc  | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 packages/phoenix-client/.cursor/rules/general.mdc

diff --git a/packages/phoenix-client/.cursor/rules/general.mdc b/packages/phoenix-client/.cursor/rules/general.mdc
new file mode 100644
index 0000000000..803a6fd5c7
--- /dev/null
+++ b/packages/phoenix-client/.cursor/rules/general.mdc
@@ -0,0 +1,67 @@
+---
+description: 
+globs: 
+alwaysApply: true
+---
+# General Client Design Guidelines
+
+## Dependancies
+
+The client should be as light-weight as possible as it is meant to be integrated into applications directly with no impact on the runtime. This means it should never depend on the core `phoenix` package and should only depend on things under the `phoenix.client` sub-module. The client must never depend on modules that are related to a server such as `starlette`, `sqlalchamy`, `pg` and so on. For libraries like `pandas`, implement lazy importing (importing within the specific function that requires it) rather than importing at the top-level.
+
+## Syntax
+
+All methods that interact with the server shoud be namespaced via `projects`, `prompts` and so on.
+
+All arguments to the methods MUST use `kwargs` so as to make the signature as self evident as possible.
+
+Do not do:
+
+```python
+client.prompts.get("prompt_version_id")
+```
+
+Prefer:
+
+```python
+client.prompts.get(prompt_version_id="prompt_version_id")
+```
+
+Methods should be prefixed with an action:
+
+- `get` - gets the entity. Corrolates to HTTP `GET` a specific entity. E.x. `/projects/1`
+- `create` - makes a new entity. Corrolates to HTTP `POST`
+- `list` - get a paginated list of an entity. E.g. `GET` a list `/projects`
+- `add` - attach an entity to another. E.x. `add_annotation` would be used to attach an annotation to a `span` or `trace`
+- `delete` - permanently delete an entity
+
+In addition things can be sent to the platform in bulk.
+
+- `log` - associates a list of entities to something. E.x. `log_annotations` will send a list of annotations to a particular target such as a `span` or a `project`
+
+## Pandas
+
+The client should make affordances to push and pull data from the phoenix server via `pandas` DataFrames. For all bulk operations, the method should be postfixed with `dataframe` so as to make it clear that the input and output is a dataframe.
+
+For example:
+
+```python
+client.log_annotations_dataframe(dataframe=dataframe)
+df = client.get_spans_dataframe(project_name="default")
+```
+
+## Transport
+
+For all IO to the phoenix server, JSON or JSONL over HTTP should be preferred. This is so that clients in other languages can be created (E.g. `TypeScript`), LLMs can easily interpret the data (fine-tunining), and so that non homogenious data can be sent over the wire (e.x. `metatadata` dictionaries). 
+
+In the case that a different format is needed (e.x. `DataFrame` or `CSV`), the client should perform the translation (e.g. be a fat client) unless there is a more specific endpoint that supports that MIME type.
+
+For example:
+
+```python
+client.log_annotations(annotations=annotations)
+
+# Syntactic surgar to log annotations as a dataframe 
+# Annotations are still sent over the wire as JSON
+client.log_annotations_dataframe(dataframe=df)
+```

From 1325c2151e167c78e2c848a00f13b50feff302b8 Mon Sep 17 00:00:00 2001
From: Priyan Jindal <pjindal@arize.com>
Date: Mon, 21 Apr 2025 22:31:02 +0000
Subject: [PATCH 09/12] docs: No subject (GITBOOK-1192)

---
 docs/prompt-engineering/overview-prompts/prompt-management.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/prompt-engineering/overview-prompts/prompt-management.md b/docs/prompt-engineering/overview-prompts/prompt-management.md
index 01a8eb8a59..7477d320d3 100644
--- a/docs/prompt-engineering/overview-prompts/prompt-management.md
+++ b/docs/prompt-engineering/overview-prompts/prompt-management.md
@@ -4,7 +4,7 @@ description: Version and track changes made to prompt templates
 
 # Prompt Management
 
-<figure><img src="https://storage.googleapis.com/arize-phoenix-assets/assets/images/prompt_management.gif" alt=""><figcaption><p>Iterate on prampts, ship prompts when they are tested</p></figcaption></figure>
+<figure><img src="https://storage.googleapis.com/arize-phoenix-assets/assets/images/prompt_management.gif" alt=""><figcaption><p>Iterate on prompts, ship prompts when they are tested</p></figcaption></figure>
 
 
 

From 85e0072f8609b7187d3f01c7d409f709d7e984b9 Mon Sep 17 00:00:00 2001
From: Sanjana Yeddula <syeddula@arize.com>
Date: Thu, 24 Apr 2025 18:46:31 +0000
Subject: [PATCH 10/12] docs: phoenix demo updates (GITBOOK-1191)

---
 docs/SUMMARY.md                               | 1 +
 docs/evaluation/llm-evals/agent-evaluation.md | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 47d309e203..2373383245 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -5,6 +5,7 @@
 * [User Guide](user-guide.md)
 * [Deployment](deployment.md)
 * [Environments](environments.md)
+* [Phoenix Demo](https://phoenix-demo.arize.com/projects)
 
 ## 🔭 Tracing
 
diff --git a/docs/evaluation/llm-evals/agent-evaluation.md b/docs/evaluation/llm-evals/agent-evaluation.md
index 40406e45bc..52ef3a2213 100644
--- a/docs/evaluation/llm-evals/agent-evaluation.md
+++ b/docs/evaluation/llm-evals/agent-evaluation.md
@@ -111,7 +111,7 @@ See our Agent Reflection evaluation template for a more specific example.
 
 See our [Agent Reflection evaluation template](../how-to-evals/running-pre-tested-evals/agent-reflection.md) for a specific example.
 
-## Putting it all together
+## Putting it all Together
 
 Through a combination of the evaluations above, you can get a far more accurate picture of how your agent is performing.
 

From edbc8c13baa99afaae5ac1020ecb56a193aaaeb9 Mon Sep 17 00:00:00 2001
From: Sebastian Sosa <1sebastian1sosa1@gmail.com>
Date: Sun, 27 Apr 2025 13:12:14 -0400
Subject: [PATCH 11/12] declarative eval formatting and linting

---
 .../src/phoenix/evals/declarative.py          | 165 +++++++++---------
 tests/unit/evals/test_declarative_eval.py     |  86 ++++-----
 2 files changed, 125 insertions(+), 126 deletions(-)

diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py
index 2abf58ae38..7fb911861a 100644
--- a/packages/phoenix-evals/src/phoenix/evals/declarative.py
+++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py
@@ -1,62 +1,30 @@
-
+import asyncio
+import json
 import time
-from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter
-from phoenix.evals.models import BaseModel
-
-import inspect
-import logging
-import warnings
-from collections import defaultdict
-from enum import Enum
-from functools import wraps
-from itertools import product
 from typing import (
     Any,
-    Callable,
-    DefaultDict,
     Dict,
-    Iterable,
     List,
-    Mapping,
-    NamedTuple,
     Optional,
     Tuple,
-    TypeVar,
     Union,
 )
-import json
+
 import pandas as pd
-from pandas import DataFrame
-from typing_extensions import TypeAlias
+from openai import AsyncOpenAI, OpenAI
+from pydantic import BaseModel, Field, create_model
+from tqdm import tqdm
 
-from phoenix.evals.evaluators import LLMEvaluator
-from phoenix.evals.exceptions import PhoenixTemplateMappingError
-from phoenix.evals.executors import ExecutionStatus, get_executor_on_sync_context
-from phoenix.evals.models import OpenAIModel, set_verbosity
+from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter
 from phoenix.evals.templates import (
-    ClassificationTemplate,
-    MultimodalPrompt,
-    PromptOptions,
-    PromptPartTemplate,
     PromptPartContentType,
-    PromptTemplate,
-    normalize_classification_template,
+    PromptPartTemplate,
 )
 from phoenix.evals.utils import (
-    NOT_PARSABLE,
     get_tqdm_progress_bar_formatter,
-    openai_function_call_kwargs,
-    parse_openai_function_call,
     printif,
-    snap_to_rail,
 )
-from pydantic import BaseModel, Field, create_model
-from typing import Union, List, Any, Optional, Callable
-import pandas as pd
-from openai import OpenAI, AsyncOpenAI
-from tqdm import tqdm
-import asyncio
-import aiohttp
+
 
 def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]:
     new_field_mappings = {}
@@ -66,11 +34,14 @@ def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) ->
     new_field_mappings["explanation"] = "explanation"
     return new_field_mappings
 
+
 async def declarative_eval(
     data: Union[pd.DataFrame, List[Any]],
     model: Union[OpenAI, AsyncOpenAI],
     schema: BaseModel,  # Pydantic model class
-    field_mappings: Dict[str, str], # key is the openinference target field value, value is the path to the field in the schema
+    field_mappings: Dict[
+        str, str
+    ],  # key is the openinference target field value, value is the path to the field in the schema
     system_instruction: Optional[str] = None,
     verbose: bool = False,
     include_prompt: bool = False,
@@ -99,14 +70,13 @@ async def declarative_eval(
         ```
         {{output}}
         ```
-        """
+        """,
     )
 
-    labels: Iterable[Optional[str]] = [None] * len(data)
-    explanations: Iterable[Optional[str]] = [None] * len(data)
-    scores: Iterable[Optional[float]] = [None] * len(data)
-
-    default_system_instruction = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema."
+    default_system_instruction = """
+    You will be provided the input passed to the llm
+    and the generated output data to evaluate according to the specified schema.
+    """
 
     # Convert data to consistent format
     if isinstance(data, pd.DataFrame):
@@ -116,7 +86,6 @@ async def declarative_eval(
         dataframe = pd.DataFrame(data)
         dataframe_index = dataframe.index
 
-    
     if provide_explanation:
         # Update the schema
         ExplainedSchema = create_model(
@@ -129,42 +98,55 @@ async def declarative_eval(
         # Update the field mappings
         field_mappings = transform_field_mappings_for_explanation(field_mappings)
 
-
-        
     def _map_template(data: pd.Series) -> str:
-        output_str = formatter.format(template.template, variables={
+        output_str = formatter.format(
+            template.template,
+            variables={
                 "input": json.dumps(data["attributes.llm.input_messages"]).replace("\\", "\\\\"),
-                "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\")
-            }
+                "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\"),
+            },
         )
         return output_str
 
-    async def _run_llm_eval_async(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]:
+    async def _run_llm_eval_async(
+        row_data: Tuple[int, pd.Series],
+    ) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]:
         # Guard clause
         if type(model) is OpenAI:
             raise ValueError("OpenAI is not supported for async operations")
         idx, row = row_data
 
-        # Handle async request        
-        async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
+        # Handle async request
+        async def _make_request(
+            idx: int, row: pd.Series
+        ) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
             try:
                 start_time = time.time()
                 response = await model.beta.chat.completions.parse(
                     model="gpt-4o-2024-08-06",
                     messages=[
-                        {"role": "system", "content": system_instruction or default_system_instruction},
-                        {"role": "user", "content": _map_template(row)}
+                        {
+                            "role": "system",
+                            "content": system_instruction or default_system_instruction,
+                        },
+                        {"role": "user", "content": _map_template(row)},
                     ],
                     response_format=schema,
                 )
                 parsed_response = response.choices[0].message.parsed
                 end_time = time.time()
                 execution_seconds = end_time - start_time
-                printif(verbose, f"\n\nIndex: {idx}\nExecution time: {execution_seconds} s\nStructured output: {parsed_response.model_dump_json(indent=2)}\n\n")
+                printif(
+                    verbose,
+                    f"""\n\nIndex: {idx}
+                    Execution time: {execution_seconds} s
+                    Structured output: {parsed_response.model_dump_json(indent=2)}
+                    \n\n""",
+                )
                 return idx, row, parsed_response, None, execution_seconds
             except Exception as e:
                 return idx, row, None, str(e), 0
-        
+
         result = await _make_request(idx, row)
 
         # # create tasks
@@ -178,23 +160,28 @@ async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseM
         #         idx, row, parsed_response, error, execution_seconds = await coro
         #         results[idx] = (row, parsed_response, error, execution_seconds)
         #         pbar.update(1)
-        
+
         return result
-    
 
     def _run_llm_eval_sync(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any]]:
         if type(model) is AsyncOpenAI:
             raise ValueError("AsyncOpenAI is not supported for sync operations")
-        
+
         idx, row = row_data
-        def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
+
+        def _make_request(
+            idx: int, row: pd.Series
+        ) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]:
             try:
                 start_time = time.time()
                 response = model.beta.chat.completions.parse(
                     model="gpt-4o-2024-08-06",
                     messages=[
-                        {"role": "system", "content": system_instruction or default_system_instruction},
-                        {"role": "user", "content": _map_template(row)}
+                        {
+                            "role": "system",
+                            "content": system_instruction or default_system_instruction,
+                        },
+                        {"role": "user", "content": _map_template(row)},
                     ],
                     response_format=schema,
                 )
@@ -204,15 +191,16 @@ def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel,
                 return idx, row, parsed_response, None, execution_seconds
             except Exception as e:
                 return idx, row, None, str(e), 0
+
         result = _make_request(idx, row)
         # results = [None] * len(dataframe)
         # for idx, (_, row) in enumerate(dataframe.iterrows()):
         #     idx, row, parsed_response, error, execution_seconds = _make_request(idx, row)
         #     results[idx] = (row, parsed_response, error, execution_seconds)
         return result
-    
+
     def _get_nested_value(obj: Dict[str, Any], path: str) -> Any:
-        parts = path.split('.')
+        parts = path.split(".")
         current = obj
         for part in parts:
             if part in current:
@@ -220,8 +208,10 @@ def _get_nested_value(obj: Dict[str, Any], path: str) -> Any:
             else:
                 return None
         return current
-    
-    def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optional[str], float]) -> Dict[str, Any]:
+
+    def _extract_data_using_field_mappings(
+        result: Tuple[pd.Series, BaseModel, Optional[str], float],
+    ) -> Dict[str, Any]:
         row, parsed_response, error, execution_seconds = result
         results_data = {}
         results_data["execution_seconds"] = execution_seconds
@@ -234,19 +224,25 @@ def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optio
             for schema_field, object_path in field_mappings.items():
                 json_schema_object = parsed_response.model_dump()
                 results_data[schema_field] = _get_nested_value(json_schema_object, object_path)
-        
+
         return results_data
-    
-    def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]:
+
+    def _parse_results(
+        results: List[Tuple[pd.Series, BaseModel, Optional[str], float]],
+    ) -> List[Tuple[pd.Series, Dict[str, Any]]]:
         results_data = []
         for result in results:
             _idx, row, model_response, error, execution_seconds = result
-            results_data.append((result[0], _extract_data_using_field_mappings(
-                (row, model_response, error, execution_seconds)
-            )))
+            results_data.append(
+                (
+                    result[0],
+                    _extract_data_using_field_mappings(
+                        (row, model_response, error, execution_seconds)
+                    ),
+                )
+            )
         return results_data
 
-
     # # USING EXECUTOR (cannot be used without acceptable model)
     # fallback_return_value = (pd.Series(), {}, None, 0)
     # executor = get_executor_on_sync_context(
@@ -258,7 +254,7 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa
     #     max_retries=max_retries,
     #     exit_on_error=exit_on_error,
     #     fallback_return_value=fallback_return_value,
-    # )    
+    # )
 
     # inputs = [
     #     row for _, row in dataframe.iterrows()
@@ -269,16 +265,14 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa
     # print("results", results)
     # print("execution_details", execution_details)
 
-    inputs = [
-        (idx, row) for idx, row in dataframe.iterrows()
-    ]
+    inputs = [(idx, row) for idx, row in dataframe.iterrows()]
     results = []
     with tqdm(total=len(inputs), desc="Running Declarative Evaluations") as pbar:
         tasks = []
         for input in inputs:
             task = _run_llm_eval_async(input)
             tasks.append(task)
-        
+
         for task in asyncio.as_completed(tasks):
             result = await task
             results.append(result)
@@ -286,9 +280,6 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa
     # results = _run_llm_eval_sync()
     results_data = _parse_results(results)
 
-
-    
-
     rows = []
     outcome_results = []
     for result in results_data:
@@ -305,5 +296,5 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa
         data=key_centric_results,
         index=dataframe_index,
     )
-    
+
     return results_data
diff --git a/tests/unit/evals/test_declarative_eval.py b/tests/unit/evals/test_declarative_eval.py
index fbb4a723bc..7a23ddbc2f 100644
--- a/tests/unit/evals/test_declarative_eval.py
+++ b/tests/unit/evals/test_declarative_eval.py
@@ -1,40 +1,53 @@
 """
 Test Declarative Eval
 """
-import asyncio
+
+from typing import Dict, Literal
+from unittest.mock import AsyncMock, MagicMock
+
 import pandas as pd
 import pytest
-from typing import Dict, Any, List, Literal
-from unittest.mock import AsyncMock, MagicMock, patch
-
 from pydantic import BaseModel, Field
+
 from phoenix.evals import declarative_eval, transform_field_mappings_for_explanation
 
 
 class Conciseness(BaseModel):
     is_concise: bool = Field(..., description="Whether the output is concise")
+
+
 class Formatting(BaseModel):
-    language: Literal["High", "Average", "Low"] = Field(..., description="The complexity of the formatting used in the output")
+    language: Literal["High", "Average", "Low"] = Field(
+        ..., description="The complexity of the formatting used in the output"
+    )
+
+
 class Schema(BaseModel):
     conciseness: Conciseness = Field(..., description="A custom evaluation of the output")
     formatting: Formatting = Field(..., description="A custom evaluation of the output")
+
+
 class SchemaWithExplanation(BaseModel):
     schema: Schema = Field(..., description="The schema to evaluate")
     explanation: str = Field(..., description="An explanation of the evaluation")
 
+
 @pytest.fixture
 def sample_dataframe() -> pd.DataFrame:
     """Sample dataframe to simulate ArizeExportClient(...).export_model_to_df(...)"""
-    return pd.DataFrame({
-        "attributes.llm.input_messages": [
-            [{"role": "user", "content": "What is 2+2?"}],
-            [{"role": "user", "content": "Who was the first president?"}],
-        ],
-        "attributes.llm.output_messages": [
-            [{"role": "assistant", "content": "4"}],
-            [{"role": "assistant", "content": "George Washington"}],
-        ]
-    })
+    return pd.DataFrame(
+        {
+            "attributes.llm.input_messages": [
+                [{"role": "user", "content": "What is 2+2?"}],
+                [{"role": "user", "content": "Who was the first president?"}],
+            ],
+            "attributes.llm.output_messages": [
+                [{"role": "assistant", "content": "4"}],
+                [{"role": "assistant", "content": "George Washington"}],
+            ],
+        }
+    )
+
 
 @pytest.fixture
 def correct_field_mappings() -> Dict[str, str]:
@@ -44,6 +57,7 @@ def correct_field_mappings() -> Dict[str, str]:
         "formatting.label": "formatting.language",
     }
 
+
 @pytest.fixture
 def incorrect_field_mappings() -> Dict[str, str]:
     """Incorrect field mappings for the Schema"""
@@ -52,6 +66,7 @@ def incorrect_field_mappings() -> Dict[str, str]:
         "formatting.label": "formatting.not_language",
     }
 
+
 @pytest.fixture
 def mock_parse_responses():
     """Mock responses for two consecutive OpenAI parse API calls."""
@@ -61,12 +76,12 @@ def mock_parse_responses():
         mock_resp.choices = [MagicMock()]
         mock_resp.choices[0].message = MagicMock()
         mock_resp.choices[0].message.parsed = Schema(
-            conciseness=Conciseness(is_concise=True),
-            formatting=Formatting(language="High")
+            conciseness=Conciseness(is_concise=True), formatting=Formatting(language="High")
         )
         responses.append(mock_resp)
     return responses
 
+
 @pytest.fixture
 def mock_client(mock_parse_responses):
     """Mock OpenAI client with predefined responses."""
@@ -74,15 +89,16 @@ def mock_client(mock_parse_responses):
     mock_client.beta = MagicMock()
     mock_client.beta.chat = MagicMock()
     mock_client.beta.chat.completions = MagicMock()
-    
+
     # Set up the async mock to return different responses for each call
     mock_parse = AsyncMock()
     # Use side_effect to return a different response for each call
     mock_parse.side_effect = mock_parse_responses
     mock_client.beta.chat.completions.parse = mock_parse
-    
+
     return mock_client
 
+
 @pytest.fixture
 def mock_parse_responses_with_explanation():
     """Mock responses for two consecutive OpenAI parse API calls."""
@@ -93,14 +109,14 @@ def mock_parse_responses_with_explanation():
         mock_resp.choices[0].message = MagicMock()
         mock_resp.choices[0].message.parsed = SchemaWithExplanation(
             schema=Schema(
-                conciseness=Conciseness(is_concise=True),
-                formatting=Formatting(language="High")
+                conciseness=Conciseness(is_concise=True), formatting=Formatting(language="High")
             ),
-            explanation="Explanation"
+            explanation="Explanation",
         )
         responses.append(mock_resp)
     return responses
 
+
 @pytest.fixture
 def mock_client_with_explanation(mock_parse_responses_with_explanation):
     """Mock OpenAI client with predefined responses."""
@@ -108,23 +124,19 @@ def mock_client_with_explanation(mock_parse_responses_with_explanation):
     mock_client.beta = MagicMock()
     mock_client.beta.chat = MagicMock()
     mock_client.beta.chat.completions = MagicMock()
-    
+
     # Set up the async mock to return different responses for each call
     mock_parse = AsyncMock()
     # Use side_effect to return a different response for each call
     mock_parse.side_effect = mock_parse_responses_with_explanation
     mock_client.beta.chat.completions.parse = mock_parse
-    
-    return mock_client
-
 
+    return mock_client
 
 
 @pytest.mark.asyncio
 async def test_declarative_eval_correct_field_mappings(
-    sample_dataframe,
-    correct_field_mappings,
-    mock_client
+    sample_dataframe, correct_field_mappings, mock_client
 ):
     """Test declarative_eval with correct field mappings."""
     result = await declarative_eval(
@@ -143,9 +155,7 @@ async def test_declarative_eval_correct_field_mappings(
 
 @pytest.mark.asyncio
 async def test_declarative_eval_incorrect_field_mappings(
-    sample_dataframe,
-    incorrect_field_mappings,
-    mock_client
+    sample_dataframe, incorrect_field_mappings, mock_client
 ):
     """Test declarative_eval with correct field mappings."""
     result = await declarative_eval(
@@ -161,11 +171,10 @@ async def test_declarative_eval_incorrect_field_mappings(
     for fm_key in fm_keys:
         assert result[fm_key].tolist() == [None, None]
 
+
 @pytest.mark.asyncio
 async def test_declarative_eval_with_explanation(
-    sample_dataframe,
-    correct_field_mappings,
-    mock_client_with_explanation
+    sample_dataframe, correct_field_mappings, mock_client_with_explanation
 ):
     """Test declarative_eval with explanations."""
     result = await declarative_eval(
@@ -177,19 +186,18 @@ async def test_declarative_eval_with_explanation(
     )
 
     pre_transform_fm = correct_field_mappings
-    pre_transform_fm_keys = set(pre_transform_fm.keys())
     pre_transform_fm_values = set(pre_transform_fm.values())
-    
+
     print(f"pre_transform_fm: {pre_transform_fm}")
     correct_field_mappings = transform_field_mappings_for_explanation(correct_field_mappings)
     print(f"correct_field_mappings: {correct_field_mappings}")
     fm_keys = set(correct_field_mappings.keys())
     fm_values = set(correct_field_mappings.values())
-    
+
     assert isinstance(result, pd.DataFrame)
     assert result.shape[0] == 2
     assert fm_keys.issubset(set(result.columns.tolist()))
     assert pre_transform_fm_values.isdisjoint(fm_values)
     assert result["conciseness.label"].tolist() == [True, True]
     assert result["formatting.label"].tolist() == ["High", "High"]
-    assert result["explanation"].tolist() == ["Explanation", "Explanation"]
\ No newline at end of file
+    assert result["explanation"].tolist() == ["Explanation", "Explanation"]

From ad06fb1b4500308a9ad86c6e3da2bf66798ecc06 Mon Sep 17 00:00:00 2001
From: Sebastian Sosa <1sebastian1sosa1@gmail.com>
Date: Sun, 27 Apr 2025 14:44:38 -0400
Subject: [PATCH 12/12] declarative eval and util docs

---
 .../src/phoenix/evals/declarative.py          | 128 ++++++++++++++++--
 1 file changed, 118 insertions(+), 10 deletions(-)

diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py
index 7fb911861a..6d070d66c3 100644
--- a/packages/phoenix-evals/src/phoenix/evals/declarative.py
+++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py
@@ -27,6 +27,25 @@
 
 
 def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]:
+    """
+    Transforms field mappings to work with a schema that includes an explanation field.
+
+    This function takes field mappings that point to fields within a schema and transforms them
+    to work with a schema that wraps the original schema and adds an explanation field.
+
+    Args:
+        field_mappings (Dict[str, str]): A dictionary mapping target field names to paths
+            within the original schema. For example, {"conciseness.label": "conciseness.is_concise"}.
+
+    Returns:
+        Dict[str, str]: A new dictionary with transformed field mappings where each value is
+            prefixed with "schema." and an additional "explanation" mapping is added.
+
+    Example:
+        >>> field_mappings = {"conciseness.label": "conciseness.is_concise"}
+        >>> transform_field_mappings_for_explanation(field_mappings)
+        {"conciseness.label": "schema.conciseness.is_concise", "explanation": "explanation"}
+    """  # noqa: E501
     new_field_mappings = {}
     for key, value in field_mappings.items():
         new_field_mappings[key] = f"schema.{value}"
@@ -42,7 +61,7 @@ async def declarative_eval(
     field_mappings: Dict[
         str, str
     ],  # key is the openinference target field value, value is the path to the field in the schema
-    system_instruction: Optional[str] = None,
+    system_instruction: str = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema.",  # noqa: E501
     verbose: bool = False,
     include_prompt: bool = False,
     include_response: bool = False,
@@ -55,8 +74,102 @@ async def declarative_eval(
     progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("llm_classify"),
 ) -> pd.DataFrame:
     """
-    Evaluates data using an LLM with a Pydantic schema to structure the output.
-    """
+    Evaluates data using a declarative schema with an LLM.
+
+    This function evaluates each row of the input data using a declarative schema with an LLM.
+    It returns a pandas DataFrame with the evaluation results mapped according to the provided field mappings.
+
+    Args:
+        data (Union[pd.DataFrame, List[Any]]): A collection of data to evaluate with columns
+            that match the template variables "attributes.llm.input_messages" and
+            "attributes.llm.output_messages".
+
+        model (Union[OpenAI, AsyncOpenAI]): An OpenAI client instance to use for evaluation.
+
+        schema (BaseModel): A Pydantic model class defining the evaluation schema.
+
+        field_mappings (Dict[str, str]): A dictionary mapping target field names to paths
+            within the schema. For example, {"conciseness.label": "conciseness.is_concise"}.
+
+        system_instruction (str): A system message to guide the evaluation, defaults to
+            "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema.".
+
+        verbose (bool): If True, prints detailed information during evaluation. Default is False.
+
+        include_prompt (bool): Not currently used.
+
+        include_response (bool): Not currently used.
+
+        include_exceptions (bool): Not currently used.
+
+        provide_explanation (bool): If True, adds an explanation field to the schema and output. Default is False.
+
+        max_retries (int): Not currently used.
+
+        exit_on_error (bool): Not currently used.
+
+        run_sync (bool): Not currently used.
+
+        concurrency (Optional[int]): Not currently used.
+
+        progress_bar_format (Optional[str]): Format for the progress bar. If None, progress bar is disabled.
+
+    Returns:
+        pd.DataFrame: A DataFrame containing the evaluation results with columns mapped according
+            to the field_mappings parameter along with the execution time and any exceptions.
+            The DataFrame has the same length and index as the input data.
+
+    Raises:
+        ValueError: If the input data doesn't contain required columns or if field mappings are invalid.
+
+    Example:
+        ```python
+        # Define a schema with nested models
+        class Conciseness(BaseModel):
+            is_concise: bool = Field(..., description="Whether the output is concise")
+
+        class Formatting(BaseModel):
+            language: Literal["High", "Average", "Low"] = Field(
+                ..., description="The complexity of the formatting used in the output"
+            )
+
+        class Schema(BaseModel):
+            conciseness: Conciseness = Field(..., description="A custom evaluation of the output")
+            formatting: Formatting = Field(..., description="A custom evaluation of the output")
+
+        # Prepare sample data
+        data = pd.DataFrame({
+            "attributes.llm.input_messages": [
+                [{"role": "user", "content": "What is 2+2?"}],
+                [{"role": "user", "content": "Who was the first president?"}],
+            ],
+            "attributes.llm.output_messages": [
+                [{"role": "assistant", "content": "Whenever you add those two numbers, you get 4"}],
+                [{"role": "assistant", "content": "George Washington"}],
+            ],
+        })
+
+        # Define field mappings
+        field_mappings = {
+            "conciseness.label": "conciseness.is_concise",
+            "formatting.label": "formatting.language",
+        }
+
+        # Run the evaluation
+        result = await declarative_eval(
+            data=data,
+            model=openai_client,
+            schema=Schema,
+            field_mappings=field_mappings,
+        )
+
+        # Result will be a DataFrame with columns:
+        # - conciseness.label (containing boolean values)
+        # - formatting.label (containing "High", "Average", or "Low")
+        # - execution_seconds (execution time)
+        # - exceptions (any errors encountered)
+        ```
+    """  # noqa: E501
 
     formatter = MustacheBaseTemplateFormatter()
     template = PromptPartTemplate(
@@ -73,11 +186,6 @@ async def declarative_eval(
         """,
     )
 
-    default_system_instruction = """
-    You will be provided the input passed to the llm
-    and the generated output data to evaluate according to the specified schema.
-    """
-
     # Convert data to consistent format
     if isinstance(data, pd.DataFrame):
         dataframe = data
@@ -127,7 +235,7 @@ async def _make_request(
                     messages=[
                         {
                             "role": "system",
-                            "content": system_instruction or default_system_instruction,
+                            "content": system_instruction,
                         },
                         {"role": "user", "content": _map_template(row)},
                     ],
@@ -179,7 +287,7 @@ def _make_request(
                     messages=[
                         {
                             "role": "system",
-                            "content": system_instruction or default_system_instruction,
+                            "content": system_instruction,
                         },
                         {"role": "user", "content": _map_template(row)},
                     ],