From 3b0ad5c802f0c660bc8fe32028b8e1b98ae363af Mon Sep 17 00:00:00 2001 From: Sebastian Sosa <1sebastian1sosa1@gmail.com> Date: Sat, 26 Apr 2025 02:53:07 -0400 Subject: [PATCH 01/12] pre dynamic executor --- .../src/phoenix/evals/__init__.py | 2 + .../src/phoenix/evals/declarative.py | 258 ++++++++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 packages/phoenix-evals/src/phoenix/evals/declarative.py diff --git a/packages/phoenix-evals/src/phoenix/evals/__init__.py b/packages/phoenix-evals/src/phoenix/evals/__init__.py index 72691112d6..1240e63cee 100644 --- a/packages/phoenix-evals/src/phoenix/evals/__init__.py +++ b/packages/phoenix-evals/src/phoenix/evals/__init__.py @@ -1,4 +1,5 @@ from .classify import llm_classify, run_evals +from .declarative import declarative_eval from .default_templates import ( CODE_FUNCTIONALITY_PROMPT_RAILS_MAP, CODE_FUNCTIONALITY_PROMPT_TEMPLATE, @@ -92,6 +93,7 @@ "TOOL_CALLING_PROMPT_RAILS_MAP", "NOT_PARSABLE", "run_evals", + "declarative_eval", "LLMEvaluator", "HallucinationEvaluator", "QAEvaluator", diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py new file mode 100644 index 0000000000..1f5184d2b6 --- /dev/null +++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py @@ -0,0 +1,258 @@ +import time +from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter +from phoenix.evals.models import BaseModel + +import inspect +import logging +import warnings +from collections import defaultdict +from enum import Enum +from functools import wraps +from itertools import product +from typing import ( + Any, + Callable, + DefaultDict, + Dict, + Iterable, + List, + Mapping, + NamedTuple, + Optional, + Tuple, + TypeVar, + Union, +) +import json +import pandas as pd +from pandas import DataFrame +from typing_extensions import TypeAlias + +from phoenix.evals.evaluators import LLMEvaluator +from phoenix.evals.exceptions import PhoenixTemplateMappingError +from phoenix.evals.executors import ExecutionStatus, get_executor_on_sync_context +from phoenix.evals.models import OpenAIModel, set_verbosity +from phoenix.evals.templates import ( + ClassificationTemplate, + MultimodalPrompt, + PromptOptions, + PromptPartTemplate, + PromptPartContentType, + PromptTemplate, + normalize_classification_template, +) +from phoenix.evals.utils import ( + NOT_PARSABLE, + get_tqdm_progress_bar_formatter, + openai_function_call_kwargs, + parse_openai_function_call, + printif, + snap_to_rail, +) +from pydantic import BaseModel, Field, create_model +from typing import Union, List, Any, Optional, Callable +import pandas as pd +from openai import OpenAI, AsyncOpenAI +from tqdm import tqdm +import asyncio +import aiohttp + +async def declarative_eval( + data: Union[pd.DataFrame, List[Any]], + model: Union[OpenAI, AsyncOpenAI], + schema: BaseModel, # Pydantic model class + field_mappings: Dict[str, str], # key is the openinference target field value, value is the path to the field in the schema + system_instruction: Optional[str] = None, + verbose: bool = False, + include_prompt: bool = False, + include_response: bool = False, + include_exceptions: bool = False, + provide_explanation: bool = False, + max_retries: int = 10, + exit_on_error: bool = True, + run_sync: bool = False, + concurrency: Optional[int] = None, + progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("declarative_eval"), +) -> pd.DataFrame: + """ + Evaluates data using an LLM with a Pydantic schema to structure the output. + """ + + formatter = MustacheBaseTemplateFormatter() + template = PromptPartTemplate( + content_type=PromptPartContentType.TEXT, + template="""an input and output pair passed to an LLM + INPUT MESSAGES: + ``` + {{input}} + ``` + OUTPUT MESSAGE: + ``` + {{output}} + ``` + """ + ) + + labels: Iterable[Optional[str]] = [None] * len(data) + explanations: Iterable[Optional[str]] = [None] * len(data) + scores: Iterable[Optional[float]] = [None] * len(data) + + default_system_instruction = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema." + + # Convert data to consistent format + if isinstance(data, pd.DataFrame): + dataframe = data + dataframe_index = data.index + else: + dataframe = pd.DataFrame(data) + dataframe_index = dataframe.index + + + if provide_explanation: + # Update the schema + ExplainedSchema = create_model( + "ExplainedSchema", + schema=(schema, Field(..., description="The schema to evaluate")), + explanation=(str, Field(..., description="An explanation of the evaluation")), + ) + schema = ExplainedSchema + + # Update the field mappings + new_field_mappings = {} + for key, value in field_mappings.items(): + new_field_mappings[key] = f"schema.{value}" + # Override the explanation field mapping + new_field_mappings["explanation"] = "explanation" + # Update the field mappings + field_mappings = new_field_mappings + + print("field_mappings", field_mappings) + + + def _map_template(data: pd.Series) -> str: + output_str = formatter.format(template.template, variables={ + "input": json.dumps(data["attributes.llm.input_messages"]).replace("\\", "\\\\"), + "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\") + } + ) + return output_str + + async def _run_llm_eval_async() -> List[Tuple[pd.Series, Dict[str, Any], Optional[str]]]: + if type(model) is OpenAI: + raise ValueError("OpenAI is not supported for async operations") + # Handle async request + async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: + try: + start_time = time.time() + response = await model.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": system_instruction or default_system_instruction}, + {"role": "user", "content": _map_template(row)} + ], + response_format=schema, + ) + parsed_response = response.choices[0].message.parsed + end_time = time.time() + execution_seconds = end_time - start_time + return idx, row, parsed_response, None, execution_seconds + except Exception as e: + return idx, row, None, str(e), 0 + + # create tasks + tasks = [] + for idx, (_, row) in enumerate(dataframe.iterrows()): + tasks.append(_make_request(idx, row)) + + results = [None] * len(tasks) + with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar: + for coro in asyncio.as_completed(tasks): + idx, row, parsed_response, error, execution_seconds = await coro + results[idx] = (row, parsed_response, error, execution_seconds) + pbar.update(1) + + return results + + + def _run_llm_eval_sync() -> List[Tuple[pd.Series, Dict[str, Any]]]: + if type(model) is AsyncOpenAI: + raise ValueError("AsyncOpenAI is not supported for sync operations") + def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: + try: + start_time = time.time() + response = model.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": system_instruction or default_system_instruction}, + {"role": "user", "content": _map_template(row)} + ], + response_format=schema, + ) + parsed_response = response.choices[0].message.parsed + end_time = time.time() + execution_seconds = end_time - start_time + return idx, row, parsed_response, None, execution_seconds + except Exception as e: + return idx, row, None, str(e), 0 + results = [None] * len(dataframe) + for idx, (_, row) in enumerate(dataframe.iterrows()): + idx, row, parsed_response, error, execution_seconds = _make_request(idx, row) + results[idx] = (row, parsed_response, error, execution_seconds) + return results + + def _get_nested_value(obj: Dict[str, Any], path: str) -> Any: + parts = path.split('.') + current = obj + for part in parts: + if part in current: + current = current[part] + else: + return None + return current + + def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optional[str], float]) -> Dict[str, Any]: + row, parsed_response, error, execution_seconds = result + results_data = {} + results_data["execution_seconds"] = execution_seconds + results_data["exceptions"] = [] + if error: + results_data["exceptions"].append(error) + for schema_field, _ in field_mappings.items(): + results_data[schema_field] = None + else: + for schema_field, object_path in field_mappings.items(): + json_schema_object = parsed_response.model_dump() + results_data[schema_field] = _get_nested_value(json_schema_object, object_path) + + return results_data + + def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]: + results_data = [] + for result in results: + results_data.append((result[0], _extract_data_using_field_mappings(result))) + return results_data + + + + results = await _run_llm_eval_async() + # results = _run_llm_eval_sync() + results_data = _parse_results(results) + + rows = [] + outcome_results = [] + for result in results_data: + rows.append(result[0]) + outcome_results.append(result[1]) + + # transform results from item centric to field centric + key_centric_results = {} + for field_name, _ in outcome_results[0].items(): + field_values = [result.get(field_name) for result in outcome_results] + if field_values: + key_centric_results[field_name] = field_values + return pd.DataFrame( + data=key_centric_results, + index=dataframe_index, + ) + + return results_data From be26c84ecfd5d796500b2ba7ee1899e5fce16253 Mon Sep 17 00:00:00 2001 From: Sebastian Sosa <1sebastian1sosa1@gmail.com> Date: Sat, 26 Apr 2025 03:39:38 -0400 Subject: [PATCH 02/12] prepare for executor integration & logging & cleanup --- .../src/phoenix/evals/declarative.py | 98 ++++++++++++++----- 1 file changed, 73 insertions(+), 25 deletions(-) diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py index 1f5184d2b6..590e69a9d0 100644 --- a/packages/phoenix-evals/src/phoenix/evals/declarative.py +++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py @@ -72,7 +72,7 @@ async def declarative_eval( exit_on_error: bool = True, run_sync: bool = False, concurrency: Optional[int] = None, - progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("declarative_eval"), + progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("llm_classify"), ) -> pd.DataFrame: """ Evaluates data using an LLM with a Pydantic schema to structure the output. @@ -126,7 +126,6 @@ async def declarative_eval( # Update the field mappings field_mappings = new_field_mappings - print("field_mappings", field_mappings) def _map_template(data: pd.Series) -> str: @@ -137,9 +136,12 @@ def _map_template(data: pd.Series) -> str: ) return output_str - async def _run_llm_eval_async() -> List[Tuple[pd.Series, Dict[str, Any], Optional[str]]]: + async def _run_llm_eval_async(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]: + # Guard clause if type(model) is OpenAI: raise ValueError("OpenAI is not supported for async operations") + idx, row = row_data + # Handle async request async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: try: @@ -155,28 +157,33 @@ async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseM parsed_response = response.choices[0].message.parsed end_time = time.time() execution_seconds = end_time - start_time + printif(verbose, f"\n\nIndex: {idx}\nExecution time: {execution_seconds} s\nStructured output: {parsed_response.model_dump_json(indent=2)}\n\n") return idx, row, parsed_response, None, execution_seconds except Exception as e: return idx, row, None, str(e), 0 - # create tasks - tasks = [] - for idx, (_, row) in enumerate(dataframe.iterrows()): - tasks.append(_make_request(idx, row)) + result = await _make_request(idx, row) - results = [None] * len(tasks) - with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar: - for coro in asyncio.as_completed(tasks): - idx, row, parsed_response, error, execution_seconds = await coro - results[idx] = (row, parsed_response, error, execution_seconds) - pbar.update(1) + # # create tasks + # tasks = [] + # for idx, (_, row) in enumerate(dataframe.iterrows()): + # tasks.append(_make_request(idx, row)) + + # results = [None] * len(tasks) + # with tqdm(total=len(tasks), desc="Running Declarative Evaluations") as pbar: + # for coro in asyncio.as_completed(tasks): + # idx, row, parsed_response, error, execution_seconds = await coro + # results[idx] = (row, parsed_response, error, execution_seconds) + # pbar.update(1) - return results + return result - def _run_llm_eval_sync() -> List[Tuple[pd.Series, Dict[str, Any]]]: + def _run_llm_eval_sync(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any]]: if type(model) is AsyncOpenAI: raise ValueError("AsyncOpenAI is not supported for sync operations") + + idx, row = row_data def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: try: start_time = time.time() @@ -194,11 +201,12 @@ def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, return idx, row, parsed_response, None, execution_seconds except Exception as e: return idx, row, None, str(e), 0 - results = [None] * len(dataframe) - for idx, (_, row) in enumerate(dataframe.iterrows()): - idx, row, parsed_response, error, execution_seconds = _make_request(idx, row) - results[idx] = (row, parsed_response, error, execution_seconds) - return results + result = _make_request(idx, row) + # results = [None] * len(dataframe) + # for idx, (_, row) in enumerate(dataframe.iterrows()): + # idx, row, parsed_response, error, execution_seconds = _make_request(idx, row) + # results[idx] = (row, parsed_response, error, execution_seconds) + return result def _get_nested_value(obj: Dict[str, Any], path: str) -> Any: parts = path.split('.') @@ -229,15 +237,55 @@ def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optio def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]: results_data = [] for result in results: - results_data.append((result[0], _extract_data_using_field_mappings(result))) + _idx, row, model_response, error, execution_seconds = result + results_data.append((result[0], _extract_data_using_field_mappings( + (row, model_response, error, execution_seconds) + ))) return results_data - - - - results = await _run_llm_eval_async() + + + # # USING EXECUTOR (cannot be used without acceptable model) + # fallback_return_value = (pd.Series(), {}, None, 0) + # executor = get_executor_on_sync_context( + # _run_llm_eval_sync, + # _run_llm_eval_async, + # run_sync=run_sync, + # concurrency=concurrency, + # tqdm_bar_format=progress_bar_format, + # max_retries=max_retries, + # exit_on_error=exit_on_error, + # fallback_return_value=fallback_return_value, + # ) + + # inputs = [ + # row for _, row in dataframe.iterrows() + # ] + # print("inputs", inputs) + # import pdb; pdb.set_trace() + # results, execution_details = executor.run(inputs) + # print("results", results) + # print("execution_details", execution_details) + + inputs = [ + (idx, row) for idx, row in dataframe.iterrows() + ] + results = [] + with tqdm(total=len(inputs), desc="Running Declarative Evaluations") as pbar: + tasks = [] + for input in inputs: + task = _run_llm_eval_async(input) + tasks.append(task) + + for task in asyncio.as_completed(tasks): + result = await task + results.append(result) + pbar.update(1) # results = _run_llm_eval_sync() results_data = _parse_results(results) + + + rows = [] outcome_results = [] for result in results_data: From 4a37d7a23e4061598117db14689f59d9b9c6cc6e Mon Sep 17 00:00:00 2001 From: Sebastian Sosa <1sebastian1sosa1@gmail.com> Date: Sun, 27 Apr 2025 01:34:51 -0400 Subject: [PATCH 03/12] declarative eval tests --- .../src/phoenix/evals/__init__.py | 3 +- .../src/phoenix/evals/declarative.py | 17 +- tests/unit/evals/__init__.py | 0 tests/unit/evals/test_declarative_eval.py | 195 ++++++++++++ uv.lock | 290 +++++++++--------- 5 files changed, 348 insertions(+), 157 deletions(-) create mode 100644 tests/unit/evals/__init__.py create mode 100644 tests/unit/evals/test_declarative_eval.py diff --git a/packages/phoenix-evals/src/phoenix/evals/__init__.py b/packages/phoenix-evals/src/phoenix/evals/__init__.py index 1240e63cee..b732e9b7c1 100644 --- a/packages/phoenix-evals/src/phoenix/evals/__init__.py +++ b/packages/phoenix-evals/src/phoenix/evals/__init__.py @@ -1,5 +1,5 @@ from .classify import llm_classify, run_evals -from .declarative import declarative_eval +from .declarative import declarative_eval, transform_field_mappings_for_explanation from .default_templates import ( CODE_FUNCTIONALITY_PROMPT_RAILS_MAP, CODE_FUNCTIONALITY_PROMPT_TEMPLATE, @@ -94,6 +94,7 @@ "NOT_PARSABLE", "run_evals", "declarative_eval", + "transform_field_mappings_for_explanation", "LLMEvaluator", "HallucinationEvaluator", "QAEvaluator", diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py index 590e69a9d0..2abf58ae38 100644 --- a/packages/phoenix-evals/src/phoenix/evals/declarative.py +++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py @@ -1,3 +1,4 @@ + import time from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter from phoenix.evals.models import BaseModel @@ -57,6 +58,14 @@ import asyncio import aiohttp +def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]: + new_field_mappings = {} + for key, value in field_mappings.items(): + new_field_mappings[key] = f"schema.{value}" + # Override the explanation field mapping + new_field_mappings["explanation"] = "explanation" + return new_field_mappings + async def declarative_eval( data: Union[pd.DataFrame, List[Any]], model: Union[OpenAI, AsyncOpenAI], @@ -118,13 +127,7 @@ async def declarative_eval( schema = ExplainedSchema # Update the field mappings - new_field_mappings = {} - for key, value in field_mappings.items(): - new_field_mappings[key] = f"schema.{value}" - # Override the explanation field mapping - new_field_mappings["explanation"] = "explanation" - # Update the field mappings - field_mappings = new_field_mappings + field_mappings = transform_field_mappings_for_explanation(field_mappings) diff --git a/tests/unit/evals/__init__.py b/tests/unit/evals/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/evals/test_declarative_eval.py b/tests/unit/evals/test_declarative_eval.py new file mode 100644 index 0000000000..fbb4a723bc --- /dev/null +++ b/tests/unit/evals/test_declarative_eval.py @@ -0,0 +1,195 @@ +""" +Test Declarative Eval +""" +import asyncio +import pandas as pd +import pytest +from typing import Dict, Any, List, Literal +from unittest.mock import AsyncMock, MagicMock, patch + +from pydantic import BaseModel, Field +from phoenix.evals import declarative_eval, transform_field_mappings_for_explanation + + +class Conciseness(BaseModel): + is_concise: bool = Field(..., description="Whether the output is concise") +class Formatting(BaseModel): + language: Literal["High", "Average", "Low"] = Field(..., description="The complexity of the formatting used in the output") +class Schema(BaseModel): + conciseness: Conciseness = Field(..., description="A custom evaluation of the output") + formatting: Formatting = Field(..., description="A custom evaluation of the output") +class SchemaWithExplanation(BaseModel): + schema: Schema = Field(..., description="The schema to evaluate") + explanation: str = Field(..., description="An explanation of the evaluation") + +@pytest.fixture +def sample_dataframe() -> pd.DataFrame: + """Sample dataframe to simulate ArizeExportClient(...).export_model_to_df(...)""" + return pd.DataFrame({ + "attributes.llm.input_messages": [ + [{"role": "user", "content": "What is 2+2?"}], + [{"role": "user", "content": "Who was the first president?"}], + ], + "attributes.llm.output_messages": [ + [{"role": "assistant", "content": "4"}], + [{"role": "assistant", "content": "George Washington"}], + ] + }) + +@pytest.fixture +def correct_field_mappings() -> Dict[str, str]: + """Accurate field mappings for the Schema""" + return { + "conciseness.label": "conciseness.is_concise", + "formatting.label": "formatting.language", + } + +@pytest.fixture +def incorrect_field_mappings() -> Dict[str, str]: + """Incorrect field mappings for the Schema""" + return { + "conciseness.label": "not_conciseness.is_concise", + "formatting.label": "formatting.not_language", + } + +@pytest.fixture +def mock_parse_responses(): + """Mock responses for two consecutive OpenAI parse API calls.""" + responses = [] + for _ in range(2): + mock_resp = MagicMock() + mock_resp.choices = [MagicMock()] + mock_resp.choices[0].message = MagicMock() + mock_resp.choices[0].message.parsed = Schema( + conciseness=Conciseness(is_concise=True), + formatting=Formatting(language="High") + ) + responses.append(mock_resp) + return responses + +@pytest.fixture +def mock_client(mock_parse_responses): + """Mock OpenAI client with predefined responses.""" + mock_client = MagicMock() + mock_client.beta = MagicMock() + mock_client.beta.chat = MagicMock() + mock_client.beta.chat.completions = MagicMock() + + # Set up the async mock to return different responses for each call + mock_parse = AsyncMock() + # Use side_effect to return a different response for each call + mock_parse.side_effect = mock_parse_responses + mock_client.beta.chat.completions.parse = mock_parse + + return mock_client + +@pytest.fixture +def mock_parse_responses_with_explanation(): + """Mock responses for two consecutive OpenAI parse API calls.""" + responses = [] + for _ in range(2): + mock_resp = MagicMock() + mock_resp.choices = [MagicMock()] + mock_resp.choices[0].message = MagicMock() + mock_resp.choices[0].message.parsed = SchemaWithExplanation( + schema=Schema( + conciseness=Conciseness(is_concise=True), + formatting=Formatting(language="High") + ), + explanation="Explanation" + ) + responses.append(mock_resp) + return responses + +@pytest.fixture +def mock_client_with_explanation(mock_parse_responses_with_explanation): + """Mock OpenAI client with predefined responses.""" + mock_client = MagicMock() + mock_client.beta = MagicMock() + mock_client.beta.chat = MagicMock() + mock_client.beta.chat.completions = MagicMock() + + # Set up the async mock to return different responses for each call + mock_parse = AsyncMock() + # Use side_effect to return a different response for each call + mock_parse.side_effect = mock_parse_responses_with_explanation + mock_client.beta.chat.completions.parse = mock_parse + + return mock_client + + + + +@pytest.mark.asyncio +async def test_declarative_eval_correct_field_mappings( + sample_dataframe, + correct_field_mappings, + mock_client +): + """Test declarative_eval with correct field mappings.""" + result = await declarative_eval( + data=sample_dataframe, + model=mock_client, + schema=Schema, + field_mappings=correct_field_mappings, + ) + fm_keys = set(correct_field_mappings.keys()) + assert isinstance(result, pd.DataFrame) + assert result.shape[0] == 2 + assert fm_keys.issubset(set(result.columns.tolist())) + assert result["conciseness.label"].tolist() == [True, True] + assert result["formatting.label"].tolist() == ["High", "High"] + + +@pytest.mark.asyncio +async def test_declarative_eval_incorrect_field_mappings( + sample_dataframe, + incorrect_field_mappings, + mock_client +): + """Test declarative_eval with correct field mappings.""" + result = await declarative_eval( + data=sample_dataframe, + model=mock_client, + schema=Schema, + field_mappings=incorrect_field_mappings, + ) + fm_keys = set(incorrect_field_mappings.keys()) + assert isinstance(result, pd.DataFrame) + assert result.shape[0] == 2 + assert fm_keys.issubset(set(result.columns.tolist())) + for fm_key in fm_keys: + assert result[fm_key].tolist() == [None, None] + +@pytest.mark.asyncio +async def test_declarative_eval_with_explanation( + sample_dataframe, + correct_field_mappings, + mock_client_with_explanation +): + """Test declarative_eval with explanations.""" + result = await declarative_eval( + data=sample_dataframe, + model=mock_client_with_explanation, + schema=SchemaWithExplanation, + field_mappings=correct_field_mappings, + provide_explanation=True, + ) + + pre_transform_fm = correct_field_mappings + pre_transform_fm_keys = set(pre_transform_fm.keys()) + pre_transform_fm_values = set(pre_transform_fm.values()) + + print(f"pre_transform_fm: {pre_transform_fm}") + correct_field_mappings = transform_field_mappings_for_explanation(correct_field_mappings) + print(f"correct_field_mappings: {correct_field_mappings}") + fm_keys = set(correct_field_mappings.keys()) + fm_values = set(correct_field_mappings.values()) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] == 2 + assert fm_keys.issubset(set(result.columns.tolist())) + assert pre_transform_fm_values.isdisjoint(fm_values) + assert result["conciseness.label"].tolist() == [True, True] + assert result["formatting.label"].tolist() == ["High", "High"] + assert result["explanation"].tolist() == ["Explanation", "Explanation"] \ No newline at end of file diff --git a/uv.lock b/uv.lock index e7ac256186..5be0dbc66b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,13 +1,11 @@ version = 1 requires-python = ">=3.9, <3.14" resolution-markers = [ - "python_version < '0'", "python_full_version >= '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'", - "python_version < '0'", "(python_full_version >= '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.13' and sys_platform != 'darwin')", "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')", "(python_full_version == '3.11.*' and platform_machine != 'x86_64') or (python_full_version == '3.11.*' and sys_platform != 'darwin')", @@ -187,7 +185,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.42.0" +version = "0.49.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -198,9 +196,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e7/7c/91b79f5ae4a52497a4e330d66ea5929aec2878ee2c9f8a998dbe4f4c7f01/anthropic-0.42.0.tar.gz", hash = "sha256:bf8b0ed8c8cb2c2118038f29c58099d2f99f7847296cafdaa853910bfff4edf4", size = 192361 } +sdist = { url = "https://files.pythonhosted.org/packages/86/e3/a88c8494ce4d1a88252b9e053607e885f9b14d0a32273d47b727cbee4228/anthropic-0.49.0.tar.gz", hash = "sha256:c09e885b0f674b9119b4f296d8508907f6cff0009bc20d5cf6b35936c40b4398", size = 210016 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/33/b907a6d27dd0d8d3adb4edb5c9e9c85a189719ec6855051cce3814c8ef13/anthropic-0.42.0-py3-none-any.whl", hash = "sha256:46775f65b723c078a2ac9e9de44a46db5c6a4fabeacfd165e5ea78e6817f4eff", size = 203365 }, + { url = "https://files.pythonhosted.org/packages/76/74/5d90ad14d55fbe3f9c474fdcb6e34b4bed99e3be8efac98734a5ddce88c1/anthropic-0.49.0-py3-none-any.whl", hash = "sha256:bbc17ad4e7094988d2fa86b87753ded8dce12498f4b85fe5810f208f454a8375", size = 243368 }, ] [[package]] @@ -295,22 +293,23 @@ llm-evaluation = [ [[package]] name = "arize-phoenix" -version = "7.6.0" +version = "8.26.3" source = { editable = "." } dependencies = [ { name = "aioitertools" }, { name = "aiosqlite" }, { name = "alembic" }, + { name = "arize-phoenix-client" }, { name = "arize-phoenix-evals" }, { name = "arize-phoenix-otel" }, { name = "authlib" }, { name = "cachetools" }, + { name = "email-validator" }, { name = "fastapi" }, { name = "grpc-interceptor" }, { name = "grpcio" }, { name = "httpx" }, { name = "jinja2" }, - { name = "jsonschema" }, { name = "numpy" }, { name = "openinference-instrumentation" }, { name = "openinference-semantic-conventions" }, @@ -333,14 +332,14 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, { name = "uvicorn" }, - { name = "websockets" }, - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] [package.optional-dependencies] container = [ + { name = "aiohttp" }, { name = "anthropic" }, + { name = "azure-identity" }, { name = "fast-hdbscan" }, { name = "google-generativeai" }, { name = "numba" }, @@ -414,19 +413,23 @@ pg = [ [package.metadata] requires-dist = [ + { name = "aiohttp", marker = "extra == 'container'" }, { name = "aioitertools" }, { name = "aiosqlite" }, { name = "alembic", specifier = ">=1.3.0,<2" }, - { name = "anthropic", marker = "extra == 'container'" }, - { name = "anthropic", marker = "extra == 'dev'" }, + { name = "anthropic", marker = "extra == 'container'", specifier = ">=0.49.0" }, + { name = "anthropic", marker = "extra == 'dev'", specifier = ">=0.49.0" }, { name = "arize", extras = ["autoembeddings", "llm-evaluation"], marker = "extra == 'dev'" }, + { name = "arize-phoenix-client" }, { name = "arize-phoenix-evals", specifier = ">=0.13.1" }, { name = "arize-phoenix-otel", specifier = ">=0.5.1" }, { name = "asgi-lifespan", marker = "extra == 'dev'" }, { name = "asyncpg", marker = "extra == 'dev'" }, { name = "asyncpg", marker = "extra == 'pg'" }, { name = "authlib" }, + { name = "azure-identity", marker = "extra == 'container'" }, { name = "cachetools" }, + { name = "email-validator" }, { name = "faker", marker = "extra == 'dev'", specifier = ">=30.1.0" }, { name = "fast-hdbscan", marker = "extra == 'container'", specifier = ">=0.2.0" }, { name = "fast-hdbscan", marker = "extra == 'embeddings'", specifier = ">=0.2.0" }, @@ -440,7 +443,6 @@ requires-dist = [ { name = "hatch", marker = "extra == 'dev'" }, { name = "httpx" }, { name = "jinja2" }, - { name = "jsonschema", specifier = ">=4.0.0,<=4.23.0" }, { name = "jupyter", marker = "extra == 'dev'" }, { name = "langchain", marker = "extra == 'dev'", specifier = ">=0.0.334" }, { name = "litellm", marker = "extra == 'dev'", specifier = ">=1.0.3,<1.57.5" }, @@ -482,7 +484,7 @@ requires-dist = [ { name = "psycopg", extras = ["binary", "pool"], marker = "extra == 'pg'" }, { name = "py-grpc-prometheus", marker = "extra == 'container'" }, { name = "pyarrow" }, - { name = "pydantic", specifier = ">=1.0,!=2.0.*,<3" }, + { name = "pydantic", specifier = ">=2.1.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.3" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, { name = "pytest-cov", marker = "extra == 'dev'" }, @@ -494,10 +496,10 @@ requires-dist = [ { name = "scipy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.4,<3" }, { name = "sqlean-py", specifier = ">=3.45.1" }, - { name = "starlette" }, - { name = "strawberry-graphql", specifier = "==0.253.1" }, - { name = "strawberry-graphql", extras = ["debug-server", "opentelemetry"], marker = "extra == 'dev'", specifier = "==0.253.1" }, - { name = "strawberry-graphql", extras = ["opentelemetry"], marker = "extra == 'container'", specifier = "==0.253.1" }, + { name = "starlette", specifier = ">=0.46.0" }, + { name = "strawberry-graphql", specifier = ">=0.262.0" }, + { name = "strawberry-graphql", extras = ["debug-server", "opentelemetry"], marker = "extra == 'dev'", specifier = "==0.262.5" }, + { name = "strawberry-graphql", extras = ["opentelemetry"], marker = "extra == 'container'", specifier = "==0.262.5" }, { name = "tabulate", marker = "extra == 'dev'" }, { name = "tox", marker = "extra == 'dev'", specifier = "==4.18.1" }, { name = "tox-uv", marker = "extra == 'dev'", specifier = "==1.11.3" }, @@ -511,9 +513,20 @@ requires-dist = [ { name = "uvicorn" }, { name = "uvloop", marker = "platform_system != 'Windows' and extra == 'container'" }, { name = "uvloop", marker = "platform_system != 'Windows' and extra == 'dev'" }, - { name = "websockets" }, - { name = "wrapt", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'", specifier = ">=1.17" }, - { name = "wrapt", marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'", specifier = "<1.17" }, + { name = "wrapt", specifier = ">=1.17.2" }, +] + +[[package]] +name = "arize-phoenix-client" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/d2/9170cc95ca4dbd2fa1509f8136c66ef8b26da5ad4e2df531a4e1259ede6f/arize_phoenix_client-1.3.0.tar.gz", hash = "sha256:37c3f72d4acfb9a5ca2f9f3cde2979ddae72dcea131fe08d550249efe8ccb1b3", size = 32841 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/83/598a4df37b4b7a4ba69016955dcc9ac24742e44f520efc75a87b86400a84/arize_phoenix_client-1.3.0-py3-none-any.whl", hash = "sha256:f2e2e0fae25d67063b0e6d967ecae47390350a5c89947b733bb5d8d934d6e1f0", size = 36544 }, ] [[package]] @@ -699,6 +712,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/43/53afb8ba17218f19b77c7834128566c5bbb100a0ad9ba2e8e89d089d7079/autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128", size = 45807 }, ] +[[package]] +name = "azure-core" +version = "1.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "six" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 }, +] + +[[package]] +name = "azure-identity" +version = "1.21.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "cryptography" }, + { name = "msal" }, + { name = "msal-extensions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/a1/f1a683672e7a88ea0e3119f57b6c7843ed52650fdcac8bfa66ed84e86e40/azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6", size = 266445 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/9f/1f9f3ef4f49729ee207a712a5971a9ca747f2ca47d9cbf13cf6953e3478a/azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9", size = 189190 }, +] + [[package]] name = "babel" version = "2.16.0" @@ -1158,8 +1201,7 @@ name = "deprecated" version = "1.2.15" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] sdist = { url = "https://files.pythonhosted.org/packages/2e/a3/53e7d78a6850ffdd394d7048a31a6f14e44900adedf190f9a165f6b69439/deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d", size = 2977612 } wheels = [ @@ -1202,6 +1244,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, ] +[[package]] +name = "dnspython" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632 }, +] + [[package]] name = "docstring-parser" version = "0.16" @@ -1211,6 +1262,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/7c/e9fcff7623954d86bdc17782036cbf715ecab1bec4847c008557affe1ca8/docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637", size = 36533 }, ] +[[package]] +name = "email-validator" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521 }, +] + [[package]] name = "evaluate" version = "0.4.3" @@ -1289,16 +1353,16 @@ wheels = [ [[package]] name = "fastapi" -version = "0.115.6" +version = "0.115.12" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/72/d83b98cd106541e8f5e5bfab8ef2974ab45a62e8a6c5b5e6940f26d2ed4b/fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654", size = 301336 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236 } wheels = [ - { url = "https://files.pythonhosted.org/packages/52/b3/7e4df40e585df024fac2f80d1a2d579c854ac37109675db2b0cc22c0bb9e/fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305", size = 94843 }, + { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 }, ] [[package]] @@ -2778,8 +2842,7 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, { name = "typing-inspect" }, - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e8/8e/556d6aec36f475f5316bfb24e000a4d2359c6492d68c778f4a3cab11bb39/llama_index_core-0.11.0.post1.tar.gz", hash = "sha256:0378f750ffbebcd914649df0e2ec27aa94329e64cfcda693090ff1d7b9b86f41", size = 1314856 } wheels = [ @@ -3239,6 +3302,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, ] +[[package]] +name = "msal" +version = "1.32.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/5f/ef42ef25fba682e83a8ee326a1a788e60c25affb58d014495349e37bce50/msal-1.32.0.tar.gz", hash = "sha256:5445fe3af1da6be484991a7ab32eaa82461dc2347de105b76af92c610c3335c2", size = 149817 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/5a/2e663ef56a5d89eba962941b267ebe5be8c5ea340a9929d286e2f5fac505/msal-1.32.0-py3-none-any.whl", hash = "sha256:9dbac5384a10bbbf4dae5c7ea0d707d14e087b92c5aa4954b3feaa2d1aa0bcb7", size = 114655 }, +] + +[[package]] +name = "msal-extensions" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "msal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583 }, +] + [[package]] name = "multidict" version = "6.1.0" @@ -3859,8 +3948,7 @@ dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, { name = "packaging" }, - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] sdist = { url = "https://files.pythonhosted.org/packages/79/2e/2e59a7cb636dc394bd7cf1758ada5e8ed87590458ca6bb2f9c26e0243847/opentelemetry_instrumentation-0.50b0.tar.gz", hash = "sha256:7d98af72de8dec5323e5202e46122e5f908592b22c6d24733aad619f07d82979", size = 26539 } wheels = [ @@ -3907,8 +3995,7 @@ dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-semantic-conventions" }, - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] sdist = { url = "https://files.pythonhosted.org/packages/51/56/658110193718ddde6e8e68ef3ad3fee7850055820a9fc2bd7ec1347afeca/opentelemetry_instrumentation_grpc-0.50b0.tar.gz", hash = "sha256:12381fbc0a7a91410fb9dad5f26f6de5eb5c30cd19c840fa9bfee78b584af7e7", size = 30746 } wheels = [ @@ -3924,8 +4011,7 @@ dependencies = [ { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-semantic-conventions" }, { name = "packaging" }, - { name = "wrapt", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'darwin'" }, - { name = "wrapt", version = "1.17.2", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "wrapt" }, ] sdist = { url = "https://files.pythonhosted.org/packages/72/ac/0cc668bb74b3646447936307bc0a56756568602e46be7a53a770cadab5f3/opentelemetry_instrumentation_sqlalchemy-0.50b0.tar.gz", hash = "sha256:8560fe2375d973746907599f360199ba0f658189ef6feba73c1702e8d832bb6e", size = 13632 } wheels = [ @@ -4807,6 +4893,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pynndescent" version = "0.5.13" @@ -5846,29 +5946,30 @@ wheels = [ [[package]] name = "starlette" -version = "0.41.3" +version = "0.46.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.10'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1a/4c/9b5764bd22eec91c4039ef4c55334e9187085da2d8a2df7bd570869aae18/starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835", size = 2574159 } +sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846 } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/00/2b325970b3060c7cecebab6d295afe763365822b1306a12eeab198f74323/starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7", size = 73225 }, + { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037 }, ] [[package]] name = "strawberry-graphql" -version = "0.253.1" +version = "0.262.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "graphql-core" }, + { name = "packaging" }, { name = "python-dateutil" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/f3/0a18ad1f102e50aeb2ae612380bfb0264068e9d8efc4dda4b86e0052c9d1/strawberry_graphql-0.253.1.tar.gz", hash = "sha256:be43eac92e0896a7f1061ab293b89b060d369974e4c1444d16ad377d7a6f030d", size = 207814 } +sdist = { url = "https://files.pythonhosted.org/packages/1d/9f/77a2611aeeef2b01dbfeea3d4a48be2517ba73935c87ee000e9c14844fd6/strawberry_graphql-0.262.5.tar.gz", hash = "sha256:92a5403133fb22ea4f31a09df9aa70567cbd7c860dc34afe92a32103125c6f26", size = 202428 } wheels = [ - { url = "https://files.pythonhosted.org/packages/24/b3/4745158fe8ebcd89bea2bdc6c070ef7a750bea52f6db299ae4fdc1bb2691/strawberry_graphql-0.253.1-py3-none-any.whl", hash = "sha256:f24cc55560546968255094aa080fb11a14eace4d61cd27eaf98dded863a2af17", size = 295163 }, + { url = "https://files.pythonhosted.org/packages/4d/6b/715835515ff21ab9de351df401a769a51b051dad6b67fa43778682de13a5/strawberry_graphql-0.262.5-py3-none-any.whl", hash = "sha256:7bc62e19326d3f5294f473c2ca3418bd01297e6abfd4a5a133f33fc9a5fcd5e1", size = 296015 }, ] [package.optional-dependencies] @@ -6526,82 +6627,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, ] -[[package]] -name = "websockets" -version = "14.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f4/1b/380b883ce05bb5f45a905b61790319a28958a9ab1e4b6b95ff5464b60ca1/websockets-14.1.tar.gz", hash = "sha256:398b10c77d471c0aab20a845e7a60076b6390bfdaac7a6d2edb0d2c59d75e8d8", size = 162840 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/af/91/b1b375dbd856fd5fff3f117de0e520542343ecaf4e8fc60f1ac1e9f5822c/websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29", size = 161950 }, - { url = "https://files.pythonhosted.org/packages/61/8f/4d52f272d3ebcd35e1325c646e98936099a348374d4a6b83b524bded8116/websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179", size = 159601 }, - { url = "https://files.pythonhosted.org/packages/c4/b1/29e87b53eb1937992cdee094a0988aadc94f25cf0b37e90c75eed7123d75/websockets-14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2177ee3901075167f01c5e335a6685e71b162a54a89a56001f1c3e9e3d2ad250", size = 159854 }, - { url = "https://files.pythonhosted.org/packages/3f/e6/752a2f5e8321ae2a613062676c08ff2fccfb37dc837a2ee919178a372e8a/websockets-14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f14a96a0034a27f9d47fd9788913924c89612225878f8078bb9d55f859272b0", size = 168835 }, - { url = "https://files.pythonhosted.org/packages/60/27/ca62de7877596926321b99071639275e94bb2401397130b7cf33dbf2106a/websockets-14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f874ba705deea77bcf64a9da42c1f5fc2466d8f14daf410bc7d4ceae0a9fcb0", size = 167844 }, - { url = "https://files.pythonhosted.org/packages/7e/db/f556a1d06635c680ef376be626c632e3f2bbdb1a0189d1d1bffb061c3b70/websockets-14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9607b9a442392e690a57909c362811184ea429585a71061cd5d3c2b98065c199", size = 168157 }, - { url = "https://files.pythonhosted.org/packages/b3/bc/99e5f511838c365ac6ecae19674eb5e94201aa4235bd1af3e6fa92c12905/websockets-14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bea45f19b7ca000380fbd4e02552be86343080120d074b87f25593ce1700ad58", size = 168561 }, - { url = "https://files.pythonhosted.org/packages/c6/e7/251491585bad61c79e525ac60927d96e4e17b18447cc9c3cfab47b2eb1b8/websockets-14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:219c8187b3ceeadbf2afcf0f25a4918d02da7b944d703b97d12fb01510869078", size = 167979 }, - { url = "https://files.pythonhosted.org/packages/ac/98/7ac2e4eeada19bdbc7a3a66a58e3ebdf33648b9e1c5b3f08c3224df168cf/websockets-14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ad2ab2547761d79926effe63de21479dfaf29834c50f98c4bf5b5480b5838434", size = 167925 }, - { url = "https://files.pythonhosted.org/packages/ab/3d/09e65c47ee2396b7482968068f6e9b516221e1032b12dcf843b9412a5dfb/websockets-14.1-cp310-cp310-win32.whl", hash = "sha256:1288369a6a84e81b90da5dbed48610cd7e5d60af62df9851ed1d1d23a9069f10", size = 162831 }, - { url = "https://files.pythonhosted.org/packages/8a/67/59828a3d09740e6a485acccfbb66600632f2178b6ed1b61388ee96f17d5a/websockets-14.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0744623852f1497d825a49a99bfbec9bea4f3f946df6eb9d8a2f0c37a2fec2e", size = 163266 }, - { url = "https://files.pythonhosted.org/packages/97/ed/c0d03cb607b7fe1f7ff45e2cd4bb5cd0f9e3299ced79c2c303a6fff44524/websockets-14.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:449d77d636f8d9c17952628cc7e3b8faf6e92a17ec581ec0c0256300717e1512", size = 161949 }, - { url = "https://files.pythonhosted.org/packages/06/91/bf0a44e238660d37a2dda1b4896235d20c29a2d0450f3a46cd688f43b239/websockets-14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a35f704be14768cea9790d921c2c1cc4fc52700410b1c10948511039be824aac", size = 159606 }, - { url = "https://files.pythonhosted.org/packages/ff/b8/7185212adad274c2b42b6a24e1ee6b916b7809ed611cbebc33b227e5c215/websockets-14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b1f3628a0510bd58968c0f60447e7a692933589b791a6b572fcef374053ca280", size = 159854 }, - { url = "https://files.pythonhosted.org/packages/5a/8a/0849968d83474be89c183d8ae8dcb7f7ada1a3c24f4d2a0d7333c231a2c3/websockets-14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c3deac3748ec73ef24fc7be0b68220d14d47d6647d2f85b2771cb35ea847aa1", size = 169402 }, - { url = "https://files.pythonhosted.org/packages/bd/4f/ef886e37245ff6b4a736a09b8468dae05d5d5c99de1357f840d54c6f297d/websockets-14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7048eb4415d46368ef29d32133134c513f507fff7d953c18c91104738a68c3b3", size = 168406 }, - { url = "https://files.pythonhosted.org/packages/11/43/e2dbd4401a63e409cebddedc1b63b9834de42f51b3c84db885469e9bdcef/websockets-14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cf0ad281c979306a6a34242b371e90e891bce504509fb6bb5246bbbf31e7b6", size = 168776 }, - { url = "https://files.pythonhosted.org/packages/6d/d6/7063e3f5c1b612e9f70faae20ebaeb2e684ffa36cb959eb0862ee2809b32/websockets-14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc1fc87428c1d18b643479caa7b15db7d544652e5bf610513d4a3478dbe823d0", size = 169083 }, - { url = "https://files.pythonhosted.org/packages/49/69/e6f3d953f2fa0f8a723cf18cd011d52733bd7f6e045122b24e0e7f49f9b0/websockets-14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f95ba34d71e2fa0c5d225bde3b3bdb152e957150100e75c86bc7f3964c450d89", size = 168529 }, - { url = "https://files.pythonhosted.org/packages/70/ff/f31fa14561fc1d7b8663b0ed719996cf1f581abee32c8fb2f295a472f268/websockets-14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9481a6de29105d73cf4515f2bef8eb71e17ac184c19d0b9918a3701c6c9c4f23", size = 168475 }, - { url = "https://files.pythonhosted.org/packages/f1/15/b72be0e4bf32ff373aa5baef46a4c7521b8ea93ad8b49ca8c6e8e764c083/websockets-14.1-cp311-cp311-win32.whl", hash = "sha256:368a05465f49c5949e27afd6fbe0a77ce53082185bbb2ac096a3a8afaf4de52e", size = 162833 }, - { url = "https://files.pythonhosted.org/packages/bc/ef/2d81679acbe7057ffe2308d422f744497b52009ea8bab34b6d74a2657d1d/websockets-14.1-cp311-cp311-win_amd64.whl", hash = "sha256:6d24fc337fc055c9e83414c94e1ee0dee902a486d19d2a7f0929e49d7d604b09", size = 163263 }, - { url = "https://files.pythonhosted.org/packages/55/64/55698544ce29e877c9188f1aee9093712411a8fc9732cca14985e49a8e9c/websockets-14.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ed907449fe5e021933e46a3e65d651f641975a768d0649fee59f10c2985529ed", size = 161957 }, - { url = "https://files.pythonhosted.org/packages/a2/b1/b088f67c2b365f2c86c7b48edb8848ac27e508caf910a9d9d831b2f343cb/websockets-14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:87e31011b5c14a33b29f17eb48932e63e1dcd3fa31d72209848652310d3d1f0d", size = 159620 }, - { url = "https://files.pythonhosted.org/packages/c1/89/2a09db1bbb40ba967a1b8225b07b7df89fea44f06de9365f17f684d0f7e6/websockets-14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bc6ccf7d54c02ae47a48ddf9414c54d48af9c01076a2e1023e3b486b6e72c707", size = 159852 }, - { url = "https://files.pythonhosted.org/packages/ca/c1/f983138cd56e7d3079f1966e81f77ce6643f230cd309f73aa156bb181749/websockets-14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9777564c0a72a1d457f0848977a1cbe15cfa75fa2f67ce267441e465717dcf1a", size = 169675 }, - { url = "https://files.pythonhosted.org/packages/c1/c8/84191455d8660e2a0bdb33878d4ee5dfa4a2cedbcdc88bbd097303b65bfa/websockets-14.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a655bde548ca98f55b43711b0ceefd2a88a71af6350b0c168aa77562104f3f45", size = 168619 }, - { url = "https://files.pythonhosted.org/packages/8d/a7/62e551fdcd7d44ea74a006dc193aba370505278ad76efd938664531ce9d6/websockets-14.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3dfff83ca578cada2d19e665e9c8368e1598d4e787422a460ec70e531dbdd58", size = 169042 }, - { url = "https://files.pythonhosted.org/packages/ad/ed/1532786f55922c1e9c4d329608e36a15fdab186def3ca9eb10d7465bc1cc/websockets-14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6a6c9bcf7cdc0fd41cc7b7944447982e8acfd9f0d560ea6d6845428ed0562058", size = 169345 }, - { url = "https://files.pythonhosted.org/packages/ea/fb/160f66960d495df3de63d9bcff78e1b42545b2a123cc611950ffe6468016/websockets-14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4b6caec8576e760f2c7dd878ba817653144d5f369200b6ddf9771d64385b84d4", size = 168725 }, - { url = "https://files.pythonhosted.org/packages/cf/53/1bf0c06618b5ac35f1d7906444b9958f8485682ab0ea40dee7b17a32da1e/websockets-14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb6d38971c800ff02e4a6afd791bbe3b923a9a57ca9aeab7314c21c84bf9ff05", size = 168712 }, - { url = "https://files.pythonhosted.org/packages/e5/22/5ec2f39fff75f44aa626f86fa7f20594524a447d9c3be94d8482cd5572ef/websockets-14.1-cp312-cp312-win32.whl", hash = "sha256:1d045cbe1358d76b24d5e20e7b1878efe578d9897a25c24e6006eef788c0fdf0", size = 162838 }, - { url = "https://files.pythonhosted.org/packages/74/27/28f07df09f2983178db7bf6c9cccc847205d2b92ced986cd79565d68af4f/websockets-14.1-cp312-cp312-win_amd64.whl", hash = "sha256:90f4c7a069c733d95c308380aae314f2cb45bd8a904fb03eb36d1a4983a4993f", size = 163277 }, - { url = "https://files.pythonhosted.org/packages/34/77/812b3ba5110ed8726eddf9257ab55ce9e85d97d4aa016805fdbecc5e5d48/websockets-14.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3630b670d5057cd9e08b9c4dab6493670e8e762a24c2c94ef312783870736ab9", size = 161966 }, - { url = "https://files.pythonhosted.org/packages/8d/24/4fcb7aa6986ae7d9f6d083d9d53d580af1483c5ec24bdec0978307a0f6ac/websockets-14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36ebd71db3b89e1f7b1a5deaa341a654852c3518ea7a8ddfdf69cc66acc2db1b", size = 159625 }, - { url = "https://files.pythonhosted.org/packages/f8/47/2a0a3a2fc4965ff5b9ce9324d63220156bd8bedf7f90824ab92a822e65fd/websockets-14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5b918d288958dc3fa1c5a0b9aa3256cb2b2b84c54407f4813c45d52267600cd3", size = 159857 }, - { url = "https://files.pythonhosted.org/packages/dd/c8/d7b425011a15e35e17757e4df75b25e1d0df64c0c315a44550454eaf88fc/websockets-14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00fe5da3f037041da1ee0cf8e308374e236883f9842c7c465aa65098b1c9af59", size = 169635 }, - { url = "https://files.pythonhosted.org/packages/93/39/6e3b5cffa11036c40bd2f13aba2e8e691ab2e01595532c46437b56575678/websockets-14.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8149a0f5a72ca36720981418eeffeb5c2729ea55fa179091c81a0910a114a5d2", size = 168578 }, - { url = "https://files.pythonhosted.org/packages/cf/03/8faa5c9576299b2adf34dcccf278fc6bbbcda8a3efcc4d817369026be421/websockets-14.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77569d19a13015e840b81550922056acabc25e3f52782625bc6843cfa034e1da", size = 169018 }, - { url = "https://files.pythonhosted.org/packages/8c/05/ea1fec05cc3a60defcdf0bb9f760c3c6bd2dd2710eff7ac7f891864a22ba/websockets-14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cf5201a04550136ef870aa60ad3d29d2a59e452a7f96b94193bee6d73b8ad9a9", size = 169383 }, - { url = "https://files.pythonhosted.org/packages/21/1d/eac1d9ed787f80754e51228e78855f879ede1172c8b6185aca8cef494911/websockets-14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:88cf9163ef674b5be5736a584c999e98daf3aabac6e536e43286eb74c126b9c7", size = 168773 }, - { url = "https://files.pythonhosted.org/packages/0e/1b/e808685530185915299740d82b3a4af3f2b44e56ccf4389397c7a5d95d39/websockets-14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:836bef7ae338a072e9d1863502026f01b14027250a4545672673057997d5c05a", size = 168757 }, - { url = "https://files.pythonhosted.org/packages/b6/19/6ab716d02a3b068fbbeb6face8a7423156e12c446975312f1c7c0f4badab/websockets-14.1-cp313-cp313-win32.whl", hash = "sha256:0d4290d559d68288da9f444089fd82490c8d2744309113fc26e2da6e48b65da6", size = 162834 }, - { url = "https://files.pythonhosted.org/packages/6c/fd/ab6b7676ba712f2fc89d1347a4b5bdc6aa130de10404071f2b2606450209/websockets-14.1-cp313-cp313-win_amd64.whl", hash = "sha256:8621a07991add373c3c5c2cf89e1d277e49dc82ed72c75e3afc74bd0acc446f0", size = 163277 }, - { url = "https://files.pythonhosted.org/packages/4d/23/ac9d8c5ec7b90efc3687d60474ef7e698f8b75cb7c9dfedad72701e797c9/websockets-14.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01bb2d4f0a6d04538d3c5dfd27c0643269656c28045a53439cbf1c004f90897a", size = 161945 }, - { url = "https://files.pythonhosted.org/packages/c5/6b/ffa450e3b736a86ae6b40ce20a758ac9af80c96a18548f6c323ed60329c5/websockets-14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:414ffe86f4d6f434a8c3b7913655a1a5383b617f9bf38720e7c0799fac3ab1c6", size = 159600 }, - { url = "https://files.pythonhosted.org/packages/74/62/f90d1fd57ea7337ecaa99f17c31a544b9dcdb7c7c32a3d3997ccc42d57d3/websockets-14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8fda642151d5affdee8a430bd85496f2e2517be3a2b9d2484d633d5712b15c56", size = 159850 }, - { url = "https://files.pythonhosted.org/packages/35/dd/1e71865de1f3c265e11d02b0b4c76178f84351c6611e515fbe3d2bd1b98c/websockets-14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd7c11968bc3860d5c78577f0dbc535257ccec41750675d58d8dc66aa47fe52c", size = 168616 }, - { url = "https://files.pythonhosted.org/packages/ba/ae/0d069b52e26d48402dbe90c7581eb6a5bed5d7dbe3d9ca3cf1033859d58e/websockets-14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a032855dc7db987dff813583d04f4950d14326665d7e714d584560b140ae6b8b", size = 167619 }, - { url = "https://files.pythonhosted.org/packages/1c/3f/d3f2df62704c53e0296f0ce714921b6a15df10e2e463734c737b1d9e2522/websockets-14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7e7ea2f782408c32d86b87a0d2c1fd8871b0399dd762364c731d86c86069a78", size = 167921 }, - { url = "https://files.pythonhosted.org/packages/e0/e2/2dcb295bdae9393070cea58c790d87d1d36149bb4319b1da6014c8a36d42/websockets-14.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:39450e6215f7d9f6f7bc2a6da21d79374729f5d052333da4d5825af8a97e6735", size = 168343 }, - { url = "https://files.pythonhosted.org/packages/6b/fd/fa48e8b4e10e2c165cbfc16dada7405b4008818be490fc6b99a4928e232a/websockets-14.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ceada5be22fa5a5a4cdeec74e761c2ee7db287208f54c718f2df4b7e200b8d4a", size = 167745 }, - { url = "https://files.pythonhosted.org/packages/42/45/79db33f2b744d2014b40946428e6c37ce944fde8791d82e1c2f4d4a67d96/websockets-14.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3fc753451d471cff90b8f467a1fc0ae64031cf2d81b7b34e1811b7e2691bc4bc", size = 167705 }, - { url = "https://files.pythonhosted.org/packages/da/27/f66507db34ca9c79562f28fa5983433f7b9080fd471cc188906006d36ba4/websockets-14.1-cp39-cp39-win32.whl", hash = "sha256:14839f54786987ccd9d03ed7f334baec0f02272e7ec4f6e9d427ff584aeea8b4", size = 162828 }, - { url = "https://files.pythonhosted.org/packages/11/25/bb8f81a4ec94f595adb845608c5ec9549cb6b446945b292fe61807c7c95b/websockets-14.1-cp39-cp39-win_amd64.whl", hash = "sha256:d9fd19ecc3a4d5ae82ddbfb30962cf6d874ff943e56e0c81f5169be2fda62979", size = 163271 }, - { url = "https://files.pythonhosted.org/packages/fb/cd/382a05a1ba2a93bd9fb807716a660751295df72e77204fb130a102fcdd36/websockets-14.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5dc25a9dbd1a7f61eca4b7cb04e74ae4b963d658f9e4f9aad9cd00b688692c8", size = 159633 }, - { url = "https://files.pythonhosted.org/packages/b7/a0/fa7c62e2952ef028b422fbf420f9353d9dd4dfaa425de3deae36e98c0784/websockets-14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:04a97aca96ca2acedf0d1f332c861c5a4486fdcba7bcef35873820f940c4231e", size = 159867 }, - { url = "https://files.pythonhosted.org/packages/c1/94/954b4924f868db31d5f0935893c7a8446515ee4b36bb8ad75a929469e453/websockets-14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df174ece723b228d3e8734a6f2a6febbd413ddec39b3dc592f5a4aa0aff28098", size = 161121 }, - { url = "https://files.pythonhosted.org/packages/7a/2e/f12bbb41a8f2abb76428ba4fdcd9e67b5b364a3e7fa97c88f4d6950aa2d4/websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:034feb9f4286476f273b9a245fb15f02c34d9586a5bc936aff108c3ba1b21beb", size = 160731 }, - { url = "https://files.pythonhosted.org/packages/13/97/b76979401f2373af1fe3e08f960b265cecab112e7dac803446fb98351a52/websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c308dabd2b380807ab64b62985eaccf923a78ebc572bd485375b9ca2b7dc7", size = 160681 }, - { url = "https://files.pythonhosted.org/packages/39/9c/16916d9a436c109a1d7ba78817e8fee357b78968be3f6e6f517f43afa43d/websockets-14.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a42d3ecbb2db5080fc578314439b1d79eef71d323dc661aa616fb492436af5d", size = 163316 }, - { url = "https://files.pythonhosted.org/packages/0f/57/50fd09848a80a1b63a572c610f230f8a17590ca47daf256eb28a0851df73/websockets-14.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddaa4a390af911da6f680be8be4ff5aaf31c4c834c1a9147bc21cbcbca2d4370", size = 159633 }, - { url = "https://files.pythonhosted.org/packages/d7/2f/db728b0c7962ad6a13ced8286325bf430b59722d943e7f6bdbd8a78e2bfe/websockets-14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a4c805c6034206143fbabd2d259ec5e757f8b29d0a2f0bf3d2fe5d1f60147a4a", size = 159863 }, - { url = "https://files.pythonhosted.org/packages/fa/e4/21e7481936fbfffee138edb488a6184eb3468b402a8181b95b9e44f6a676/websockets-14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:205f672a6c2c671a86d33f6d47c9b35781a998728d2c7c2a3e1cf3333fcb62b7", size = 161119 }, - { url = "https://files.pythonhosted.org/packages/64/2d/efb6cf716d4f9da60190756e06f8db2066faf1ae4a4a8657ab136dfcc7a8/websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef440054124728cc49b01c33469de06755e5a7a4e83ef61934ad95fc327fbb0", size = 160724 }, - { url = "https://files.pythonhosted.org/packages/40/b0/a70b972d853c3f26040834fcff3dd45c8a0292af9f5f0b36f9fbb82d5d44/websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7591d6f440af7f73c4bd9404f3772bfee064e639d2b6cc8c94076e71b2471c1", size = 160676 }, - { url = "https://files.pythonhosted.org/packages/4a/76/f9da7f97476cc7b8c74829bb4851f1faf660455839689ffcc354b52860a7/websockets-14.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:25225cc79cfebc95ba1d24cd3ab86aaa35bcd315d12fa4358939bd55e9bd74a5", size = 163311 }, - { url = "https://files.pythonhosted.org/packages/b0/0b/c7e5d11020242984d9d37990310520ed663b942333b83a033c2f20191113/websockets-14.1-py3-none-any.whl", hash = "sha256:4d4fc827a20abe6d544a119896f6b78ee13fe81cbfef416f3f2ddf09a03f0e2e", size = 156277 }, -] - [[package]] name = "widgetsnbextension" version = "4.0.13" @@ -6611,43 +6636,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71", size = 2335872 }, ] -[[package]] -name = "wrapt" -version = "1.16.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_version < '0'", - "python_full_version >= '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'", - "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'x86_64' and sys_platform == 'darwin'", - "python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'darwin'", -] -sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/c6/5375258add3777494671d8cec27cdf5402abd91016dee24aa2972c61fedf/wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4", size = 37315 }, - { url = "https://files.pythonhosted.org/packages/32/12/e11adfde33444986135d8881b401e4de6cbb4cced046edc6b464e6ad7547/wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", size = 38160 }, - { url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313 }, - { url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164 }, - { url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614 }, - { url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316 }, - { url = "https://files.pythonhosted.org/packages/70/cc/b92e1da2cad6a9f8ee481000ece07a35e3b24e041e60ff8b850c079f0ebf/wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2", size = 37314 }, - { url = "https://files.pythonhosted.org/packages/4a/cc/3402bcc897978be00fef608cd9e3e39ec8869c973feeb5e1e277670e5ad2/wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb", size = 38162 }, - { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362 }, -] - [[package]] name = "wrapt" version = "1.17.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_version < '0'", - "(python_full_version >= '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.13' and sys_platform != 'darwin')", - "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')", - "(python_full_version == '3.11.*' and platform_machine != 'x86_64') or (python_full_version == '3.11.*' and sys_platform != 'darwin')", - "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'x86_64') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform != 'darwin')", - "(python_full_version >= '3.12.4' and python_full_version < '3.13' and platform_machine != 'x86_64') or (python_full_version >= '3.12.4' and python_full_version < '3.13' and sys_platform != 'darwin')", -] sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } wheels = [ { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, From 663f766040b4b1ddc65337dbd41a0061925e1e30 Mon Sep 17 00:00:00 2001 From: Anthony Powell Date: Thu, 24 Apr 2025 20:25:30 -0400 Subject: [PATCH 04/12] fix: Allow scroll on settings pages (#7284) --- app/src/pages/settings/SettingsPage.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/app/src/pages/settings/SettingsPage.tsx b/app/src/pages/settings/SettingsPage.tsx index 0908306d6b..21aa6631f7 100644 --- a/app/src/pages/settings/SettingsPage.tsx +++ b/app/src/pages/settings/SettingsPage.tsx @@ -27,7 +27,6 @@ const settingsPageInnerCSS = css` width: 100%; margin-left: auto; margin-right: auto; - height: 100%; `; export function SettingsPage() { From 854dc2e0498da58ac695cba17fee865c2f288d27 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 24 Apr 2025 18:37:09 -0600 Subject: [PATCH 05/12] chore(main): release arize-phoenix 8.27.1 (#7285) --- .release-please-manifest.json | 2 +- CHANGELOG.md | 7 +++++++ src/phoenix/version.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 613d0284cc..00f9c72232 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1 +1 @@ -{".":"8.27.0","packages/phoenix-evals":"0.20.6","packages/phoenix-otel":"0.9.2","packages/phoenix-client":"1.3.0"} +{".":"8.27.1","packages/phoenix-evals":"0.20.6","packages/phoenix-otel":"0.9.2","packages/phoenix-client":"1.3.0"} diff --git a/CHANGELOG.md b/CHANGELOG.md index e7549c7a33..930295ee7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [8.27.1](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-v8.27.0...arize-phoenix-v8.27.1) (2025-04-25) + + +### Bug Fixes + +* Allow scroll on settings pages ([#7284](https://github.com/Arize-ai/phoenix/issues/7284)) ([c25b071](https://github.com/Arize-ai/phoenix/commit/c25b07143b9c714b75e3d9655ca9db161542acb0)) + ## [8.27.0](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-v8.26.3...arize-phoenix-v8.27.0) (2025-04-24) diff --git a/src/phoenix/version.py b/src/phoenix/version.py index bd322bdcdb..cb5321f854 100644 --- a/src/phoenix/version.py +++ b/src/phoenix/version.py @@ -1 +1 @@ -__version__ = "8.27.0" +__version__ = "8.27.1" From 1cb5914fc49702a4e527544f099f12982c36b1f1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 24 Apr 2025 19:58:49 -0700 Subject: [PATCH 06/12] chore: update Phoenix version to 8.27.1 in Kustomize (#7286) --- kustomize/base/phoenix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kustomize/base/phoenix.yaml b/kustomize/base/phoenix.yaml index 81b9796d0b..e23d1413c3 100644 --- a/kustomize/base/phoenix.yaml +++ b/kustomize/base/phoenix.yaml @@ -28,7 +28,7 @@ spec: value: /mnt/data - name: PHOENIX_PORT value: "6006" - image: arizephoenix/phoenix:version-8.27.0 + image: arizephoenix/phoenix:version-8.27.1 name: phoenix ports: - containerPort: 6006 From 91b6d58ddbdc37b4982ea697a55e40400d745806 Mon Sep 17 00:00:00 2001 From: s-yeddula Date: Fri, 25 Apr 2025 17:22:39 -0700 Subject: [PATCH 07/12] update sessions notebook (#7293) --- .../tracing/openai_sessions_tutorial.ipynb | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/tutorials/tracing/openai_sessions_tutorial.ipynb b/tutorials/tracing/openai_sessions_tutorial.ipynb index d12c607922..6cdba3d430 100644 --- a/tutorials/tracing/openai_sessions_tutorial.ipynb +++ b/tutorials/tracing/openai_sessions_tutorial.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "4kPKrMTP_n_j" + }, "source": [ "
\n", "

\n", @@ -25,7 +27,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "9y1DA5uu_n_k" + }, "source": [ "## 1. Install Dependencies and Import Libraries\n", "\n", @@ -43,7 +47,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "u9xTBlt__n_l" + }, "source": [ "## Configure Your OpenAI API Key and Instantiate Your OpenAI Client\n", "\n", @@ -68,7 +74,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "hY0NliPg_n_l" + }, "source": [ "## Instrument Your OpenAI Client\n", "\n", @@ -91,7 +99,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "MhVjxN_R_n_l" + }, "source": [ "## Run Phoenix in the Background\n", "\n", @@ -106,12 +116,14 @@ "source": [ "import phoenix as px\n", "\n", - "px.launch_app()" + "px.launch_app().view()" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "VoLsIY9o_n_m" + }, "source": [ "## Create a bare-bones Agent\n", "\n", @@ -126,12 +138,10 @@ "source": [ "import uuid\n", "\n", - "import openai\n", "from openinference.instrumentation import using_session\n", "from openinference.semconv.trace import SpanAttributes\n", "from opentelemetry import trace\n", "\n", - "client = openai.Client()\n", "session_id = str(uuid.uuid4())\n", "\n", "tracer = trace.get_tracer(__name__)\n", @@ -173,6 +183,11 @@ "response = assistant(\n", " messages,\n", " session_id=session_id,\n", + ")\n", + "messages = messages + [response, {\"role\": \"user\", \"content\": \"what's 4+5?\"}]\n", + "response = assistant(\n", + " messages,\n", + " session_id=session_id,\n", ")" ] } @@ -183,5 +198,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 0 } From 9ba32879ab89c812cf31f41efc93e17ba206b27a Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Fri, 25 Apr 2025 19:31:33 -0600 Subject: [PATCH 08/12] docs(client): add general rules for the client (#7290) * docs(client): add general rules for the client * cleanup * Update packages/phoenix-client/.cursor/rules/general.mdc Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> * Update packages/phoenix-client/.cursor/rules/general.mdc Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> * Update packages/phoenix-client/.cursor/rules/general.mdc Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --------- Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- .../phoenix-client/.cursor/rules/general.mdc | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 packages/phoenix-client/.cursor/rules/general.mdc diff --git a/packages/phoenix-client/.cursor/rules/general.mdc b/packages/phoenix-client/.cursor/rules/general.mdc new file mode 100644 index 0000000000..803a6fd5c7 --- /dev/null +++ b/packages/phoenix-client/.cursor/rules/general.mdc @@ -0,0 +1,67 @@ +--- +description: +globs: +alwaysApply: true +--- +# General Client Design Guidelines + +## Dependancies + +The client should be as light-weight as possible as it is meant to be integrated into applications directly with no impact on the runtime. This means it should never depend on the core `phoenix` package and should only depend on things under the `phoenix.client` sub-module. The client must never depend on modules that are related to a server such as `starlette`, `sqlalchamy`, `pg` and so on. For libraries like `pandas`, implement lazy importing (importing within the specific function that requires it) rather than importing at the top-level. + +## Syntax + +All methods that interact with the server shoud be namespaced via `projects`, `prompts` and so on. + +All arguments to the methods MUST use `kwargs` so as to make the signature as self evident as possible. + +Do not do: + +```python +client.prompts.get("prompt_version_id") +``` + +Prefer: + +```python +client.prompts.get(prompt_version_id="prompt_version_id") +``` + +Methods should be prefixed with an action: + +- `get` - gets the entity. Corrolates to HTTP `GET` a specific entity. E.x. `/projects/1` +- `create` - makes a new entity. Corrolates to HTTP `POST` +- `list` - get a paginated list of an entity. E.g. `GET` a list `/projects` +- `add` - attach an entity to another. E.x. `add_annotation` would be used to attach an annotation to a `span` or `trace` +- `delete` - permanently delete an entity + +In addition things can be sent to the platform in bulk. + +- `log` - associates a list of entities to something. E.x. `log_annotations` will send a list of annotations to a particular target such as a `span` or a `project` + +## Pandas + +The client should make affordances to push and pull data from the phoenix server via `pandas` DataFrames. For all bulk operations, the method should be postfixed with `dataframe` so as to make it clear that the input and output is a dataframe. + +For example: + +```python +client.log_annotations_dataframe(dataframe=dataframe) +df = client.get_spans_dataframe(project_name="default") +``` + +## Transport + +For all IO to the phoenix server, JSON or JSONL over HTTP should be preferred. This is so that clients in other languages can be created (E.g. `TypeScript`), LLMs can easily interpret the data (fine-tunining), and so that non homogenious data can be sent over the wire (e.x. `metatadata` dictionaries). + +In the case that a different format is needed (e.x. `DataFrame` or `CSV`), the client should perform the translation (e.g. be a fat client) unless there is a more specific endpoint that supports that MIME type. + +For example: + +```python +client.log_annotations(annotations=annotations) + +# Syntactic surgar to log annotations as a dataframe +# Annotations are still sent over the wire as JSON +client.log_annotations_dataframe(dataframe=df) +``` From 1325c2151e167c78e2c848a00f13b50feff302b8 Mon Sep 17 00:00:00 2001 From: Priyan Jindal Date: Mon, 21 Apr 2025 22:31:02 +0000 Subject: [PATCH 09/12] docs: No subject (GITBOOK-1192) --- docs/prompt-engineering/overview-prompts/prompt-management.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/prompt-engineering/overview-prompts/prompt-management.md b/docs/prompt-engineering/overview-prompts/prompt-management.md index 01a8eb8a59..7477d320d3 100644 --- a/docs/prompt-engineering/overview-prompts/prompt-management.md +++ b/docs/prompt-engineering/overview-prompts/prompt-management.md @@ -4,7 +4,7 @@ description: Version and track changes made to prompt templates # Prompt Management -

Iterate on prampts, ship prompts when they are tested

+

Iterate on prompts, ship prompts when they are tested

From 85e0072f8609b7187d3f01c7d409f709d7e984b9 Mon Sep 17 00:00:00 2001 From: Sanjana Yeddula Date: Thu, 24 Apr 2025 18:46:31 +0000 Subject: [PATCH 10/12] docs: phoenix demo updates (GITBOOK-1191) --- docs/SUMMARY.md | 1 + docs/evaluation/llm-evals/agent-evaluation.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 47d309e203..2373383245 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -5,6 +5,7 @@ * [User Guide](user-guide.md) * [Deployment](deployment.md) * [Environments](environments.md) +* [Phoenix Demo](https://phoenix-demo.arize.com/projects) ## 🔭 Tracing diff --git a/docs/evaluation/llm-evals/agent-evaluation.md b/docs/evaluation/llm-evals/agent-evaluation.md index 40406e45bc..52ef3a2213 100644 --- a/docs/evaluation/llm-evals/agent-evaluation.md +++ b/docs/evaluation/llm-evals/agent-evaluation.md @@ -111,7 +111,7 @@ See our Agent Reflection evaluation template for a more specific example. See our [Agent Reflection evaluation template](../how-to-evals/running-pre-tested-evals/agent-reflection.md) for a specific example. -## Putting it all together +## Putting it all Together Through a combination of the evaluations above, you can get a far more accurate picture of how your agent is performing. From edbc8c13baa99afaae5ac1020ecb56a193aaaeb9 Mon Sep 17 00:00:00 2001 From: Sebastian Sosa <1sebastian1sosa1@gmail.com> Date: Sun, 27 Apr 2025 13:12:14 -0400 Subject: [PATCH 11/12] declarative eval formatting and linting --- .../src/phoenix/evals/declarative.py | 165 +++++++++--------- tests/unit/evals/test_declarative_eval.py | 86 ++++----- 2 files changed, 125 insertions(+), 126 deletions(-) diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py index 2abf58ae38..7fb911861a 100644 --- a/packages/phoenix-evals/src/phoenix/evals/declarative.py +++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py @@ -1,62 +1,30 @@ - +import asyncio +import json import time -from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter -from phoenix.evals.models import BaseModel - -import inspect -import logging -import warnings -from collections import defaultdict -from enum import Enum -from functools import wraps -from itertools import product from typing import ( Any, - Callable, - DefaultDict, Dict, - Iterable, List, - Mapping, - NamedTuple, Optional, Tuple, - TypeVar, Union, ) -import json + import pandas as pd -from pandas import DataFrame -from typing_extensions import TypeAlias +from openai import AsyncOpenAI, OpenAI +from pydantic import BaseModel, Field, create_model +from tqdm import tqdm -from phoenix.evals.evaluators import LLMEvaluator -from phoenix.evals.exceptions import PhoenixTemplateMappingError -from phoenix.evals.executors import ExecutionStatus, get_executor_on_sync_context -from phoenix.evals.models import OpenAIModel, set_verbosity +from phoenix.client.utils.template_formatters import MustacheBaseTemplateFormatter from phoenix.evals.templates import ( - ClassificationTemplate, - MultimodalPrompt, - PromptOptions, - PromptPartTemplate, PromptPartContentType, - PromptTemplate, - normalize_classification_template, + PromptPartTemplate, ) from phoenix.evals.utils import ( - NOT_PARSABLE, get_tqdm_progress_bar_formatter, - openai_function_call_kwargs, - parse_openai_function_call, printif, - snap_to_rail, ) -from pydantic import BaseModel, Field, create_model -from typing import Union, List, Any, Optional, Callable -import pandas as pd -from openai import OpenAI, AsyncOpenAI -from tqdm import tqdm -import asyncio -import aiohttp + def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]: new_field_mappings = {} @@ -66,11 +34,14 @@ def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> new_field_mappings["explanation"] = "explanation" return new_field_mappings + async def declarative_eval( data: Union[pd.DataFrame, List[Any]], model: Union[OpenAI, AsyncOpenAI], schema: BaseModel, # Pydantic model class - field_mappings: Dict[str, str], # key is the openinference target field value, value is the path to the field in the schema + field_mappings: Dict[ + str, str + ], # key is the openinference target field value, value is the path to the field in the schema system_instruction: Optional[str] = None, verbose: bool = False, include_prompt: bool = False, @@ -99,14 +70,13 @@ async def declarative_eval( ``` {{output}} ``` - """ + """, ) - labels: Iterable[Optional[str]] = [None] * len(data) - explanations: Iterable[Optional[str]] = [None] * len(data) - scores: Iterable[Optional[float]] = [None] * len(data) - - default_system_instruction = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema." + default_system_instruction = """ + You will be provided the input passed to the llm + and the generated output data to evaluate according to the specified schema. + """ # Convert data to consistent format if isinstance(data, pd.DataFrame): @@ -116,7 +86,6 @@ async def declarative_eval( dataframe = pd.DataFrame(data) dataframe_index = dataframe.index - if provide_explanation: # Update the schema ExplainedSchema = create_model( @@ -129,42 +98,55 @@ async def declarative_eval( # Update the field mappings field_mappings = transform_field_mappings_for_explanation(field_mappings) - - def _map_template(data: pd.Series) -> str: - output_str = formatter.format(template.template, variables={ + output_str = formatter.format( + template.template, + variables={ "input": json.dumps(data["attributes.llm.input_messages"]).replace("\\", "\\\\"), - "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\") - } + "output": json.dumps(data["attributes.llm.output_messages"]).replace("\\", "\\\\"), + }, ) return output_str - async def _run_llm_eval_async(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]: + async def _run_llm_eval_async( + row_data: Tuple[int, pd.Series], + ) -> Tuple[pd.Series, Dict[str, Any], Optional[str], float]: # Guard clause if type(model) is OpenAI: raise ValueError("OpenAI is not supported for async operations") idx, row = row_data - # Handle async request - async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: + # Handle async request + async def _make_request( + idx: int, row: pd.Series + ) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: try: start_time = time.time() response = await model.beta.chat.completions.parse( model="gpt-4o-2024-08-06", messages=[ - {"role": "system", "content": system_instruction or default_system_instruction}, - {"role": "user", "content": _map_template(row)} + { + "role": "system", + "content": system_instruction or default_system_instruction, + }, + {"role": "user", "content": _map_template(row)}, ], response_format=schema, ) parsed_response = response.choices[0].message.parsed end_time = time.time() execution_seconds = end_time - start_time - printif(verbose, f"\n\nIndex: {idx}\nExecution time: {execution_seconds} s\nStructured output: {parsed_response.model_dump_json(indent=2)}\n\n") + printif( + verbose, + f"""\n\nIndex: {idx} + Execution time: {execution_seconds} s + Structured output: {parsed_response.model_dump_json(indent=2)} + \n\n""", + ) return idx, row, parsed_response, None, execution_seconds except Exception as e: return idx, row, None, str(e), 0 - + result = await _make_request(idx, row) # # create tasks @@ -178,23 +160,28 @@ async def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseM # idx, row, parsed_response, error, execution_seconds = await coro # results[idx] = (row, parsed_response, error, execution_seconds) # pbar.update(1) - + return result - def _run_llm_eval_sync(row_data: Tuple[int, pd.Series]) -> Tuple[pd.Series, Dict[str, Any]]: if type(model) is AsyncOpenAI: raise ValueError("AsyncOpenAI is not supported for sync operations") - + idx, row = row_data - def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: + + def _make_request( + idx: int, row: pd.Series + ) -> Tuple[int, pd.Series, BaseModel, Optional[str], float]: try: start_time = time.time() response = model.beta.chat.completions.parse( model="gpt-4o-2024-08-06", messages=[ - {"role": "system", "content": system_instruction or default_system_instruction}, - {"role": "user", "content": _map_template(row)} + { + "role": "system", + "content": system_instruction or default_system_instruction, + }, + {"role": "user", "content": _map_template(row)}, ], response_format=schema, ) @@ -204,15 +191,16 @@ def _make_request(idx: int, row: pd.Series) -> Tuple[int, pd.Series, BaseModel, return idx, row, parsed_response, None, execution_seconds except Exception as e: return idx, row, None, str(e), 0 + result = _make_request(idx, row) # results = [None] * len(dataframe) # for idx, (_, row) in enumerate(dataframe.iterrows()): # idx, row, parsed_response, error, execution_seconds = _make_request(idx, row) # results[idx] = (row, parsed_response, error, execution_seconds) return result - + def _get_nested_value(obj: Dict[str, Any], path: str) -> Any: - parts = path.split('.') + parts = path.split(".") current = obj for part in parts: if part in current: @@ -220,8 +208,10 @@ def _get_nested_value(obj: Dict[str, Any], path: str) -> Any: else: return None return current - - def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optional[str], float]) -> Dict[str, Any]: + + def _extract_data_using_field_mappings( + result: Tuple[pd.Series, BaseModel, Optional[str], float], + ) -> Dict[str, Any]: row, parsed_response, error, execution_seconds = result results_data = {} results_data["execution_seconds"] = execution_seconds @@ -234,19 +224,25 @@ def _extract_data_using_field_mappings(result: Tuple[pd.Series, BaseModel, Optio for schema_field, object_path in field_mappings.items(): json_schema_object = parsed_response.model_dump() results_data[schema_field] = _get_nested_value(json_schema_object, object_path) - + return results_data - - def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], float]]) -> List[Tuple[pd.Series, Dict[str, Any]]]: + + def _parse_results( + results: List[Tuple[pd.Series, BaseModel, Optional[str], float]], + ) -> List[Tuple[pd.Series, Dict[str, Any]]]: results_data = [] for result in results: _idx, row, model_response, error, execution_seconds = result - results_data.append((result[0], _extract_data_using_field_mappings( - (row, model_response, error, execution_seconds) - ))) + results_data.append( + ( + result[0], + _extract_data_using_field_mappings( + (row, model_response, error, execution_seconds) + ), + ) + ) return results_data - # # USING EXECUTOR (cannot be used without acceptable model) # fallback_return_value = (pd.Series(), {}, None, 0) # executor = get_executor_on_sync_context( @@ -258,7 +254,7 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa # max_retries=max_retries, # exit_on_error=exit_on_error, # fallback_return_value=fallback_return_value, - # ) + # ) # inputs = [ # row for _, row in dataframe.iterrows() @@ -269,16 +265,14 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa # print("results", results) # print("execution_details", execution_details) - inputs = [ - (idx, row) for idx, row in dataframe.iterrows() - ] + inputs = [(idx, row) for idx, row in dataframe.iterrows()] results = [] with tqdm(total=len(inputs), desc="Running Declarative Evaluations") as pbar: tasks = [] for input in inputs: task = _run_llm_eval_async(input) tasks.append(task) - + for task in asyncio.as_completed(tasks): result = await task results.append(result) @@ -286,9 +280,6 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa # results = _run_llm_eval_sync() results_data = _parse_results(results) - - - rows = [] outcome_results = [] for result in results_data: @@ -305,5 +296,5 @@ def _parse_results(results: List[Tuple[pd.Series, BaseModel, Optional[str], floa data=key_centric_results, index=dataframe_index, ) - + return results_data diff --git a/tests/unit/evals/test_declarative_eval.py b/tests/unit/evals/test_declarative_eval.py index fbb4a723bc..7a23ddbc2f 100644 --- a/tests/unit/evals/test_declarative_eval.py +++ b/tests/unit/evals/test_declarative_eval.py @@ -1,40 +1,53 @@ """ Test Declarative Eval """ -import asyncio + +from typing import Dict, Literal +from unittest.mock import AsyncMock, MagicMock + import pandas as pd import pytest -from typing import Dict, Any, List, Literal -from unittest.mock import AsyncMock, MagicMock, patch - from pydantic import BaseModel, Field + from phoenix.evals import declarative_eval, transform_field_mappings_for_explanation class Conciseness(BaseModel): is_concise: bool = Field(..., description="Whether the output is concise") + + class Formatting(BaseModel): - language: Literal["High", "Average", "Low"] = Field(..., description="The complexity of the formatting used in the output") + language: Literal["High", "Average", "Low"] = Field( + ..., description="The complexity of the formatting used in the output" + ) + + class Schema(BaseModel): conciseness: Conciseness = Field(..., description="A custom evaluation of the output") formatting: Formatting = Field(..., description="A custom evaluation of the output") + + class SchemaWithExplanation(BaseModel): schema: Schema = Field(..., description="The schema to evaluate") explanation: str = Field(..., description="An explanation of the evaluation") + @pytest.fixture def sample_dataframe() -> pd.DataFrame: """Sample dataframe to simulate ArizeExportClient(...).export_model_to_df(...)""" - return pd.DataFrame({ - "attributes.llm.input_messages": [ - [{"role": "user", "content": "What is 2+2?"}], - [{"role": "user", "content": "Who was the first president?"}], - ], - "attributes.llm.output_messages": [ - [{"role": "assistant", "content": "4"}], - [{"role": "assistant", "content": "George Washington"}], - ] - }) + return pd.DataFrame( + { + "attributes.llm.input_messages": [ + [{"role": "user", "content": "What is 2+2?"}], + [{"role": "user", "content": "Who was the first president?"}], + ], + "attributes.llm.output_messages": [ + [{"role": "assistant", "content": "4"}], + [{"role": "assistant", "content": "George Washington"}], + ], + } + ) + @pytest.fixture def correct_field_mappings() -> Dict[str, str]: @@ -44,6 +57,7 @@ def correct_field_mappings() -> Dict[str, str]: "formatting.label": "formatting.language", } + @pytest.fixture def incorrect_field_mappings() -> Dict[str, str]: """Incorrect field mappings for the Schema""" @@ -52,6 +66,7 @@ def incorrect_field_mappings() -> Dict[str, str]: "formatting.label": "formatting.not_language", } + @pytest.fixture def mock_parse_responses(): """Mock responses for two consecutive OpenAI parse API calls.""" @@ -61,12 +76,12 @@ def mock_parse_responses(): mock_resp.choices = [MagicMock()] mock_resp.choices[0].message = MagicMock() mock_resp.choices[0].message.parsed = Schema( - conciseness=Conciseness(is_concise=True), - formatting=Formatting(language="High") + conciseness=Conciseness(is_concise=True), formatting=Formatting(language="High") ) responses.append(mock_resp) return responses + @pytest.fixture def mock_client(mock_parse_responses): """Mock OpenAI client with predefined responses.""" @@ -74,15 +89,16 @@ def mock_client(mock_parse_responses): mock_client.beta = MagicMock() mock_client.beta.chat = MagicMock() mock_client.beta.chat.completions = MagicMock() - + # Set up the async mock to return different responses for each call mock_parse = AsyncMock() # Use side_effect to return a different response for each call mock_parse.side_effect = mock_parse_responses mock_client.beta.chat.completions.parse = mock_parse - + return mock_client + @pytest.fixture def mock_parse_responses_with_explanation(): """Mock responses for two consecutive OpenAI parse API calls.""" @@ -93,14 +109,14 @@ def mock_parse_responses_with_explanation(): mock_resp.choices[0].message = MagicMock() mock_resp.choices[0].message.parsed = SchemaWithExplanation( schema=Schema( - conciseness=Conciseness(is_concise=True), - formatting=Formatting(language="High") + conciseness=Conciseness(is_concise=True), formatting=Formatting(language="High") ), - explanation="Explanation" + explanation="Explanation", ) responses.append(mock_resp) return responses + @pytest.fixture def mock_client_with_explanation(mock_parse_responses_with_explanation): """Mock OpenAI client with predefined responses.""" @@ -108,23 +124,19 @@ def mock_client_with_explanation(mock_parse_responses_with_explanation): mock_client.beta = MagicMock() mock_client.beta.chat = MagicMock() mock_client.beta.chat.completions = MagicMock() - + # Set up the async mock to return different responses for each call mock_parse = AsyncMock() # Use side_effect to return a different response for each call mock_parse.side_effect = mock_parse_responses_with_explanation mock_client.beta.chat.completions.parse = mock_parse - - return mock_client - + return mock_client @pytest.mark.asyncio async def test_declarative_eval_correct_field_mappings( - sample_dataframe, - correct_field_mappings, - mock_client + sample_dataframe, correct_field_mappings, mock_client ): """Test declarative_eval with correct field mappings.""" result = await declarative_eval( @@ -143,9 +155,7 @@ async def test_declarative_eval_correct_field_mappings( @pytest.mark.asyncio async def test_declarative_eval_incorrect_field_mappings( - sample_dataframe, - incorrect_field_mappings, - mock_client + sample_dataframe, incorrect_field_mappings, mock_client ): """Test declarative_eval with correct field mappings.""" result = await declarative_eval( @@ -161,11 +171,10 @@ async def test_declarative_eval_incorrect_field_mappings( for fm_key in fm_keys: assert result[fm_key].tolist() == [None, None] + @pytest.mark.asyncio async def test_declarative_eval_with_explanation( - sample_dataframe, - correct_field_mappings, - mock_client_with_explanation + sample_dataframe, correct_field_mappings, mock_client_with_explanation ): """Test declarative_eval with explanations.""" result = await declarative_eval( @@ -177,19 +186,18 @@ async def test_declarative_eval_with_explanation( ) pre_transform_fm = correct_field_mappings - pre_transform_fm_keys = set(pre_transform_fm.keys()) pre_transform_fm_values = set(pre_transform_fm.values()) - + print(f"pre_transform_fm: {pre_transform_fm}") correct_field_mappings = transform_field_mappings_for_explanation(correct_field_mappings) print(f"correct_field_mappings: {correct_field_mappings}") fm_keys = set(correct_field_mappings.keys()) fm_values = set(correct_field_mappings.values()) - + assert isinstance(result, pd.DataFrame) assert result.shape[0] == 2 assert fm_keys.issubset(set(result.columns.tolist())) assert pre_transform_fm_values.isdisjoint(fm_values) assert result["conciseness.label"].tolist() == [True, True] assert result["formatting.label"].tolist() == ["High", "High"] - assert result["explanation"].tolist() == ["Explanation", "Explanation"] \ No newline at end of file + assert result["explanation"].tolist() == ["Explanation", "Explanation"] From ad06fb1b4500308a9ad86c6e3da2bf66798ecc06 Mon Sep 17 00:00:00 2001 From: Sebastian Sosa <1sebastian1sosa1@gmail.com> Date: Sun, 27 Apr 2025 14:44:38 -0400 Subject: [PATCH 12/12] declarative eval and util docs --- .../src/phoenix/evals/declarative.py | 128 ++++++++++++++++-- 1 file changed, 118 insertions(+), 10 deletions(-) diff --git a/packages/phoenix-evals/src/phoenix/evals/declarative.py b/packages/phoenix-evals/src/phoenix/evals/declarative.py index 7fb911861a..6d070d66c3 100644 --- a/packages/phoenix-evals/src/phoenix/evals/declarative.py +++ b/packages/phoenix-evals/src/phoenix/evals/declarative.py @@ -27,6 +27,25 @@ def transform_field_mappings_for_explanation(field_mappings: Dict[str, str]) -> Dict[str, str]: + """ + Transforms field mappings to work with a schema that includes an explanation field. + + This function takes field mappings that point to fields within a schema and transforms them + to work with a schema that wraps the original schema and adds an explanation field. + + Args: + field_mappings (Dict[str, str]): A dictionary mapping target field names to paths + within the original schema. For example, {"conciseness.label": "conciseness.is_concise"}. + + Returns: + Dict[str, str]: A new dictionary with transformed field mappings where each value is + prefixed with "schema." and an additional "explanation" mapping is added. + + Example: + >>> field_mappings = {"conciseness.label": "conciseness.is_concise"} + >>> transform_field_mappings_for_explanation(field_mappings) + {"conciseness.label": "schema.conciseness.is_concise", "explanation": "explanation"} + """ # noqa: E501 new_field_mappings = {} for key, value in field_mappings.items(): new_field_mappings[key] = f"schema.{value}" @@ -42,7 +61,7 @@ async def declarative_eval( field_mappings: Dict[ str, str ], # key is the openinference target field value, value is the path to the field in the schema - system_instruction: Optional[str] = None, + system_instruction: str = "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema.", # noqa: E501 verbose: bool = False, include_prompt: bool = False, include_response: bool = False, @@ -55,8 +74,102 @@ async def declarative_eval( progress_bar_format: Optional[str] = get_tqdm_progress_bar_formatter("llm_classify"), ) -> pd.DataFrame: """ - Evaluates data using an LLM with a Pydantic schema to structure the output. - """ + Evaluates data using a declarative schema with an LLM. + + This function evaluates each row of the input data using a declarative schema with an LLM. + It returns a pandas DataFrame with the evaluation results mapped according to the provided field mappings. + + Args: + data (Union[pd.DataFrame, List[Any]]): A collection of data to evaluate with columns + that match the template variables "attributes.llm.input_messages" and + "attributes.llm.output_messages". + + model (Union[OpenAI, AsyncOpenAI]): An OpenAI client instance to use for evaluation. + + schema (BaseModel): A Pydantic model class defining the evaluation schema. + + field_mappings (Dict[str, str]): A dictionary mapping target field names to paths + within the schema. For example, {"conciseness.label": "conciseness.is_concise"}. + + system_instruction (str): A system message to guide the evaluation, defaults to + "You will be provided the input passed to the llm and the generated output data to evaluate according to the specified schema.". + + verbose (bool): If True, prints detailed information during evaluation. Default is False. + + include_prompt (bool): Not currently used. + + include_response (bool): Not currently used. + + include_exceptions (bool): Not currently used. + + provide_explanation (bool): If True, adds an explanation field to the schema and output. Default is False. + + max_retries (int): Not currently used. + + exit_on_error (bool): Not currently used. + + run_sync (bool): Not currently used. + + concurrency (Optional[int]): Not currently used. + + progress_bar_format (Optional[str]): Format for the progress bar. If None, progress bar is disabled. + + Returns: + pd.DataFrame: A DataFrame containing the evaluation results with columns mapped according + to the field_mappings parameter along with the execution time and any exceptions. + The DataFrame has the same length and index as the input data. + + Raises: + ValueError: If the input data doesn't contain required columns or if field mappings are invalid. + + Example: + ```python + # Define a schema with nested models + class Conciseness(BaseModel): + is_concise: bool = Field(..., description="Whether the output is concise") + + class Formatting(BaseModel): + language: Literal["High", "Average", "Low"] = Field( + ..., description="The complexity of the formatting used in the output" + ) + + class Schema(BaseModel): + conciseness: Conciseness = Field(..., description="A custom evaluation of the output") + formatting: Formatting = Field(..., description="A custom evaluation of the output") + + # Prepare sample data + data = pd.DataFrame({ + "attributes.llm.input_messages": [ + [{"role": "user", "content": "What is 2+2?"}], + [{"role": "user", "content": "Who was the first president?"}], + ], + "attributes.llm.output_messages": [ + [{"role": "assistant", "content": "Whenever you add those two numbers, you get 4"}], + [{"role": "assistant", "content": "George Washington"}], + ], + }) + + # Define field mappings + field_mappings = { + "conciseness.label": "conciseness.is_concise", + "formatting.label": "formatting.language", + } + + # Run the evaluation + result = await declarative_eval( + data=data, + model=openai_client, + schema=Schema, + field_mappings=field_mappings, + ) + + # Result will be a DataFrame with columns: + # - conciseness.label (containing boolean values) + # - formatting.label (containing "High", "Average", or "Low") + # - execution_seconds (execution time) + # - exceptions (any errors encountered) + ``` + """ # noqa: E501 formatter = MustacheBaseTemplateFormatter() template = PromptPartTemplate( @@ -73,11 +186,6 @@ async def declarative_eval( """, ) - default_system_instruction = """ - You will be provided the input passed to the llm - and the generated output data to evaluate according to the specified schema. - """ - # Convert data to consistent format if isinstance(data, pd.DataFrame): dataframe = data @@ -127,7 +235,7 @@ async def _make_request( messages=[ { "role": "system", - "content": system_instruction or default_system_instruction, + "content": system_instruction, }, {"role": "user", "content": _map_template(row)}, ], @@ -179,7 +287,7 @@ def _make_request( messages=[ { "role": "system", - "content": system_instruction or default_system_instruction, + "content": system_instruction, }, {"role": "user", "content": _map_template(row)}, ],