Skip to content

Commit 88e3b3c

Browse files
feat: Improve client semantics (#7399)
* Update project_identifier semantics * Improve joining ergonomics for annotations dataframe * Ruff 🐶 * Optimize fetching project name * Ignore some pyright errors w/ pandas * Update type ignore
1 parent 8b8ce43 commit 88e3b3c

File tree

2 files changed

+91
-32
lines changed

2 files changed

+91
-32
lines changed

packages/phoenix-client/src/phoenix/client/resources/spans/__init__.py

+74-10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import base64
12
import logging
23
from datetime import datetime, timezone, tzinfo
34
from io import StringIO
@@ -46,6 +47,7 @@ def get_spans_dataframe(
4647
end_time: Optional[datetime] = None,
4748
limit: int = 1000,
4849
root_spans_only: Optional[bool] = None,
50+
project_identifier: Optional[str] = None,
4951
project_name: Optional[str] = None,
5052
timeout: Optional[int] = DEFAULT_TIMEOUT_IN_SECONDS,
5153
) -> "pd.DataFrame":
@@ -58,7 +60,9 @@ def get_spans_dataframe(
5860
end_time: Optional end time for filtering.
5961
limit: Maximum number of spans to return.
6062
root_spans_only: Whether to return only root spans.
61-
project_name: Optional project name to filter by.
63+
project_name: Optional project name to filter by. Deprecated, use `project_identifier`
64+
to also specify by the project id.
65+
project_identifier: Optional project identifier (name or id) to filter by.
6266
timeout: Optional request timeout in seconds.
6367
6468
Returns:
@@ -86,6 +90,21 @@ def get_spans_dataframe(
8690

8791
_ = pd # Prevent unused symbol error
8892

93+
if project_identifier and project_name:
94+
raise ValueError("Provide only one of 'project_identifier' or 'project_name'.")
95+
elif project_identifier and not project_name:
96+
if _is_base64_project_identifier(project_identifier):
97+
project_response = self._client.get(
98+
url=f"v1/projects/{project_identifier}",
99+
headers={"accept": "application/json"},
100+
timeout=timeout,
101+
)
102+
project_response.raise_for_status()
103+
project = project_response.json()
104+
project_name = project["data"]["name"]
105+
else:
106+
project_name = project_identifier
107+
89108
response = self._client.post(
90109
url="v1/spans",
91110
headers={"accept": "application/json"},
@@ -120,7 +139,7 @@ def get_span_annotations_dataframe(
120139
*,
121140
spans_dataframe: Optional["pd.DataFrame"] = None,
122141
span_ids: Optional[Iterable[str]] = None,
123-
project: str = "default",
142+
project_identifier: str = "default",
124143
limit: int = 1000,
125144
timeout: Optional[int] = DEFAULT_TIMEOUT_IN_SECONDS,
126145
) -> "pd.DataFrame":
@@ -133,7 +152,7 @@ def get_span_annotations_dataframe(
133152
spans_dataframe: A DataFrame (typically returned by `get_spans_dataframe`) with a
134153
`context.span_id` or `span_id` column.
135154
span_ids: An iterable of span IDs.
136-
project: The project identifier (name or ID) used in the API path.
155+
project_identifier: The project identifier (name or ID) used in the API path.
137156
limit: Maximum number of annotations returned per request page.
138157
timeout: Optional request timeout in seconds.
139158
@@ -173,7 +192,7 @@ def get_span_annotations_dataframe(
173192
return pd.DataFrame()
174193

175194
annotations: list[v1.SpanAnnotation] = []
176-
path = f"v1/projects/{project}/span_annotations"
195+
path = f"v1/projects/{project_identifier}/span_annotations"
177196

178197
for i in range(0, len(span_ids_list), _MAX_SPAN_IDS_PER_REQUEST):
179198
batch_ids = span_ids_list[i : i + _MAX_SPAN_IDS_PER_REQUEST]
@@ -202,14 +221,16 @@ def get_span_annotations_dataframe(
202221
break # finished paginating this batch
203222

204223
df = pd.DataFrame(annotations)
205-
df.set_index("span_id", inplace=True)
224+
df = _flatten_nested_column(df, "result")
225+
df.rename(columns={"name": "annotation_name"}, inplace=True)
226+
df.set_index("span_id", inplace=True) # type: ignore[unused-ignore]
206227
return df
207228

208229
def get_span_annotations(
209230
self,
210231
*,
211232
span_ids: Iterable[str],
212-
project: str,
233+
project_identifier: str,
213234
limit: int = 1000,
214235
timeout: Optional[int] = DEFAULT_TIMEOUT_IN_SECONDS,
215236
) -> list[v1.SpanAnnotation]:
@@ -218,7 +239,7 @@ def get_span_annotations(
218239
219240
Args:
220241
span_ids: An iterable of span IDs.
221-
project: The project identifier (name or ID) used in the API path.
242+
project_identifier: The project identifier (name or ID) used in the API path.
222243
limit: Maximum number of annotations returned per request page.
223244
timeout: Optional request timeout in seconds.
224245
@@ -234,7 +255,7 @@ def get_span_annotations(
234255
return []
235256

236257
annotations: list[v1.SpanAnnotation] = []
237-
path = f"v1/projects/{project}/span_annotations"
258+
path = f"v1/projects/{project_identifier}/span_annotations"
238259

239260
for i in range(0, len(span_ids_list), _MAX_SPAN_IDS_PER_REQUEST):
240261
batch_ids = span_ids_list[i : i + _MAX_SPAN_IDS_PER_REQUEST]
@@ -290,6 +311,7 @@ async def get_spans_dataframe(
290311
limit: int = 1000,
291312
root_spans_only: Optional[bool] = None,
292313
project_name: Optional[str] = None,
314+
project_identifier: Optional[str] = None,
293315
timeout: Optional[int] = DEFAULT_TIMEOUT_IN_SECONDS,
294316
) -> "pd.DataFrame":
295317
"""
@@ -301,7 +323,9 @@ async def get_spans_dataframe(
301323
end_time: Optional end time for filtering.
302324
limit: Maximum number of spans to return.
303325
root_spans_only: Whether to return only root spans.
304-
project_name: Optional project name to filter by.
326+
project_name: Optional project name to filter by. Deprecated, use `project_identifier`
327+
to also specify by the project id.
328+
project_identifier: Optional project identifier (name or id) to filter by.
305329
timeout: Optional request timeout in seconds.
306330
307331
Returns:
@@ -329,6 +353,21 @@ async def get_spans_dataframe(
329353

330354
_ = pd # Prevent unused symbol error
331355

356+
if project_identifier and project_name:
357+
raise ValueError("Provide only one of 'project_identifier' or 'project_name'.")
358+
elif project_identifier and not project_name:
359+
if _is_base64_project_identifier(project_identifier):
360+
project_response = await self._client.get(
361+
url=f"v1/projects/{project_identifier}",
362+
headers={"accept": "application/json"},
363+
timeout=timeout,
364+
)
365+
project_response.raise_for_status()
366+
project = project_response.json()
367+
project_name = project["name"]
368+
else:
369+
project_name = project_identifier
370+
332371
response = await self._client.post(
333372
url="v1/spans",
334373
headers={"accept": "application/json"},
@@ -444,7 +483,9 @@ async def get_span_annotations_dataframe(
444483
break
445484

446485
df = pd.DataFrame(annotations)
447-
df.set_index("span_id", inplace=True)
486+
df = _flatten_nested_column(df, "result")
487+
df.rename(columns={"name": "annotation_name"}, inplace=True)
488+
df.set_index("span_id", inplace=True) # type: ignore[unused-ignore]
448489
return df
449490

450491
async def get_span_annotations(
@@ -565,4 +606,27 @@ def _process_span_dataframe(response: httpx.Response) -> "pd.DataFrame":
565606
return pd.DataFrame()
566607

567608

609+
def _is_base64_project_identifier(s: str) -> bool:
610+
try:
611+
decoded = base64.b64decode(s, validate=True)
612+
if not decoded.startswith(b"Project:"):
613+
return False
614+
return True
615+
except Exception:
616+
return False
617+
618+
619+
def _flatten_nested_column(df: "pd.DataFrame", column_name: str) -> "pd.DataFrame":
620+
import pandas as pd
621+
622+
if column_name in df.columns:
623+
# Flatten the nested dictionary column and prefix each resulting column with
624+
# the original column name (e.g., "result.label").
625+
nested_df = pd.json_normalize(df[column_name]).rename( # type: ignore[arg-type]
626+
columns=lambda col: f"{column_name}.{col}"
627+
)
628+
df = pd.concat([df.drop(columns=[column_name]), nested_df], axis=1)
629+
return df
630+
631+
568632
class TimeoutError(Exception): ...

tutorials/human_feedback/chatbot_with_human_feedback.ipynb

+17-22
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
},
3535
{
3636
"cell_type": "code",
37-
"execution_count": 1,
37+
"execution_count": null,
3838
"metadata": {},
3939
"outputs": [],
4040
"source": [
@@ -89,7 +89,7 @@
8989
},
9090
{
9191
"cell_type": "code",
92-
"execution_count": 2,
92+
"execution_count": null,
9393
"metadata": {},
9494
"outputs": [],
9595
"source": [
@@ -114,7 +114,7 @@
114114
},
115115
{
116116
"cell_type": "code",
117-
"execution_count": 4,
117+
"execution_count": null,
118118
"metadata": {},
119119
"outputs": [],
120120
"source": [
@@ -236,13 +236,13 @@
236236
},
237237
{
238238
"cell_type": "code",
239-
"execution_count": 1,
239+
"execution_count": null,
240240
"metadata": {},
241241
"outputs": [],
242242
"source": [
243-
"spans_df = client.spans.get_spans_dataframe(project_name=\"default\")\n",
243+
"spans_df = client.spans.get_spans_dataframe(project_identifier=\"default\")\n",
244244
"annotations_df = client.spans.get_span_annotations_dataframe(\n",
245-
" spans_dataframe=spans_df, project=\"default\"\n",
245+
" spans_dataframe=spans_df, project_identifier=\"default\"\n",
246246
")"
247247
]
248248
},
@@ -252,27 +252,22 @@
252252
"metadata": {},
253253
"outputs": [],
254254
"source": [
255-
"annotations_df.join(spans_df, how=\"inner\", lsuffix=\"_annotation\", rsuffix=\"_span\")"
255+
"annotations_df.join(spans_df, how=\"inner\")"
256+
]
257+
},
258+
{
259+
"cell_type": "code",
260+
"execution_count": null,
261+
"metadata": {},
262+
"outputs": [],
263+
"source": [
264+
"client.spans.get_span_annotations(span_ids=spans_df.index, project_identifier=\"default\")"
256265
]
257266
}
258267
],
259268
"metadata": {
260-
"kernelspec": {
261-
"display_name": "dev",
262-
"language": "python",
263-
"name": "python3"
264-
},
265269
"language_info": {
266-
"codemirror_mode": {
267-
"name": "ipython",
268-
"version": 3
269-
},
270-
"file_extension": ".py",
271-
"mimetype": "text/x-python",
272-
"name": "python",
273-
"nbconvert_exporter": "python",
274-
"pygments_lexer": "ipython3",
275-
"version": "3.11.9"
270+
"name": "python"
276271
}
277272
},
278273
"nbformat": 4,

0 commit comments

Comments
 (0)