1
+ import base64
1
2
import logging
2
3
from datetime import datetime , timezone , tzinfo
3
4
from io import StringIO
@@ -46,6 +47,7 @@ def get_spans_dataframe(
46
47
end_time : Optional [datetime ] = None ,
47
48
limit : int = 1000 ,
48
49
root_spans_only : Optional [bool ] = None ,
50
+ project_identifier : Optional [str ] = None ,
49
51
project_name : Optional [str ] = None ,
50
52
timeout : Optional [int ] = DEFAULT_TIMEOUT_IN_SECONDS ,
51
53
) -> "pd.DataFrame" :
@@ -58,7 +60,9 @@ def get_spans_dataframe(
58
60
end_time: Optional end time for filtering.
59
61
limit: Maximum number of spans to return.
60
62
root_spans_only: Whether to return only root spans.
61
- project_name: Optional project name to filter by.
63
+ project_name: Optional project name to filter by. Deprecated, use `project_identifier`
64
+ to also specify by the project id.
65
+ project_identifier: Optional project identifier (name or id) to filter by.
62
66
timeout: Optional request timeout in seconds.
63
67
64
68
Returns:
@@ -86,6 +90,21 @@ def get_spans_dataframe(
86
90
87
91
_ = pd # Prevent unused symbol error
88
92
93
+ if project_identifier and project_name :
94
+ raise ValueError ("Provide only one of 'project_identifier' or 'project_name'." )
95
+ elif project_identifier and not project_name :
96
+ if _is_base64_project_identifier (project_identifier ):
97
+ project_response = self ._client .get (
98
+ url = f"v1/projects/{ project_identifier } " ,
99
+ headers = {"accept" : "application/json" },
100
+ timeout = timeout ,
101
+ )
102
+ project_response .raise_for_status ()
103
+ project = project_response .json ()
104
+ project_name = project ["data" ]["name" ]
105
+ else :
106
+ project_name = project_identifier
107
+
89
108
response = self ._client .post (
90
109
url = "v1/spans" ,
91
110
headers = {"accept" : "application/json" },
@@ -120,7 +139,7 @@ def get_span_annotations_dataframe(
120
139
* ,
121
140
spans_dataframe : Optional ["pd.DataFrame" ] = None ,
122
141
span_ids : Optional [Iterable [str ]] = None ,
123
- project : str = "default" ,
142
+ project_identifier : str = "default" ,
124
143
limit : int = 1000 ,
125
144
timeout : Optional [int ] = DEFAULT_TIMEOUT_IN_SECONDS ,
126
145
) -> "pd.DataFrame" :
@@ -133,7 +152,7 @@ def get_span_annotations_dataframe(
133
152
spans_dataframe: A DataFrame (typically returned by `get_spans_dataframe`) with a
134
153
`context.span_id` or `span_id` column.
135
154
span_ids: An iterable of span IDs.
136
- project : The project identifier (name or ID) used in the API path.
155
+ project_identifier : The project identifier (name or ID) used in the API path.
137
156
limit: Maximum number of annotations returned per request page.
138
157
timeout: Optional request timeout in seconds.
139
158
@@ -173,7 +192,7 @@ def get_span_annotations_dataframe(
173
192
return pd .DataFrame ()
174
193
175
194
annotations : list [v1 .SpanAnnotation ] = []
176
- path = f"v1/projects/{ project } /span_annotations"
195
+ path = f"v1/projects/{ project_identifier } /span_annotations"
177
196
178
197
for i in range (0 , len (span_ids_list ), _MAX_SPAN_IDS_PER_REQUEST ):
179
198
batch_ids = span_ids_list [i : i + _MAX_SPAN_IDS_PER_REQUEST ]
@@ -202,14 +221,16 @@ def get_span_annotations_dataframe(
202
221
break # finished paginating this batch
203
222
204
223
df = pd .DataFrame (annotations )
205
- df .set_index ("span_id" , inplace = True )
224
+ df = _flatten_nested_column (df , "result" )
225
+ df .rename (columns = {"name" : "annotation_name" }, inplace = True )
226
+ df .set_index ("span_id" , inplace = True ) # type: ignore[unused-ignore]
206
227
return df
207
228
208
229
def get_span_annotations (
209
230
self ,
210
231
* ,
211
232
span_ids : Iterable [str ],
212
- project : str ,
233
+ project_identifier : str ,
213
234
limit : int = 1000 ,
214
235
timeout : Optional [int ] = DEFAULT_TIMEOUT_IN_SECONDS ,
215
236
) -> list [v1 .SpanAnnotation ]:
@@ -218,7 +239,7 @@ def get_span_annotations(
218
239
219
240
Args:
220
241
span_ids: An iterable of span IDs.
221
- project : The project identifier (name or ID) used in the API path.
242
+ project_identifier : The project identifier (name or ID) used in the API path.
222
243
limit: Maximum number of annotations returned per request page.
223
244
timeout: Optional request timeout in seconds.
224
245
@@ -234,7 +255,7 @@ def get_span_annotations(
234
255
return []
235
256
236
257
annotations : list [v1 .SpanAnnotation ] = []
237
- path = f"v1/projects/{ project } /span_annotations"
258
+ path = f"v1/projects/{ project_identifier } /span_annotations"
238
259
239
260
for i in range (0 , len (span_ids_list ), _MAX_SPAN_IDS_PER_REQUEST ):
240
261
batch_ids = span_ids_list [i : i + _MAX_SPAN_IDS_PER_REQUEST ]
@@ -290,6 +311,7 @@ async def get_spans_dataframe(
290
311
limit : int = 1000 ,
291
312
root_spans_only : Optional [bool ] = None ,
292
313
project_name : Optional [str ] = None ,
314
+ project_identifier : Optional [str ] = None ,
293
315
timeout : Optional [int ] = DEFAULT_TIMEOUT_IN_SECONDS ,
294
316
) -> "pd.DataFrame" :
295
317
"""
@@ -301,7 +323,9 @@ async def get_spans_dataframe(
301
323
end_time: Optional end time for filtering.
302
324
limit: Maximum number of spans to return.
303
325
root_spans_only: Whether to return only root spans.
304
- project_name: Optional project name to filter by.
326
+ project_name: Optional project name to filter by. Deprecated, use `project_identifier`
327
+ to also specify by the project id.
328
+ project_identifier: Optional project identifier (name or id) to filter by.
305
329
timeout: Optional request timeout in seconds.
306
330
307
331
Returns:
@@ -329,6 +353,21 @@ async def get_spans_dataframe(
329
353
330
354
_ = pd # Prevent unused symbol error
331
355
356
+ if project_identifier and project_name :
357
+ raise ValueError ("Provide only one of 'project_identifier' or 'project_name'." )
358
+ elif project_identifier and not project_name :
359
+ if _is_base64_project_identifier (project_identifier ):
360
+ project_response = await self ._client .get (
361
+ url = f"v1/projects/{ project_identifier } " ,
362
+ headers = {"accept" : "application/json" },
363
+ timeout = timeout ,
364
+ )
365
+ project_response .raise_for_status ()
366
+ project = project_response .json ()
367
+ project_name = project ["name" ]
368
+ else :
369
+ project_name = project_identifier
370
+
332
371
response = await self ._client .post (
333
372
url = "v1/spans" ,
334
373
headers = {"accept" : "application/json" },
@@ -444,7 +483,9 @@ async def get_span_annotations_dataframe(
444
483
break
445
484
446
485
df = pd .DataFrame (annotations )
447
- df .set_index ("span_id" , inplace = True )
486
+ df = _flatten_nested_column (df , "result" )
487
+ df .rename (columns = {"name" : "annotation_name" }, inplace = True )
488
+ df .set_index ("span_id" , inplace = True ) # type: ignore[unused-ignore]
448
489
return df
449
490
450
491
async def get_span_annotations (
@@ -565,4 +606,27 @@ def _process_span_dataframe(response: httpx.Response) -> "pd.DataFrame":
565
606
return pd .DataFrame ()
566
607
567
608
609
+ def _is_base64_project_identifier (s : str ) -> bool :
610
+ try :
611
+ decoded = base64 .b64decode (s , validate = True )
612
+ if not decoded .startswith (b"Project:" ):
613
+ return False
614
+ return True
615
+ except Exception :
616
+ return False
617
+
618
+
619
+ def _flatten_nested_column (df : "pd.DataFrame" , column_name : str ) -> "pd.DataFrame" :
620
+ import pandas as pd
621
+
622
+ if column_name in df .columns :
623
+ # Flatten the nested dictionary column and prefix each resulting column with
624
+ # the original column name (e.g., "result.label").
625
+ nested_df = pd .json_normalize (df [column_name ]).rename ( # type: ignore[arg-type]
626
+ columns = lambda col : f"{ column_name } .{ col } "
627
+ )
628
+ df = pd .concat ([df .drop (columns = [column_name ]), nested_df ], axis = 1 )
629
+ return df
630
+
631
+
568
632
class TimeoutError (Exception ): ...
0 commit comments