3535from galileo .utils .metrics import populate_local_metrics
3636from galileo .utils .nop_logger import nop_async , nop_sync
3737from galileo .utils .serialization import serialize_to_str
38+ from galileo_core .helpers .execution import async_run
3839from galileo_core .schemas .logging .agent import AgentType
3940from galileo_core .schemas .logging .span import (
4041 AgentSpan ,
@@ -1016,22 +1017,8 @@ def conclude(
10161017
10171018 return current_parent
10181019
1019- @nop_sync
1020- def flush (self ) -> list [Trace ]:
1021- """
1022- Upload all traces to Galileo.
1023-
1024- Returns:
1025- -------
1026- List[Trace]: The list of uploaded traces.
1027- """
1028- if self .mode == "batch" :
1029- return self ._flush_batch ()
1030- else :
1031- self ._logger .warning ("Flushing in streaming mode is not supported." )
1032- return list ()
1033-
1034- def _flush_batch (self ):
1020+ async def _flush_batch (self , is_async : bool = False ) -> list [Trace ]:
1021+ # import pdb; pdb.set_trace()
10351022 if not self .traces :
10361023 self ._logger .info ("No traces to flush." )
10371024 return list ()
@@ -1044,7 +1031,7 @@ def _flush_batch(self):
10441031
10451032 if self .local_metrics :
10461033 self ._logger .info ("Computing local metrics..." )
1047- # TODO: parallelize, possibly with ThreadPoolExecutor
1034+ # TODO: parallelize, possibly with ThreadPoolExecutor/asyncio
10481035 for trace in self .traces :
10491036 populate_local_metrics (trace , self .local_metrics )
10501037
@@ -1053,7 +1040,11 @@ def _flush_batch(self):
10531040 traces_ingest_request = TracesIngestRequest (
10541041 traces = self .traces , experiment_id = self .experiment_id , session_id = self .session_id
10551042 )
1056- self ._client .ingest_traces_sync (traces_ingest_request )
1043+
1044+ if is_async :
1045+ await self ._client .ingest_traces (traces_ingest_request )
1046+ async_run (self ._client .ingest_traces (traces_ingest_request ))
1047+
10571048 logged_traces = self .traces
10581049
10591050 self ._logger .info ("Successfully flushed %d traces." , len (logged_traces ))
@@ -1072,39 +1063,38 @@ async def async_flush(self) -> list[Trace]:
10721063 List[Trace]: The list of uploaded workflows.
10731064 """
10741065 if self .mode == "batch" :
1075- return await self ._async_flush_batch ( )
1066+ return await self ._flush_batch ( is_async = True )
10761067 else :
10771068 self ._logger .warning ("Flushing in streaming mode is not supported." )
10781069 return list ()
10791070
1080- async def _async_flush_batch (self ) -> list [Trace ]:
1081- if not self .traces :
1082- self ._logger .info ("No traces to flush." )
1083- return list ()
1084-
1085- current_parent = self .current_parent ()
1086- if current_parent is not None :
1087- self ._logger .info ("Concluding the active trace..." )
1088- last_output = get_last_output (current_parent )
1089- self .conclude (output = last_output , conclude_all = True )
1090-
1091- if self .local_metrics :
1092- self ._logger .info ("Computing metrics for local scorers..." )
1093- # TODO: parallelize, possibly with asyncio to_thread/gather
1094- for trace in self .traces :
1095- populate_local_metrics (trace , self .local_metrics )
1096-
1097- self ._logger .info ("Flushing %d traces..." , len (self .traces ))
1098-
1099- traces_ingest_request = TracesIngestRequest (traces = self .traces , session_id = self .session_id )
1100- await self ._client .ingest_traces (traces_ingest_request )
1101- logged_traces = self .traces
1102-
1103- self ._logger .info ("Successfully flushed %d traces." , len (logged_traces ))
1071+ @nop_sync
1072+ def flush (self ) -> list [Trace ]:
1073+ """
1074+ Upload all traces to Galileo.
11041075
1105- self .traces = list ()
1106- self ._parent_stack = deque ()
1107- return logged_traces
1076+ Returns:
1077+ -------
1078+ List[Trace]: The list of uploaded traces.
1079+ """
1080+ if self .mode == "batch" :
1081+ # This is bad because asyncio.run() fails in environments with existing event loops
1082+ # (e.g. jupyter notebooks, FastAPI, etc. would fail with "cannot be called from a running event loop"
1083+ # Even though flush() is sync, it can be called from async contexts like:
1084+ # - Jupyter notebooks (which have their own event loop)
1085+ # - pytest-asyncio tests (where @mark.asyncio creates an event loop)
1086+ # - FastAPI/Django async views (where the web framework has an event loop)
1087+ # - Any async function that calls sync code
1088+ # The EventLoopThreadPool approach works in ALL environments by using dedicated threads
1089+ # return asyncio.run(self._flush_batch(is_async=False))
1090+
1091+ # This is good because async_run() uses EventLoopThreadPool which works in all environments
1092+ # by running async code in dedicated threads with their own event loops
1093+
1094+ return async_run (self ._flush_batch (is_async = False ))
1095+ else :
1096+ self ._logger .warning ("Flushing in streaming mode is not supported." )
1097+ return list ()
11081098
11091099 @nop_sync
11101100 def terminate (self ) -> None :
0 commit comments