Merge pull request #1066 from UiPath/fix/eval-sequential-runtime-isolation

Chibionos · web-flow · commit 2a557c8a11dd · 2026-01-05T18:57:36.000-08:00
fix: create isolated runtime per eval execution
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath"
-version = "2.4.2"
+version = "2.4.3"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -613,42 +613,55 @@ async def execute_runtime(
             "evalId": eval_item.id,
             "span_type": "eval",
         }
-        execution_runtime = UiPathExecutionRuntime(
-            delegate=runtime,
-            trace_manager=self.trace_manager,
-            log_handler=log_handler,
-            execution_id=execution_id,
-            span_attributes=attributes,
-        )
 
-        start_time = time()
+        # Create a new runtime with unique runtime_id for this eval execution.
+        # This ensures each eval has its own LangGraph thread_id (clean state),
+        # preventing message accumulation across eval runs.
+        eval_runtime = None
         try:
-            result = await execution_runtime.execute(
-                input=eval_item.inputs,
+            eval_runtime = await self.factory.new_runtime(
+                entrypoint=self.context.entrypoint or "",
+                runtime_id=execution_id,
             )
-        except Exception as e:
+            execution_runtime = UiPathExecutionRuntime(
+                delegate=eval_runtime,
+                trace_manager=self.trace_manager,
+                log_handler=log_handler,
+                execution_id=execution_id,
+                span_attributes=attributes,
+            )
+
+            start_time = time()
+            try:
+                result = await execution_runtime.execute(
+                    input=eval_item.inputs,
+                )
+            except Exception as e:
+                end_time = time()
+                spans, logs = self._get_and_clear_execution_data(execution_id)
+
+                raise EvaluationRuntimeException(
+                    spans=spans,
+                    logs=logs,
+                    root_exception=e,
+                    execution_time=end_time - start_time,
+                ) from e
+
             end_time = time()
             spans, logs = self._get_and_clear_execution_data(execution_id)
 
-            raise EvaluationRuntimeException(
+            if result is None:
+                raise ValueError("Execution result cannot be None for eval runs")
+
+            return UiPathEvalRunExecutionOutput(
+                execution_time=end_time - start_time,
                 spans=spans,
                 logs=logs,
-                root_exception=e,
-                execution_time=end_time - start_time,
-            ) from e
-
-        end_time = time()
-        spans, logs = self._get_and_clear_execution_data(execution_id)
-
-        if result is None:
-            raise ValueError("Execution result cannot be None for eval runs")
-
-        return UiPathEvalRunExecutionOutput(
-            execution_time=end_time - start_time,
-            spans=spans,
-            logs=logs,
-            result=result,
-        )
+                result=result,
+            )
+        finally:
+            if eval_runtime is not None:
+                await eval_runtime.dispose()
 
     def _setup_execution_logging(
         self, eval_item_id: str
diff --git a/uv.lock b/uv.lock