Skip to content

Commit ac54608

Browse files
address review feedback
1 parent 72eb775 commit ac54608

2 files changed

Lines changed: 21 additions & 25 deletions

File tree

src/agentevals/custom_evaluators.py

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,8 @@ def extensions(self) -> tuple[str, ...]:
6868
"""File extensions this runtime handles (e.g. ``(".py",)``)."""
6969

7070
@abc.abstractmethod
71-
def build_command(self, path: Path, python: Path | None = None) -> list[str]:
72-
"""Return the argv list to execute *path*.
73-
74-
For Python runtimes, *python* may point to a venv interpreter.
75-
Non-Python runtimes ignore this parameter.
76-
"""
71+
def build_command(self, path: Path) -> list[str]:
72+
"""Return the argv list to execute *path*."""
7773

7874
def is_available(self) -> bool:
7975
"""Return True if the runtime's interpreter is found on the system."""
@@ -85,6 +81,9 @@ def is_available(self) -> bool:
8581

8682

8783
class PythonRuntime(Runtime):
84+
def __init__(self, python_path: Path | None = None):
85+
self._python_path = python_path
86+
8887
@property
8988
def name(self) -> str:
9089
return "Python"
@@ -93,8 +92,8 @@ def name(self) -> str:
9392
def extensions(self) -> tuple[str, ...]:
9493
return (".py",)
9594

96-
def build_command(self, path: Path, python: Path | None = None) -> list[str]:
97-
exe = str(python) if python else sys.executable
95+
def build_command(self, path: Path) -> list[str]:
96+
exe = str(self._python_path) if self._python_path else sys.executable
9897
return [exe, str(path)]
9998

10099
def is_available(self) -> bool:
@@ -110,7 +109,7 @@ def name(self) -> str:
110109
def extensions(self) -> tuple[str, ...]:
111110
return (".js", ".ts")
112111

113-
def build_command(self, path: Path, python: Path | None = None) -> list[str]:
112+
def build_command(self, path: Path) -> list[str]:
114113
node = shutil.which("node")
115114
if not node:
116115
raise RuntimeError("Node.js not found on PATH (required for .js/.ts evaluators)")
@@ -208,22 +207,20 @@ class SubprocessBackend(EvaluatorBackend):
208207
"""Runs a local code file (.py, .js, .ts, …) as a subprocess.
209208
210209
The correct interpreter is resolved from the file extension via the
211-
:data:`_RUNTIMES` registry. When *venv_python* is provided, Python
212-
evaluators run inside that virtual environment instead of the host
213-
interpreter.
210+
:data:`_RUNTIMES` registry. Pass a pre-configured *runtime* to override
211+
the default (e.g. a :class:`PythonRuntime` with a venv interpreter).
214212
"""
215213

216-
def __init__(self, path: Path, timeout: int = 30, venv_python: Path | None = None):
214+
def __init__(self, path: Path, timeout: int = 30, runtime: Runtime | None = None):
217215
self._path = path.resolve()
218-
self._runtime = _resolve_runtime(self._path)
216+
self._runtime = runtime or _resolve_runtime(self._path)
219217
self._timeout = timeout
220-
self._venv_python = venv_python
221218

222219
if not self._path.exists():
223220
raise FileNotFoundError(f"Evaluator file not found: {self._path}")
224221

225222
async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult:
226-
cmd = self._runtime.build_command(self._path, self._venv_python)
223+
cmd = self._runtime.build_command(self._path)
227224
return await _run_subprocess(cmd, eval_input.model_dump_json(), self._timeout, metric_name)
228225

229226

@@ -232,18 +229,18 @@ async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult:
232229
# ---------------------------------------------------------------------------
233230

234231
_EXECUTOR_FACTORIES: dict[str, Callable[..., EvaluatorBackend]] = {
235-
"local": lambda path, timeout, venv_python=None: SubprocessBackend(path, timeout, venv_python),
232+
"local": lambda path, timeout, runtime=None: SubprocessBackend(path, timeout, runtime),
236233
}
237234

238235

239236
def create_executor(
240-
executor_name: str, path: Path, timeout: int = 30, venv_python: Path | None = None
237+
executor_name: str, path: Path, timeout: int = 30, runtime: Runtime | None = None
241238
) -> EvaluatorBackend:
242239
"""Construct an EvaluatorBackend by executor name (e.g. 'local', 'docker')."""
243240
factory = _EXECUTOR_FACTORIES.get(executor_name)
244241
if factory is None:
245242
raise ValueError(f"Unknown executor '{executor_name}'. Available: {sorted(_EXECUTOR_FACTORIES.keys())}")
246-
return factory(path, timeout, venv_python)
243+
return factory(path, timeout, runtime)
247244

248245

249246
def register_executor(name: str, factory: Callable[..., EvaluatorBackend]) -> None:
@@ -437,8 +434,7 @@ async def evaluate_custom_evaluator(
437434
if isinstance(evaluator_def, CodeEvaluatorDef):
438435
evaluator_path = Path(evaluator_def.path)
439436

440-
# Set up a venv if the evaluator ships a requirements.txt.
441-
venv_python: Path | None = None
437+
runtime: Runtime | None = None
442438
if evaluator_path.suffix == ".py":
443439
from .evaluator.venv import ensure_venv_async
444440

@@ -450,10 +446,10 @@ async def evaluate_custom_evaluator(
450446
metric_name=evaluator_def.name,
451447
error=f"Dependency installation failed: {exc}",
452448
)
449+
if venv_python:
450+
runtime = PythonRuntime(python_path=venv_python)
453451

454-
backend = create_executor(
455-
evaluator_def.executor, evaluator_path, evaluator_def.timeout, venv_python=venv_python
456-
)
452+
backend = create_executor(evaluator_def.executor, evaluator_path, evaluator_def.timeout, runtime=runtime)
457453
else:
458454
raise ValueError(f"Unsupported custom evaluator type: {type(evaluator_def).__name__}")
459455

src/agentevals/evaluator/venv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def _install_deps(venv_dir: Path, requirements: Path, uv: str | None) -> None:
6464

6565
subprocess.run(base + [sdk_spec], check=True, capture_output=True)
6666
logger.info("Installing dependencies from %s ...", requirements.name)
67-
subprocess.run(base + ["-r", str(requirements)], check=True)
67+
subprocess.run(base + ["-r", str(requirements)], check=True, capture_output=True)
6868

6969

7070
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)