Skip to content

Commit cd6620b

Browse files
committed
pr feedback
Signed-off-by: Peter Jausovec <peter.jausovec@solo.io>
1 parent 270908a commit cd6620b

7 files changed

Lines changed: 38 additions & 16 deletions

File tree

docs/custom-evaluators.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,12 @@ def response_quality(input: EvalInput) -> EvalResult:
5252
score=sum(scores) / len(scores) if scores else 0.0,
5353
per_invocation_scores=scores,
5454
)
55+
56+
if __name__ == "__main__":
57+
response_quality.run()
5558
```
5659

57-
The `@evaluator` decorator handles all the stdin/stdout plumbing. Your function receives an `EvalInput` and returns an `EvalResult`.
60+
The `@evaluator` decorator marks your function as an evaluator. Call `.run()` to execute it as a stdin/stdout script. Your function receives an `EvalInput` and returns an `EvalResult`. The decorated function can still be called directly in tests.
5861

5962
### 3. Add it to your eval config
6063

examples/custom_evaluators/response_quality.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,7 @@ def response_quality(input: EvalInput) -> EvalResult:
5555
per_invocation_scores=scores,
5656
details={"issues": issues} if issues else None,
5757
)
58+
59+
60+
if __name__ == "__main__":
61+
response_quality.run()

examples/custom_evaluators/tool_call_checker.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@ def tool_call_checker(input: EvalInput) -> EvalResult:
3030
score=overall,
3131
per_invocation_scores=scores,
3232
)
33+
34+
35+
if __name__ == "__main__":
36+
tool_call_checker.run()

packages/evaluator-sdk-py/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ def my_evaluator(input: EvalInput) -> EvalResult:
2626
score=sum(scores) / len(scores) if scores else 0.0,
2727
per_invocation_scores=scores,
2828
)
29+
30+
if __name__ == "__main__":
31+
my_evaluator.run()
2932
```
3033

31-
The `@evaluator` decorator turns your function into a runnable script -- just execute it with `python my_evaluator.py`. It reads JSON from stdin, calls your function, and writes the result to stdout.
34+
The `@evaluator` decorator marks your function as a runnable evaluator. Call `.run()` to execute it as a stdin/stdout script -- it reads JSON from stdin, calls your function, and writes the result to stdout. The decorated function can still be called directly in tests.
3235

3336
## Types
3437

packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ def my_evaluator(input: EvalInput) -> EvalResult:
1313
if not inv.final_response:
1414
score -= 0.5
1515
return EvalResult(score=max(0.0, score))
16+
17+
if __name__ == "__main__":
18+
my_evaluator.run()
1619
"""
1720

1821
from .decorator import evaluator

packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import asyncio
6+
import functools
67
import inspect
78
import sys
89
import traceback
@@ -12,16 +13,10 @@
1213

1314

1415
def evaluator(fn: Callable[[EvalInput], EvalResult]) -> Callable[[EvalInput], EvalResult]:
15-
"""Decorator that turns an evaluator function into a runnable stdin/stdout script.
16+
"""Decorator that marks a function as a runnable evaluator.
1617
17-
When the decorated module is executed (``python my_evaluator.py``), it:
18-
1. Reads JSON from stdin and parses it into an :class:`EvalInput`.
19-
2. Calls the decorated function with the parsed input.
20-
3. Serializes the returned :class:`EvalResult` to stdout as JSON.
21-
22-
The decorated function can be sync or async.
23-
24-
Example::
18+
The decorated function can still be called normally (e.g. in tests).
19+
To run it as a stdin/stdout script, call ``.run()``::
2520
2621
from agentevals_evaluator_sdk import evaluator, EvalInput, EvalResult
2722
@@ -32,9 +27,17 @@ def format_check(input: EvalInput) -> EvalResult:
3227
if not inv.final_response:
3328
score -= 0.5
3429
return EvalResult(score=max(0.0, score))
30+
31+
if __name__ == "__main__":
32+
format_check.run()
3533
"""
3634

37-
def _run() -> None:
35+
@functools.wraps(fn)
36+
def wrapper(*args, **kwargs):
37+
return fn(*args, **kwargs)
38+
39+
def run() -> None:
40+
"""Read EvalInput from stdin, call the evaluator, write EvalResult to stdout."""
3841
raw = sys.stdin.read()
3942
if not raw.strip():
4043
_write_error("No input received on stdin")
@@ -65,10 +68,8 @@ def _run() -> None:
6568
sys.stdout.write("\n")
6669
sys.stdout.flush()
6770

68-
import atexit
69-
atexit.register(_run)
70-
71-
return fn
71+
wrapper.run = run
72+
return wrapper
7273

7374

7475
def _write_error(msg: str) -> None:

src/agentevals/evaluator/templates.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ def ${name}(input: EvalInput) -> EvalResult:
4343
score=overall,
4444
per_invocation_scores=scores,
4545
)
46+
47+
48+
if __name__ == "__main__":
49+
${name}.run()
4650
''')
4751

4852

0 commit comments

Comments
 (0)