@@ -52,23 +52,6 @@ def _relative_time(iso_str: str | None) -> str:
5252 return ""
5353
5454
55- def _apply_builtin_overrides (evaluators , * , judge_model , threshold , trajectory_match_type ):
56- updated = []
57- for evaluator in evaluators :
58- if getattr (evaluator , "type" , None ) == "builtin" :
59- payload = evaluator .model_dump (by_alias = False )
60- if judge_model is not None :
61- payload ["judge_model" ] = judge_model
62- if threshold is not None :
63- payload ["threshold" ] = threshold
64- if trajectory_match_type is not None :
65- payload ["trajectory_match_type" ] = trajectory_match_type
66- updated .append (type (evaluator ).model_validate (payload ))
67- else :
68- updated .append (evaluator )
69- return updated
70-
71-
7255@click .group ()
7356@click .version_option (version = __version__ , prog_name = "agentevals" )
7457@click .option (
@@ -160,61 +143,54 @@ def run(
160143 config_file : str | None ,
161144) -> None :
162145 """Evaluate trace file(s) against the configured evaluators."""
163- from .config import EvalRunConfig , make_builtin_evaluator_entries
146+ from .config import EvalRunConfig , apply_builtin_overrides , make_builtin_evaluator_entries
164147 from .output import format_results
165148 from .runner import run_evaluation
166149
167150 explicit_metrics = list (metric ) if metric else []
168151
169152 if config_file :
170- from .eval_config_loader import load_eval_config , merge_configs
153+ from .eval_config_loader import load_eval_config
171154
172- file_config = load_eval_config (config_file )
173- config = file_config
155+ config = load_eval_config (config_file )
174156 if explicit_metrics :
175- cli_config = EvalRunConfig (
176- trace_files = [],
177- evaluators = make_builtin_evaluator_entries (
178- explicit_metrics ,
179- judge_model = judge_model ,
180- threshold = threshold ,
181- trajectory_match_type = trajectory_match_type ,
182- ),
157+ cli_evaluators = make_builtin_evaluator_entries (
158+ explicit_metrics ,
159+ judge_model = judge_model ,
160+ threshold = threshold ,
161+ trajectory_match_type = trajectory_match_type ,
183162 )
184- config = merge_configs (file_config , cli_config )
163+ by_name = {e .name : e for e in config .evaluators }
164+ for ev in cli_evaluators :
165+ by_name [ev .name ] = ev
166+ config .evaluators = list (by_name .values ())
185167 elif judge_model is not None or threshold is not None or trajectory_match_type is not None :
186- config = config .model_copy (
187- update = {
188- "evaluators" : _apply_builtin_overrides (
189- config .evaluators ,
190- judge_model = judge_model ,
191- threshold = threshold ,
192- trajectory_match_type = trajectory_match_type ,
193- )
194- }
168+ config .evaluators = apply_builtin_overrides (
169+ config .evaluators ,
170+ judge_model = judge_model ,
171+ threshold = threshold ,
172+ trajectory_match_type = trajectory_match_type ,
195173 )
196- if trace_files :
197- config .trace_files = list (trace_files )
198- if eval_set is not None :
199- config .eval_set_file = eval_set
200- if trace_format is not None :
201- config .trace_format = trace_format
202- if output != "table" :
203- config .output_format = output
204174 else :
205175 config = EvalRunConfig (
206- trace_files = list (trace_files ),
207- eval_set_file = eval_set ,
176+ trace_files = [],
208177 evaluators = make_builtin_evaluator_entries (
209- explicit_metrics if explicit_metrics else None ,
178+ explicit_metrics or None ,
210179 judge_model = judge_model ,
211180 threshold = threshold ,
212181 trajectory_match_type = trajectory_match_type ,
213182 ),
214- trace_format = trace_format ,
215- output_format = output ,
216183 )
217184
185+ if trace_files :
186+ config .trace_files = list (trace_files )
187+ if eval_set is not None :
188+ config .eval_set_file = eval_set
189+ if trace_format is not None :
190+ config .trace_format = trace_format
191+ if output != "table" :
192+ config .output_format = output
193+
218194 result = asyncio .run (run_evaluation (config ))
219195 formatted = format_results (result , fmt = config .output_format )
220196 click .echo (formatted )
0 commit comments