-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy patheval_config.yaml
More file actions
49 lines (43 loc) · 1.22 KB
/
eval_config.yaml
File metadata and controls
49 lines (43 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Example eval config using a custom code-based evaluator alongside built-in ones.
#
# Run with:
# agentevals run samples/helm.json \
# --config examples/custom_evaluators/eval_config.yaml \
# --eval-set samples/eval_set_helm.json
evaluators:
# Built-in metric
- name: tool_trajectory_avg_score
type: builtin
# Custom code evaluators (local scripts)
- name: tool_call_checker
type: code
path: ./examples/custom_evaluators/tool_call_checker.py
threshold: 1.0
config:
min_tool_calls: 1
- name: response_quality
type: code
path: ./examples/custom_evaluators/response_quality.py
threshold: 0.7
config:
min_response_length: 20
# Reference an evaluator from Github
- name: random_evaluator
type: remote
source: github
ref: evaluators/random_evaluator/random_evaluator.py
threshold: 0.110
executor: local
# OpenAI Evals API graders (requires OPENAI_API_KEY)
- name: response_similarity
type: openai_eval
threshold: 0.8
grader:
type: text_similarity
evaluation_metric: fuzzy_match
- name: city_name_check
type: openai_eval
grader:
type: string_check
operation: eq
reference: "{{ item.expected_response }}"