forked from NVIDIA-NeMo/RL
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.yaml
More file actions
55 lines (49 loc) · 1.88 KB
/
eval.yaml
File metadata and controls
55 lines (49 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Evaluation Configuration
eval:
metric: "pass@k" # pass@k and cons@k are supported
num_tests_per_prompt: 1 # every prompt will be tested num_tests_per_prompt times and use the average score as the final score
seed: 42
k_value: 1
save_path: null # Path to save evaluation results and configuration of the evaluation. Set to null to disable saving. Example: "results/eval_output" or "/path/to/evaluation_results"
generation:
backend: "vllm" # only vllm is supported for evaluation
max_new_tokens: ${generation.vllm_cfg.max_model_len}
temperature: 0.0
top_p: 1.0
top_k: -1 # -1 means disable
num_prompts_per_step: -1 # -1 means pass all prompts at once
model_name: "Qwen/Qwen2.5-Math-1.5B-Instruct"
stop_token_ids: null
stop_strings: null
vllm_cfg:
async_engine: false
precision: "bfloat16"
tensor_parallel_size: 1
pipeline_parallel_size: 1
expert_parallel_size: 1
gpu_memory_utilization: 0.9
max_model_len: 2048
enforce_eager: False
colocated:
# true: generation shares training GPUs
# false: uses dedicated generation resources
enabled: true
# only relevant when enabled is false
resources:
gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
num_nodes: null # Decides number of nodes to be dedicated to generation
tokenizer:
name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default
chat_template: "default"
chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
data:
max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation
prompt_file: null
system_prompt_file: null
dataset_name: "aime2024"
env:
math:
num_workers: 8
cluster:
gpus_per_node: 1
num_nodes: 1