-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathrun_specs.conf
62 lines (47 loc) · 4.42 KB
/
run_specs.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
entries: [
#bigbench
#analytic_entailment: https://github.com/google/BIG-bench/blob/main/bigbench/benchmark_tasks/analytic_entailment
{description: "big_bench:model=neurips/local,max_train_instances=3,task=analytic_entailment,subtask=", priority: 1}
#causal_judgment: https://github.com/google/BIG-bench/blob/main/bigbench/benchmark_tasks/causal_judgment
{description: "big_bench:model=neurips/local,max_train_instances=3,task=causal_judgment,subtask=", priority: 1}
#emoji_movie: https://github.com/google/big-bench/tree/main/bigbench/benchmark_tasks/emoji_movie
{description: "big_bench:model=neurips/local,max_train_instances=3,task=emoji_movie,subtask=", priority: 1}
#empirical_judgments: https://github.com/google/big-bench/tree/main/bigbench/benchmark_tasks/empirical_judgments
{description: "big_bench:model=neurips/local,max_train_instances=3,task=empirical_judgments,subtask=", priority: 1}
#known_unknowns: https://github.com/google/big-bench/tree/main/bigbench/benchmark_tasks/known_unknowns
{description: "big_bench:model=neurips/local,max_train_instances=3,task=known_unknowns,subtask=", priority: 1}
# logical_deduction: https://github.com/google/big-bench/tree/main/bigbench/benchmark_tasks/logical_deduction
{description: "big_bench:model=neurips/local,max_train_instances=3,task=logical_deduction,subtask=three_objects", priority: 1}
#strange_stories: https://github.com/google/big-bench/tree/main/bigbench/benchmark_tasks/strange_stories
{description: "big_bench:model=neurips/local,max_train_instances=3,task=strange_stories,subtask=multiple_choice", priority: 1}
#snarks: https://github.com/google/BIG-bench/tree/main/bigbench/benchmark_tasks/snarks
{description: "big_bench:model=neurips/local,max_train_instances=3,task=snarks,subtask=", priority: 1}
#dark_humor_detection: https://github.com/google/BIG-bench/tree/main/bigbench/benchmark_tasks/dark_humor_detection
{description: "big_bench:model=neurips/local,max_train_instances=3,task=dark_humor_detection,subtask=", priority: 1}
#mmlu
{description: "mmlu:model=neurips/local,subject=philosophy,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_biology,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_chemistry,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_computer_science,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_european_history,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_geography,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_government_and_politics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_macroeconomics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_mathematics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_microeconomics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_physics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_psychology,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_statistics,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_us_history,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_world_history,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=moral_disputes,data_augmentation=canonical", priority: 1}
{description: "mmlu:model=neurips/local,subject=moral_scenarios,data_augmentation=canonical", priority: 1}
#truthful QA
{description: "truthful_qa:task=mc_single,model=neurips/local", priority: 1},
#CNN/daily mail
{description: "summarization_cnndm:model=neurips/local", priority: 1},
#GSM
{description: "gsm:model=neurips/local", priority: 1}
#BBQ
{description: "bbq:subject=all,model=neurips/local", priority: 1},
]