Skip to content

Commit ff28b70

Browse files
committed
trying again
Signed-off-by: dafnapension <[email protected]>
1 parent c90534f commit ff28b70

File tree

5 files changed

+53
-1
lines changed

5 files changed

+53
-1
lines changed

prepare/metrics/llm_as_judge/pairwise_rating/llama_3_arena_hard_template.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
)
77
from unitxt.llm_as_judge import LLMAsJudge
88

9-
model_list = ["meta-llama/llama-3-8b-instruct", "meta-llama/llama-3-70b-instruct"]
9+
model_list = [
10+
"meta-llama/llama-3-8b-instruct",
11+
"meta-llama/llama-3-70b-instruct",
12+
"meta-llama/llama-3-3-70b-instruct",
13+
]
1014
format = "formats.llama3_instruct"
1115
templates = [
1216
"templates.response_assessment.pairwise_comparative_rating.arena_hard",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"__type__": "llm_as_judge",
3+
"inference_model": {
4+
"__type__": "generic_inference_engine",
5+
"default": "engines.ibm_gen_ai.llama_3_70b_instruct"
6+
},
7+
"template": "templates.response_assessment.pairwise_comparative_rating.arena_hard",
8+
"task": "pairwise_comparative_rating.single_turn",
9+
"format": "formats.llama3_instruct",
10+
"main_score": "llama_3_3_70b_instruct_generic_engine_template_arena_hard"
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"__type__": "llm_as_judge",
3+
"inference_model": {
4+
"__type__": "generic_inference_engine",
5+
"default": "engines.ibm_gen_ai.llama_3_70b_instruct"
6+
},
7+
"template": "templates.response_assessment.pairwise_comparative_rating.arena_hard_with_shuffling",
8+
"task": "pairwise_comparative_rating.single_turn",
9+
"format": "formats.llama3_instruct",
10+
"main_score": "llama_3_3_70b_instruct_generic_engine_template_arena_hard_with_shuffling"
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"__type__": "llm_as_judge",
3+
"inference_model": {
4+
"__type__": "wml_inference_engine",
5+
"model_name": "meta-llama/llama-3-3-70b-instruct",
6+
"max_new_tokens": 2048,
7+
"random_seed": 42
8+
},
9+
"template": "templates.response_assessment.pairwise_comparative_rating.arena_hard",
10+
"task": "pairwise_comparative_rating.single_turn",
11+
"format": "formats.llama3_instruct",
12+
"main_score": "llama_3_3_70b_instruct_ibm_wml_template_arena_hard"
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"__type__": "llm_as_judge",
3+
"inference_model": {
4+
"__type__": "wml_inference_engine",
5+
"model_name": "meta-llama/llama-3-3-70b-instruct",
6+
"max_new_tokens": 2048,
7+
"random_seed": 42
8+
},
9+
"template": "templates.response_assessment.pairwise_comparative_rating.arena_hard_with_shuffling",
10+
"task": "pairwise_comparative_rating.single_turn",
11+
"format": "formats.llama3_instruct",
12+
"main_score": "llama_3_3_70b_instruct_ibm_wml_template_arena_hard_with_shuffling"
13+
}

0 commit comments

Comments
 (0)