File tree 40 files changed +80
-102
lines changed
src/unitxt/catalog/metrics/llm_as_judge
40 files changed +80
-102
lines changed Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
8
"api_base" : " https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ gpt-4o-2024-08-06"
10
+ "model" : " gpt-4o-2024-08-06"
11
11
},
12
12
"evaluator_name" : " GPT4" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
- "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12 /chat/completions?api-version=2024-08-01-preview"
8
+ "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-mini/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ o1-mini-2024-09-12 "
10
+ "model" : " o1-mini"
11
11
},
12
12
"evaluator_name" : " O1_MINI" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
- "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12 /chat/completions?api-version=2024-08-01-preview"
8
+ "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-preview/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ o1-preview-2024-09-12 "
10
+ "model" : " o1-preview"
11
11
},
12
12
"evaluator_name" : " O1_PREVIEW" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ gpt-4o-2024-08-06"
7
+ "model" : " gpt-4o-2024-08-06"
8
8
},
9
9
"evaluator_name" : " GPT4" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ o1-mini-2024-09-12 "
7
+ "model" : " o1-mini"
8
8
},
9
9
"evaluator_name" : " O1_MINI" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ o1-preview-2024-09-12 "
7
+ "model" : " o1-preview"
8
8
},
9
9
"evaluator_name" : " O1_PREVIEW" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " ibm- granite/granite-3.0 -8b-instruct"
7
+ "model " : " granite-3 -8b-instruct"
8
8
},
9
9
"evaluator_name" : " GRANITE3_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " ibm- granite/granite-3. 1-8b-instruct"
7
+ "model " : " granite-3- 1-8b-instruct"
8
8
},
9
9
"evaluator_name" : " GRANITE3_1_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " meta-llama/ llama-3-1-405b-instruct-fp8"
7
+ "model " : " llama-3-1-405b-instruct-fp8"
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_405B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " meta-llama/ llama-3-1-70b-instruct"
7
+ "model " : " llama-3-1-70b-instruct"
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_70B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " meta- llama/Llama-3.1-8B-Instruct "
7
+ "model " : " llama-3-1-70b-instruct "
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_8B" ,
10
10
"generate_summaries" : false
Load Diff This file was deleted.
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " mistralai/ mixtral-8x7B -instruct-v0.1 "
7
+ "model " : " mixtral-8x7b -instruct"
8
8
},
9
9
"evaluator_name" : " MIXTRAL8_7b" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " mistralai/ mistral-large-instruct-2407 "
7
+ "model " : " mistral-large-instruct"
8
8
},
9
9
"evaluator_name" : " MIXTRAL_LARGE" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/ibm/ granite-3-8b-instruct"
7
+ "model" : " granite-3-8b-instruct"
8
8
},
9
9
"evaluator_name" : " GRANITE3_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/meta- llama/llama -3-405b-instruct"
7
+ "model" : " llama-3-1- 405b-instruct-fp8 "
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_405B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/meta-llama/ llama-3-1-70b-instruct"
7
+ "model" : " llama-3-1-70b-instruct"
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_70B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/meta- llama/llama -3-1-8b -instruct"
7
+ "model" : " llama-3-1-70b -instruct"
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/mistralai/ mixtral-8x7b-instruct-v01 "
7
+ "model" : " mixtral-8x7b-instruct"
8
8
},
9
9
"evaluator_name" : " MIXTRAL8_7b" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_direct" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " watsonx/mistralai/ mistral-large"
7
+ "model" : " mistral-large-instruct "
8
8
},
9
9
"evaluator_name" : " MIXTRAL_LARGE" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
8
"api_base" : " https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ gpt-4o-2024-08-06"
10
+ "model" : " gpt-4o-2024-08-06"
11
11
},
12
12
"evaluator_name" : " GPT4" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
- "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12 /chat/completions?api-version=2024-08-01-preview"
8
+ "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-mini/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ o1-mini-2024-09-12 "
10
+ "model" : " o1-mini"
11
11
},
12
12
"evaluator_name" : " O1_MINI" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
7
"credentials" : {
8
- "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12 /chat/completions?api-version=2024-08-01-preview"
8
+ "api_base" : " https://eteopenai.azure-api.net/openai/deployments/o1-preview/chat/completions?api-version=2024-08-01-preview"
9
9
},
10
- "model" : " azure/ o1-preview-2024-09-12 "
10
+ "model" : " o1-preview"
11
11
},
12
12
"evaluator_name" : " O1_PREVIEW" ,
13
13
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ gpt-4o-2024-08-06"
7
+ "model" : " gpt-4o-2024-08-06"
8
8
},
9
9
"evaluator_name" : " GPT4" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ o1-mini-2024-09-12 "
7
+ "model" : " o1-mini"
8
8
},
9
9
"evaluator_name" : " O1_MINI" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " lite_llm_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model" : " openai/ o1-preview-2024-09-12 "
7
+ "model" : " o1-preview"
8
8
},
9
9
"evaluator_name" : " O1_PREVIEW" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " ibm- granite/granite-3.0 -8b-instruct"
7
+ "model " : " granite-3 -8b-instruct"
8
8
},
9
9
"evaluator_name" : " GRANITE3_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " ibm- granite/granite-3. 1-8b-instruct"
7
+ "model " : " granite-3- 1-8b-instruct"
8
8
},
9
9
"evaluator_name" : " GRANITE3_1_8B" ,
10
10
"generate_summaries" : false
Original file line number Diff line number Diff line change 1
1
{
2
2
"__type__" : " llm_judge_pairwise" ,
3
3
"inference_engine" : {
4
- "__type__" : " rits_inference_engine " ,
4
+ "__type__" : " cross_provider_inference_engine " ,
5
5
"max_tokens" : 1024 ,
6
6
"seed" : 42 ,
7
- "model_name " : " meta-llama/ llama-3-1-405b-instruct-fp8"
7
+ "model " : " llama-3-1-405b-instruct-fp8"
8
8
},
9
9
"evaluator_name" : " LLAMA3_1_405B" ,
10
10
"generate_summaries" : false
You can’t perform that action at this time.
0 commit comments