Skip to content

Commit ab736be

Browse files
committed
Adapt LLM Judge catalog
Signed-off-by: Martín Santillán Cooper <[email protected]>
1 parent b6d67da commit ab736be

40 files changed

+80
-102
lines changed

src/unitxt/catalog/metrics/llm_as_judge/direct/azure_openai/gpt_4o.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
88
"api_base": "https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/gpt-4o-2024-08-06"
10+
"model": "gpt-4o-2024-08-06"
1111
},
1212
"evaluator_name": "GPT4",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/azure_openai/o1_mini.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
8-
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12/chat/completions?api-version=2024-08-01-preview"
8+
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/o1-mini-2024-09-12"
10+
"model": "o1-mini"
1111
},
1212
"evaluator_name": "O1_MINI",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/azure_openai/o1_preview.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
8-
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12/chat/completions?api-version=2024-08-01-preview"
8+
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/o1-preview-2024-09-12"
10+
"model": "o1-preview"
1111
},
1212
"evaluator_name": "O1_PREVIEW",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/openai/gpt_4o.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/gpt-4o-2024-08-06"
7+
"model": "gpt-4o-2024-08-06"
88
},
99
"evaluator_name": "GPT4",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/openai/o1_mini.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/o1-mini-2024-09-12"
7+
"model": "o1-mini"
88
},
99
"evaluator_name": "O1_MINI",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/openai/o1_preview.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/o1-preview-2024-09-12"
7+
"model": "o1-preview"
88
},
99
"evaluator_name": "O1_PREVIEW",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_0_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "ibm-granite/granite-3.0-8b-instruct"
7+
"model": "granite-3-8b-instruct"
88
},
99
"evaluator_name": "GRANITE3_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_1_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "ibm-granite/granite-3.1-8b-instruct"
7+
"model": "granite-3-1-8b-instruct"
88
},
99
"evaluator_name": "GRANITE3_1_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/llama3_1_405b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8"
7+
"model": "llama-3-1-405b-instruct-fp8"
88
},
99
"evaluator_name": "LLAMA3_1_405B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/llama3_1_70b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "meta-llama/llama-3-1-70b-instruct"
7+
"model": "llama-3-1-70b-instruct"
88
},
99
"evaluator_name": "LLAMA3_1_70B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/llama3_1_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "meta-llama/Llama-3.1-8B-Instruct"
7+
"model": "llama-3-1-70b-instruct"
88
},
99
"evaluator_name": "LLAMA3_1_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/mixtral8_22b.json

-11
This file was deleted.

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/mixtral8_7b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "mistralai/mixtral-8x7B-instruct-v0.1"
7+
"model": "mixtral-8x7b-instruct"
88
},
99
"evaluator_name": "MIXTRAL8_7b",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/rits/mixtral_large.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "mistralai/mistral-large-instruct-2407"
7+
"model": "mistral-large-instruct"
88
},
99
"evaluator_name": "MIXTRAL_LARGE",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/granite3_0_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/ibm/granite-3-8b-instruct"
7+
"model": "granite-3-8b-instruct"
88
},
99
"evaluator_name": "GRANITE3_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/llama3_1_405b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/meta-llama/llama-3-405b-instruct"
7+
"model": "llama-3-1-405b-instruct-fp8"
88
},
99
"evaluator_name": "LLAMA3_1_405B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/llama3_1_70b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/meta-llama/llama-3-1-70b-instruct"
7+
"model": "llama-3-1-70b-instruct"
88
},
99
"evaluator_name": "LLAMA3_1_70B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/llama3_1_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/meta-llama/llama-3-1-8b-instruct"
7+
"model": "llama-3-1-70b-instruct"
88
},
99
"evaluator_name": "LLAMA3_1_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/mixtral8_7b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/mistralai/mixtral-8x7b-instruct-v01"
7+
"model": "mixtral-8x7b-instruct"
88
},
99
"evaluator_name": "MIXTRAL8_7b",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/mixtral_large.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_direct",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "watsonx/mistralai/mistral-large"
7+
"model": "mistral-large-instruct"
88
},
99
"evaluator_name": "MIXTRAL_LARGE",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/azure_openai/gpt_4o.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
88
"api_base": "https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/gpt-4o-2024-08-06"
10+
"model": "gpt-4o-2024-08-06"
1111
},
1212
"evaluator_name": "GPT4",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/azure_openai/o1_mini.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
8-
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12/chat/completions?api-version=2024-08-01-preview"
8+
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/o1-mini-2024-09-12"
10+
"model": "o1-mini"
1111
},
1212
"evaluator_name": "O1_MINI",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/azure_openai/o1_preview.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
77
"credentials": {
8-
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12/chat/completions?api-version=2024-08-01-preview"
8+
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview/chat/completions?api-version=2024-08-01-preview"
99
},
10-
"model": "azure/o1-preview-2024-09-12"
10+
"model": "o1-preview"
1111
},
1212
"evaluator_name": "O1_PREVIEW",
1313
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/openai/gpt_4o.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/gpt-4o-2024-08-06"
7+
"model": "gpt-4o-2024-08-06"
88
},
99
"evaluator_name": "GPT4",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/openai/o1_mini.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/o1-mini-2024-09-12"
7+
"model": "o1-mini"
88
},
99
"evaluator_name": "O1_MINI",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/openai/o1_preview.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "lite_llm_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model": "openai/o1-preview-2024-09-12"
7+
"model": "o1-preview"
88
},
99
"evaluator_name": "O1_PREVIEW",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_0_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "ibm-granite/granite-3.0-8b-instruct"
7+
"model": "granite-3-8b-instruct"
88
},
99
"evaluator_name": "GRANITE3_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_1_8b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "ibm-granite/granite-3.1-8b-instruct"
7+
"model": "granite-3-1-8b-instruct"
88
},
99
"evaluator_name": "GRANITE3_1_8B",
1010
"generate_summaries": false

src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/llama3_1_405b.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"__type__": "llm_judge_pairwise",
33
"inference_engine": {
4-
"__type__": "rits_inference_engine",
4+
"__type__": "cross_provider_inference_engine",
55
"max_tokens": 1024,
66
"seed": 42,
7-
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8"
7+
"model": "llama-3-1-405b-instruct-fp8"
88
},
99
"evaluator_name": "LLAMA3_1_405B",
1010
"generate_summaries": false

0 commit comments

Comments
 (0)