Skip to content

Commit

Permalink
Fix Azure OpenAI based LLM judges (#1619)
Browse files Browse the repository at this point in the history
* Fix azure openai evaluators (wrong url) and code refactor

Signed-off-by: Martín Santillán Cooper <[email protected]>

* Catalog changes because of code refactor

The order of the items in a dict changes

Signed-off-by: Martín Santillán Cooper <[email protected]>

---------

Signed-off-by: Martín Santillán Cooper <[email protected]>
Co-authored-by: Elron Bandel <[email protected]>
  • Loading branch information
martinscooper and elronbandel authored Feb 23, 2025
1 parent e24eccb commit 4d8047d
Show file tree
Hide file tree
Showing 41 changed files with 106 additions and 85 deletions.
13 changes: 8 additions & 5 deletions prepare/metrics/llm_as_judge/llm_as_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,16 @@ def get_evaluator(
model_name = f"watsonx/{model_name}"
elif provider == ModelProviderEnum.OPENAI:
model_name = f"openai/{model_name}"
elif provider == ModelProviderEnum.AZURE_OPENAI:
inference_params["credentials"] = {}
inference_params["credentials"]["api_base"] = (
f"https://eteopenai.azure-api.net/openai/deployments/{model_name}/chat/completions?api-version=2024-08-01-preview"
)
model_name = "azure/" + model_name

params = {
f"{'model' if provider != ModelProviderEnum.RITS else 'model_name'}": model_name,
**inference_params,
}
inference_params[f"{'model' if provider != ModelProviderEnum.RITS else 'model_name'}"] = model_name

inference_engine = INFERENCE_ENGINE_NAME_TO_CLASS[provider](**params)
inference_engine = INFERENCE_ENGINE_NAME_TO_CLASS[provider](**inference_params)

params = {
"inference_engine": inference_engine,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "gpt-4o-2024-08-06",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/gpt-4o-2024-08-06"
},
"evaluator_name": "GPT4",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "o1-mini-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/o1-mini-2024-09-12"
},
"evaluator_name": "O1_MINI",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "o1-preview-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/o1-preview-2024-09-12"
},
"evaluator_name": "O1_PREVIEW",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/gpt-4o-2024-08-06",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/gpt-4o-2024-08-06"
},
"evaluator_name": "GPT4",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/o1-mini-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/o1-mini-2024-09-12"
},
"evaluator_name": "O1_MINI",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/o1-preview-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/o1-preview-2024-09-12"
},
"evaluator_name": "O1_PREVIEW",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "ibm-granite/granite-3.0-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "ibm-granite/granite-3.0-8b-instruct"
},
"evaluator_name": "GRANITE3_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "ibm-granite/granite-3.1-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "ibm-granite/granite-3.1-8b-instruct"
},
"evaluator_name": "GRANITE3_1_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8"
},
"evaluator_name": "LLAMA3_1_405B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "meta-llama/llama-3-1-70b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "meta-llama/llama-3-1-70b-instruct"
},
"evaluator_name": "LLAMA3_1_70B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "meta-llama/Llama-3.1-8B-Instruct"
},
"evaluator_name": "LLAMA3_1_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "mistralai/mixtral-8x22B-instruct-v0.1",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "mistralai/mixtral-8x22B-instruct-v0.1"
},
"evaluator_name": "MIXTRAL8_22b",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "mistralai/mixtral-8x7B-instruct-v0.1",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "mistralai/mixtral-8x7B-instruct-v0.1"
},
"evaluator_name": "MIXTRAL8_7b",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "mistralai/mistral-large-instruct-2407",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "mistralai/mistral-large-instruct-2407"
},
"evaluator_name": "MIXTRAL_LARGE",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/ibm/granite-3-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/ibm/granite-3-8b-instruct"
},
"evaluator_name": "GRANITE3_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/meta-llama/llama-3-405b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/meta-llama/llama-3-405b-instruct"
},
"evaluator_name": "LLAMA3_1_405B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/meta-llama/llama-3-1-70b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/meta-llama/llama-3-1-70b-instruct"
},
"evaluator_name": "LLAMA3_1_70B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/meta-llama/llama-3-1-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/meta-llama/llama-3-1-8b-instruct"
},
"evaluator_name": "LLAMA3_1_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/mistralai/mixtral-8x7b-instruct-v01",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/mistralai/mixtral-8x7b-instruct-v01"
},
"evaluator_name": "MIXTRAL8_7b",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_direct",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "watsonx/mistralai/mistral-large",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "watsonx/mistralai/mistral-large"
},
"evaluator_name": "MIXTRAL_LARGE",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "gpt-4o-2024-08-06",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/gpt-4o-2024-08-06/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/gpt-4o-2024-08-06"
},
"evaluator_name": "GPT4",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "o1-mini-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-mini-2024-09-12/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/o1-mini-2024-09-12"
},
"evaluator_name": "O1_MINI",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "o1-preview-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"credentials": {
"api_base": "https://eteopenai.azure-api.net/openai/deployments/o1-preview-2024-09-12/chat/completions?api-version=2024-08-01-preview"
},
"model": "azure/o1-preview-2024-09-12"
},
"evaluator_name": "O1_PREVIEW",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/gpt-4o-2024-08-06",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/gpt-4o-2024-08-06"
},
"evaluator_name": "GPT4",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/o1-mini-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/o1-mini-2024-09-12"
},
"evaluator_name": "O1_MINI",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "lite_llm_inference_engine",
"model": "openai/o1-preview-2024-09-12",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model": "openai/o1-preview-2024-09-12"
},
"evaluator_name": "O1_PREVIEW",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "ibm-granite/granite-3.0-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "ibm-granite/granite-3.0-8b-instruct"
},
"evaluator_name": "GRANITE3_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "ibm-granite/granite-3.1-8b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "ibm-granite/granite-3.1-8b-instruct"
},
"evaluator_name": "GRANITE3_1_8B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "meta-llama/llama-3-1-405b-instruct-fp8"
},
"evaluator_name": "LLAMA3_1_405B",
"generate_summaries": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"__type__": "llm_judge_pairwise",
"inference_engine": {
"__type__": "rits_inference_engine",
"model_name": "meta-llama/llama-3-1-70b-instruct",
"max_tokens": 1024,
"seed": 42
"seed": 42,
"model_name": "meta-llama/llama-3-1-70b-instruct"
},
"evaluator_name": "LLAMA3_1_70B",
"generate_summaries": false
Expand Down
Loading

0 comments on commit 4d8047d

Please sign in to comment.