Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions deepeval/models/base_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Optional, List
from deepeval.models.utils import parse_model_name


class DeepEvalBaseModel(ABC):
Expand Down Expand Up @@ -32,7 +31,7 @@ def _call(self, *args, **kwargs):

class DeepEvalBaseLLM(ABC):
def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
self.model_name = parse_model_name(model_name)
self.model_name = model_name
self.model = self.load_model(*args, **kwargs)

@abstractmethod
Expand Down Expand Up @@ -77,7 +76,7 @@ def get_model_name(self, *args, **kwargs) -> str:

class DeepEvalBaseMLLM(ABC):
def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
self.model_name = parse_model_name(model_name)
self.model_name = model_name

@abstractmethod
def generate(self, *args, **kwargs) -> str:
Expand All @@ -104,7 +103,7 @@ def get_model_name(self, *args, **kwargs) -> str:

class DeepEvalBaseEmbeddingModel(ABC):
def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
self.model_name = parse_model_name(model_name)
self.model_name = model_name

self.model = self.load_model(*args, **kwargs)

Expand Down
3 changes: 1 addition & 2 deletions deepeval/models/llms/anthropic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from deepeval.models import DeepEvalBaseLLM
from deepeval.models.llms.utils import trim_and_load_json
from deepeval.models.utils import parse_model_name

model_pricing = {
"claude-3-7-sonnet-latest": {"input": 3.00 / 1e6, "output": 15.00 / 1e6},
Expand All @@ -25,7 +24,7 @@ def __init__(
temperature: float = 0,
_anthropic_api_key: Optional[str] = None,
):
model_name = parse_model_name(model)
model_name = model
self._anthropic_api_key = _anthropic_api_key

if temperature < 0:
Expand Down
3 changes: 1 addition & 2 deletions deepeval/models/llms/azure_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
log_retry_error,
)
from deepeval.models.llms.utils import trim_and_load_json
from deepeval.models.utils import parse_model_name

retryable_exceptions = (
openai.RateLimitError,
Expand Down Expand Up @@ -61,7 +60,7 @@ def __init__(
# args and kwargs will be passed to the underlying model, in load_model function
self.args = args
self.kwargs = kwargs
super().__init__(parse_model_name(model_name))
super().__init__(model_name)

###############################################
# Other generate functions
Expand Down
3 changes: 1 addition & 2 deletions deepeval/models/llms/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from deepeval.models import DeepEvalBaseLLM
from deepeval.models.llms.utils import trim_and_load_json
from deepeval.models.utils import parse_model_name


def log_retry_error(retry_state: RetryCallState):
Expand Down Expand Up @@ -160,7 +159,7 @@ def __init__(
):
model_name = None
if isinstance(model, str):
model_name = parse_model_name(model)
model_name = model
if model_name not in valid_gpt_models:
raise ValueError(
f"Invalid model. Available GPT models: {', '.join(model for model in valid_gpt_models)}"
Expand Down
3 changes: 1 addition & 2 deletions deepeval/models/mlllms/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from deepeval.models import DeepEvalBaseMLLM
from deepeval.test_case import MLLMImage
from deepeval.models.utils import parse_model_name

retryable_exceptions = (
openai.RateLimitError,
Expand Down Expand Up @@ -87,7 +86,7 @@ def __init__(
):
model_name = None
if isinstance(model, str):
model_name = parse_model_name(model)
model_name = model
if model_name not in valid_multimodal_gpt_models:
raise ValueError(
f"Invalid model. Available Multimodal GPT models: {', '.join(model for model in valid_multimodal_gpt_models)}"
Expand Down
36 changes: 24 additions & 12 deletions deepeval/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,43 @@
from typing import Optional


def parse_model_name(model_name: Optional[str] = None) -> str:
def parse_model_name(model_name: Optional[str] = None) -> Optional[str]:
"""Extract base model name from provider-prefixed format.

This function is useful for extracting the actual model name from a
provider-prefixed format which is used by some proxies like LiteLLM.
LiteLLM is designed to work with many different LLM providers (OpenAI, Anthropic,
Cohere, etc.). To tell it which provider's API to call, you prepend the provider
name to the model ID, in the form "<provider>/<model>". So openai/gpt-4.1-mini
literally means "OpenAI's GPT-4.1 Mini via the OpenAI chat completions endpoint."
Some model serving systems (e.g., LiteLLM) prepend the provider to the model ID
in the format "<provider>/<model>". This function strips the known provider
prefix and returns the actual model name.

For example, "openai/gpt-4o" means GPT-4o served via OpenAI. In such cases,
we strip the "openai/" prefix.

However, not all prefixes are providers — for example, "local/llama-3" might refer
to a valid model path in a self-hosted setup. We preserve model names unless the
prefix matches a known provider.

Args:
model_name: Original model identifier, potentially in
"<provider>/<model>" format
model_name: The original model identifier, potentially in "<provider>/<model>" format.

Returns:
The model name without provider prefix
The model name without provider prefix for known providers.
Returns the full name for unknown providers (e.g., "local", "custom").

Examples:
parse_model_name("openai/gpt-4o") -> "gpt-4o"
parse_model_name("gpt-4o") -> "gpt-4o"
parse_model_name("local/llama-3") -> "local/llama-3"
parse_model_name(None) -> None
"""
if model_name is None:
return None

known_providers = {"openai", "anthropic", "cohere", "mistral", "groq", "huggingface"}

if "/" in model_name:
_, parsed_model_name = model_name.split("/", 1)
return parsed_model_name
provider, parsed_model_name = model_name.split("/", 1)
if provider in known_providers:
return parsed_model_name
else:
return model_name

return model_name
90 changes: 27 additions & 63 deletions tests/test_models_utils.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,34 @@
import pytest
from deepeval.models.utils import parse_model_name


class TestGetActualModelName:
"""Test suite for the parse_model_name function."""
class TestModelNameFormat:
"""
Since parse_model_name function is removed,
we expect model names to be used directly in full 'provider/model' format.
"""

@pytest.mark.parametrize(
"input_model_name,expected_output",
"model_name",
[
# Standard provider/model format
("openai/gpt-4o", "gpt-4o"),
("anthropic/claude-3-opus", "claude-3-opus"),
("cohere/command", "command"),
# No provider prefix
("gpt-4o", "gpt-4o"),
("claude-3-sonnet", "claude-3-sonnet"),
# Edge cases
("", ""), # Empty string
("/", ""), # Just a slash
("openai/", ""), # Provider with no model
(
"//model",
"/model",
), # Multiple slashes at start - splits only on first slash
(
"provider/model/version",
"model/version",
), # Multiple slashes in name
(
"provider/model-name/with/slashes",
"model-name/with/slashes",
), # Multiple slashes
# Mixed case and special characters
("OpenAI/GPT-4o", "GPT-4o"),
("custom-provider/model-123_test", "model-123_test"),
# Numerical and versioned names
("openai/gpt-3.5-turbo", "gpt-3.5-turbo"),
("anthropic/claude-2.1", "claude-2.1"),
"openai/gpt-4o",
"anthropic/claude-3-opus",
"cohere/command",
"local/llama-3",
"custom-provider/model-123_test",
"gpt-4o",
"claude-3-sonnet",
],
)
def test_parse_model_name(self, input_model_name, expected_output):
"""
Test that parse_model_name correctly extracts the model name
from various input formats.

Args:
input_model_name: The model name to process
expected_output: The expected result after processing
"""
assert parse_model_name(input_model_name) == expected_output

def test_parse_model_name_type_preservation(self):
"""Test that the function preserves the string type and doesn't modify the input."""
result = parse_model_name("provider/model")
assert isinstance(result, str)

result = parse_model_name("")
assert isinstance(result, str)

def test_parse_model_name_identity(self):
"""Test that the function returns equal values for inputs without providers."""
test_cases = ["gpt-4", "claude-3", "command-r", "model-with-dashes"]
for model_name in test_cases:
assert parse_model_name(model_name) == model_name

def test_parse_model_name_none_value(self):
"""Test that the function raises TypeError when None is passed."""
with pytest.raises(TypeError):
parse_model_name(None)
def test_model_name_direct_usage(self, model_name):
# Model names should be used as-is, with no parsing or stripping.
assert isinstance(model_name, str)
assert len(model_name) > 0

def test_model_name_format_contains_provider(self):
# Check that common provider/model format contains a slash
assert "/" in "openai/gpt-4o"
assert "/" in "anthropic/claude-3-opus"

def test_model_name_format_no_slash(self):
# Model names without provider are allowed as-is
model = "gpt-4o"
assert "/" not in model
Loading