Fix azure dall e 3 call with custom model name + Handle Bearer $LITELLM_API_KEY in x-litellm-api-key custom header (#10776)

krrishdholakia · web-flow · commit ff68cbda8e5e · 2025-05-12T22:49:35.000-07:00
* fix(main.py): use base model instead of user model if given Fixes #10760 * feat(azure/image_generation/__init__.py): make azure image gen check more robust Fixes #10760 * fix(user_api_key_auth.py): support bearer token auth for `x-litellm-api-key` header Fixes earlier regression on vertex ai passthrough auth * fix(user_api_key_auth.py): refactor get api key into separate function enables easier testing * fix: cleanup * fix: fix linting error * fix: cleanup * test: update tests
diff --git a/litellm/llms/azure/image_generation/__init__.py b/litellm/llms/azure/image_generation/__init__.py
@@ -1,3 +1,4 @@
+from litellm._logging import verbose_logger
 from litellm.llms.base_llm.image_generation.transformation import (
     BaseImageGenerationConfig,
 )
@@ -14,9 +15,15 @@
 
 
 def get_azure_image_generation_config(model: str) -> BaseImageGenerationConfig:
-    if model.startswith("dall-e-2") or model == "":  # empty model is dall-e-2
+    model = model.lower()
+    model = model.replace("-", "")
+    model = model.replace("_", "")
+    if model == "" or "dalle2" in model:  # empty model is dall-e-2
         return AzureDallE2ImageGenerationConfig()
-    elif model.startswith("dall-e-3"):
+    elif "dalle3" in model:
         return AzureDallE3ImageGenerationConfig()
     else:
+        verbose_logger.debug(
+            f"Using AzureGPTImageGenerationConfig for model: {model}. This follows the gpt-image-1 model format."
+        )
         return AzureGPTImageGenerationConfig()
diff --git a/litellm/main.py b/litellm/main.py
@@ -4661,6 +4661,7 @@ def image_generation(  # noqa: PLR0915
         client = kwargs.get("client", None)
         extra_headers = kwargs.get("extra_headers", None)
         headers: dict = kwargs.get("headers", None) or {}
+        base_model = kwargs.get("base_model", None)
         if extra_headers is not None:
             headers.update(extra_headers)
         model_response: ImageResponse = litellm.utils.ImageResponse()
@@ -4705,13 +4706,13 @@ def image_generation(  # noqa: PLR0915
         ):
             image_generation_config = (
                 ProviderConfigManager.get_provider_image_generation_config(
-                    model=model,
+                    model=base_model or model,
                     provider=LlmProviders(custom_llm_provider),
                 )
             )
 
         optional_params = get_optional_params_image_gen(
-            model=model,
+            model=base_model or model,
             n=n,
             quality=quality,
             response_format=response_format,
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
@@ -10,7 +10,7 @@
 import asyncio
 import secrets
 from datetime import datetime, timezone
-from typing import Optional, cast
+from typing import List, Optional, Tuple, cast
 
 import fastapi
 from fastapi import HTTPException, Request, WebSocket, status
@@ -89,6 +89,17 @@
 )
 
 
+def _get_bearer_token_or_received_api_key(api_key: str) -> str:
+    if api_key.startswith("Bearer "):  # ensure Bearer token passed in
+        api_key = api_key.replace("Bearer ", "")  # extract the token
+    elif api_key.startswith("Basic "):
+        api_key = api_key.replace("Basic ", "")  # handle langfuse input
+    elif api_key.startswith("bearer "):
+        api_key = api_key.replace("bearer ", "")
+
+    return api_key
+
+
 def _get_bearer_token(
     api_key: str,
 ):
@@ -217,6 +228,53 @@ def get_rbac_role(jwt_handler: JWTHandler, scopes: List[str]) -> str:
         return LitellmUserRoles.TEAM
 
 
+def get_api_key(
+    custom_litellm_key_header: Optional[str],
+    api_key: str,
+    azure_api_key_header: Optional[str],
+    anthropic_api_key_header: Optional[str],
+    google_ai_studio_api_key_header: Optional[str],
+    azure_apim_header: Optional[str],
+    pass_through_endpoints: Optional[List[dict]],
+    route: str,
+    request: Request,
+) -> Tuple[str, Optional[str]]:
+    """
+    Returns:
+        Tuple[Optional[str], Optional[str]]: Tuple of the api_key and the passed_in_key
+    """
+    api_key = api_key
+    passed_in_key: Optional[str] = None
+    if isinstance(custom_litellm_key_header, str):
+        passed_in_key = custom_litellm_key_header
+        api_key = _get_bearer_token_or_received_api_key(custom_litellm_key_header)
+    elif isinstance(api_key, str):
+        passed_in_key = api_key
+        api_key = _get_bearer_token(api_key=api_key)
+    elif isinstance(azure_api_key_header, str):
+        passed_in_key = azure_api_key_header
+        api_key = azure_api_key_header
+    elif isinstance(anthropic_api_key_header, str):
+        passed_in_key = anthropic_api_key_header
+        api_key = anthropic_api_key_header
+    elif isinstance(google_ai_studio_api_key_header, str):
+        passed_in_key = google_ai_studio_api_key_header
+        api_key = google_ai_studio_api_key_header
+    elif isinstance(azure_apim_header, str):
+        passed_in_key = azure_apim_header
+        api_key = azure_apim_header
+    elif pass_through_endpoints is not None:
+        for endpoint in pass_through_endpoints:
+            if endpoint.get("path", "") == route:
+                headers: Optional[dict] = endpoint.get("headers", None)
+                if headers is not None:
+                    header_key: str = headers.get("litellm_user_api_key", "")
+                    if request.headers.get(key=header_key) is not None:
+                        api_key = request.headers.get(key=header_key)
+                        passed_in_key = api_key
+    return api_key, passed_in_key
+
+
 async def _user_api_key_auth_builder(  # noqa: PLR0915
     request: Request,
     api_key: str,
@@ -260,28 +318,17 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
         )
         passed_in_key: Optional[str] = None
         ## CHECK IF X-LITELM-API-KEY IS PASSED IN - supercedes Authorization header
-        if isinstance(custom_litellm_key_header, str):
-            api_key = custom_litellm_key_header
-        elif isinstance(api_key, str):
-            passed_in_key = api_key
-            api_key = _get_bearer_token(api_key=api_key)
-        elif isinstance(azure_api_key_header, str):
-            api_key = azure_api_key_header
-        elif isinstance(anthropic_api_key_header, str):
-            api_key = anthropic_api_key_header
-        elif isinstance(google_ai_studio_api_key_header, str):
-            api_key = google_ai_studio_api_key_header
-        elif isinstance(azure_apim_header, str):
-            api_key = azure_apim_header
-        elif pass_through_endpoints is not None:
-            for endpoint in pass_through_endpoints:
-                if endpoint.get("path", "") == route:
-                    headers: Optional[dict] = endpoint.get("headers", None)
-                    if headers is not None:
-                        header_key: str = headers.get("litellm_user_api_key", "")
-                        if request.headers.get(key=header_key) is not None:
-                            api_key = request.headers.get(key=header_key)
-
+        api_key, passed_in_key = get_api_key(
+            custom_litellm_key_header=custom_litellm_key_header,
+            api_key=api_key,
+            azure_api_key_header=azure_api_key_header,
+            anthropic_api_key_header=anthropic_api_key_header,
+            google_ai_studio_api_key_header=google_ai_studio_api_key_header,
+            azure_apim_header=azure_apim_header,
+            pass_through_endpoints=pass_through_endpoints,
+            route=route,
+            request=request,
+        )
         # if user wants to pass LiteLLM_Master_Key as a custom header, example pass litellm keys as X-LiteLLM-Key: Bearer sk-1234
         custom_litellm_key_header_name = general_settings.get("litellm_key_header_name")
         if custom_litellm_key_header_name is not None:
diff --git a/litellm/utils.py b/litellm/utils.py
@@ -2271,7 +2271,7 @@ def _check_valid_arg(supported_params):
                 elif k not in supported_params:
                     raise UnsupportedParamsError(
                         status_code=500,
-                        message=f"Setting `{k}` is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
+                        message=f"Setting `{k}` is not supported by {custom_llm_provider}, {model}. To drop it from the call, set `litellm.drop_params = True`.",
                     )
             return non_default_params
 
diff --git a/tests/image_gen_tests/test_image_generation.py b/tests/image_gen_tests/test_image_generation.py
@@ -240,3 +240,4 @@ async def test_aimage_generation_bedrock_with_optional_params():
             pass
         else:
             pytest.fail(f"An exception occurred - {str(e)}")
+
diff --git a/tests/litellm/llms/azure/image_generation/test_azure_image_generation_init.py b/tests/litellm/llms/azure/image_generation/test_azure_image_generation_init.py
@@ -0,0 +1,31 @@
+import json
+import os
+import sys
+import traceback
+from typing import Callable, Optional
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm.llms.azure.image_generation import (
+    AzureDallE3ImageGenerationConfig,
+    get_azure_image_generation_config,
+)
+
+
+@pytest.mark.parametrize(
+    "received_model, expected_config",
+    [
+        ("dall-e-3", AzureDallE3ImageGenerationConfig),
+        ("dalle-3", AzureDallE3ImageGenerationConfig),
+        ("openai_dall_e_3", AzureDallE3ImageGenerationConfig),
+    ],
+)
+def test_azure_image_generation_config(received_model, expected_config):
+    assert isinstance(
+        get_azure_image_generation_config(received_model), expected_config
+    )
diff --git a/tests/litellm/proxy/auth/test_user_api_key_auth.py b/tests/litellm/proxy/auth/test_user_api_key_auth.py
@@ -0,0 +1,58 @@
+import asyncio
+import json
+import os
+import sys
+from typing import Tuple
+from unittest.mock import AsyncMock, MagicMock, patch
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from litellm.proxy.auth.user_api_key_auth import get_api_key
+
+
+def test_get_api_key():
+    bearer_token = "Bearer sk-12345678"
+    api_key = "sk-12345678"
+    passed_in_key = "Bearer sk-12345678"
+    assert get_api_key(
+        custom_litellm_key_header=None,
+        api_key=bearer_token,
+        azure_api_key_header=None,
+        anthropic_api_key_header=None,
+        google_ai_studio_api_key_header=None,
+        azure_apim_header=None,
+        pass_through_endpoints=None,
+        route="",
+        request=MagicMock(),
+    ) == (api_key, passed_in_key)
+
+
+@pytest.mark.parametrize(
+    "custom_litellm_key_header, api_key, passed_in_key",
+    [
+        ("Bearer sk-12345678", "sk-12345678", "Bearer sk-12345678"),
+        ("Basic sk-12345678", "sk-12345678", "Basic sk-12345678"),
+        ("bearer sk-12345678", "sk-12345678", "bearer sk-12345678"),
+        ("sk-12345678", "sk-12345678", "sk-12345678"),
+    ],
+)
+def test_get_api_key_with_custom_litellm_key_header(
+    custom_litellm_key_header, api_key, passed_in_key
+):
+    assert get_api_key(
+        custom_litellm_key_header=custom_litellm_key_header,
+        api_key=None,
+        azure_api_key_header=None,
+        anthropic_api_key_header=None,
+        google_ai_studio_api_key_header=None,
+        azure_apim_header=None,
+        pass_through_endpoints=None,
+        route="",
+        request=MagicMock(),
+    ) == (api_key, passed_in_key)
diff --git a/tests/litellm/test_utils.py b/tests/litellm/test_utils.py
@@ -32,119 +32,6 @@ def test_get_optional_params_image_gen():
     assert optional_params["n"] == 3
 
 
-def return_mocked_response(model: str):
-    if model == "bedrock/mistral.mistral-large-2407-v1:0":
-        return {
-            "metrics": {"latencyMs": 316},
-            "output": {
-                "message": {
-                    "content": [{"text": "Hello! How are you doing today? How can"}],
-                    "role": "assistant",
-                }
-            },
-            "stopReason": "max_tokens",
-            "usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
-        }
-
-
-@pytest.mark.parametrize(
-    "model",
-    [
-        "bedrock/mistral.mistral-large-2407-v1:0",
-    ],
-)
-@pytest.mark.asyncio()
-async def test_bedrock_max_completion_tokens(model: str):
-    """
-    Tests that:
-    - max_completion_tokens is passed as max_tokens to bedrock models
-    """
-    from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
-
-    litellm.set_verbose = True
-
-    client = AsyncHTTPHandler()
-
-    mock_response = return_mocked_response(model)
-    _model = model.split("/")[1]
-    print("\n\nmock_response: ", mock_response)
-
-    with patch.object(client, "post") as mock_client:
-        try:
-            response = await litellm.acompletion(
-                model=model,
-                max_completion_tokens=10,
-                messages=[{"role": "user", "content": "Hello!"}],
-                client=client,
-            )
-        except Exception as e:
-            print(f"Error: {e}")
-
-        mock_client.assert_called_once()
-        request_body = json.loads(mock_client.call_args.kwargs["data"])
-
-        print("request_body: ", request_body)
-
-        assert request_body == {
-            "messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
-            "additionalModelRequestFields": {},
-            "system": [],
-            "inferenceConfig": {"maxTokens": 10},
-        }
-
-
-@pytest.mark.parametrize(
-    "model",
-    ["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229"],
-)
-@pytest.mark.asyncio()
-async def test_anthropic_api_max_completion_tokens(model: str):
-    """
-    Tests that:
-    - max_completion_tokens is passed as max_tokens to anthropic models
-    """
-    litellm.set_verbose = True
-    from litellm.llms.custom_httpx.http_handler import HTTPHandler
-
-    mock_response = {
-        "content": [{"text": "Hi! My name is Claude.", "type": "text"}],
-        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
-        "model": "claude-3-5-sonnet-20240620",
-        "role": "assistant",
-        "stop_reason": "end_turn",
-        "stop_sequence": None,
-        "type": "message",
-        "usage": {"input_tokens": 2095, "output_tokens": 503},
-    }
-
-    client = HTTPHandler()
-
-    print("\n\nmock_response: ", mock_response)
-
-    with patch.object(client, "post") as mock_client:
-        try:
-            response = await litellm.acompletion(
-                model=model,
-                max_completion_tokens=10,
-                messages=[{"role": "user", "content": "Hello!"}],
-                client=client,
-            )
-        except Exception as e:
-            print(f"Error: {e}")
-        mock_client.assert_called_once()
-        request_body = mock_client.call_args.kwargs["json"]
-
-        print("request_body: ", request_body)
-
-        assert request_body == {
-            "messages": [
-                {"role": "user", "content": [{"type": "text", "text": "Hello!"}]}
-            ],
-            "max_tokens": 10,
-            "model": model.split("/")[-1],
-        }
-
-
 def test_all_model_configs():
     from litellm.llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
         VertexAIAi21Config,
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py
diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py

Original file line number	Diff line number	Diff line change
`@@ -2271,7 +2271,7 @@ def _check_valid_arg(supported_params):`
`2271`	`2271`	`elif k not in supported_params:`
`2272`	`2272`	`raise UnsupportedParamsError(`
`2273`	`2273`	`status_code=500,`
`2274`		- message=f"Setting `{k}` is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
	`2274`	+ message=f"Setting `{k}` is not supported by {custom_llm_provider}, {model}. To drop it from the call, set `litellm.drop_params = True`.",
`2275`	`2275`	`)`
`2276`	`2276`	`return non_default_params`
`2277`	`2277`
-Original file line number
+Diff line change
             pass
         else:
             pytest.fail(f"An exception occurred - {str(e)}")
++