BerriAI · krrish-berri-2 · Apr 20, 2026 · Apr 19, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -240,7 +240,7 @@ async def async_pre_routing_hook(
         self,
         model: str,
         request_kwargs: Dict,
-        messages: Optional[List[Dict[str, str]]] = None,
+        messages: Optional[List[Dict[str, Any]]] = None,
         input: Optional[Union[str, List]] = None,
         specific_deployment: Optional[bool] = False,
     ) -> Optional[PreRoutingHookResponse]:

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -51,6 +51,7 @@
 else:
     AsyncIOScheduler = Any
 
+
 class PrometheusLogger(CustomLogger):
     # Class variables or attributes
 
@@ -991,9 +992,7 @@ def _inc_labeled_counter(
         amount: float = 1.0,
     ) -> None:
         _labels = prometheus_label_factory(
-            supported_enum_labels=self.get_labels_for_metric(
-                metric_name=metric_name
-            ),
+            supported_enum_labels=self.get_labels_for_metric(metric_name=metric_name),
             enum_values=enum_values,
             label_context=label_context,
         )
@@ -1118,7 +1117,9 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
 
             user_api_key = hash_token(user_api_key)
 
-        label_context = PrometheusLabelFactoryContext(enum_values) #amortized per request.
+        label_context = PrometheusLabelFactoryContext(
+            enum_values
+        )  # amortized per request.
 
         # increment total LLM requests and spend metric
         self._increment_top_level_request_and_spend_metrics(
@@ -3490,7 +3491,9 @@ def _prometheus_labels_from_context(
     }
 
     if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
-        filtered_labels[UserAPIKeyLabelNames.END_USER.value] = ctx.get_resolved_end_user()
+        filtered_labels[UserAPIKeyLabelNames.END_USER.value] = (
+            ctx.get_resolved_end_user()
+        )
 
     for sk, val in ctx._custom_by_sanitized_key.items():
         if sk in supported_enum_labels:

diff --git a/litellm/integrations/prometheus_helpers/__init__.py b/litellm/integrations/prometheus_helpers/__init__.py
@@ -51,8 +51,7 @@ def __init__(self, enum_values: UserAPIKeyLabelValues) -> None:
         self.enum_values = enum_values
         enum_dict = enum_values.model_dump()
         self._sanitized_enum: Dict[str, Optional[str]] = {
-            k: _sanitize_prometheus_label_value(v)
-            for k, v in enum_dict.items()
+            k: _sanitize_prometheus_label_value(v) for k, v in enum_dict.items()
         }
         self._custom_by_sanitized_key: Dict[str, Optional[str]] = {}
         if enum_values.custom_metadata_labels is not None:

diff --git a/litellm/integrations/websearch_interception/handler.py b/litellm/integrations/websearch_interception/handler.py
@@ -847,7 +847,9 @@ async def _build_anthropic_request_patch(
         kwargs_for_followup = self._prepare_followup_kwargs(kwargs)
 
         if logging_obj is not None:
-            agentic_params = logging_obj.model_call_details.get("agentic_loop_params", {})
+            agentic_params = logging_obj.model_call_details.get(
+                "agentic_loop_params", {}
+            )
             full_model_name = agentic_params.get("model", model)
         verbose_logger.debug(
             "WebSearchInterception: Built anthropic request patch "

diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@@ -684,7 +684,7 @@ def generic_cost_per_token(  # noqa: PLR0915
             - cache_creation
             - image_tokens
         )
-        # Clamp to zero: inconsistent streaming usage 
+        # Clamp to zero: inconsistent streaming usage
         if text_tokens < 0:
             text_tokens = 0
         prompt_tokens_details["text_tokens"] = text_tokens

diff --git a/litellm/llms/anthropic/chat/guardrail_translation/handler.py b/litellm/llms/anthropic/chat/guardrail_translation/handler.py
@@ -34,6 +34,7 @@
 )
 from litellm.types.llms.openai import (
     AllMessageValues,
+    ChatCompletionRequest,
     ChatCompletionToolCallChunk,
     ChatCompletionToolParam,
 )
@@ -67,6 +68,32 @@ def __init__(self):
         super().__init__()
         self.adapter = LiteLLMAnthropicMessagesAdapter()
 
+    def _translate_to_openai(self, data: dict) -> ChatCompletionRequest:
+        """Translate Anthropic request to OpenAI chat completion format."""
+        (
+            chat_completion_compatible_request,
+            _tool_name_mapping,
+        ) = LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
+            anthropic_message_request=cast(AnthropicMessagesRequest, data.copy())
+        )
+        return chat_completion_compatible_request
+
+    def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
+        """
+        Convert Anthropic messages request data to OpenAI-spec structured messages.
+
+        Uses the Anthropic-to-OpenAI adapter to translate message format.
+        """
+        messages = data.get("messages")
+        if messages is None:
+            return None
+        chat_completion_compatible_request = self._translate_to_openai(data)
+        result = cast(
+            List[AllMessageValues],
+            chat_completion_compatible_request.get("messages", []),
+        )
+        return result if result else None
+
     async def process_input_messages(
         self,
         data: dict,
@@ -82,13 +109,7 @@ async def process_input_messages(
 
         skip_system = effective_skip_system_message_for_guardrail(guardrail_to_apply)
 
-        (
-            chat_completion_compatible_request,
-            _tool_name_mapping,
-        ) = LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
-            # Use a shallow copy to avoid mutating request data (pop on litellm_metadata).
-            anthropic_message_request=cast(AnthropicMessagesRequest, data.copy())
-        )
+        chat_completion_compatible_request = self._translate_to_openai(data)
 
         structured_messages = cast(
             List[AllMessageValues],
@@ -103,8 +124,6 @@ async def process_input_messages(
             chat_completion_compatible_request.get("tools", [])
         )
         task_mappings: List[Tuple[int, Optional[int]]] = []
-        # Track (message_index, content_index) for each text
-        # content_index is None for string content, int for list content
 
         # Step 1: Extract all text content and images
         for msg_idx, message in enumerate(messages):

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/agentic_streaming_iterator.py b/litellm/llms/anthropic/experimental_pass_through/messages/agentic_streaming_iterator.py
@@ -216,9 +216,11 @@ async def _process_agentic_hooks(self) -> None:
                 return
 
             [
-                f"{b.get('type')}({b.get('name', '')})"
-                if b.get("type") == "tool_use"
-                else b.get("type")
+                (
+                    f"{b.get('type')}({b.get('name', '')})"
+                    if b.get("type") == "tool_use"
+                    else b.get("type")
+                )
                 for b in rebuilt.get("content", [])
             ]
 

diff --git a/litellm/llms/base_llm/guardrail_translation/base_translation.py b/litellm/llms/base_llm/guardrail_translation/base_translation.py
@@ -5,6 +5,7 @@
     from litellm.integrations.custom_guardrail import CustomGuardrail
     from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
     from litellm.proxy._types import UserAPIKeyAuth
+    from litellm.types.llms.openai import AllMessageValues
 
 
 class BaseTranslation(ABC):
@@ -101,6 +102,16 @@ async def process_output_streaming_response(
         """
         return responses_so_far
 
+    def get_structured_messages(self, data: dict) -> Optional[List["AllMessageValues"]]:
+        """
+        Convert request data to OpenAI-spec structured messages.
+
+        Override in subclasses for format-specific conversion.
+
+        Returns None if no convertible content is found.
+        """
+        return None
+
     def extract_request_tool_names(self, data: dict) -> List[str]:
         """
         Extract tool names from the request body for allowlist/policy checks.

diff --git a/litellm/llms/github_copilot/authenticator.py b/litellm/llms/github_copilot/authenticator.py
@@ -294,9 +294,7 @@ def _poll_for_access_token(self, device_code: str) -> str:
         access_token_url = os.getenv(
             "GITHUB_COPILOT_ACCESS_TOKEN_URL", DEFAULT_GITHUB_ACCESS_TOKEN_URL
         )
-        client_id = os.getenv(
-            "GITHUB_COPILOT_CLIENT_ID", DEFAULT_GITHUB_CLIENT_ID
-        )
+        client_id = os.getenv("GITHUB_COPILOT_CLIENT_ID", DEFAULT_GITHUB_CLIENT_ID)
 
         for attempt in range(max_attempts):
             try:

diff --git a/litellm/llms/openai/chat/guardrail_translation/handler.py b/litellm/llms/openai/chat/guardrail_translation/handler.py
@@ -48,6 +48,17 @@ class OpenAIChatCompletionsHandler(BaseTranslation):
     Methods can be overridden to customize behavior for different message formats.
     """
 
+    def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
+        """
+        Convert chat completions request data to OpenAI-spec structured messages.
+
+        Messages are already in OpenAI format, so this is a simple extraction.
+        """
+        messages = data.get("messages")
+        if messages is None:
+            return None
+        return cast(List[AllMessageValues], messages)
+
     async def process_input_messages(
         self,
         data: dict,
@@ -68,9 +79,6 @@ async def process_input_messages(
         tool_calls_to_check: List[ChatCompletionToolParam] = []
         text_task_mappings: List[Tuple[int, Optional[int]]] = []
         tool_call_task_mappings: List[Tuple[int, int]] = []
-        # text_task_mappings: Track (message_index, content_index) for each text
-        # content_index is None for string content, int for list content
-        # tool_call_task_mappings: Track (message_index, tool_call_index) for each tool call
 
         # Step 1: Extract all text content, images, and tool calls
         for msg_idx, message in enumerate(messages):
@@ -92,12 +100,12 @@ async def process_input_messages(
                 inputs["images"] = images_to_check
             if tool_calls_to_check:
                 inputs["tool_calls"] = tool_calls_to_check  # type: ignore
-            if messages:
-                msg_list = cast(List[AllMessageValues], messages)
+            structured_messages = self.get_structured_messages(data)
+            if structured_messages:
                 inputs["structured_messages"] = (
-                    openai_messages_without_system(msg_list)
+                    openai_messages_without_system(structured_messages)
                     if skip_system
-                    else msg_list
+                    else structured_messages
                 )
             # Pass tools (function definitions) to the guardrail
             tools = data.get("tools")

diff --git a/litellm/llms/openai/responses/guardrail_translation/handler.py b/litellm/llms/openai/responses/guardrail_translation/handler.py
@@ -43,6 +43,7 @@
     LiteLLMCompletionResponsesConfig,
 )
 from litellm.types.llms.openai import (
+    AllMessageValues,
     ChatCompletionToolCallChunk,
     ChatCompletionToolParam,
 )
@@ -70,6 +71,24 @@ class OpenAIResponsesHandler(BaseTranslation):
     Methods can be overridden to customize behavior for different message formats.
     """
 
+    def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
+        """
+        Convert Responses API request data to OpenAI-spec structured messages.
+
+        Transforms `input` (string or ResponseInputParam) and optional
+        `instructions` into chat completion messages.
+        """
+        input_data = data.get("input")
+        if input_data is None:
+            return None
+        messages = (
+            LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
+                input=input_data,
+                responses_api_request=data,
+            )
+        )
+        return cast(List[AllMessageValues], messages) if messages else None
+
     async def process_input_messages(
         self,
         data: dict,
@@ -86,12 +105,7 @@ async def process_input_messages(
         if input_data is None:
             return data
 
-        structured_messages = (
-            LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
-                input=input_data,
-                responses_api_request=data,
-            )
-        )
+        structured_messages = self.get_structured_messages(data)
 
         # Handle simple string input
         if isinstance(input_data, str):

diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
@@ -79,7 +79,9 @@ def forward_headers_from_request(
         for header_name, header_value in request_headers.items():
             if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
                 # Strip the 'x-pass-' prefix and normalize to lowercase
-                actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :].lower()
+                actual_header_name = header_name[
+                    len(PASS_THROUGH_HEADER_PREFIX) :
+                ].lower()
                 if actual_header_name in _PASS_THROUGH_PROTECTED_HEADERS or any(
                     actual_header_name.startswith(p)
                     for p in _PASS_THROUGH_PROTECTED_HEADER_PREFIXES

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -30,13 +30,13 @@ model_list:
       id: claude-sonnet-4-custom-pricing
       input_cost_per_token: 0.0003         # 100x standard ($0.000003)
       output_cost_per_token: 0.0015         # 100x standard ($0.000015)
-
-litellm_settings:
-  callbacks: ["compression_interception"]
-  compression_interception_params:
-    enabled: true
-    compression_trigger: 100000
-#     # optional:
-#     # embedding_model: "text-embedding-3-small"
-#     # embedding_model_params:
-#     #   dimensions: 512
+  - model_name: my-auto
+    litellm_params:
+      model: auto_router/complexity_router
+      complexity_router_config:
+        tiers:
+          SIMPLE: "gpt-4.1-mini"
+          COMPLEX: claude-sonnet-4-6
+        tier_boundaries:
+          simple_medium: 0.30
+      complexity_router_default_model: small-model
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
@@ -3126,9 +3126,7 @@ async def _virtual_key_max_budget_alert_check(
         alert_email_config: Optional[Dict[str, List[str]]] = (
             _merge_budget_alert_email_configs(
                 global_cfg=litellm.default_key_max_budget_alert_emails,
-                per_key_cfg=(valid_token.metadata or {}).get(
-                    "max_budget_alert_emails"
-                ),
+                per_key_cfg=(valid_token.metadata or {}).get("max_budget_alert_emails"),
             )
         )
 
@@ -3138,7 +3136,9 @@ async def _virtual_key_max_budget_alert_check(
                 (int(k) for k in alert_email_config if k.isdigit()),
                 default=None,
             )
-            if min_pct is None or valid_token.spend < valid_token.max_budget * (min_pct / 100.0):
+            if min_pct is None or valid_token.spend < valid_token.max_budget * (
+                min_pct / 100.0
+            ):
                 return
 
             call_info = CallInfo(
@@ -3164,8 +3164,7 @@ async def _virtual_key_max_budget_alert_check(
         else:
             # Old path: existing single 80% threshold — completely unchanged
             alert_threshold = (
-                valid_token.max_budget
-                * EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE
+                valid_token.max_budget * EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE
             )
 
             if (

diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py
@@ -306,7 +306,9 @@ def _health_check_deployment_is_wildcard(litellm_params: dict) -> bool:
     return "*" in _deployment_model_string_for_health_check(litellm_params)
 
 
-def _resolve_health_check_max_tokens(model_info: dict, litellm_params: dict) -> Optional[int]:
+def _resolve_health_check_max_tokens(
+    model_info: dict, litellm_params: dict
+) -> Optional[int]:
     """
     Pick max_tokens for the health check request.
 
@@ -341,10 +343,7 @@ def _resolve_health_check_max_tokens(model_info: dict, litellm_params: dict) ->
                 return int(tokens_reasoning)
             if not is_reasoning and tokens_non_reasoning is not None:
                 return int(tokens_non_reasoning)
-        if (
-            is_reasoning
-            and BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING is not None
-        ):
+        if is_reasoning and BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING is not None:
             return int(BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING)
 
     if BACKGROUND_HEALTH_CHECK_MAX_TOKENS is not None:

diff --git a/litellm/proxy/hooks/parallel_request_limiter_v3.py b/litellm/proxy/hooks/parallel_request_limiter_v3.py
@@ -1570,9 +1570,9 @@ def _build_success_event_pipeline_operations(
         user_api_key_project_id = standard_logging_metadata.get(
             "user_api_key_project_id"
         )
-        user_api_key_end_user_id = kwargs.get(
-            "user"
-        ) or standard_logging_metadata.get("user_api_key_end_user_id")
+        user_api_key_end_user_id = kwargs.get("user") or standard_logging_metadata.get(
+            "user_api_key_end_user_id"
+        )
         model_group = get_model_group_from_litellm_kwargs(kwargs)
 
         # Get total tokens from response