Skip to content
2 changes: 1 addition & 1 deletion litellm/integrations/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ async def async_pre_routing_hook(
self,
model: str,
request_kwargs: Dict,
messages: Optional[List[Dict[str, str]]] = None,
messages: Optional[List[Dict[str, Any]]] = None,
input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False,
) -> Optional[PreRoutingHookResponse]:
Expand Down
13 changes: 8 additions & 5 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
else:
AsyncIOScheduler = Any


class PrometheusLogger(CustomLogger):
# Class variables or attributes

Expand Down Expand Up @@ -991,9 +992,7 @@ def _inc_labeled_counter(
amount: float = 1.0,
) -> None:
_labels = prometheus_label_factory(
supported_enum_labels=self.get_labels_for_metric(
metric_name=metric_name
),
supported_enum_labels=self.get_labels_for_metric(metric_name=metric_name),
enum_values=enum_values,
label_context=label_context,
)
Expand Down Expand Up @@ -1118,7 +1117,9 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti

user_api_key = hash_token(user_api_key)

label_context = PrometheusLabelFactoryContext(enum_values) #amortized per request.
label_context = PrometheusLabelFactoryContext(
enum_values
) # amortized per request.

# increment total LLM requests and spend metric
self._increment_top_level_request_and_spend_metrics(
Expand Down Expand Up @@ -3490,7 +3491,9 @@ def _prometheus_labels_from_context(
}

if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
filtered_labels[UserAPIKeyLabelNames.END_USER.value] = ctx.get_resolved_end_user()
filtered_labels[UserAPIKeyLabelNames.END_USER.value] = (
ctx.get_resolved_end_user()
)

for sk, val in ctx._custom_by_sanitized_key.items():
if sk in supported_enum_labels:
Expand Down
3 changes: 1 addition & 2 deletions litellm/integrations/prometheus_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def __init__(self, enum_values: UserAPIKeyLabelValues) -> None:
self.enum_values = enum_values
enum_dict = enum_values.model_dump()
self._sanitized_enum: Dict[str, Optional[str]] = {
k: _sanitize_prometheus_label_value(v)
for k, v in enum_dict.items()
k: _sanitize_prometheus_label_value(v) for k, v in enum_dict.items()
}
self._custom_by_sanitized_key: Dict[str, Optional[str]] = {}
if enum_values.custom_metadata_labels is not None:
Expand Down
4 changes: 3 additions & 1 deletion litellm/integrations/websearch_interception/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,9 @@ async def _build_anthropic_request_patch(
kwargs_for_followup = self._prepare_followup_kwargs(kwargs)

if logging_obj is not None:
agentic_params = logging_obj.model_call_details.get("agentic_loop_params", {})
agentic_params = logging_obj.model_call_details.get(
"agentic_loop_params", {}
)
full_model_name = agentic_params.get("model", model)
verbose_logger.debug(
"WebSearchInterception: Built anthropic request patch "
Expand Down
2 changes: 1 addition & 1 deletion litellm/litellm_core_utils/llm_cost_calc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def generic_cost_per_token( # noqa: PLR0915
- cache_creation
- image_tokens
)
# Clamp to zero: inconsistent streaming usage
# Clamp to zero: inconsistent streaming usage
if text_tokens < 0:
text_tokens = 0
prompt_tokens_details["text_tokens"] = text_tokens
Expand Down
37 changes: 28 additions & 9 deletions litellm/llms/anthropic/chat/guardrail_translation/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionRequest,
ChatCompletionToolCallChunk,
ChatCompletionToolParam,
)
Expand Down Expand Up @@ -67,6 +68,32 @@ def __init__(self):
super().__init__()
self.adapter = LiteLLMAnthropicMessagesAdapter()

def _translate_to_openai(self, data: dict) -> ChatCompletionRequest:
"""Translate Anthropic request to OpenAI chat completion format."""
(
chat_completion_compatible_request,
_tool_name_mapping,
) = LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
anthropic_message_request=cast(AnthropicMessagesRequest, data.copy())
)
return chat_completion_compatible_request

def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
"""
Convert Anthropic messages request data to OpenAI-spec structured messages.

Uses the Anthropic-to-OpenAI adapter to translate message format.
"""
messages = data.get("messages")
if messages is None:
return None
chat_completion_compatible_request = self._translate_to_openai(data)
result = cast(
List[AllMessageValues],
chat_completion_compatible_request.get("messages", []),
)
return result if result else None

async def process_input_messages(
self,
data: dict,
Expand All @@ -82,13 +109,7 @@ async def process_input_messages(

skip_system = effective_skip_system_message_for_guardrail(guardrail_to_apply)

(
chat_completion_compatible_request,
_tool_name_mapping,
) = LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
# Use a shallow copy to avoid mutating request data (pop on litellm_metadata).
anthropic_message_request=cast(AnthropicMessagesRequest, data.copy())
)
chat_completion_compatible_request = self._translate_to_openai(data)

structured_messages = cast(
List[AllMessageValues],
Expand All @@ -103,8 +124,6 @@ async def process_input_messages(
chat_completion_compatible_request.get("tools", [])
)
task_mappings: List[Tuple[int, Optional[int]]] = []
# Track (message_index, content_index) for each text
# content_index is None for string content, int for list content

# Step 1: Extract all text content and images
for msg_idx, message in enumerate(messages):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,11 @@ async def _process_agentic_hooks(self) -> None:
return

[
f"{b.get('type')}({b.get('name', '')})"
if b.get("type") == "tool_use"
else b.get("type")
(
f"{b.get('type')}({b.get('name', '')})"
if b.get("type") == "tool_use"
else b.get("type")
)
for b in rebuilt.get("content", [])
]

Expand Down
11 changes: 11 additions & 0 deletions litellm/llms/base_llm/guardrail_translation/base_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.llms.openai import AllMessageValues


class BaseTranslation(ABC):
Expand Down Expand Up @@ -101,6 +102,16 @@ async def process_output_streaming_response(
"""
return responses_so_far

def get_structured_messages(self, data: dict) -> Optional[List["AllMessageValues"]]:
"""
Convert request data to OpenAI-spec structured messages.
Override in subclasses for format-specific conversion.
Returns None if no convertible content is found.
"""
return None

def extract_request_tool_names(self, data: dict) -> List[str]:
"""
Extract tool names from the request body for allowlist/policy checks.
Expand Down
4 changes: 1 addition & 3 deletions litellm/llms/github_copilot/authenticator.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,7 @@ def _poll_for_access_token(self, device_code: str) -> str:
access_token_url = os.getenv(
"GITHUB_COPILOT_ACCESS_TOKEN_URL", DEFAULT_GITHUB_ACCESS_TOKEN_URL
)
client_id = os.getenv(
"GITHUB_COPILOT_CLIENT_ID", DEFAULT_GITHUB_CLIENT_ID
)
client_id = os.getenv("GITHUB_COPILOT_CLIENT_ID", DEFAULT_GITHUB_CLIENT_ID)

for attempt in range(max_attempts):
try:
Expand Down
22 changes: 15 additions & 7 deletions litellm/llms/openai/chat/guardrail_translation/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ class OpenAIChatCompletionsHandler(BaseTranslation):
Methods can be overridden to customize behavior for different message formats.
"""

def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
"""
Convert chat completions request data to OpenAI-spec structured messages.

Messages are already in OpenAI format, so this is a simple extraction.
"""
messages = data.get("messages")
if messages is None:
return None
return cast(List[AllMessageValues], messages)

async def process_input_messages(
self,
data: dict,
Expand All @@ -68,9 +79,6 @@ async def process_input_messages(
tool_calls_to_check: List[ChatCompletionToolParam] = []
text_task_mappings: List[Tuple[int, Optional[int]]] = []
tool_call_task_mappings: List[Tuple[int, int]] = []
# text_task_mappings: Track (message_index, content_index) for each text
# content_index is None for string content, int for list content
# tool_call_task_mappings: Track (message_index, tool_call_index) for each tool call

# Step 1: Extract all text content, images, and tool calls
for msg_idx, message in enumerate(messages):
Expand All @@ -92,12 +100,12 @@ async def process_input_messages(
inputs["images"] = images_to_check
if tool_calls_to_check:
inputs["tool_calls"] = tool_calls_to_check # type: ignore
if messages:
msg_list = cast(List[AllMessageValues], messages)
structured_messages = self.get_structured_messages(data)
if structured_messages:
inputs["structured_messages"] = (
openai_messages_without_system(msg_list)
openai_messages_without_system(structured_messages)
if skip_system
else msg_list
else structured_messages
)
# Pass tools (function definitions) to the guardrail
tools = data.get("tools")
Expand Down
26 changes: 20 additions & 6 deletions litellm/llms/openai/responses/guardrail_translation/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
LiteLLMCompletionResponsesConfig,
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionToolCallChunk,
ChatCompletionToolParam,
)
Expand Down Expand Up @@ -70,6 +71,24 @@ class OpenAIResponsesHandler(BaseTranslation):
Methods can be overridden to customize behavior for different message formats.
"""

def get_structured_messages(self, data: dict) -> Optional[List[AllMessageValues]]:
"""
Convert Responses API request data to OpenAI-spec structured messages.

Transforms `input` (string or ResponseInputParam) and optional
`instructions` into chat completion messages.
"""
input_data = data.get("input")
if input_data is None:
return None
messages = (
LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input_data,
responses_api_request=data,
)
)
return cast(List[AllMessageValues], messages) if messages else None

async def process_input_messages(
self,
data: dict,
Expand All @@ -86,12 +105,7 @@ async def process_input_messages(
if input_data is None:
return data

structured_messages = (
LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input_data,
responses_api_request=data,
)
)
structured_messages = self.get_structured_messages(data)

# Handle simple string input
if isinstance(input_data, str):
Expand Down
4 changes: 3 additions & 1 deletion litellm/passthrough/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def forward_headers_from_request(
for header_name, header_value in request_headers.items():
if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
# Strip the 'x-pass-' prefix and normalize to lowercase
actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :].lower()
actual_header_name = header_name[
len(PASS_THROUGH_HEADER_PREFIX) :
].lower()
if actual_header_name in _PASS_THROUGH_PROTECTED_HEADERS or any(
actual_header_name.startswith(p)
for p in _PASS_THROUGH_PROTECTED_HEADER_PREFIXES
Expand Down
20 changes: 10 additions & 10 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ model_list:
id: claude-sonnet-4-custom-pricing
input_cost_per_token: 0.0003 # 100x standard ($0.000003)
output_cost_per_token: 0.0015 # 100x standard ($0.000015)

litellm_settings:
callbacks: ["compression_interception"]
compression_interception_params:
enabled: true
compression_trigger: 100000
# # optional:
# # embedding_model: "text-embedding-3-small"
# # embedding_model_params:
# # dimensions: 512
- model_name: my-auto
litellm_params:
model: auto_router/complexity_router
complexity_router_config:
tiers:
SIMPLE: "gpt-4.1-mini"
COMPLEX: claude-sonnet-4-6
tier_boundaries:
simple_medium: 0.30
complexity_router_default_model: small-model
11 changes: 5 additions & 6 deletions litellm/proxy/auth/auth_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3126,9 +3126,7 @@ async def _virtual_key_max_budget_alert_check(
alert_email_config: Optional[Dict[str, List[str]]] = (
_merge_budget_alert_email_configs(
global_cfg=litellm.default_key_max_budget_alert_emails,
per_key_cfg=(valid_token.metadata or {}).get(
"max_budget_alert_emails"
),
per_key_cfg=(valid_token.metadata or {}).get("max_budget_alert_emails"),
)
)

Expand All @@ -3138,7 +3136,9 @@ async def _virtual_key_max_budget_alert_check(
(int(k) for k in alert_email_config if k.isdigit()),
default=None,
)
if min_pct is None or valid_token.spend < valid_token.max_budget * (min_pct / 100.0):
if min_pct is None or valid_token.spend < valid_token.max_budget * (
min_pct / 100.0
):
return

call_info = CallInfo(
Expand All @@ -3164,8 +3164,7 @@ async def _virtual_key_max_budget_alert_check(
else:
# Old path: existing single 80% threshold — completely unchanged
alert_threshold = (
valid_token.max_budget
* EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE
valid_token.max_budget * EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE
)

if (
Expand Down
9 changes: 4 additions & 5 deletions litellm/proxy/health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,9 @@ def _health_check_deployment_is_wildcard(litellm_params: dict) -> bool:
return "*" in _deployment_model_string_for_health_check(litellm_params)


def _resolve_health_check_max_tokens(model_info: dict, litellm_params: dict) -> Optional[int]:
def _resolve_health_check_max_tokens(
model_info: dict, litellm_params: dict
) -> Optional[int]:
"""
Pick max_tokens for the health check request.
Expand Down Expand Up @@ -341,10 +343,7 @@ def _resolve_health_check_max_tokens(model_info: dict, litellm_params: dict) ->
return int(tokens_reasoning)
if not is_reasoning and tokens_non_reasoning is not None:
return int(tokens_non_reasoning)
if (
is_reasoning
and BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING is not None
):
if is_reasoning and BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING is not None:
return int(BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING)

if BACKGROUND_HEALTH_CHECK_MAX_TOKENS is not None:
Expand Down
6 changes: 3 additions & 3 deletions litellm/proxy/hooks/parallel_request_limiter_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -1570,9 +1570,9 @@ def _build_success_event_pipeline_operations(
user_api_key_project_id = standard_logging_metadata.get(
"user_api_key_project_id"
)
user_api_key_end_user_id = kwargs.get(
"user"
) or standard_logging_metadata.get("user_api_key_end_user_id")
user_api_key_end_user_id = kwargs.get("user") or standard_logging_metadata.get(
"user_api_key_end_user_id"
)
model_group = get_model_group_from_litellm_kwargs(kwargs)

# Get total tokens from response
Expand Down
Loading
Loading