Skip to content
8 changes: 4 additions & 4 deletions litellm/litellm_core_utils/llm_request_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def get_proxy_server_request_headers(litellm_params: Optional[dict]) -> dict:
if litellm_params is None:
return {}

proxy_request_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
proxy_server_request = litellm_params.get("proxy_server_request") or {}
if not isinstance(proxy_server_request, dict):
return {}

return proxy_request_headers
return proxy_server_request.get("headers") or {}
10 changes: 6 additions & 4 deletions litellm/litellm_core_utils/logging_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,11 +507,13 @@ def _flush_on_exit(self):
except asyncio.QueueEmpty:
break

# Run the coroutine synchronously in new loop
# Note: We run the coroutine directly, not via create_task,
# since we're in a new event loop context
# Run the coroutine synchronously in new loop with a per-task
# timeout so a single hung coroutine cannot block the entire
# flush past the outer time budget.
try:
loop.run_until_complete(task["coroutine"])
loop.run_until_complete(
asyncio.wait_for(task["coroutine"], timeout=2.0)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Hardcoded per-task timeout may exceed configurable outer budget

The per-task timeout of 2.0 s is a magic number that doesn't track the configurable MAX_TIME_TO_CLEAR_QUEUE constant (defaults to 5.0 s, overridable via env). If an operator sets MAX_TIME_TO_CLEAR_QUEUE=1.0, every single task would consume more than the entire outer budget before the loop-level time check could fire, effectively turning the outer limit into dead code for any queue with at least one slow coroutine.

Consider deriving the per-task limit from the outer budget or at least capping it:

    _per_task_timeout = min(2.0, MAX_TIME_TO_CLEAR_QUEUE)
    loop.run_until_complete(
        asyncio.wait_for(task["coroutine"], timeout=_per_task_timeout)
    )

)
processed += 1
except Exception:
# Silent failure to not break user's program
Expand Down
148 changes: 147 additions & 1 deletion litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -19275,13 +19275,24 @@
},
"gpt-5.5": {
"cache_read_input_token_cost": 5e-07,
"cache_read_input_token_cost_above_272k_tokens": 1e-06,
"cache_read_input_token_cost_flex": 2.5e-07,
"cache_read_input_token_cost_priority": 1e-06,
"input_cost_per_token": 5e-06,
"input_cost_per_token_above_272k_tokens": 1e-05,
"input_cost_per_token_flex": 2.5e-06,
"input_cost_per_token_batches": 2.5e-06,
"input_cost_per_token_priority": 1e-05,
"litellm_provider": "openai",
"max_input_tokens": 272000,
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 3e-05,
"output_cost_per_token_above_272k_tokens": 4.5e-05,
"output_cost_per_token_flex": 1.5e-05,
"output_cost_per_token_batches": 1.5e-05,
"output_cost_per_token_priority": 6e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
Expand All @@ -19305,10 +19316,145 @@
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-2026-04-23": {
"cache_read_input_token_cost": 5e-07,
"cache_read_input_token_cost_above_272k_tokens": 1e-06,
"cache_read_input_token_cost_flex": 2.5e-07,
"cache_read_input_token_cost_priority": 1e-06,
"input_cost_per_token": 5e-06,
"input_cost_per_token_above_272k_tokens": 1e-05,
"input_cost_per_token_flex": 2.5e-06,
"input_cost_per_token_batches": 2.5e-06,
"input_cost_per_token_priority": 1e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 3e-05,
"output_cost_per_token_above_272k_tokens": 4.5e-05,
"output_cost_per_token_flex": 1.5e-05,
"output_cost_per_token_batches": 1.5e-05,
"output_cost_per_token_priority": 6e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-pro": {
"cache_read_input_token_cost": 6e-06,
"cache_read_input_token_cost_above_272k_tokens": 1.2e-05,
"input_cost_per_token": 6e-05,
"input_cost_per_token_above_272k_tokens": 0.00012,
"input_cost_per_token_flex": 3e-05,
"input_cost_per_token_batches": 3e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 0.00036,
"output_cost_per_token_above_272k_tokens": 0.00054,
"output_cost_per_token_flex": 0.00018,
"output_cost_per_token_batches": 0.00018,
"supported_endpoints": [
"/v1/responses",
"/v1/batch"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-pro-2026-04-23": {
"cache_read_input_token_cost": 6e-06,
"cache_read_input_token_cost_above_272k_tokens": 1.2e-05,
"input_cost_per_token": 6e-05,
"input_cost_per_token_above_272k_tokens": 0.00012,
"input_cost_per_token_flex": 3e-05,
"input_cost_per_token_batches": 3e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 0.00036,
"output_cost_per_token_above_272k_tokens": 0.00054,
"output_cost_per_token_flex": 0.00018,
"output_cost_per_token_batches": 0.00018,
"supported_endpoints": [
"/v1/responses",
"/v1/batch"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_above_272k_tokens": 5e-07,
Expand Down
148 changes: 147 additions & 1 deletion model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -19289,13 +19289,24 @@
},
"gpt-5.5": {
"cache_read_input_token_cost": 5e-07,
"cache_read_input_token_cost_above_272k_tokens": 1e-06,
"cache_read_input_token_cost_flex": 2.5e-07,
"cache_read_input_token_cost_priority": 1e-06,
"input_cost_per_token": 5e-06,
"input_cost_per_token_above_272k_tokens": 1e-05,
"input_cost_per_token_flex": 2.5e-06,
"input_cost_per_token_batches": 2.5e-06,
"input_cost_per_token_priority": 1e-05,
"litellm_provider": "openai",
"max_input_tokens": 272000,
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 3e-05,
"output_cost_per_token_above_272k_tokens": 4.5e-05,
"output_cost_per_token_flex": 1.5e-05,
"output_cost_per_token_batches": 1.5e-05,
"output_cost_per_token_priority": 6e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
Expand All @@ -19319,10 +19330,145 @@
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-2026-04-23": {
"cache_read_input_token_cost": 5e-07,
"cache_read_input_token_cost_above_272k_tokens": 1e-06,
"cache_read_input_token_cost_flex": 2.5e-07,
"cache_read_input_token_cost_priority": 1e-06,
"input_cost_per_token": 5e-06,
"input_cost_per_token_above_272k_tokens": 1e-05,
"input_cost_per_token_flex": 2.5e-06,
"input_cost_per_token_batches": 2.5e-06,
"input_cost_per_token_priority": 1e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 3e-05,
"output_cost_per_token_above_272k_tokens": 4.5e-05,
"output_cost_per_token_flex": 1.5e-05,
"output_cost_per_token_batches": 1.5e-05,
"output_cost_per_token_priority": 6e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-pro": {
"cache_read_input_token_cost": 6e-06,
"cache_read_input_token_cost_above_272k_tokens": 1.2e-05,
"input_cost_per_token": 6e-05,
"input_cost_per_token_above_272k_tokens": 0.00012,
"input_cost_per_token_flex": 3e-05,
"input_cost_per_token_batches": 3e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 0.00036,
"output_cost_per_token_above_272k_tokens": 0.00054,
"output_cost_per_token_flex": 0.00018,
"output_cost_per_token_batches": 0.00018,
"supported_endpoints": [
"/v1/responses",
"/v1/batch"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.5-pro-2026-04-23": {
"cache_read_input_token_cost": 6e-06,
"cache_read_input_token_cost_above_272k_tokens": 1.2e-05,
"input_cost_per_token": 6e-05,
"input_cost_per_token_above_272k_tokens": 0.00012,
"input_cost_per_token_flex": 3e-05,
"input_cost_per_token_batches": 3e-05,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 0.00036,
"output_cost_per_token_above_272k_tokens": 0.00054,
"output_cost_per_token_flex": 0.00018,
"output_cost_per_token_batches": 0.00018,
"supported_endpoints": [
"/v1/responses",
"/v1/batch"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_above_272k_tokens": 5e-07,
Expand Down
1 change: 0 additions & 1 deletion tests/local_testing/test_amazing_vertex_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3688,7 +3688,6 @@ def test_vertex_ai_llama_tool_calling():

def test_vertex_schema_test():
load_vertex_ai_credentials()
litellm._turn_on_debug()

def tool_call(text: str | None) -> str:
return text or "No text provided"
Expand Down
Loading
Loading