Skip to content

Commit beae5cf

Browse files
Litellm staging 05 10 2025 - openai pdf url support + sagemaker chat content length error fix (#10724)
* Support pdf url's to openai (#10640) * fix(gpt_transformation.py): support pdf url input to openai pass as base64 as openai doesn't support image url's * fix(openai.py): support async message transformation allows async get request to convert url to base64 * fix(gpt_transformation.py): fix linting errrors and use common components across sync + async flows * fix: fix linting errors * fix(openai.py): pop correct var * Fix sagemaker chat calls - content length error (#10607) * fix(sagemaker_chat/): support passing dynamic aws params previously being ignored * refactor(sagemaker/chat): more refactoring * fix(sagemaker_chat/): make sure streaming is correctly handled post-refactor * refactor: more refactoring to support using signed json str * fix(sagemaker/chat): working sync streaming post refactor * fix(sagemaker/chat): support async streaming post refactor * fix(llm_http_handler.py): await async function * fix: remove print statements * test: update test * test: update test * fix(llm_http_handler.py): retain passing in data as json str * test: update test * fix(base_model_iterator.py): fix linting error * test: test auth * fix: fix linting error * test: update test * test: update translation test * fix(gpt_transformation.py): handle awaitable/non-awaitable object * fix: handle async flow for message transformation on openai compatible api's * test: cleanup testing * test: update test * test(test_router.py): use model with higher quota * test: simplify test * test: update test
1 parent b1fae09 commit beae5cf

30 files changed

+809
-184
lines changed

litellm/llms/base_llm/anthropic_messages/transformation.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional
2+
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Tuple
33

44
import httpx
55

@@ -87,15 +87,15 @@ def sign_request(
8787
model: Optional[str] = None,
8888
stream: Optional[bool] = None,
8989
fake_stream: Optional[bool] = None,
90-
) -> dict:
90+
) -> Tuple[dict, Optional[bytes]]:
9191
"""
9292
OPTIONAL
9393
9494
Sign the request, providers like Bedrock need to sign the request before sending it to the API
9595
9696
For all other providers, this is a no-op and we just return the headers
9797
"""
98-
return headers
98+
return headers, None
9999

100100
def get_async_streaming_response_iterator(
101101
self,

litellm/llms/base_llm/base_model_iterator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ def _handle_string_chunk(
4141
self, str_line: str
4242
) -> Union[GenericStreamingChunk, ModelResponseStream]:
4343
# chunk is a str at this point
44-
44+
stripped_json_chunk: Optional[dict] = None
4545
stripped_chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(
4646
str_line
4747
)
4848
try:
4949
if stripped_chunk is not None:
50-
stripped_json_chunk: Optional[dict] = json.loads(stripped_chunk)
50+
stripped_json_chunk = json.loads(stripped_chunk)
5151
else:
5252
stripped_json_chunk = None
5353
except json.JSONDecodeError:

litellm/llms/base_llm/chat/transformation.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Iterator,
1212
List,
1313
Optional,
14+
Tuple,
1415
Type,
1516
Union,
1617
cast,
@@ -277,7 +278,7 @@ def sign_request(
277278
model: Optional[str] = None,
278279
stream: Optional[bool] = None,
279280
fake_stream: Optional[bool] = None,
280-
) -> dict:
281+
) -> Tuple[dict, Optional[bytes]]:
281282
"""
282283
Some providers like Bedrock require signing the request. The sign request funtion needs access to `request_data` and `complete_url`
283284
Args:
@@ -290,7 +291,7 @@ def sign_request(
290291
291292
Update the headers with the signed headers in this function. The return values will be sent as headers in the http request.
292293
"""
293-
return headers
294+
return headers, None
294295

295296
def get_complete_url(
296297
self,
@@ -323,6 +324,27 @@ def transform_request(
323324
) -> dict:
324325
pass
325326

327+
async def async_transform_request(
328+
self,
329+
model: str,
330+
messages: List[AllMessageValues],
331+
optional_params: dict,
332+
litellm_params: dict,
333+
headers: dict,
334+
) -> dict:
335+
"""
336+
Override to allow for http requests on async calls - e.g. converting url to base64
337+
338+
Currently only used by openai.py
339+
"""
340+
return self.transform_request(
341+
model=model,
342+
messages=messages,
343+
optional_params=optional_params,
344+
litellm_params=litellm_params,
345+
headers=headers,
346+
)
347+
326348
@abstractmethod
327349
def transform_response(
328350
self,
@@ -354,7 +376,7 @@ def get_model_response_iterator(
354376
) -> Any:
355377
pass
356378

357-
def get_async_custom_stream_wrapper(
379+
async def get_async_custom_stream_wrapper(
358380
self,
359381
model: str,
360382
custom_llm_provider: str,
@@ -365,6 +387,7 @@ def get_async_custom_stream_wrapper(
365387
messages: list,
366388
client: Optional[AsyncHTTPHandler] = None,
367389
json_mode: Optional[bool] = None,
390+
signed_json_body: Optional[bytes] = None,
368391
) -> CustomStreamWrapper:
369392
raise NotImplementedError
370393

@@ -379,6 +402,7 @@ def get_sync_custom_stream_wrapper(
379402
messages: list,
380403
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
381404
json_mode: Optional[bool] = None,
405+
signed_json_body: Optional[bytes] = None,
382406
) -> CustomStreamWrapper:
383407
raise NotImplementedError
384408

litellm/llms/bedrock/base_aws_llm.py

+82-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
import json
33
import os
44
from datetime import datetime
5-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast, get_args
5+
from typing import (
6+
TYPE_CHECKING,
7+
Any,
8+
Dict,
9+
List,
10+
Literal,
11+
Optional,
12+
Tuple,
13+
cast,
14+
get_args,
15+
)
616

717
import httpx
818
from pydantic import BaseModel
@@ -625,3 +635,74 @@ def get_request_headers(
625635
prepped = request.prepare()
626636

627637
return prepped
638+
639+
def _sign_request(
640+
self,
641+
service_name: Literal["bedrock", "sagemaker"],
642+
headers: dict,
643+
optional_params: dict,
644+
request_data: dict,
645+
api_base: str,
646+
model: Optional[str] = None,
647+
stream: Optional[bool] = None,
648+
fake_stream: Optional[bool] = None,
649+
) -> Tuple[dict, Optional[bytes]]:
650+
"""
651+
Sign a request for Bedrock or Sagemaker
652+
653+
Returns:
654+
Tuple[dict, Optional[str]]: A tuple containing the headers and the json str body of the request
655+
"""
656+
try:
657+
from botocore.auth import SigV4Auth
658+
from botocore.awsrequest import AWSRequest
659+
from botocore.credentials import Credentials
660+
except ImportError:
661+
raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
662+
663+
## CREDENTIALS ##
664+
# pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
665+
aws_secret_access_key = optional_params.get("aws_secret_access_key", None)
666+
aws_access_key_id = optional_params.get("aws_access_key_id", None)
667+
aws_session_token = optional_params.get("aws_session_token", None)
668+
aws_role_name = optional_params.get("aws_role_name", None)
669+
aws_session_name = optional_params.get("aws_session_name", None)
670+
aws_profile_name = optional_params.get("aws_profile_name", None)
671+
aws_web_identity_token = optional_params.get("aws_web_identity_token", None)
672+
aws_sts_endpoint = optional_params.get("aws_sts_endpoint", None)
673+
aws_region_name = self._get_aws_region_name(
674+
optional_params=optional_params, model=model
675+
)
676+
677+
credentials: Credentials = self.get_credentials(
678+
aws_access_key_id=aws_access_key_id,
679+
aws_secret_access_key=aws_secret_access_key,
680+
aws_session_token=aws_session_token,
681+
aws_region_name=aws_region_name,
682+
aws_session_name=aws_session_name,
683+
aws_profile_name=aws_profile_name,
684+
aws_role_name=aws_role_name,
685+
aws_web_identity_token=aws_web_identity_token,
686+
aws_sts_endpoint=aws_sts_endpoint,
687+
)
688+
689+
sigv4 = SigV4Auth(credentials, service_name, aws_region_name)
690+
if headers is not None:
691+
headers = {"Content-Type": "application/json", **headers}
692+
else:
693+
headers = {"Content-Type": "application/json"}
694+
695+
request = AWSRequest(
696+
method="POST",
697+
url=api_base,
698+
data=json.dumps(request_data),
699+
headers=headers,
700+
)
701+
sigv4.add_auth(request)
702+
703+
request_headers_dict = dict(request.headers)
704+
if (
705+
headers is not None and "Authorization" in headers
706+
): # prevent sigv4 from overwriting the auth header
707+
request_headers_dict["Authorization"] = headers["Authorization"]
708+
return request_headers_dict, request.body

litellm/llms/bedrock/chat/invoke_handler.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ def make_sync_call(
272272
api_base: str,
273273
headers: dict,
274274
data: str,
275+
signed_json_body: Optional[bytes],
275276
model: str,
276277
messages: list,
277278
logging_obj: Logging,
@@ -286,7 +287,7 @@ def make_sync_call(
286287
response = client.post(
287288
api_base,
288289
headers=headers,
289-
data=data,
290+
data=signed_json_body if signed_json_body is not None else data,
290291
stream=not fake_stream,
291292
logging_obj=logging_obj,
292293
)

litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py

+13-53
Original file line numberDiff line numberDiff line change
@@ -121,60 +121,17 @@ def sign_request(
121121
model: Optional[str] = None,
122122
stream: Optional[bool] = None,
123123
fake_stream: Optional[bool] = None,
124-
) -> dict:
125-
try:
126-
from botocore.auth import SigV4Auth
127-
from botocore.awsrequest import AWSRequest
128-
from botocore.credentials import Credentials
129-
except ImportError:
130-
raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
131-
132-
## CREDENTIALS ##
133-
# pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
134-
aws_secret_access_key = optional_params.get("aws_secret_access_key", None)
135-
aws_access_key_id = optional_params.get("aws_access_key_id", None)
136-
aws_session_token = optional_params.get("aws_session_token", None)
137-
aws_role_name = optional_params.get("aws_role_name", None)
138-
aws_session_name = optional_params.get("aws_session_name", None)
139-
aws_profile_name = optional_params.get("aws_profile_name", None)
140-
aws_web_identity_token = optional_params.get("aws_web_identity_token", None)
141-
aws_sts_endpoint = optional_params.get("aws_sts_endpoint", None)
142-
aws_region_name = self._get_aws_region_name(
143-
optional_params=optional_params, model=model
144-
)
145-
146-
credentials: Credentials = self.get_credentials(
147-
aws_access_key_id=aws_access_key_id,
148-
aws_secret_access_key=aws_secret_access_key,
149-
aws_session_token=aws_session_token,
150-
aws_region_name=aws_region_name,
151-
aws_session_name=aws_session_name,
152-
aws_profile_name=aws_profile_name,
153-
aws_role_name=aws_role_name,
154-
aws_web_identity_token=aws_web_identity_token,
155-
aws_sts_endpoint=aws_sts_endpoint,
156-
)
157-
158-
sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
159-
if headers is not None:
160-
headers = {"Content-Type": "application/json", **headers}
161-
else:
162-
headers = {"Content-Type": "application/json"}
163-
164-
request = AWSRequest(
165-
method="POST",
166-
url=api_base,
167-
data=json.dumps(request_data),
124+
) -> Tuple[dict, Optional[bytes]]:
125+
return self._sign_request(
126+
service_name="bedrock",
168127
headers=headers,
128+
optional_params=optional_params,
129+
request_data=request_data,
130+
api_base=api_base,
131+
model=model,
132+
stream=stream,
133+
fake_stream=fake_stream,
169134
)
170-
sigv4.add_auth(request)
171-
172-
request_headers_dict = dict(request.headers)
173-
if (
174-
headers is not None and "Authorization" in headers
175-
): # prevent sigv4 from overwriting the auth header
176-
request_headers_dict["Authorization"] = headers["Authorization"]
177-
return request_headers_dict
178135

179136
def transform_request(
180137
self,
@@ -454,7 +411,7 @@ def get_error_class(
454411
return BedrockError(status_code=status_code, message=error_message)
455412

456413
@track_llm_api_timing()
457-
def get_async_custom_stream_wrapper(
414+
async def get_async_custom_stream_wrapper(
458415
self,
459416
model: str,
460417
custom_llm_provider: str,
@@ -465,6 +422,7 @@ def get_async_custom_stream_wrapper(
465422
messages: list,
466423
client: Optional[AsyncHTTPHandler] = None,
467424
json_mode: Optional[bool] = None,
425+
signed_json_body: Optional[bytes] = None,
468426
) -> CustomStreamWrapper:
469427
streaming_response = CustomStreamWrapper(
470428
completion_stream=None,
@@ -499,6 +457,7 @@ def get_sync_custom_stream_wrapper(
499457
messages: list,
500458
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
501459
json_mode: Optional[bool] = None,
460+
signed_json_body: Optional[bytes] = None,
502461
) -> CustomStreamWrapper:
503462
if client is None or isinstance(client, AsyncHTTPHandler):
504463
client = _get_httpx_client(params={})
@@ -510,6 +469,7 @@ def get_sync_custom_stream_wrapper(
510469
api_base=api_base,
511470
headers=headers,
512471
data=json.dumps(data),
472+
signed_json_body=signed_json_body,
513473
model=model,
514474
messages=messages,
515475
logging_obj=logging_obj,

litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Union
1+
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Tuple, Union
22

33
import httpx
44

@@ -47,7 +47,7 @@ def sign_request(
4747
model: Optional[str] = None,
4848
stream: Optional[bool] = None,
4949
fake_stream: Optional[bool] = None,
50-
) -> dict:
50+
) -> Tuple[dict, Optional[bytes]]:
5151
return AmazonInvokeConfig.sign_request(
5252
self=self,
5353
headers=headers,

0 commit comments

Comments
 (0)