Skip to content

Commit f6535ae

Browse files
Support format param for specifying image type (#9019)
* fix(transformation.py): support a 'format' parameter for image's allow user to specify mime type * fix: pass mimetype via 'format' param * feat(gemini/chat/transformation.py): support 'format' param for gemini * fix(factory.py): support 'format' param on sync bedrock converse calls * feat(bedrock/converse_transformation.py): support 'format' param for bedrock async calls * refactor(factory.py): move to supporting 'format' param in base helper ensures consistency in param support * feat(gpt_transformation.py): filter out 'format' param don't send invalid param to openai * fix(gpt_transformation.py): fix translation * fix: fix translation error
1 parent a271297 commit f6535ae

File tree

11 files changed

+278
-29
lines changed

11 files changed

+278
-29
lines changed

litellm/litellm_core_utils/prompt_templates/factory.py

+45-14
Original file line numberDiff line numberDiff line change
@@ -680,12 +680,13 @@ def convert_generic_image_chunk_to_openai_image_obj(
680680
Return:
681681
"data:image/jpeg;base64,{base64_image}"
682682
"""
683-
return "data:{};{},{}".format(
684-
image_chunk["media_type"], image_chunk["type"], image_chunk["data"]
685-
)
683+
media_type = image_chunk["media_type"]
684+
return "data:{};{},{}".format(media_type, image_chunk["type"], image_chunk["data"])
686685

687686

688-
def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk:
687+
def convert_to_anthropic_image_obj(
688+
openai_image_url: str, format: Optional[str]
689+
) -> GenericImageParsingChunk:
689690
"""
690691
Input:
691692
"image_url": "data:image/jpeg;base64,{base64_image}",
@@ -702,7 +703,11 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing
702703
openai_image_url = convert_url_to_base64(url=openai_image_url)
703704
# Extract the media type and base64 data
704705
media_type, base64_data = openai_image_url.split("data:")[1].split(";base64,")
705-
media_type = media_type.replace("\\/", "/")
706+
707+
if format:
708+
media_type = format
709+
else:
710+
media_type = media_type.replace("\\/", "/")
706711

707712
return GenericImageParsingChunk(
708713
type="base64",
@@ -820,11 +825,12 @@ def anthropic_messages_pt_xml(messages: list):
820825
if isinstance(messages[msg_i]["content"], list):
821826
for m in messages[msg_i]["content"]:
822827
if m.get("type", "") == "image_url":
828+
format = m["image_url"].get("format")
823829
user_content.append(
824830
{
825831
"type": "image",
826832
"source": convert_to_anthropic_image_obj(
827-
m["image_url"]["url"]
833+
m["image_url"]["url"], format=format
828834
),
829835
}
830836
)
@@ -1156,10 +1162,13 @@ def convert_to_anthropic_tool_result(
11561162
)
11571163
elif content["type"] == "image_url":
11581164
if isinstance(content["image_url"], str):
1159-
image_chunk = convert_to_anthropic_image_obj(content["image_url"])
1165+
image_chunk = convert_to_anthropic_image_obj(
1166+
content["image_url"], format=None
1167+
)
11601168
else:
1169+
format = content["image_url"].get("format")
11611170
image_chunk = convert_to_anthropic_image_obj(
1162-
content["image_url"]["url"]
1171+
content["image_url"]["url"], format=format
11631172
)
11641173
anthropic_content_list.append(
11651174
AnthropicMessagesImageParam(
@@ -1318,6 +1327,7 @@ def _anthropic_content_element_factory(
13181327
data=image_chunk["data"],
13191328
),
13201329
)
1330+
13211331
return _anthropic_content_element
13221332

13231333

@@ -1369,13 +1379,16 @@ def anthropic_messages_pt( # noqa: PLR0915
13691379
for m in user_message_types_block["content"]:
13701380
if m.get("type", "") == "image_url":
13711381
m = cast(ChatCompletionImageObject, m)
1382+
format: Optional[str] = None
13721383
if isinstance(m["image_url"], str):
13731384
image_chunk = convert_to_anthropic_image_obj(
1374-
openai_image_url=m["image_url"]
1385+
openai_image_url=m["image_url"], format=None
13751386
)
13761387
else:
1388+
format = m["image_url"].get("format")
13771389
image_chunk = convert_to_anthropic_image_obj(
1378-
openai_image_url=m["image_url"]["url"]
1390+
openai_image_url=m["image_url"]["url"],
1391+
format=format,
13791392
)
13801393

13811394
_anthropic_content_element = (
@@ -2303,8 +2316,11 @@ def _create_bedrock_block(
23032316
)
23042317

23052318
@classmethod
2306-
def process_image_sync(cls, image_url: str) -> BedrockContentBlock:
2319+
def process_image_sync(
2320+
cls, image_url: str, format: Optional[str] = None
2321+
) -> BedrockContentBlock:
23072322
"""Synchronous image processing."""
2323+
23082324
if "base64" in image_url:
23092325
img_bytes, mime_type, image_format = cls._parse_base64_image(image_url)
23102326
elif "http://" in image_url or "https://" in image_url:
@@ -2315,11 +2331,17 @@ def process_image_sync(cls, image_url: str) -> BedrockContentBlock:
23152331
"Unsupported image type. Expected either image url or base64 encoded string"
23162332
)
23172333

2334+
if format:
2335+
mime_type = format
2336+
image_format = mime_type.split("/")[1]
2337+
23182338
image_format = cls._validate_format(mime_type, image_format)
23192339
return cls._create_bedrock_block(img_bytes, mime_type, image_format)
23202340

23212341
@classmethod
2322-
async def process_image_async(cls, image_url: str) -> BedrockContentBlock:
2342+
async def process_image_async(
2343+
cls, image_url: str, format: Optional[str]
2344+
) -> BedrockContentBlock:
23232345
"""Asynchronous image processing."""
23242346

23252347
if "base64" in image_url:
@@ -2334,6 +2356,10 @@ async def process_image_async(cls, image_url: str) -> BedrockContentBlock:
23342356
"Unsupported image type. Expected either image url or base64 encoded string"
23352357
)
23362358

2359+
if format: # override with user-defined params
2360+
mime_type = format
2361+
image_format = mime_type.split("/")[1]
2362+
23372363
image_format = cls._validate_format(mime_type, image_format)
23382364
return cls._create_bedrock_block(img_bytes, mime_type, image_format)
23392365

@@ -2821,12 +2847,14 @@ async def _bedrock_converse_messages_pt_async( # noqa: PLR0915
28212847
_part = BedrockContentBlock(text=element["text"])
28222848
_parts.append(_part)
28232849
elif element["type"] == "image_url":
2850+
format: Optional[str] = None
28242851
if isinstance(element["image_url"], dict):
28252852
image_url = element["image_url"]["url"]
2853+
format = element["image_url"].get("format")
28262854
else:
28272855
image_url = element["image_url"]
28282856
_part = await BedrockImageProcessor.process_image_async( # type: ignore
2829-
image_url=image_url
2857+
image_url=image_url, format=format
28302858
)
28312859
_parts.append(_part) # type: ignore
28322860
_cache_point_block = (
@@ -3059,12 +3087,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
30593087
_part = BedrockContentBlock(text=element["text"])
30603088
_parts.append(_part)
30613089
elif element["type"] == "image_url":
3090+
format: Optional[str] = None
30623091
if isinstance(element["image_url"], dict):
30633092
image_url = element["image_url"]["url"]
3093+
format = element["image_url"].get("format")
30643094
else:
30653095
image_url = element["image_url"]
30663096
_part = BedrockImageProcessor.process_image_sync( # type: ignore
3067-
image_url=image_url
3097+
image_url=image_url,
3098+
format=format,
30683099
)
30693100
_parts.append(_part) # type: ignore
30703101
_cache_point_block = (

litellm/llms/gemini/chat/transformation.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,16 @@ def _transform_messages(
114114
if element.get("type") == "image_url":
115115
img_element = element
116116
_image_url: Optional[str] = None
117+
format: Optional[str] = None
117118
if isinstance(img_element.get("image_url"), dict):
118119
_image_url = img_element["image_url"].get("url") # type: ignore
120+
format = img_element["image_url"].get("format") # type: ignore
119121
else:
120122
_image_url = img_element.get("image_url") # type: ignore
121123
if _image_url and "https://" in _image_url:
122-
image_obj = convert_to_anthropic_image_obj(_image_url)
124+
image_obj = convert_to_anthropic_image_obj(
125+
_image_url, format=format
126+
)
123127
img_element["image_url"] = ( # type: ignore
124128
convert_generic_image_chunk_to_openai_image_obj(
125129
image_obj

litellm/llms/openai/chat/gpt_transformation.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
2121
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
2222
from litellm.secret_managers.main import get_secret_str
23-
from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
23+
from litellm.types.llms.openai import (
24+
AllMessageValues,
25+
ChatCompletionImageObject,
26+
ChatCompletionImageUrlObject,
27+
)
2428
from litellm.types.utils import ModelResponse, ModelResponseStream
2529
from litellm.utils import convert_to_model_response_object
2630

@@ -189,6 +193,16 @@ def _transform_messages(
189193
content_item["image_url"] = {
190194
"url": content_item["image_url"],
191195
}
196+
elif isinstance(content_item["image_url"], dict):
197+
litellm_specific_params = {"format"}
198+
new_image_url_obj = ChatCompletionImageUrlObject(
199+
**{ # type: ignore
200+
k: v
201+
for k, v in content_item["image_url"].items()
202+
if k not in litellm_specific_params
203+
}
204+
)
205+
content_item["image_url"] = new_image_url_obj
192206
return messages
193207

194208
def transform_request(

litellm/llms/vertex_ai/gemini/transformation.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -55,36 +55,42 @@
5555
LiteLLMLoggingObj = Any
5656

5757

58-
def _process_gemini_image(image_url: str) -> PartType:
58+
def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartType:
5959
"""
6060
Given an image URL, return the appropriate PartType for Gemini
6161
"""
62+
6263
try:
6364
# GCS URIs
6465
if "gs://" in image_url:
6566
# Figure out file type
6667
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
6768
extension = extension_with_dot[1:] # Ex: "png"
6869

69-
file_type = get_file_type_from_extension(extension)
70+
if not format:
71+
file_type = get_file_type_from_extension(extension)
7072

71-
# Validate the file type is supported by Gemini
72-
if not is_gemini_1_5_accepted_file_type(file_type):
73-
raise Exception(f"File type not supported by gemini - {file_type}")
73+
# Validate the file type is supported by Gemini
74+
if not is_gemini_1_5_accepted_file_type(file_type):
75+
raise Exception(f"File type not supported by gemini - {file_type}")
7476

75-
mime_type = get_file_mime_type_for_file_type(file_type)
77+
mime_type = get_file_mime_type_for_file_type(file_type)
78+
else:
79+
mime_type = format
7680
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
7781

7882
return PartType(file_data=file_data)
7983
elif (
8084
"https://" in image_url
81-
and (image_type := _get_image_mime_type_from_url(image_url)) is not None
85+
and (image_type := format or _get_image_mime_type_from_url(image_url))
86+
is not None
8287
):
88+
8389
file_data = FileDataType(file_uri=image_url, mime_type=image_type)
8490
return PartType(file_data=file_data)
8591
elif "http://" in image_url or "https://" in image_url or "base64" in image_url:
8692
# https links for unsupported mime types and base64 images
87-
image = convert_to_anthropic_image_obj(image_url)
93+
image = convert_to_anthropic_image_obj(image_url, format=format)
8894
_blob = BlobType(data=image["data"], mime_type=image["media_type"])
8995
return PartType(inline_data=_blob)
9096
raise Exception("Invalid image received - {}".format(image_url))
@@ -159,11 +165,15 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
159165
elif element["type"] == "image_url":
160166
element = cast(ChatCompletionImageObject, element)
161167
img_element = element
168+
format: Optional[str] = None
162169
if isinstance(img_element["image_url"], dict):
163170
image_url = img_element["image_url"]["url"]
171+
format = img_element["image_url"].get("format")
164172
else:
165173
image_url = img_element["image_url"]
166-
_part = _process_gemini_image(image_url=image_url)
174+
_part = _process_gemini_image(
175+
image_url=image_url, format=format
176+
)
167177
_parts.append(_part)
168178
user_content.extend(_parts)
169179
elif (

litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py

-1
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,6 @@ async def async_completion(
10231023
gemini_api_key: Optional[str] = None,
10241024
extra_headers: Optional[dict] = None,
10251025
) -> Union[ModelResponse, CustomStreamWrapper]:
1026-
10271026
should_use_v1beta1_features = self.is_using_v1beta1_features(
10281027
optional_params=optional_params
10291028
)

litellm/types/llms/openai.py

+1
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ class ChatCompletionTextObject(
378378
class ChatCompletionImageUrlObject(TypedDict, total=False):
379379
url: Required[str]
380380
detail: str
381+
format: str
381382

382383

383384
class ChatCompletionImageObject(TypedDict):

tests/litellm/log.txt

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
============================= test session starts ==============================
2+
platform darwin -- Python 3.11.4, pytest-7.4.1, pluggy-1.2.0 -- /Library/Frameworks/Python.framework/Versions/3.11/bin/python3
3+
cachedir: .pytest_cache
4+
rootdir: /Users/krrishdholakia/Documents/litellm/tests/litellm
5+
plugins: snapshot-0.9.0, cov-5.0.0, timeout-2.2.0, respx-0.21.1, asyncio-0.21.1, langsmith-0.3.4, anyio-4.8.0, mock-3.11.1, Faker-25.9.2
6+
asyncio: mode=Mode.STRICT
7+
collecting ... collected 4 items
8+
9+
test_main.py::test_url_with_format_param[True-gemini/gemini-1.5-flash] PASSED [ 25%]
10+
test_main.py::test_url_with_format_param[True-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0] PASSED [ 50%]
11+
test_main.py::test_url_with_format_param[False-gemini/gemini-1.5-flash] PASSED [ 75%]
12+
test_main.py::test_url_with_format_param[False-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0] PASSED [100%]
13+
14+
=============================== warnings summary ===============================
15+
../../../../../../Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295
16+
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
17+
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
18+
19+
../../litellm/litellm_core_utils/get_model_cost_map.py:24
20+
/Users/krrishdholakia/Documents/litellm/litellm/litellm_core_utils/get_model_cost_map.py:24: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
21+
with importlib.resources.open_text(
22+
23+
../../litellm/utils.py:168
24+
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:168: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
25+
with resources.open_text(
26+
27+
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
28+
======================== 4 passed, 3 warnings in 2.80s =========================

0 commit comments

Comments
 (0)