BerriAI
diff --git a/‎litellm/litellm_core_utils/prompt_templates/factory.py
Lines changed: 45 additions & 14 deletions b/‎litellm/litellm_core_utils/prompt_templates/factory.py
Lines changed: 45 additions & 14 deletions
diff --git a/‎litellm/llms/gemini/chat/transformation.py
Lines changed: 5 additions & 1 deletion b/‎litellm/llms/gemini/chat/transformation.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎litellm/llms/openai/chat/gpt_transformation.py
Lines changed: 15 additions & 1 deletion b/‎litellm/llms/openai/chat/gpt_transformation.py
Lines changed: 15 additions & 1 deletion
diff --git a/‎litellm/llms/vertex_ai/gemini/transformation.py
Lines changed: 19 additions & 9 deletions b/‎litellm/llms/vertex_ai/gemini/transformation.py
Lines changed: 19 additions & 9 deletions
diff --git a/‎litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
Lines changed: 0 additions & 1 deletion b/‎litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎litellm/types/llms/openai.py
Lines changed: 1 addition & 0 deletions b/‎litellm/types/llms/openai.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/litellm/log.txt
Lines changed: 28 additions & 0 deletions b/‎tests/litellm/log.txt
Lines changed: 28 additions & 0 deletions
@@ -680,12 +680,13 @@ def convert_generic_image_chunk_to_openai_image_obj(
     Return:
     "data:image/jpeg;base64,{base64_image}"
     """
-    return "data:{};{},{}".format(
-        image_chunk["media_type"], image_chunk["type"], image_chunk["data"]
-    )
+    media_type = image_chunk["media_type"]
+    return "data:{};{},{}".format(media_type, image_chunk["type"], image_chunk["data"])
 
 
-def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk:
+def convert_to_anthropic_image_obj(
+    openai_image_url: str, format: Optional[str]
+) -> GenericImageParsingChunk:
     """
     Input:
     "image_url": "data:image/jpeg;base64,{base64_image}",
@@ -702,7 +703,11 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing
             openai_image_url = convert_url_to_base64(url=openai_image_url)
         # Extract the media type and base64 data
         media_type, base64_data = openai_image_url.split("data:")[1].split(";base64,")
-        media_type = media_type.replace("\\/", "/")
+
+        if format:
+            media_type = format
+        else:
+            media_type = media_type.replace("\\/", "/")
 
         return GenericImageParsingChunk(
             type="base64",
@@ -820,11 +825,12 @@ def anthropic_messages_pt_xml(messages: list):
             if isinstance(messages[msg_i]["content"], list):
                 for m in messages[msg_i]["content"]:
                     if m.get("type", "") == "image_url":
+                        format = m["image_url"].get("format")
                         user_content.append(
                             {
                                 "type": "image",
                                 "source": convert_to_anthropic_image_obj(
-                                    m["image_url"]["url"]
+                                    m["image_url"]["url"], format=format
                                 ),
                             }
                         )
@@ -1156,10 +1162,13 @@ def convert_to_anthropic_tool_result(
                 )
             elif content["type"] == "image_url":
                 if isinstance(content["image_url"], str):
-                    image_chunk = convert_to_anthropic_image_obj(content["image_url"])
+                    image_chunk = convert_to_anthropic_image_obj(
+                        content["image_url"], format=None
+                    )
                 else:
+                    format = content["image_url"].get("format")
                     image_chunk = convert_to_anthropic_image_obj(
-                        content["image_url"]["url"]
+                        content["image_url"]["url"], format=format
                     )
                 anthropic_content_list.append(
                     AnthropicMessagesImageParam(
@@ -1318,6 +1327,7 @@ def _anthropic_content_element_factory(
                 data=image_chunk["data"],
             ),
         )
+
     return _anthropic_content_element
 
 
@@ -1369,13 +1379,16 @@ def anthropic_messages_pt(  # noqa: PLR0915
                     for m in user_message_types_block["content"]:
                         if m.get("type", "") == "image_url":
                             m = cast(ChatCompletionImageObject, m)
+                            format: Optional[str] = None
                             if isinstance(m["image_url"], str):
                                 image_chunk = convert_to_anthropic_image_obj(
-                                    openai_image_url=m["image_url"]
+                                    openai_image_url=m["image_url"], format=None
                                 )
                             else:
+                                format = m["image_url"].get("format")
                                 image_chunk = convert_to_anthropic_image_obj(
-                                    openai_image_url=m["image_url"]["url"]
+                                    openai_image_url=m["image_url"]["url"],
+                                    format=format,
                                 )
 
                             _anthropic_content_element = (
@@ -2303,8 +2316,11 @@ def _create_bedrock_block(
             )
 
     @classmethod
-    def process_image_sync(cls, image_url: str) -> BedrockContentBlock:
+    def process_image_sync(
+        cls, image_url: str, format: Optional[str] = None
+    ) -> BedrockContentBlock:
         """Synchronous image processing."""
+
         if "base64" in image_url:
             img_bytes, mime_type, image_format = cls._parse_base64_image(image_url)
         elif "http://" in image_url or "https://" in image_url:
@@ -2315,11 +2331,17 @@ def process_image_sync(cls, image_url: str) -> BedrockContentBlock:
                 "Unsupported image type. Expected either image url or base64 encoded string"
             )
 
+        if format:
+            mime_type = format
+            image_format = mime_type.split("/")[1]
+
         image_format = cls._validate_format(mime_type, image_format)
         return cls._create_bedrock_block(img_bytes, mime_type, image_format)
 
     @classmethod
-    async def process_image_async(cls, image_url: str) -> BedrockContentBlock:
+    async def process_image_async(
+        cls, image_url: str, format: Optional[str]
+    ) -> BedrockContentBlock:
         """Asynchronous image processing."""
 
         if "base64" in image_url:
@@ -2334,6 +2356,10 @@ async def process_image_async(cls, image_url: str) -> BedrockContentBlock:
                 "Unsupported image type. Expected either image url or base64 encoded string"
             )
 
+        if format:  # override with user-defined params
+            mime_type = format
+            image_format = mime_type.split("/")[1]
+
         image_format = cls._validate_format(mime_type, image_format)
         return cls._create_bedrock_block(img_bytes, mime_type, image_format)
 
@@ -2821,12 +2847,14 @@ async def _bedrock_converse_messages_pt_async(  # noqa: PLR0915
                                 _part = BedrockContentBlock(text=element["text"])
                                 _parts.append(_part)
                             elif element["type"] == "image_url":
+                                format: Optional[str] = None
                                 if isinstance(element["image_url"], dict):
                                     image_url = element["image_url"]["url"]
+                                    format = element["image_url"].get("format")
                                 else:
                                     image_url = element["image_url"]
                                 _part = await BedrockImageProcessor.process_image_async(  # type: ignore
-                                    image_url=image_url
+                                    image_url=image_url, format=format
                                 )
                                 _parts.append(_part)  # type: ignore
                             _cache_point_block = (
@@ -3059,12 +3087,15 @@ def _bedrock_converse_messages_pt(  # noqa: PLR0915
                             _part = BedrockContentBlock(text=element["text"])
                             _parts.append(_part)
                         elif element["type"] == "image_url":
+                            format: Optional[str] = None
                             if isinstance(element["image_url"], dict):
                                 image_url = element["image_url"]["url"]
+                                format = element["image_url"].get("format")
                             else:
                                 image_url = element["image_url"]
                             _part = BedrockImageProcessor.process_image_sync(  # type: ignore
-                                image_url=image_url
+                                image_url=image_url,
+                                format=format,
                             )
                             _parts.append(_part)  # type: ignore
                         _cache_point_block = (
 
@@ -114,12 +114,16 @@ def _transform_messages(
                     if element.get("type") == "image_url":
                         img_element = element
                         _image_url: Optional[str] = None
+                        format: Optional[str] = None
                         if isinstance(img_element.get("image_url"), dict):
                             _image_url = img_element["image_url"].get("url")  # type: ignore
+                            format = img_element["image_url"].get("format")  # type: ignore
                         else:
                             _image_url = img_element.get("image_url")  # type: ignore
                         if _image_url and "https://" in _image_url:
-                            image_obj = convert_to_anthropic_image_obj(_image_url)
+                            image_obj = convert_to_anthropic_image_obj(
+                                _image_url, format=format
+                            )
                             img_element["image_url"] = (  # type: ignore
                                 convert_generic_image_chunk_to_openai_image_obj(
                                     image_obj
 
@@ -20,7 +20,11 @@
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionImageObject,
+    ChatCompletionImageUrlObject,
+)
 from litellm.types.utils import ModelResponse, ModelResponseStream
 from litellm.utils import convert_to_model_response_object
 
@@ -189,6 +193,16 @@ def _transform_messages(
                             content_item["image_url"] = {
                                 "url": content_item["image_url"],
                             }
+                        elif isinstance(content_item["image_url"], dict):
+                            litellm_specific_params = {"format"}
+                            new_image_url_obj = ChatCompletionImageUrlObject(
+                                **{  # type: ignore
+                                    k: v
+                                    for k, v in content_item["image_url"].items()
+                                    if k not in litellm_specific_params
+                                }
+                            )
+                            content_item["image_url"] = new_image_url_obj
         return messages
 
     def transform_request(
 
@@ -55,36 +55,42 @@
     LiteLLMLoggingObj = Any
 
 
-def _process_gemini_image(image_url: str) -> PartType:
+def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartType:
     """
     Given an image URL, return the appropriate PartType for Gemini
     """
+
     try:
         # GCS URIs
         if "gs://" in image_url:
             # Figure out file type
             extension_with_dot = os.path.splitext(image_url)[-1]  # Ex: ".png"
             extension = extension_with_dot[1:]  # Ex: "png"
 
-            file_type = get_file_type_from_extension(extension)
+            if not format:
+                file_type = get_file_type_from_extension(extension)
 
-            # Validate the file type is supported by Gemini
-            if not is_gemini_1_5_accepted_file_type(file_type):
-                raise Exception(f"File type not supported by gemini - {file_type}")
+                # Validate the file type is supported by Gemini
+                if not is_gemini_1_5_accepted_file_type(file_type):
+                    raise Exception(f"File type not supported by gemini - {file_type}")
 
-            mime_type = get_file_mime_type_for_file_type(file_type)
+                mime_type = get_file_mime_type_for_file_type(file_type)
+            else:
+                mime_type = format
             file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
 
             return PartType(file_data=file_data)
         elif (
             "https://" in image_url
-            and (image_type := _get_image_mime_type_from_url(image_url)) is not None
+            and (image_type := format or _get_image_mime_type_from_url(image_url))
+            is not None
         ):
+
             file_data = FileDataType(file_uri=image_url, mime_type=image_type)
             return PartType(file_data=file_data)
         elif "http://" in image_url or "https://" in image_url or "base64" in image_url:
             # https links for unsupported mime types and base64 images
-            image = convert_to_anthropic_image_obj(image_url)
+            image = convert_to_anthropic_image_obj(image_url, format=format)
             _blob = BlobType(data=image["data"], mime_type=image["media_type"])
             return PartType(inline_data=_blob)
         raise Exception("Invalid image received - {}".format(image_url))
@@ -159,11 +165,15 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                         elif element["type"] == "image_url":
                             element = cast(ChatCompletionImageObject, element)
                             img_element = element
+                            format: Optional[str] = None
                             if isinstance(img_element["image_url"], dict):
                                 image_url = img_element["image_url"]["url"]
+                                format = img_element["image_url"].get("format")
                             else:
                                 image_url = img_element["image_url"]
-                            _part = _process_gemini_image(image_url=image_url)
+                            _part = _process_gemini_image(
+                                image_url=image_url, format=format
+                            )
                             _parts.append(_part)
                     user_content.extend(_parts)
                 elif (
 
@@ -1023,7 +1023,6 @@ async def async_completion(
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
     ) -> Union[ModelResponse, CustomStreamWrapper]:
-
         should_use_v1beta1_features = self.is_using_v1beta1_features(
             optional_params=optional_params
         )
 
@@ -378,6 +378,7 @@ class ChatCompletionTextObject(
 class ChatCompletionImageUrlObject(TypedDict, total=False):
     url: Required[str]
     detail: str
+    format: str
 
 
 class ChatCompletionImageObject(TypedDict):
 
@@ -0,0 +1,28 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.11.4, pytest-7.4.1, pluggy-1.2.0 -- /Library/Frameworks/Python.framework/Versions/3.11/bin/python3
+cachedir: .pytest_cache
+rootdir: /Users/krrishdholakia/Documents/litellm/tests/litellm
+plugins: snapshot-0.9.0, cov-5.0.0, timeout-2.2.0, respx-0.21.1, asyncio-0.21.1, langsmith-0.3.4, anyio-4.8.0, mock-3.11.1, Faker-25.9.2
+asyncio: mode=Mode.STRICT
+collecting ... collected 4 items
+
+test_main.py::test_url_with_format_param[True-gemini/gemini-1.5-flash] PASSED [ 25%]
+test_main.py::test_url_with_format_param[True-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0] PASSED [ 50%]
+test_main.py::test_url_with_format_param[False-gemini/gemini-1.5-flash] PASSED [ 75%]
+test_main.py::test_url_with_format_param[False-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0] PASSED [100%]
+
+=============================== warnings summary ===============================
+../../../../../../Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
+    warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
+
+../../litellm/litellm_core_utils/get_model_cost_map.py:24
+  /Users/krrishdholakia/Documents/litellm/litellm/litellm_core_utils/get_model_cost_map.py:24: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
+    with importlib.resources.open_text(
+
+../../litellm/utils.py:168
+  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:168: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
+    with resources.open_text(
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+======================== 4 passed, 3 warnings in 2.80s =========================
Original file line number	Diff line number	Diff line change
`@@ -1023,7 +1023,6 @@ async def async_completion(`
`1023`	`1023`	`gemini_api_key: Optional[str] = None,`
`1024`	`1024`	`extra_headers: Optional[dict] = None,`
`1025`	`1025`	`) -> Union[ModelResponse, CustomStreamWrapper]:`
`1026`		`-`
`1027`	`1026`	`should_use_v1beta1_features = self.is_using_v1beta1_features(`
`1028`	`1027`	`optional_params=optional_params`
`1029`	`1028`	`)`