Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
19c998b
fix(sdk): resize Anthropic many-image inputs
Zheng-Lu Mar 23, 2026
b3f67af
Merge branch 'main' into fix/2467-image-downscale
openhands-agent Mar 27, 2026
7dbe9e0
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Mar 28, 2026
079183f
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 2, 2026
4d047ed
fix(sdk): handle Anthropic single-image limits
Zheng-Lu Mar 28, 2026
487da93
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 6, 2026
82447d2
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 10, 2026
aef2b73
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 16, 2026
e2ed9aa
fix(sdk): simplify image resize utilities
Zheng-Lu Apr 18, 2026
7fce88f
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 18, 2026
64f0104
fix(sdk): require pillow for image resizing
Zheng-Lu Apr 19, 2026
5394fef
Merge branch 'main' into fix/2467-image-downscale
Zheng-Lu Apr 22, 2026
c400168
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 27, 2026
4303819
feat(example): add many-image send_message to exercise Anthropic resize
openhands-agent Apr 27, 2026
813919d
Merge remote-tracking branch 'origin/main' into fix/2467-image-downscale
openhands-agent Apr 27, 2026
5095dd0
fix(ci): use head SHA for wait-on-check-action in fork PRs
openhands-agent Apr 27, 2026
f339ec1
fix(ci): handle missing Build & Push check for fork PRs
openhands-agent Apr 27, 2026
76aec9c
fix(ci): make PR comment posting non-fatal for fork PRs
openhands-agent Apr 27, 2026
fc2bd09
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 27, 2026
2c4287e
Merge branch 'main' into fix/2467-image-downscale
Zheng-Lu Apr 28, 2026
09e84fa
Merge main into fix/2467-image-downscale, resolve pyproject.toml conf…
openhands-agent May 5, 2026
75390e7
fix(sdk): resize proxy Anthropic images
xingyaoww May 5, 2026
1e420e2
chore: remove run examples workflow changes
xingyaoww May 5, 2026
574aa42
Merge branch 'main' into fix/2467-image-downscale
xingyaoww May 5, 2026
172e0ee
Merge branch 'main' into fix/2467-image-downscale
xingyaoww May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import base64
import copy
import io
import json
import os
import warnings
Expand Down Expand Up @@ -73,6 +75,7 @@
supports_vision,
token_counter,
)
from PIL import Image
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated

from openhands.sdk.llm.exceptions import (
LLMContextWindowTooSmallError,
Expand All @@ -83,6 +86,7 @@
# OpenHands utilities
from openhands.sdk.llm.llm_response import LLMResponse
from openhands.sdk.llm.message import (
ImageContent,
Message,
)
from openhands.sdk.llm.mixins.non_native_fc import NonNativeToolCallingMixin
Expand Down Expand Up @@ -127,6 +131,8 @@
# This cap prevents requesting output that exceeds the context window.
# 16384 is a safe default that works for most models (GPT-4o: 16k, Claude: 8k).
DEFAULT_MAX_OUTPUT_TOKENS_CAP: Final[int] = 16384
ANTHROPIC_MANY_IMAGE_THRESHOLD: Final[int] = 20
ANTHROPIC_MANY_IMAGE_MAX_DIMENSION: Final[int] = 2000
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated


class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
Expand Down Expand Up @@ -1259,6 +1265,83 @@ def _apply_prompt_caching(self, messages: list[Message]) -> None:
].cache_prompt = True # Last item inside the message content
break

def _apply_outgoing_image_resize(
self, messages: list[Message], *, vision_enabled: bool
) -> None:
max_dimension = self._get_outgoing_image_max_dimension(
messages=messages, vision_enabled=vision_enabled
)
if max_dimension is None:
return

for message in messages:
for content_item in message.content:
if isinstance(content_item, ImageContent):
content_item.image_urls = [
self._resize_base64_data_image_url(
url, max_dimension=max_dimension
)
for url in content_item.image_urls
]

def _get_outgoing_image_max_dimension(
self, messages: list[Message], *, vision_enabled: bool
) -> int | None:
if not vision_enabled or self._infer_litellm_provider() != "anthropic":
return None

total_images = sum(
len(content_item.image_urls)
for message in messages
for content_item in message.content
if isinstance(content_item, ImageContent)
)
if total_images <= ANTHROPIC_MANY_IMAGE_THRESHOLD:
return None

return ANTHROPIC_MANY_IMAGE_MAX_DIMENSION

@staticmethod
def _resize_base64_data_image_url(url: str, *, max_dimension: int) -> str:
if not url.startswith("data:image/"):
return url

header, sep, encoded = url.partition(";base64,")
if not sep:
return url

mime_type = header.removeprefix("data:")

try:
raw_bytes = base64.b64decode(encoded)
with Image.open(io.BytesIO(raw_bytes)) as image:
if max(image.size) <= max_dimension:
return url
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated

resized_image = image.copy()
resized_image.thumbnail(
(max_dimension, max_dimension), Image.Resampling.LANCZOS
)
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated
image_format = image.format or mime_type.split("/", 1)[1].upper()

if image_format == "JPG":
image_format = "JPEG"

if image_format == "JPEG" and resized_image.mode not in ("RGB", "L"):
resized_image = resized_image.convert("RGB")

buffer = io.BytesIO()
resized_image.save(buffer, format=image_format)
except Exception:
logger.warning(
"Failed to resize base64 data image for outgoing LLM request",
exc_info=True,
)
return url

resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"data:{mime_type};base64,{resized_encoded}"
Comment thread
xingyaoww marked this conversation as resolved.
Outdated

def format_messages_for_llm(self, messages: list[Message]) -> list[dict]:
"""Formats Message objects for LLM consumption."""

Expand All @@ -1277,6 +1360,8 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]:
)
send_reasoning_content = model_features.send_reasoning_content

self._apply_outgoing_image_resize(messages, vision_enabled=vision_enabled)

formatted_messages = [
message.to_chat_dict(
cache_enabled=cache_enabled,
Expand Down
1 change: 1 addition & 0 deletions openhands-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies = [
"filelock>=3.20.1",
"httpx>=0.27.0",
"litellm==1.80.10",
"pillow>=12.1.1",
Comment thread
Zheng-Lu marked this conversation as resolved.
"pydantic>=2.12.5",
"python-frontmatter>=1.1.0",
"python-json-logger>=3.3.0",
Expand Down
130 changes: 130 additions & 0 deletions tests/sdk/llm/test_llm_image_resizing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import base64
import io
from unittest.mock import patch

from PIL import Image
from pydantic import SecretStr

from openhands.sdk.llm import LLM, ImageContent, Message, TextContent


def _make_png_data_url(width: int, height: int) -> str:
image = Image.new("RGB", (width, height), color="red")
buffer = io.BytesIO()
image.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"data:image/png;base64,{encoded}"


def _data_url_dimensions(url: str) -> tuple[int, int]:
_header, _sep, encoded = url.partition(";base64,")
image_bytes = base64.b64decode(encoded)
with Image.open(io.BytesIO(image_bytes)) as image:
return image.size


def _image_urls_from_chat_message(chat_message: dict) -> list[str]:
return [
item["image_url"]["url"]
for item in chat_message["content"]
if item.get("type") == "image_url"
]


def _format_for_provider(
llm: LLM, messages: list[Message], *, provider: str
) -> list[dict]:
with (
patch.object(LLM, "vision_is_active", return_value=True),
patch.object(LLM, "_infer_litellm_provider", return_value=provider),
):
return llm.format_messages_for_llm(messages)
Comment thread
Zheng-Lu marked this conversation as resolved.

Comment thread
Zheng-Lu marked this conversation as resolved.

def test_anthropic_many_image_requests_resize_base64_images():
original_url = _make_png_data_url(2400, 1200)
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[original_url] * 21),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-many-image",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert len(image_urls) == 21
assert _data_url_dimensions(image_urls[0]) == (2000, 1000)
original_content = message.content[1]
assert isinstance(original_content, ImageContent)
assert original_content.image_urls[0] == original_url


def test_anthropic_exactly_twenty_images_do_not_resize():
original_url = _make_png_data_url(2400, 1200)
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[original_url] * 20),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-twenty-images",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert len(image_urls) == 20
assert _data_url_dimensions(image_urls[0]) == (2400, 1200)


def test_anthropic_single_image_requests_do_not_resize():
original_url = _make_png_data_url(2400, 2400)
message = Message(
role="user",
content=[
TextContent(text="Describe this image."),
ImageContent(image_urls=[original_url]),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-single-image",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert image_urls == [original_url]
assert _data_url_dimensions(image_urls[0]) == (2400, 2400)


def test_anthropic_many_image_requests_leave_url_images_unchanged():
image_url = "https://example.com/image.png"
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[image_url] * 21),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-url-images",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

assert _image_urls_from_chat_message(formatted[0]) == [image_url] * 21
Loading