Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
19c998b
fix(sdk): resize Anthropic many-image inputs
Zheng-Lu Mar 23, 2026
b3f67af
Merge branch 'main' into fix/2467-image-downscale
openhands-agent Mar 27, 2026
7dbe9e0
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Mar 28, 2026
079183f
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 2, 2026
4d047ed
fix(sdk): handle Anthropic single-image limits
Zheng-Lu Mar 28, 2026
487da93
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 6, 2026
82447d2
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 10, 2026
aef2b73
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 16, 2026
e2ed9aa
fix(sdk): simplify image resize utilities
Zheng-Lu Apr 18, 2026
7fce88f
Merge branch 'OpenHands:main' into fix/2467-image-downscale
Zheng-Lu Apr 18, 2026
64f0104
fix(sdk): require pillow for image resizing
Zheng-Lu Apr 19, 2026
5394fef
Merge branch 'main' into fix/2467-image-downscale
Zheng-Lu Apr 22, 2026
c400168
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 27, 2026
4303819
feat(example): add many-image send_message to exercise Anthropic resize
openhands-agent Apr 27, 2026
813919d
Merge remote-tracking branch 'origin/main' into fix/2467-image-downscale
openhands-agent Apr 27, 2026
5095dd0
fix(ci): use head SHA for wait-on-check-action in fork PRs
openhands-agent Apr 27, 2026
f339ec1
fix(ci): handle missing Build & Push check for fork PRs
openhands-agent Apr 27, 2026
76aec9c
fix(ci): make PR comment posting non-fatal for fork PRs
openhands-agent Apr 27, 2026
fc2bd09
Merge branch 'main' into fix/2467-image-downscale
xingyaoww Apr 27, 2026
2c4287e
Merge branch 'main' into fix/2467-image-downscale
Zheng-Lu Apr 28, 2026
09e84fa
Merge main into fix/2467-image-downscale, resolve pyproject.toml conf…
openhands-agent May 5, 2026
75390e7
fix(sdk): resize proxy Anthropic images
xingyaoww May 5, 2026
1e420e2
chore: remove run examples workflow changes
xingyaoww May 5, 2026
574aa42
Merge branch 'main' into fix/2467-image-downscale
xingyaoww May 5, 2026
172e0ee
Merge branch 'main' into fix/2467-image-downscale
xingyaoww May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
from openhands.sdk.llm.streaming import (
TokenCallbackType,
)
from openhands.sdk.llm.utils.image_resize import maybe_resize_messages_for_provider
from openhands.sdk.llm.utils.litellm_provider import infer_litellm_provider
from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
from openhands.sdk.llm.utils.model_features import get_features
Expand Down Expand Up @@ -1392,6 +1393,12 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]:
)
send_reasoning_content = model_features.send_reasoning_content

maybe_resize_messages_for_provider(
messages,
provider=self._infer_litellm_provider(),
vision_enabled=vision_enabled,
)

formatted_messages = [
message.to_chat_dict(
cache_enabled=cache_enabled,
Expand Down
122 changes: 122 additions & 0 deletions openhands-sdk/openhands/sdk/llm/utils/image_resize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import annotations

import base64
import io
from typing import Any

from openhands.sdk.llm.message import ImageContent, Message
from openhands.sdk.logger import get_logger


logger = get_logger(__name__)

# Anthropic vision docs: requests with more than 20 images cap each image at
# 2000x2000 pixels. Requests with 20 or fewer images cap each image at
# 8000x8000 pixels.
# https://docs.anthropic.com/en/docs/build-with-claude/vision
ANTHROPIC_MANY_IMAGE_THRESHOLD = 20
ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000
ANTHROPIC_STANDARD_IMAGE_MAX_DIMENSION = 8000


def maybe_resize_messages_for_provider(
messages: list[Message], *, provider: str | None, vision_enabled: bool
) -> None:
max_dimension = _get_image_max_dimension(
messages=messages,
provider=provider,
vision_enabled=vision_enabled,
)
if max_dimension is None:
return

image_module = _load_pillow_image_module()
if image_module is None:
return

for message in messages:
for content_item in message.content:
if isinstance(content_item, ImageContent):
content_item.image_urls = [
resize_base64_data_url(
url,
max_dimension=max_dimension,
image_module=image_module,
)
for url in content_item.image_urls
]


Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated
def _get_image_max_dimension(
messages: list[Message], *, provider: str | None, vision_enabled: bool
) -> int | None:
if not vision_enabled or provider != "anthropic":
return None

total_images = sum(
len(content_item.image_urls)
for message in messages
for content_item in message.content
if isinstance(content_item, ImageContent)
)
if total_images == 0:
return None
if total_images <= ANTHROPIC_MANY_IMAGE_THRESHOLD:
return ANTHROPIC_STANDARD_IMAGE_MAX_DIMENSION

return ANTHROPIC_MANY_IMAGE_MAX_DIMENSION


def _load_pillow_image_module() -> Any | None:
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated
try:
from PIL import Image
except ImportError:
logger.warning(
"pillow is not installed; skipping Anthropic image resizing. "
"Install openhands-sdk[pillow] to enable base64 image downscaling."
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated
)
return None

return Image


Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated
def resize_base64_data_url(url: str, *, max_dimension: int, image_module: Any) -> str:
if not url.startswith("data:image/"):
Comment thread
Zheng-Lu marked this conversation as resolved.
return url

header, sep, encoded = url.partition(";base64,")
if not sep:
return url

mime_type = header.removeprefix("data:")

try:
raw_bytes = base64.b64decode(encoded)
with image_module.open(io.BytesIO(raw_bytes)) as image:
if max(image.size) <= max_dimension:
return url

image.thumbnail(
(max_dimension, max_dimension),
image_module.Resampling.LANCZOS,
)
image_format = image.format or mime_type.split("/", 1)[1].upper()

if image_format == "JPG":
image_format = "JPEG"

output_image = image
if image_format == "JPEG" and image.mode not in ("RGB", "L"):
output_image = image.convert("RGB")

buffer = io.BytesIO()
output_image.save(buffer, format=image_format)
except Exception:
logger.warning(
"Failed to resize base64 data image for outgoing LLM request",
exc_info=True,
)
return url

resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"data:{mime_type};base64,{resized_encoded}"
1 change: 1 addition & 0 deletions openhands-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Documentation = "https://docs.openhands.dev/sdk"

[project.optional-dependencies]
boto3 = ["boto3>=1.35.0"]
pillow = ["pillow>=12.1.1"]
Comment thread
Zheng-Lu marked this conversation as resolved.
Outdated

[build-system]
requires = ["setuptools>=61.0", "wheel"]
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ openhands-agent-server = { workspace = true }
dev = [
"pre-commit>=4.3.0",
"packaging>=24.2",
"pillow>=12.1.1",
"psutil>=7.0.0",
"pyright[nodejs]>=1.1.405",
"pytest>=9.0.0",
Expand Down
173 changes: 173 additions & 0 deletions tests/sdk/llm/test_llm_image_resizing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import base64
import io
from unittest.mock import patch

from PIL import Image
from pydantic import SecretStr

from openhands.sdk.llm import LLM, ImageContent, Message, TextContent


def _make_png_data_url(width: int, height: int) -> str:
image = Image.new("RGB", (width, height), color="red")
buffer = io.BytesIO()
image.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"data:image/png;base64,{encoded}"


def _data_url_dimensions(url: str) -> tuple[int, int]:
_header, _sep, encoded = url.partition(";base64,")
image_bytes = base64.b64decode(encoded)
with Image.open(io.BytesIO(image_bytes)) as image:
return image.size


def _image_urls_from_chat_message(chat_message: dict) -> list[str]:
return [
item["image_url"]["url"]
for item in chat_message["content"]
if item.get("type") == "image_url"
]


def _format_for_provider(
llm: LLM, messages: list[Message], *, provider: str
) -> list[dict]:
with (
patch.object(LLM, "vision_is_active", return_value=True),
patch.object(LLM, "_infer_litellm_provider", return_value=provider),
):
return llm.format_messages_for_llm(messages)
Comment thread
Zheng-Lu marked this conversation as resolved.

Comment thread
Zheng-Lu marked this conversation as resolved.

def test_anthropic_many_image_requests_resize_base64_images():
original_url = _make_png_data_url(2400, 1200)
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[original_url] * 21),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-many-image",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert len(image_urls) == 21
assert _data_url_dimensions(image_urls[0]) == (2000, 1000)
original_content = message.content[1]
assert isinstance(original_content, ImageContent)
assert original_content.image_urls[0] == original_url


def test_anthropic_exactly_twenty_images_use_standard_limit():
original_url = _make_png_data_url(8001, 400)
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[original_url] * 20),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-twenty-images",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert len(image_urls) == 20
assert _data_url_dimensions(image_urls[0]) == (8000, 400)


def test_anthropic_single_image_requests_do_not_resize():
original_url = _make_png_data_url(2400, 2400)
message = Message(
role="user",
content=[
TextContent(text="Describe this image."),
ImageContent(image_urls=[original_url]),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-single-image",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert image_urls == [original_url]
assert _data_url_dimensions(image_urls[0]) == (2400, 2400)


def test_anthropic_single_image_requests_resize_above_standard_limit():
original_url = _make_png_data_url(8001, 400)
message = Message(
role="user",
content=[
TextContent(text="Describe this image."),
ImageContent(image_urls=[original_url]),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-single-image-large",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

image_urls = _image_urls_from_chat_message(formatted[0])
assert _data_url_dimensions(image_urls[0]) == (8000, 400)


def test_anthropic_many_image_requests_leave_url_images_unchanged():
image_url = "https://example.com/image.png"
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[image_url] * 21),
],
)
llm = LLM(
model="anthropic/claude-opus-4-6",
api_key=SecretStr("test-key"),
usage_id="test-anthropic-url-images",
)

formatted = _format_for_provider(llm, [message], provider="anthropic")

assert _image_urls_from_chat_message(formatted[0]) == [image_url] * 21


def test_non_anthropic_many_image_requests_do_not_resize():
original_url = _make_png_data_url(2400, 1200)
message = Message(
role="user",
content=[
TextContent(text="Describe these images."),
ImageContent(image_urls=[original_url] * 25),
],
)
llm = LLM(
model="gpt-4o",
api_key=SecretStr("test-key"),
usage_id="test-openai-many-image",
)

formatted = _format_for_provider(llm, [message], provider="openai")

image_urls = _image_urls_from_chat_message(formatted[0])
assert len(image_urls) == 25
assert _data_url_dimensions(image_urls[0]) == (2400, 1200)
Loading