Skip to content

Commit bf0528a

Browse files
committed
Pass through cachePoint and other Bedrock-native content blocks
Fixes #1954. The _to_bedrock_content_items function was raising ValueError for unrecognized Bedrock-native dict content blocks like cachePoint, guardContent, audio, and video. These are valid Bedrock Converse API content types that should be passed through to the API unchanged, just like text, image, and document blocks. Instead of maintaining an explicit allowlist that breaks whenever AWS adds new content types, pass through any dict that does not match the OpenAI-style format (no "type" key). The Bedrock API itself validates content blocks, so instructor does not need to be the gatekeeper here. Signed-off-by: debu-sinha <debusinha2009@gmail.com>
1 parent 7206b98 commit bf0528a

File tree

2 files changed

+59
-3
lines changed

2 files changed

+59
-3
lines changed

instructor/providers/bedrock/utils.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,14 @@ def _to_bedrock_content_items(content: Any) -> list[dict[str, Any]]:
225225
{"text":"..."}
226226
{"image":{"format":"jpeg|png|gif|webp","source":{"bytes": <raw bytes>}}}
227227
{"document":{"format":"pdf|csv|doc|docx|xls|xlsx|html|txt|md","name":"...","source":{"bytes": <raw bytes>}}}
228+
{"cachePoint":{"type":"default"}}
229+
Any other valid Bedrock ContentBlock dict (guardContent, toolUse,
230+
toolResult, audio, video, reasoningContent, etc.)
228231
229232
Note:
230-
- We do not validate or normalize Bedrock-native image/document blocks here.
231-
Caller is responsible for providing valid 'format' and raw 'bytes'.
233+
- We do not validate or normalize Bedrock-native blocks here.
234+
Caller is responsible for providing valid structure.
235+
The Bedrock API will reject malformed content blocks.
232236
"""
233237
# Plain string
234238
if isinstance(content, str):
@@ -269,7 +273,10 @@ def _to_bedrock_content_items(content: Any) -> list[dict[str, Any]]:
269273
items.append(p)
270274
continue
271275

272-
raise ValueError(f"Unsupported dict content for Bedrock: {p}")
276+
# Pass-through any other Bedrock-native content block (cachePoint,
277+
# guardContent, toolUse, toolResult, audio, video, etc.)
278+
items.append(p)
279+
continue
273280

274281
# Plain string elements inside list
275282
if isinstance(p, str):

tests/llm/test_bedrock/test_bedrock_native_passthrough.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from __future__ import annotations
2+
3+
import pytest
4+
25
from instructor.providers.bedrock.utils import _to_bedrock_content_items
36

47

@@ -18,3 +21,49 @@ def test_bedrock_native_document_passthrough(tiny_pdf_bytes: bytes):
1821
native = {"document": {"format": "pdf", "source": {"bytes": tiny_pdf_bytes}}}
1922
items = _to_bedrock_content_items([native])
2023
assert items[0] == native
24+
25+
26+
def test_bedrock_native_cachepoint_passthrough():
27+
"""Regression test for #1954: cachePoint dicts must pass through."""
28+
cache_point = {"cachePoint": {"type": "default"}}
29+
items = _to_bedrock_content_items([cache_point])
30+
assert items == [cache_point]
31+
32+
33+
def test_bedrock_native_cachepoint_with_ttl():
34+
cache_point = {"cachePoint": {"type": "default", "ttl": "5m"}}
35+
items = _to_bedrock_content_items([cache_point])
36+
assert items == [cache_point]
37+
38+
39+
def test_bedrock_native_guard_content_passthrough():
40+
guard = {"guardContent": {"text": {"text": "test content"}}}
41+
items = _to_bedrock_content_items([guard])
42+
assert items == [guard]
43+
44+
45+
@pytest.mark.parametrize(
46+
"block",
47+
[
48+
{"cachePoint": {"type": "default"}},
49+
{"guardContent": {"text": {"text": "check this"}}},
50+
{"video": {"format": "mp4", "source": {"bytes": b"fake"}}},
51+
{"audio": {"format": "mp3", "source": {"bytes": b"fake"}}},
52+
],
53+
ids=["cachePoint", "guardContent", "video", "audio"],
54+
)
55+
def test_bedrock_native_content_block_passthrough(block: dict):
56+
"""All Bedrock-native content blocks should pass through unchanged."""
57+
items = _to_bedrock_content_items([block])
58+
assert items == [block]
59+
60+
61+
def test_mixed_content_with_cachepoint():
62+
"""Regression test for #1954: cachePoint mixed with text in real usage."""
63+
content = [
64+
{"text": "Say hello world."},
65+
{"cachePoint": {"type": "default"}},
66+
{"text": "This is a test message."},
67+
]
68+
items = _to_bedrock_content_items(content)
69+
assert items == content

0 commit comments

Comments
 (0)