Skip to content

Commit f8cd6d1

Browse files
jxnlcursoragent
andauthored
List object crashes fix (#2011)
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
1 parent 3b692a7 commit f8cd6d1

File tree

10 files changed

+220
-18
lines changed

10 files changed

+220
-18
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file. The format
88

99
### Fixed
1010
- Fixed Google GenAI `safety_settings` causing `400 INVALID_ARGUMENT` when requests include image content by using image-specific harm categories when needed (#1773)
11+
- Fixed `create_with_completion()` crashing when using `list[T]` response models by preserving `_raw_response` on list outputs (#1303)
1112

1213
## [1.14.3] - 2026-01-13
1314

docs/concepts/raw_response.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,36 @@ ChatCompletion(
7676
"""
7777
```
7878

79+
## Raw response with a list response model
80+
81+
If your response model is a list (for example, `list[UserExtract]`), you can still use `create_with_completion()`. The returned value behaves like a normal list, but it also keeps the raw response so `create_with_completion()` does not crash.
82+
83+
```python
84+
import instructor
85+
from pydantic import BaseModel
86+
87+
client = instructor.from_provider("openai/gpt-4.1-mini")
88+
89+
90+
class UserExtract(BaseModel):
91+
name: str
92+
age: int
93+
94+
95+
users, completion = client.create_with_completion(
96+
response_model=list[UserExtract],
97+
messages=[
98+
{"role": "user", "content": "Extract users: Jason is 25, Ivan is 30"},
99+
],
100+
)
101+
102+
print(users[0])
103+
#> name='Jason' age=25
104+
105+
raw = users.get_raw_response()
106+
assert raw == completion
107+
```
108+
79109
## See Also
80110

81111
- [Hooks](./hooks.md) - Monitor LLM interactions without accessing raw responses

instructor/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,14 @@
114114
__all__ += ["from_cohere"]
115115

116116
if all(importlib.util.find_spec(pkg) for pkg in ("vertexai", "jsonref")):
117-
from .providers.vertexai.client import from_vertexai
118-
119-
__all__ += ["from_vertexai"]
117+
try:
118+
from .providers.vertexai.client import from_vertexai
119+
except Exception:
120+
# Optional dependency may be present but broken/misconfigured at import time.
121+
# Avoid failing `import instructor` in that case.
122+
pass
123+
else:
124+
__all__ += ["from_vertexai"]
120125

121126
if importlib.util.find_spec("boto3") is not None:
122127
from .providers.bedrock.client import from_bedrock

instructor/dsl/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
from .maybe import Maybe
33
from .partial import Partial
44
from .citation import CitationMixin
5+
from .response_list import ListResponse
56
from .simple_type import is_simple_type, ModelAdapter
67
from . import validators # Backwards compatibility module
78

89
__all__ = [ # noqa: F405
910
"CitationMixin",
1011
"IterableModel",
12+
"ListResponse",
1113
"Maybe",
1214
"Partial",
1315
"is_simple_type",

instructor/dsl/iterable.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,17 @@ def from_streaming_response(cls, completion) -> Generator[User]:
636636
Returns:
637637
schema (OpenAISchema): A new class that can be used to segment multiple tasks
638638
"""
639-
task_name = subtask_class.__name__ if name is None else name
639+
if name is not None:
640+
task_name = name
641+
else:
642+
# Handle `Union[A, B]` / `A | B` task types.
643+
# `types.UnionType` does not have `__name__`, so fall back to a stable name.
644+
task_name = getattr(subtask_class, "__name__", None)
645+
if task_name is None and get_origin(subtask_class) is Union:
646+
members = get_args(subtask_class)
647+
task_name = "Or".join(getattr(m, "__name__", str(m)) for m in members)
648+
if task_name is None:
649+
task_name = str(subtask_class)
640650

641651
name = f"Iterable{task_name}"
642652

instructor/dsl/response_list.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from __future__ import annotations
2+
3+
from typing import Any, Generic, TypeVar
4+
5+
T = TypeVar("T")
6+
7+
8+
class ListResponse(list[T], Generic[T]):
9+
"""A list that preserves the underlying provider response.
10+
11+
This is used when a call returns a list of objects (e.g. `list[User]`), so
12+
`create_with_completion()` can still return `(result, raw_response)` without
13+
crashing on a plain `list`.
14+
"""
15+
16+
_raw_response: Any | None
17+
18+
def __init__(self, iterable=(), _raw_response: Any | None = None): # type: ignore[no-untyped-def]
19+
super().__init__(iterable)
20+
self._raw_response = _raw_response
21+
22+
@classmethod
23+
def from_list(cls, items: list[T], *, raw_response: Any | None) -> ListResponse[T]:
24+
return cls(items, _raw_response=raw_response)
25+
26+
def get_raw_response(self) -> Any | None:
27+
return self._raw_response
28+
29+
def __getitem__(self, key): # type: ignore[no-untyped-def]
30+
value = super().__getitem__(key)
31+
if isinstance(key, slice):
32+
return type(self)(value, _raw_response=self._raw_response)
33+
return value

instructor/processing/response.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class User(BaseModel):
4949
from ..dsl.iterable import IterableBase
5050
from ..dsl.parallel import ParallelBase
5151
from ..dsl.partial import PartialBase
52+
from ..dsl.response_list import ListResponse
5253
from ..dsl.simple_type import AdapterBase
5354

5455
if TYPE_CHECKING:
@@ -248,7 +249,10 @@ async def process_response_async(
248249
# ? attaching usage data and the raw response to the model we return.
249250
if isinstance(model, IterableBase):
250251
logger.debug(f"Returning takes from IterableBase")
251-
return [task for task in model.tasks] # type: ignore
252+
return ListResponse.from_list( # type: ignore[return-value]
253+
[task for task in model.tasks],
254+
raw_response=response,
255+
)
252256

253257
if isinstance(response_model, ParallelBase):
254258
logger.debug(f"Returning model from ParallelBase")
@@ -353,7 +357,10 @@ class to parse the response into. Special DSL types supported:
353357
# ? attaching usage data and the raw response to the model we return.
354358
if isinstance(model, IterableBase):
355359
logger.debug(f"Returning takes from IterableBase")
356-
return [task for task in model.tasks] # type: ignore
360+
return ListResponse.from_list( # type: ignore[return-value]
361+
[task for task in model.tasks],
362+
raw_response=response,
363+
)
357364

358365
if isinstance(response_model, ParallelBase):
359366
logger.debug(f"Returning model from ParallelBase")

instructor/providers/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,14 @@
6262
__all__.append("from_perplexity")
6363

6464
if all(importlib.util.find_spec(pkg) for pkg in ("vertexai", "jsonref")):
65-
from .vertexai.client import from_vertexai # noqa: F401
66-
67-
__all__.append("from_vertexai")
65+
try:
66+
from .vertexai.client import from_vertexai # noqa: F401
67+
except Exception:
68+
# Optional dependency may be present but broken/misconfigured at import time.
69+
# Avoid failing `import instructor` in that case.
70+
pass
71+
else:
72+
__all__.append("from_vertexai")
6873

6974
if importlib.util.find_spec("writerai") is not None:
7075
from .writer.client import from_writer # noqa: F401

instructor/utils/core.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Any,
1515
Callable,
1616
Generic,
17+
Union,
1718
TypeVar,
1819
cast,
1920
get_args,
@@ -595,22 +596,66 @@ def prepare_response_model(response_model: type[T] | None) -> type[T] | None:
595596
if response_model is None:
596597
return None
597598

598-
if is_simple_type(response_model):
599-
from instructor.dsl.simple_type import ModelAdapter
599+
# `list[int | str]` and similar scalar lists are treated as simple types and should
600+
# be adapted, not converted into an IterableModel.
601+
origin = get_origin(response_model)
602+
if origin is list and is_simple_type(response_model):
603+
args = get_args(response_model)
604+
605+
def _is_model_type(t: Any) -> bool:
606+
if inspect.isclass(t) and issubclass(t, BaseModel):
607+
return True
608+
return get_origin(t) is Union and all(
609+
inspect.isclass(m) and issubclass(m, BaseModel) for m in get_args(t)
610+
)
600611

601-
response_model = ModelAdapter[response_model]
612+
# If the list element is a Pydantic model (or union of models), this is a
613+
# structured "iterable extraction" response model, not a simple scalar list.
614+
if args and _is_model_type(args[0]):
615+
origin = None
616+
else:
617+
from instructor.dsl.simple_type import ModelAdapter
618+
619+
response_model = ModelAdapter[response_model] # type: ignore[invalid-type-form]
620+
origin = get_origin(response_model)
602621

603622
if is_typed_dict(response_model):
604-
response_model: BaseModel = create_model(
605-
response_model.__name__,
606-
**{k: (v, ...) for k, v in response_model.__annotations__.items()},
623+
response_model = cast(
624+
type[BaseModel],
625+
create_model(
626+
response_model.__name__,
627+
**{k: (v, ...) for k, v in response_model.__annotations__.items()},
628+
),
607629
)
608630

609-
if get_origin(response_model) is Iterable:
631+
# Recompute after potential wrapping/conversion above.
632+
origin = get_origin(response_model)
633+
if origin in {Iterable, list}:
610634
from instructor.dsl.iterable import IterableModel
611635

612-
iterable_element_class = get_args(response_model)[0]
613-
response_model = cast(BaseModel, IterableModel(iterable_element_class)) # type: ignore
636+
args = get_args(response_model)
637+
if not args or args[0] is None:
638+
raise ValueError(
639+
"response_model must be parameterized, e.g. list[User] or Iterable[User]"
640+
)
641+
iterable_element_class = args[0]
642+
if is_typed_dict(iterable_element_class):
643+
iterable_element_class = cast(
644+
type[BaseModel],
645+
create_model(
646+
iterable_element_class.__name__,
647+
**{
648+
k: (v, ...)
649+
for k, v in iterable_element_class.__annotations__.items()
650+
},
651+
),
652+
)
653+
response_model = IterableModel(cast(type[BaseModel], iterable_element_class))
654+
655+
if is_simple_type(response_model):
656+
from instructor.dsl.simple_type import ModelAdapter
657+
658+
response_model = ModelAdapter[response_model] # type: ignore[invalid-type-form]
614659

615660
# Import here to avoid circular dependency
616661
from ..processing.function_calls import OpenAISchema, openai_schema

tests/test_list_response.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Iterable as ABCIterable
4+
from typing import Any
5+
6+
from pydantic import BaseModel
7+
8+
from instructor.dsl import ListResponse
9+
from instructor.dsl.iterable import IterableBase
10+
from instructor.mode import Mode
11+
from instructor.processing.response import process_response
12+
from instructor.utils.core import prepare_response_model
13+
14+
15+
class User(BaseModel):
16+
name: str
17+
18+
19+
def test_listresponse_preserves_raw_response_on_slice() -> None:
20+
raw: Any = {"provider": "test"}
21+
resp = ListResponse([User(name="a"), User(name="b")], _raw_response=raw)
22+
23+
assert resp.get_raw_response() is raw
24+
assert resp[0].name == "a"
25+
26+
sliced = resp[1:]
27+
assert isinstance(sliced, ListResponse)
28+
assert sliced.get_raw_response() is raw
29+
assert sliced[0].name == "b"
30+
31+
32+
def test_process_response_wraps_iterablebase_tasks_with_raw_response() -> None:
33+
class FakeIterableResponse(BaseModel, IterableBase):
34+
tasks: list[User]
35+
36+
@classmethod
37+
def from_response( # type: ignore[override]
38+
cls, _response: Any, **_kwargs: Any
39+
) -> FakeIterableResponse:
40+
return cls(tasks=[User(name="x"), User(name="y")])
41+
42+
# `process_response()` is typed with a BaseModel-bounded type variable for `response`,
43+
# so use a BaseModel instance here to keep `ty` happy.
44+
raw_response: Any = User(name="raw")
45+
out = process_response(
46+
raw_response,
47+
response_model=FakeIterableResponse,
48+
stream=False,
49+
mode=Mode.TOOLS,
50+
)
51+
52+
assert isinstance(out, ListResponse)
53+
assert [u.name for u in out] == ["x", "y"]
54+
assert out.get_raw_response() is raw_response
55+
56+
57+
def test_prepare_response_model_supports_list_and_iterable() -> None:
58+
prepared_list = prepare_response_model(list[User])
59+
assert prepared_list is not None
60+
assert issubclass(prepared_list, IterableBase)
61+
62+
prepared_iterable = prepare_response_model(ABCIterable[User]) # type: ignore[index]
63+
assert prepared_iterable is not None
64+
assert issubclass(prepared_iterable, IterableBase)

0 commit comments

Comments
 (0)