Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 114 additions & 26 deletions docs/integrations/genai.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ class User(BaseModel):
age: int

# Initialize and patch the client
client = genai.Client()
client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

# Extract structured data
response = client.chat.completions.create(
Expand All @@ -82,8 +81,7 @@ class User(BaseModel):
age: int

# Initialize and patch the client
client = genai.Client()
client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

# Single string (converted to user message)
response = client.chat.completions.create(
Expand Down Expand Up @@ -135,8 +133,7 @@ class User(BaseModel):
age: int


client = genai.Client()
client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

# As a parameter
response = client.chat.completions.create(
Expand Down Expand Up @@ -180,12 +177,11 @@ class User(BaseModel):


# Initialize and patch the client
client = genai.Client()
client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

# Single string (converted to user message)
response = client.chat.completions.create(
messages=["{{name}} is {{ age }} years old"],
messages=[{"role": "user", "content": "{{ name }} is {{ age }} years old"}],
response_model=User,
context={
"name": "Jason",
Expand Down Expand Up @@ -250,7 +246,7 @@ class UserDetail(BaseModel):
age: int


client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

response = client.chat.completions.create(
messages=[{"role": "user", "content": "Extract: jason is 25 years old"}],
Expand Down Expand Up @@ -300,7 +296,7 @@ class ImageDescription(BaseModel):
colors: list[str] = Field(..., description="The colors in the image")


client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/image.jpg"
# Multiple ways to load an image:
response = client.chat.completions.create(
Expand Down Expand Up @@ -355,7 +351,7 @@ class AudioDescription(BaseModel):

url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/gettysburg.wav"

client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

response = client.chat.completions.create(
response_model=AudioDescription,
Expand Down Expand Up @@ -397,7 +393,7 @@ class Receipt(BaseModel):
items: list[str]


client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
# Multiple ways to load an PDF:
response = client.chat.completions.create(
Expand Down Expand Up @@ -450,7 +446,7 @@ class Receipt(BaseModel):
items: list[str]


client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
# Multiple ways to load an PDF:
response = client.chat.completions.create(
Expand Down Expand Up @@ -480,7 +476,6 @@ If you'd like more fine-grained control over the files used, you can also use th
Our API integration also supports the use of files

```python
from google import genai
import instructor
from pydantic import BaseModel

Expand All @@ -489,18 +484,22 @@ class Summary(BaseModel):
summary: str


client = genai.Client()
client = instructor.from_provider("genai/gemini-1.5-flash")
client = instructor.from_provider("google/gemini-2.5-flash")

file1 = client.files.upload(
file="./gettysburg.wav",
)

# As a parameter
response = client.chat.completions.create(
system="Summarise the audio file.",
messages=[
file1,
{
"role": "user",
"content": [
"Summarise the audio file.",
file1,
]
}
],
response_model=Summary,
)
Expand All @@ -511,7 +510,13 @@ print(response)

## Streaming Responses

> **Note:** Streaming functionality is currently only available when using the `Mode.GENAI_STRUCTURED_OUTPUTS` mode with Gemini models. Other modes like `tools` do not support streaming at this time.
!!! warning "Streaming Limitations"

**As of July 11, 2025, Google GenAI does not support streaming with tool/function calling or structured outputs for regular models.**

- `Mode.GENAI_TOOLS` and `Mode.GENAI_STRUCTURED_OUTPUTS` do not support streaming with regular models
- To use streaming, you must use `Partial[YourModel]` explicitly or switch to other modes like `Mode.JSON`
- Alternatively, set `stream=False` to disable streaming

Streaming allows you to process responses incrementally rather than waiting for the complete result. This is extremely useful for making UI changes feel instant and responsive.

Expand All @@ -522,11 +527,10 @@ Receive a stream of complete, validated objects as they're generated:
```python
from pydantic import BaseModel
import instructor
from google import genai


client = instructor.from_provider(
"genai/gemini-1.5-flash",
"google/gemini-2.5-flash",
mode=instructor.Mode.GENAI_STRUCTURED_OUTPUTS,
)

Expand All @@ -541,22 +545,107 @@ class PersonList(BaseModel):


stream = client.chat.completions.create_partial(
system="You are a helpful assistant. You must return a function call with the schema provided.",
model="gemini-2.5-flash",
response_model=PersonList,
stream=True,
messages=[
{
"role": "user",
"content": "Ivan is 20 years old, Jason is 25 years old, and John is 30 years old",
}
],
response_model=PersonList,
)

for extraction in stream:
print(extraction)
# > people=[PartialPerson(name='Ivan', age=None)]
# > people=[PartialPerson(name='Ivan', age=20), PartialPerson(name='Jason', age=25), PartialPerson(name='John', age=None)]
# > people=[PartialPerson(name='Ivan', age=20), PartialPerson(name='Jason', age=25), PartialPerson(name='John', age=30)]
```

### Iterable Streaming

For extracting multiple objects from a single response, use `create_iterable`:

```python
from pydantic import BaseModel
import instructor

client = instructor.from_provider("google/gemini-2.5-flash")

class User(BaseModel):
name: str
age: int

# Extract multiple users from a single response
stream = client.chat.completions.create_iterable(
model="gemini-2.5-flash",
response_model=User,
stream=True,
messages=[
{
"role": "user",
"content": "Jason is 25 years old, Sarah is 30 years old, and Mike is 28 years old",
}
],
)

for user in stream:
print(user)
# > User(name='Jason', age=25)
# > User(name='Sarah', age=30)
# > User(name='Mike', age=28)
```

### Async Streaming

Both partial and iterable streaming work with async clients:

```python
import asyncio
from pydantic import BaseModel
import instructor

class User(BaseModel):
name: str
age: int

async def async_partial_example():
client = instructor.from_provider("google/gemini-2.5-flash", async_client=True)

stream = client.chat.completions.create_partial(
model="gemini-2.5-flash",
response_model=User,
stream=True,
messages=[
{"role": "user", "content": "Jason is 25 years old"}
],
)

async for chunk in stream:
print(chunk)

async def async_iterable_example():
client = instructor.from_provider("google/gemini-2.5-flash", async_client=True)

stream = client.chat.completions.create_iterable(
model="gemini-2.5-flash",
response_model=User,
stream=True,
messages=[
{
"role": "user",
"content": "Jason is 25, Sarah is 30, Mike is 28"
}
],
)

async for user in stream:
print(user)

# Run async examples
asyncio.run(async_partial_example())
asyncio.run(async_iterable_example())
```

## Async Support
Expand All @@ -567,7 +656,6 @@ Instructor provides full async support for the genai SDK, allowing you to make n
import asyncio

import instructor
from google import genai
from pydantic import BaseModel


Expand All @@ -578,7 +666,7 @@ class User(BaseModel):

async def extract_user():
client = instructor.from_provider(
"genai/gemini-1.5-flash",
"google/gemini-2.5-flash",
async_client=True,
)

Expand Down
10 changes: 9 additions & 1 deletion instructor/process_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_types_array,
handle_parallel_model,
)
from instructor.dsl.partial import PartialBase
from instructor.dsl.partial import PartialBase, Partial
from instructor.dsl.simple_type import (
AdapterBase,
ModelAdapter,
Expand Down Expand Up @@ -626,6 +626,10 @@ def handle_genai_structured_outputs(
) -> tuple[type[T], dict[str, Any]]:
from google.genai import types

# Automatically wrap regular models with Partial when streaming is enabled
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate partial wrapping logic in both handle_genai_structured_outputs and handle_genai_tools. Consider extracting a helper to DRY this pattern.

if new_kwargs.get("stream", False) and not issubclass(response_model, PartialBase):
response_model = Partial[response_model]

if new_kwargs.get("system"):
system_message = new_kwargs.pop("system")
elif new_kwargs.get("messages"):
Expand Down Expand Up @@ -660,6 +664,10 @@ def handle_genai_tools(
) -> tuple[type[T], dict[str, Any]]:
from google.genai import types

# Automatically wrap regular models with Partial when streaming is enabled
if new_kwargs.get("stream", False) and not issubclass(response_model, PartialBase):
response_model = Partial[response_model]

schema = map_to_gemini_function_schema(response_model.model_json_schema())
function_definition = types.FunctionDeclaration(
name=response_model.__name__,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ dev = [
"pytest-examples>=0.0.15",
"python-dotenv>=1.0.1",
"pytest-xdist>=3.8.0",
"pre-commit>=4.2.0",
]
docs = [
"mkdocs<2.0.0,>=1.4.3",
Expand Down
5 changes: 2 additions & 3 deletions tests/llm/test_genai/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pydantic import BaseModel

import instructor
from instructor.dsl.partial import Partial

from .util import models, modes

Expand All @@ -17,9 +16,9 @@ class UserExtract(BaseModel):
@pytest.mark.parametrize("model,mode", product(models, modes))
def test_partial_model(model, mode, client):
client = instructor.from_provider(f"google/{model}", mode=mode, async_client=False)
model = client.chat.completions.create(
model = client.chat.completions.create_partial(
model=model,
response_model=Partial[UserExtract],
response_model=UserExtract,
max_retries=2,
stream=True,
messages=[
Expand Down
10 changes: 4 additions & 6 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading