567-labs · jxnl · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/docs/integrations/genai.md b/docs/integrations/genai.md
@@ -54,8 +54,7 @@ class User(BaseModel):
     age: int
 
 # Initialize and patch the client
-client = genai.Client()
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 # Extract structured data
 response = client.chat.completions.create(
@@ -82,8 +81,7 @@ class User(BaseModel):
     age: int
 
 # Initialize and patch the client
-client = genai.Client()
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 # Single string (converted to user message)
 response = client.chat.completions.create(
@@ -135,8 +133,7 @@ class User(BaseModel):
     age: int
 
 
-client = genai.Client()
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 # As a parameter
 response = client.chat.completions.create(
@@ -180,12 +177,11 @@ class User(BaseModel):
 
 
 # Initialize and patch the client
-client = genai.Client()
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 # Single string (converted to user message)
 response = client.chat.completions.create(
-    messages=["{{name}} is {{ age }} years old"],
+    messages=[{"role": "user", "content": "{{ name }} is {{ age }} years old"}],
     response_model=User,
     context={
         "name": "Jason",
@@ -250,7 +246,7 @@ class UserDetail(BaseModel):
     age: int
 
 
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 response = client.chat.completions.create(
     messages=[{"role": "user", "content": "Extract: jason is 25 years old"}],
@@ -300,7 +296,7 @@ class ImageDescription(BaseModel):
     colors: list[str] = Field(..., description="The colors in the image")
 
 
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/image.jpg"
 # Multiple ways to load an image:
 response = client.chat.completions.create(
@@ -355,7 +351,7 @@ class AudioDescription(BaseModel):
 
 url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/gettysburg.wav"
 
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 response = client.chat.completions.create(
     response_model=AudioDescription,
@@ -397,7 +393,7 @@ class Receipt(BaseModel):
     items: list[str]
 
 
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
 # Multiple ways to load an PDF:
 response = client.chat.completions.create(
@@ -450,7 +446,7 @@ class Receipt(BaseModel):
     items: list[str]
 
 
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
 # Multiple ways to load an PDF:
 response = client.chat.completions.create(
@@ -480,7 +476,6 @@ If you'd like more fine-grained control over the files used, you can also use th
 Our API integration also supports the use of files
 
 ```python
-from google import genai
 import instructor
 from pydantic import BaseModel
 
@@ -489,18 +484,22 @@ class Summary(BaseModel):
     summary: str
 
 
-client = genai.Client()
-client = instructor.from_provider("genai/gemini-1.5-flash")
+client = instructor.from_provider("google/gemini-2.5-flash")
 
 file1 = client.files.upload(
     file="./gettysburg.wav",
 )
 
 # As a parameter
 response = client.chat.completions.create(
-    system="Summarise the audio file.",
     messages=[
-        file1,
+        {
+            "role": "user",
+            "content": [
+                "Summarise the audio file.",
+                file1,
+            ]
+        }
     ],
     response_model=Summary,
 )
@@ -511,7 +510,13 @@ print(response)
 
 ## Streaming Responses
 
-> **Note:** Streaming functionality is currently only available when using the `Mode.GENAI_STRUCTURED_OUTPUTS` mode with Gemini models. Other modes like `tools` do not support streaming at this time.
+!!! warning "Streaming Limitations"
+
+    **As of July 11, 2025, Google GenAI does not support streaming with tool/function calling or structured outputs for regular models.** 
+
+    - `Mode.GENAI_TOOLS` and `Mode.GENAI_STRUCTURED_OUTPUTS` do not support streaming with regular models
+    - To use streaming, you must use `Partial[YourModel]` explicitly or switch to other modes like `Mode.JSON`
+    - Alternatively, set `stream=False` to disable streaming
 
 Streaming allows you to process responses incrementally rather than waiting for the complete result. This is extremely useful for making UI changes feel instant and responsive.
 
@@ -522,11 +527,10 @@ Receive a stream of complete, validated objects as they're generated:
 ```python
 from pydantic import BaseModel
 import instructor
-from google import genai
 
 
 client = instructor.from_provider(
-    "genai/gemini-1.5-flash",
+    "google/gemini-2.5-flash",
     mode=instructor.Mode.GENAI_STRUCTURED_OUTPUTS,
 )
 
@@ -541,22 +545,107 @@ class PersonList(BaseModel):
 
 
 stream = client.chat.completions.create_partial(
-    system="You are a helpful assistant. You must return a function call with the schema provided.",
+    model="gemini-2.5-flash",
+    response_model=PersonList,
+    stream=True,
     messages=[
         {
             "role": "user",
             "content": "Ivan is 20 years old, Jason is 25 years old, and John is 30 years old",
         }
     ],
-    response_model=PersonList,
 )
 
 for extraction in stream:
     print(extraction)
     # > people=[PartialPerson(name='Ivan', age=None)]
     # > people=[PartialPerson(name='Ivan', age=20), PartialPerson(name='Jason', age=25), PartialPerson(name='John', age=None)]
     # > people=[PartialPerson(name='Ivan', age=20), PartialPerson(name='Jason', age=25), PartialPerson(name='John', age=30)]
+```
+
+### Iterable Streaming
+
+For extracting multiple objects from a single response, use `create_iterable`:
 
+```python
+from pydantic import BaseModel
+import instructor
+
+client = instructor.from_provider("google/gemini-2.5-flash")
+
+class User(BaseModel):
+    name: str
+    age: int
+
+# Extract multiple users from a single response
+stream = client.chat.completions.create_iterable(
+    model="gemini-2.5-flash",
+    response_model=User,
+    stream=True,
+    messages=[
+        {
+            "role": "user",
+            "content": "Jason is 25 years old, Sarah is 30 years old, and Mike is 28 years old",
+        }
+    ],
+)
+
+for user in stream:
+    print(user)
+    # > User(name='Jason', age=25)
+    # > User(name='Sarah', age=30)
+    # > User(name='Mike', age=28)
+```
+
+### Async Streaming
+
+Both partial and iterable streaming work with async clients:
+
+```python
+import asyncio
+from pydantic import BaseModel
+import instructor
+
+class User(BaseModel):
+    name: str
+    age: int
+
+async def async_partial_example():
+    client = instructor.from_provider("google/gemini-2.5-flash", async_client=True)
+
+    stream = client.chat.completions.create_partial(
+        model="gemini-2.5-flash",
+        response_model=User,
+        stream=True,
+        messages=[
+            {"role": "user", "content": "Jason is 25 years old"}
+        ],
+    )
+
+    async for chunk in stream:
+        print(chunk)
+
+async def async_iterable_example():
+    client = instructor.from_provider("google/gemini-2.5-flash", async_client=True)
+
+    stream = client.chat.completions.create_iterable(
+        model="gemini-2.5-flash",
+        response_model=User,
+        stream=True,
+        messages=[
+            {
+                "role": "user", 
+                "content": "Jason is 25, Sarah is 30, Mike is 28"
+            }
+        ],
+    )
+
+    async for user in stream:
+        print(user)
+
+# Run async examples
+asyncio.run(async_partial_example())
+asyncio.run(async_iterable_example())
 ```
 
 ## Async Support
@@ -567,7 +656,6 @@ Instructor provides full async support for the genai SDK, allowing you to make n
 import asyncio
 
 import instructor
-from google import genai
 from pydantic import BaseModel
 
 
@@ -578,7 +666,7 @@ class User(BaseModel):
 
 async def extract_user():
     client = instructor.from_provider(
-        "genai/gemini-1.5-flash",
+        "google/gemini-2.5-flash",
         async_client=True,
     )
 

diff --git a/instructor/process_response.py b/instructor/process_response.py
@@ -22,7 +22,7 @@
     get_types_array,
     handle_parallel_model,
 )
-from instructor.dsl.partial import PartialBase
+from instructor.dsl.partial import PartialBase, Partial
 from instructor.dsl.simple_type import (
     AdapterBase,
     ModelAdapter,
@@ -626,6 +626,10 @@ def handle_genai_structured_outputs(
 ) -> tuple[type[T], dict[str, Any]]:
     from google.genai import types
 
+    # Automatically wrap regular models with Partial when streaming is enabled
+    if new_kwargs.get("stream", False) and not issubclass(response_model, PartialBase):
+        response_model = Partial[response_model]
+
     if new_kwargs.get("system"):
         system_message = new_kwargs.pop("system")
     elif new_kwargs.get("messages"):
@@ -660,6 +664,10 @@ def handle_genai_tools(
 ) -> tuple[type[T], dict[str, Any]]:
     from google.genai import types
 
+    # Automatically wrap regular models with Partial when streaming is enabled
+    if new_kwargs.get("stream", False) and not issubclass(response_model, PartialBase):
+        response_model = Partial[response_model]
+
     schema = map_to_gemini_function_schema(response_model.model_json_schema())
     function_definition = types.FunctionDeclaration(
         name=response_model.__name__,

diff --git a/pyproject.toml b/pyproject.toml
@@ -115,6 +115,7 @@ dev = [
     "pytest-examples>=0.0.15",
     "python-dotenv>=1.0.1",
     "pytest-xdist>=3.8.0",
+    "pre-commit>=4.2.0",
 ]
 docs = [
     "mkdocs<2.0.0,>=1.4.3",

diff --git a/tests/llm/test_genai/test_stream.py b/tests/llm/test_genai/test_stream.py
@@ -4,7 +4,6 @@
 from pydantic import BaseModel
 
 import instructor
-from instructor.dsl.partial import Partial
 
 from .util import models, modes
 
@@ -17,9 +16,9 @@ class UserExtract(BaseModel):
 @pytest.mark.parametrize("model,mode", product(models, modes))
 def test_partial_model(model, mode, client):
     client = instructor.from_provider(f"google/{model}", mode=mode, async_client=False)
-    model = client.chat.completions.create(
+    model = client.chat.completions.create_partial(
         model=model,
-        response_model=Partial[UserExtract],
+        response_model=UserExtract,
         max_retries=2,
         stream=True,
         messages=[

diff --git a/uv.lock b/uv.lock