docs(agents): note uv run --with (#2029)

jxnl · xtzie · web-flow · commit a3ad1a50270f · 2026-01-24T12:54:58.000-05:00
Co-authored-by: ben lu &lt;benlu.dev@outlook.com&gt;
Co-authored-by: Ben Lu &lt;58869945+xtzie@users.noreply.github.com&gt;
diff --git a/AGENT.md b/AGENT.md
@@ -5,6 +5,7 @@
 - Run tests: `uv run pytest tests/`
 - Run single test: `uv run pytest tests/path_to_test.py::test_name`
 - Skip LLM tests: `uv run pytest tests/ -k 'not llm and not openai'`
+- Temp deps for a run: `uv run --with <pkg>[==version] <command>` (example: `uv run --with pytest-asyncio --with anthropic pytest tests/...`)
 - Type check: `uv run ty check`
 - Lint: `uv run ruff check instructor examples tests`
 - Format: `uv run ruff format instructor examples tests`
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -241,7 +241,7 @@ Documentation improvements are always welcome! Follow these guidelines:
 
 We encourage contributions to our evaluation tests:
 
-1. Explore existing evals in the [evals directory](https://github.com/instructor-ai/instructor/tree/main/tests/llm/test_openai/evals)
+1. Explore existing evals in the [evals directory](https://github.com/instructor-ai/instructor/tree/main/tests/llm)
 2. Contribute new evals as pytest tests
 3. Evals should test specific capabilities or edge cases of the library or models
 4. Follow the existing patterns for structuring eval tests
diff --git a/instructor/dsl/partial.py b/instructor/dsl/partial.py
@@ -445,21 +445,16 @@ def model_from_chunks(
         # Always use trailing-strings mode to preserve incomplete data during streaming
         # PartialLiteralMixin is deprecated - completeness-based validation handles Literals
         partial_mode = "trailing-strings"
-        chunk_buffer = []
         final_obj = None
         for chunk in json_chunks:
-            chunk_buffer += chunk
-            if len(chunk_buffer) < 2:
+            if chunk is None:
                 continue
-            potential_object += remove_control_chars("".join(chunk_buffer))
-            chunk_buffer = []
-            obj = process_potential_object(
-                potential_object, partial_mode, partial_model, **kwargs
-            )
-            final_obj = obj
-            yield obj
-        if chunk_buffer:
-            potential_object += remove_control_chars(chunk_buffer[0])
+            if not isinstance(chunk, str):
+                try:
+                    chunk = str(chunk)
+                except Exception:
+                    continue
+            potential_object += remove_control_chars(chunk)
             obj = process_potential_object(
                 potential_object, partial_mode, partial_model, **kwargs
             )
@@ -487,7 +482,14 @@ async def model_from_chunks_async(
         partial_mode = "trailing-strings"
         final_obj = None
         async for chunk in json_chunks:
-            potential_object += chunk
+            if chunk is None:
+                continue
+            if not isinstance(chunk, str):
+                try:
+                    chunk = str(chunk)
+                except Exception:
+                    continue
+            potential_object += remove_control_chars(chunk)
             obj = process_potential_object(
                 potential_object, partial_mode, partial_model, **kwargs
             )
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,6 +50,8 @@ dev = [
     "python-dotenv>=1.0.1",
     "pytest-xdist>=3.8.0",
     "pre-commit>=4.2.0",
+    "anthropic==0.71.0",
+    "xmltodict>=0.13,<1.1",
 ]
 docs = [
     "mkdocs<2.0.0,>=1.6.1",
diff --git a/requirements.txt b/requirements.txt
@@ -115,6 +115,8 @@ typer==0.20.0
     # via instructor (pyproject.toml)
 typing-extensions==4.15.0
     # via
+    #   aiosignal
+    #   anyio
     #   openai
     #   pydantic
     #   pydantic-core
diff --git a/tests/dsl/test_partial.py b/tests/dsl/test_partial.py
@@ -97,28 +97,32 @@ def test_partial():
     }, "Partial model JSON schema has changed"
 
 
+partial_chunks = ["\n", "\t", " ", "\x00", '{"a": 42, "b": {"b": 1}}']
+expected_sync_models = [
+    # First model has default values (nested models show their fields as None)
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    # Last model has all fields populated from JSON
+    {"a": 42, "b": {"b": 1}},
+]
+expected_async_models = [
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    {"a": None, "b": {"b": None}},
+    {"a": 42, "b": {"b": 1}},
+]
+
+
 def test_partial_with_whitespace():
     partial = Partial[SamplePartial]
-
     # Get the actual models from chunks - must provide complete data for final validation
-    models = list(
-        partial.model_from_chunks(["\n", "\t", " ", '{"a": 42, "b": {"b": 1}}'])
-    )
-
-    # Print actual values for debugging
-    print(f"Number of models: {len(models)}")
+    models = list(partial.model_from_chunks(partial_chunks))
+    assert len(models) == len(expected_sync_models)
     for i, model in enumerate(models):
-        print(f"Model {i}: {model.model_dump()}")
-
-    # Actual behavior: When whitespace chunks are processed, we may get models
-    # First model has default values (nested models show their fields as None)
-    assert models[0].model_dump() == {"a": None, "b": {"b": None}}
-
-    # Last model has all fields populated from JSON
-    assert models[-1].model_dump() == {"a": 42, "b": {"b": 1}}
-
-    # Check we have the expected number of models (2 instead of 4)
-    assert len(models) == 2
+        assert model.model_dump() == expected_sync_models[i]
 
 
 @pytest.mark.asyncio
@@ -127,23 +131,15 @@ async def test_async_partial_with_whitespace():
 
     # Handle any leading whitespace from the model - must provide complete data for final validation
     async def async_generator():
-        for chunk in ["\n", "\t", " ", '{"a": 42, "b": {"b": 1}}']:
+        for chunk in partial_chunks:
             yield chunk
 
-    # With completeness-based validation, nested models are constructed with None fields
-    expected_model_dicts = [
-        {"a": None, "b": {"b": None}},
-        {"a": None, "b": {"b": None}},
-        {"a": None, "b": {"b": None}},
-        {"a": 42, "b": {"b": 1}},
-    ]
-
     i = 0
     async for model in partial.model_from_chunks_async(async_generator()):
-        assert model.model_dump() == expected_model_dicts[i]
+        # Expected behavior: When whitespace chunks are processed, we should always get a model
+        assert model.model_dump() == expected_async_models[i]
         i += 1
-
-    assert model.model_dump() == {"a": 42, "b": {"b": 1}}
+    assert i == len(expected_async_models)
 
 
 @pytest.mark.skipif(not os.getenv("OPENAI_API_KEY"), reason="OPENAI_API_KEY not set")
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,8 @@ dev = [`
`50`	`50`	`"python-dotenv>=1.0.1",`
`51`	`51`	`"pytest-xdist>=3.8.0",`
`52`	`52`	`"pre-commit>=4.2.0",`
	`53`	`+ "anthropic==0.71.0",`
	`54`	`+ "xmltodict>=0.13,<1.1",`
`53`	`55`	`]`
`54`	`56`	`docs = [`
`55`	`57`	`"mkdocs<2.0.0,>=1.6.1",`