BerriAI · AudreyKj · Mar 21, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py
@@ -270,7 +270,12 @@ def preserve_upstream_non_openai_attributes(
     """
     # Access model_fields on the class, not the instance, to avoid Pydantic 2.11+ deprecation warnings
     expected_keys = set(type(model_response).model_fields.keys()).union({"usage"})
-    for key, value in original_chunk.model_dump().items():
+    try:
+        obj_dict = original_chunk.model_dump()
+    except TypeError:
+        # Fallback for Pydantic MockValSer bug (issue #18801)
+        obj_dict = dict(original_chunk.__dict__) if hasattr(original_chunk, '__dict__') else {}
+    for key, value in obj_dict.items():
         if key not in expected_keys:
             setattr(model_response, key, value)
 

diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
@@ -1859,7 +1859,11 @@ def __next__(self) -> "ModelResponseStream":  # noqa: PLR0915
                         response, "usage"
                     ):  # remove usage from chunk, only send on final chunk
                         # Convert the object to a dictionary
-                        obj_dict = response.model_dump()
+                        try:
+                            obj_dict = response.model_dump()
+                        except TypeError as e:
+                            # Fallback: manually extract dict from __dict__ to bypass Pydantic serializer
+                            obj_dict = dict(response.__dict__) if hasattr(response, '__dict__') else {}
 
                         # Remove an attribute (e.g., 'attr2')
                         if "usage" in obj_dict:
@@ -2047,7 +2051,11 @@ async def __anext__(self) -> "ModelResponseStream":  # noqa: PLR0915
 
                         # Strip usage from the outgoing chunk so it's not sent twice
                         # (once in the chunk, once in _hidden_params).
-                        obj_dict = processed_chunk.model_dump()
+                        try:
+                            obj_dict = processed_chunk.model_dump()
+                        except TypeError as e:
+                            # Fallback: manually extract dict from __dict__ to bypass Pydantic serializer
+                            obj_dict = dict(processed_chunk.__dict__) if hasattr(processed_chunk, '__dict__') else {}
                         if "usage" in obj_dict:
                             del obj_dict["usage"]
                         processed_chunk = self.model_response_creator(

diff --git a/tests/test_litellm/litellm_core_utils/test_streaming_handler.py b/tests/test_litellm/litellm_core_utils/test_streaming_handler.py
@@ -1679,3 +1679,52 @@ def test_tool_use_not_dropped_when_finish_reason_already_set(
     )
     assert tool_calls[0].id == "call_1"
     assert tool_calls[0].function.name == "get_weather"
+
+
+def test_model_dump_fallback_handles_pydantic_serializer_bug(
+    initialized_custom_stream_wrapper: CustomStreamWrapper,
+):
+    """
+    Regression test for #18801: MockValSer TypeError in streaming responses.
+
+    Pydantic 2.11+ has a bug where MockValSer sentinel is not converted to
+    SchemaSerializer in certain scenarios. The fix catches TypeError and falls
+    back to __dict__ extraction.
+    """
+    # Create a chunk with usage that will be stripped
+    chunk_with_usage = ModelResponseStream(
+        id="test-chunk",
+        created=1742056047,
+        model="sap-ai-core/test-model",
+        object="chat.completion.chunk",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(content="test content", role="assistant"),
+            )
+        ],
+        usage=Usage(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+    )
+
+    # Mock model_dump to raise TypeError (simulating MockValSer bug)
+    original_model_dump = chunk_with_usage.model_dump
+
+    def mock_model_dump(*args, **kwargs):
+        raise TypeError("'MockValSer' object cannot be converted to 'SchemaSerializer'")
+
+    chunk_with_usage.model_dump = mock_model_dump
+
+    # The code should gracefully fall back to __dict__ and not crash
+    initialized_custom_stream_wrapper.chunks.append(chunk_with_usage)
+
+    # Process the chunk through return_processed_chunk_logic which calls model_dump
+    result = initialized_custom_stream_wrapper.return_processed_chunk_logic(
+        completion_obj={"content": "test content"},
+        response_obj={"original_chunk": chunk_with_usage},
+        model_response=chunk_with_usage,
+    )
+
+    # Should not raise TypeError and should successfully process the chunk
+    assert result is not None
+    assert result.choices[0].delta.content == "test content"