fix(completions): preserve unknown chat delta fields end-to-end (#677)

Evrard-Nil · web-flow · commit 76c579d1dd4b · 2026-05-26T21:06:52.000+02:00
`ChatDelta` is a typed struct with no `#[serde(flatten)]` catch-all, so any field the upstream emits that's not in our explicit field list is silently dropped on deserialize and never reaches the client. This bites the inference-proxy's server-side agent loop (nearai/inference-proxy#144): the proxy emits a synthetic `delta.nearai_tool_result` chunk between iterations carrying the tool's grounded output. Empirically, against a freshly-deployed staging running #676, the model successfully called `web_context_search` (we saw the tool_calls chunks on the wire), but the proxy's `nearai_tool_result` chunks never made it through to the client — cloud-api stripped them on chunk re-serialization. Same root cause as #676 on the request side: typed structs with no catch-all. Fix: add a flattened `extra: HashMap<String, serde_json::Value>` to `ChatDelta`. Unknown delta fields are now preserved verbatim on the deserialize-then-re-serialize round trip. Derived `Default` on the struct so the empty catch-all is `HashMap::new()` by default, then updated the handful of explicit `ChatDelta { ... }` literals across the workspace to set `extra: Default::default()`. Regression test: `chat_delta_preserves_unknown_fields_round_trip` constructs a chunk with `delta.nearai_tool_result`, deserializes it, asserts the catch-all contains the expected payload, and round-trips the re-serialized JSON to confirm the synthetic field survives. `cargo test --workspace --lib` clean (315 + 174 + 106 + 16 + 11 passing, including the new test). `cargo clippy --all-targets -- -D warnings` clean.
diff --git a/crates/api/src/routes/completions.rs b/crates/api/src/routes/completions.rs
@@ -507,6 +507,7 @@ fn finalize_choice_in_place(
             tool_calls: None,
             reasoning_content: None,
             reasoning: None,
+            extra: Default::default(),
         });
 
     if let Some(s) = states.content.remove(&idx) {
@@ -709,6 +710,7 @@ fn build_flush_chunks(states: &mut StreamUnredactStates, template: &ChunkTemplat
                     tool_calls: None,
                     reasoning_content: if rc.is_empty() { None } else { Some(rc) },
                     reasoning: if r.is_empty() { None } else { Some(r) },
+                    extra: Default::default(),
                 }),
                 logprobs: None,
                 finish_reason: None,
@@ -769,6 +771,7 @@ fn build_flush_chunks(states: &mut StreamUnredactStates, template: &ChunkTemplat
                     }]),
                     reasoning_content: None,
                     reasoning: None,
+                    extra: Default::default(),
                 }),
                 logprobs: None,
                 finish_reason: None,
@@ -1563,6 +1566,7 @@ mod tests {
             tool_calls: None,
             reasoning_content: None,
             reasoning: None,
+            extra: Default::default(),
         }
     }
 
diff --git a/crates/inference_providers/src/chunk_builder.rs b/crates/inference_providers/src/chunk_builder.rs
@@ -59,6 +59,7 @@ impl ChunkContext {
                 tool_calls: None,
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             None,
             None,
@@ -76,6 +77,7 @@ impl ChunkContext {
                 tool_calls: None,
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             None,
             None,
@@ -107,6 +109,7 @@ impl ChunkContext {
                 }]),
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             None,
             None,
@@ -133,6 +136,7 @@ impl ChunkContext {
                 }]),
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             None,
             None,
@@ -170,6 +174,7 @@ impl ChunkContext {
                 tool_calls: Some(deltas),
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             finish_reason,
             usage,
@@ -191,6 +196,7 @@ impl ChunkContext {
                 tool_calls: None,
                 reasoning_content: None,
                 reasoning: None,
+                extra: Default::default(),
             },
             finish_reason,
             Some(usage),
diff --git a/crates/inference_providers/src/mock.rs b/crates/inference_providers/src/mock.rs
@@ -371,6 +371,7 @@ impl ResponseTemplate {
                             tool_calls: None,
                             reasoning_content: Some(word_with_space.clone()),
                             reasoning: Some(word_with_space),
+                            extra: Default::default(),
                         }),
                         logprobs: None,
                         finish_reason: None,
@@ -415,6 +416,7 @@ impl ResponseTemplate {
                             tool_calls: None,
                             reasoning_content: None,
                             reasoning: None,
+                            extra: Default::default(),
                         }),
                         logprobs: None,
                         finish_reason,
@@ -460,6 +462,7 @@ impl ResponseTemplate {
                             }]),
                             reasoning_content: None,
                             reasoning: None,
+                            extra: Default::default(),
                         }),
                         logprobs: None,
                         finish_reason: None,
@@ -510,6 +513,7 @@ impl ResponseTemplate {
                                 }]),
                                 reasoning_content: None,
                                 reasoning: None,
+                                extra: Default::default(),
                             }),
                             logprobs: None,
                             finish_reason,
diff --git a/crates/inference_providers/src/models.rs b/crates/inference_providers/src/models.rs
@@ -18,7 +18,7 @@ pub struct ChatMessage {
 
 /// Delta message in streaming chat completions
 /// All fields are optional as they may not be present in every chunk
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct ChatDelta {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub role: Option<MessageRole>,
@@ -34,6 +34,17 @@ pub struct ChatDelta {
     pub reasoning_content: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub reasoning: Option<String>,
+    /// Preserve any additional fields the upstream emits that we don't
+    /// have an explicit slot for. Without this, serde silently drops
+    /// unknown delta fields on deserialize and they never reach the
+    /// client. Specifically: the inference-proxy's server-side agent
+    /// loop (nearai/inference-proxy#144) emits a synthetic
+    /// `delta.nearai_tool_result` chunk between iterations carrying
+    /// the tool's grounded output — without flatten this chunk is
+    /// stripped before it leaves cloud-api, even though the
+    /// `tool_calls` and final `content` make it through.
+    #[serde(flatten)]
+    pub extra: std::collections::HashMap<String, serde_json::Value>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -1197,6 +1208,54 @@ mod tests {
         assert_eq!(choice.finish_reason.as_deref(), Some("stop"));
         assert!(choice.message.content.is_some());
     }
+
+    #[test]
+    fn chat_delta_preserves_unknown_fields_round_trip() {
+        // Regression for the inference-proxy agent-loop path
+        // (nearai/inference-proxy#144): the proxy emits a synthetic
+        // `delta.nearai_tool_result` chunk between iterations. Without
+        // the flattened `extra` catch-all, serde silently drops it on
+        // deserialize and clients never see the tool grounding.
+        let json_chunk = r#"{
+            "id": "chatcmpl-abc",
+            "object": "chat.completion.chunk",
+            "created": 1,
+            "model": "zai-org/GLM-5.1-FP8",
+            "choices": [{
+                "index": 0,
+                "delta": {
+                    "nearai_tool_result": {
+                        "tool_call_id": "call_1",
+                        "name": "web_context_search",
+                        "status": "ok",
+                        "output": "[1] result..."
+                    }
+                }
+            }]
+        }"#;
+
+        let chunk: ChatCompletionChunk = serde_json::from_str(json_chunk).unwrap();
+        let delta = chunk.choices[0]
+            .delta
+            .as_ref()
+            .expect("delta should deserialize");
+
+        // The synthetic field is preserved verbatim in the catch-all.
+        let tool_result = delta
+            .extra
+            .get("nearai_tool_result")
+            .expect("nearai_tool_result must survive deserialization");
+        assert_eq!(tool_result["tool_call_id"], "call_1");
+        assert_eq!(tool_result["name"], "web_context_search");
+        assert_eq!(tool_result["status"], "ok");
+
+        // And round-trips on re-serialization so clients see the
+        // same shape we got from upstream.
+        let reserialized = serde_json::to_string(&chunk).unwrap();
+        assert!(reserialized.contains("\"nearai_tool_result\""));
+        assert!(reserialized.contains("\"web_context_search\""));
+        assert!(reserialized.contains("\"call_1\""));
+    }
 }
 
 // Score models for text similarity endpoint