Skip to content

Commit 76c579d

Browse files
authored
fix(completions): preserve unknown chat delta fields end-to-end (#677)
`ChatDelta` is a typed struct with no `#[serde(flatten)]` catch-all, so any field the upstream emits that's not in our explicit field list is silently dropped on deserialize and never reaches the client. This bites the inference-proxy's server-side agent loop (nearai/inference-proxy#144): the proxy emits a synthetic `delta.nearai_tool_result` chunk between iterations carrying the tool's grounded output. Empirically, against a freshly-deployed staging running #676, the model successfully called `web_context_search` (we saw the tool_calls chunks on the wire), but the proxy's `nearai_tool_result` chunks never made it through to the client — cloud-api stripped them on chunk re-serialization. Same root cause as #676 on the request side: typed structs with no catch-all. Fix: add a flattened `extra: HashMap<String, serde_json::Value>` to `ChatDelta`. Unknown delta fields are now preserved verbatim on the deserialize-then-re-serialize round trip. Derived `Default` on the struct so the empty catch-all is `HashMap::new()` by default, then updated the handful of explicit `ChatDelta { ... }` literals across the workspace to set `extra: Default::default()`. Regression test: `chat_delta_preserves_unknown_fields_round_trip` constructs a chunk with `delta.nearai_tool_result`, deserializes it, asserts the catch-all contains the expected payload, and round-trips the re-serialized JSON to confirm the synthetic field survives. `cargo test --workspace --lib` clean (315 + 174 + 106 + 16 + 11 passing, including the new test). `cargo clippy --all-targets -- -D warnings` clean.
1 parent f017653 commit 76c579d

4 files changed

Lines changed: 74 additions & 1 deletion

File tree

crates/api/src/routes/completions.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,7 @@ fn finalize_choice_in_place(
507507
tool_calls: None,
508508
reasoning_content: None,
509509
reasoning: None,
510+
extra: Default::default(),
510511
});
511512

512513
if let Some(s) = states.content.remove(&idx) {
@@ -709,6 +710,7 @@ fn build_flush_chunks(states: &mut StreamUnredactStates, template: &ChunkTemplat
709710
tool_calls: None,
710711
reasoning_content: if rc.is_empty() { None } else { Some(rc) },
711712
reasoning: if r.is_empty() { None } else { Some(r) },
713+
extra: Default::default(),
712714
}),
713715
logprobs: None,
714716
finish_reason: None,
@@ -769,6 +771,7 @@ fn build_flush_chunks(states: &mut StreamUnredactStates, template: &ChunkTemplat
769771
}]),
770772
reasoning_content: None,
771773
reasoning: None,
774+
extra: Default::default(),
772775
}),
773776
logprobs: None,
774777
finish_reason: None,
@@ -1563,6 +1566,7 @@ mod tests {
15631566
tool_calls: None,
15641567
reasoning_content: None,
15651568
reasoning: None,
1569+
extra: Default::default(),
15661570
}
15671571
}
15681572

crates/inference_providers/src/chunk_builder.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ impl ChunkContext {
5959
tool_calls: None,
6060
reasoning_content: None,
6161
reasoning: None,
62+
extra: Default::default(),
6263
},
6364
None,
6465
None,
@@ -76,6 +77,7 @@ impl ChunkContext {
7677
tool_calls: None,
7778
reasoning_content: None,
7879
reasoning: None,
80+
extra: Default::default(),
7981
},
8082
None,
8183
None,
@@ -107,6 +109,7 @@ impl ChunkContext {
107109
}]),
108110
reasoning_content: None,
109111
reasoning: None,
112+
extra: Default::default(),
110113
},
111114
None,
112115
None,
@@ -133,6 +136,7 @@ impl ChunkContext {
133136
}]),
134137
reasoning_content: None,
135138
reasoning: None,
139+
extra: Default::default(),
136140
},
137141
None,
138142
None,
@@ -170,6 +174,7 @@ impl ChunkContext {
170174
tool_calls: Some(deltas),
171175
reasoning_content: None,
172176
reasoning: None,
177+
extra: Default::default(),
173178
},
174179
finish_reason,
175180
usage,
@@ -191,6 +196,7 @@ impl ChunkContext {
191196
tool_calls: None,
192197
reasoning_content: None,
193198
reasoning: None,
199+
extra: Default::default(),
194200
},
195201
finish_reason,
196202
Some(usage),

crates/inference_providers/src/mock.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ impl ResponseTemplate {
371371
tool_calls: None,
372372
reasoning_content: Some(word_with_space.clone()),
373373
reasoning: Some(word_with_space),
374+
extra: Default::default(),
374375
}),
375376
logprobs: None,
376377
finish_reason: None,
@@ -415,6 +416,7 @@ impl ResponseTemplate {
415416
tool_calls: None,
416417
reasoning_content: None,
417418
reasoning: None,
419+
extra: Default::default(),
418420
}),
419421
logprobs: None,
420422
finish_reason,
@@ -460,6 +462,7 @@ impl ResponseTemplate {
460462
}]),
461463
reasoning_content: None,
462464
reasoning: None,
465+
extra: Default::default(),
463466
}),
464467
logprobs: None,
465468
finish_reason: None,
@@ -510,6 +513,7 @@ impl ResponseTemplate {
510513
}]),
511514
reasoning_content: None,
512515
reasoning: None,
516+
extra: Default::default(),
513517
}),
514518
logprobs: None,
515519
finish_reason,

crates/inference_providers/src/models.rs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub struct ChatMessage {
1818

1919
/// Delta message in streaming chat completions
2020
/// All fields are optional as they may not be present in every chunk
21-
#[derive(Debug, Clone, Serialize, Deserialize)]
21+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2222
pub struct ChatDelta {
2323
#[serde(skip_serializing_if = "Option::is_none")]
2424
pub role: Option<MessageRole>,
@@ -34,6 +34,17 @@ pub struct ChatDelta {
3434
pub reasoning_content: Option<String>,
3535
#[serde(skip_serializing_if = "Option::is_none")]
3636
pub reasoning: Option<String>,
37+
/// Preserve any additional fields the upstream emits that we don't
38+
/// have an explicit slot for. Without this, serde silently drops
39+
/// unknown delta fields on deserialize and they never reach the
40+
/// client. Specifically: the inference-proxy's server-side agent
41+
/// loop (nearai/inference-proxy#144) emits a synthetic
42+
/// `delta.nearai_tool_result` chunk between iterations carrying
43+
/// the tool's grounded output — without flatten this chunk is
44+
/// stripped before it leaves cloud-api, even though the
45+
/// `tool_calls` and final `content` make it through.
46+
#[serde(flatten)]
47+
pub extra: std::collections::HashMap<String, serde_json::Value>,
3748
}
3849

3950
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -1197,6 +1208,54 @@ mod tests {
11971208
assert_eq!(choice.finish_reason.as_deref(), Some("stop"));
11981209
assert!(choice.message.content.is_some());
11991210
}
1211+
1212+
#[test]
1213+
fn chat_delta_preserves_unknown_fields_round_trip() {
1214+
// Regression for the inference-proxy agent-loop path
1215+
// (nearai/inference-proxy#144): the proxy emits a synthetic
1216+
// `delta.nearai_tool_result` chunk between iterations. Without
1217+
// the flattened `extra` catch-all, serde silently drops it on
1218+
// deserialize and clients never see the tool grounding.
1219+
let json_chunk = r#"{
1220+
"id": "chatcmpl-abc",
1221+
"object": "chat.completion.chunk",
1222+
"created": 1,
1223+
"model": "zai-org/GLM-5.1-FP8",
1224+
"choices": [{
1225+
"index": 0,
1226+
"delta": {
1227+
"nearai_tool_result": {
1228+
"tool_call_id": "call_1",
1229+
"name": "web_context_search",
1230+
"status": "ok",
1231+
"output": "[1] result..."
1232+
}
1233+
}
1234+
}]
1235+
}"#;
1236+
1237+
let chunk: ChatCompletionChunk = serde_json::from_str(json_chunk).unwrap();
1238+
let delta = chunk.choices[0]
1239+
.delta
1240+
.as_ref()
1241+
.expect("delta should deserialize");
1242+
1243+
// The synthetic field is preserved verbatim in the catch-all.
1244+
let tool_result = delta
1245+
.extra
1246+
.get("nearai_tool_result")
1247+
.expect("nearai_tool_result must survive deserialization");
1248+
assert_eq!(tool_result["tool_call_id"], "call_1");
1249+
assert_eq!(tool_result["name"], "web_context_search");
1250+
assert_eq!(tool_result["status"], "ok");
1251+
1252+
// And round-trips on re-serialization so clients see the
1253+
// same shape we got from upstream.
1254+
let reserialized = serde_json::to_string(&chunk).unwrap();
1255+
assert!(reserialized.contains("\"nearai_tool_result\""));
1256+
assert!(reserialized.contains("\"web_context_search\""));
1257+
assert!(reserialized.contains("\"call_1\""));
1258+
}
12001259
}
12011260

12021261
// Score models for text similarity endpoint

0 commit comments

Comments
 (0)