Skip to content

Commit 4d32675

Browse files
committed
fix(responseapi): inject stream flag into translated Response API body
ChatCompletionNewParams does not expose a Stream field (the library uses request options internally). When translating Response API requests to Chat Completions format, the "stream": true key was missing from the JSON body, which would prevent backends from streaming. Inject it using sjson after marshaling. Also renumbers TD037 to TD038 after upstream claimed TD037, and adds follow-up issue #1685 tracking link. Signed-off-by: Asaad Balum <asaad.balum@gmail.com> Signed-off-by: asaadbalum <asaad.balum@gmail.com>
1 parent 128f0d5 commit 4d32675

File tree

5 files changed

+49
-4
lines changed

5 files changed

+49
-4
lines changed

docs/agent/tech-debt/td-038-custom-chat-completions-structs.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,7 @@ explaining why the extension is necessary.
5454
- [ ] `pkg/memory` serialization types migrated or documented as intentional exceptions
5555
- [ ] Zero custom `ChatCompletion*` type definitions remain outside documented exceptions
5656
- [ ] Compatibility tests cover all conversion paths
57+
58+
## Tracking
59+
60+
Follow-up issue: #1685

src/semantic-router/pkg/extproc/extproc_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3727,6 +3727,39 @@ var _ = Describe("Response API Translation", func() {
37273727
Expect(firstMsg["role"]).To(Equal("user"))
37283728
})
37293729

3730+
It("should include stream flag in translated request when streaming", func() {
3731+
responseAPIReq := `{
3732+
"model": "gpt-4",
3733+
"input": "Hello",
3734+
"stream": true
3735+
}`
3736+
3737+
_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
3738+
Expect(err).NotTo(HaveOccurred())
3739+
3740+
var chatReq map[string]interface{}
3741+
err = json.Unmarshal(translatedBody, &chatReq)
3742+
Expect(err).NotTo(HaveOccurred())
3743+
Expect(chatReq["stream"]).To(Equal(true))
3744+
Expect(chatReq).To(HaveKey("stream_options"))
3745+
})
3746+
3747+
It("should omit stream flag when not streaming", func() {
3748+
responseAPIReq := `{
3749+
"model": "gpt-4",
3750+
"input": "Hello"
3751+
}`
3752+
3753+
_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
3754+
Expect(err).NotTo(HaveOccurred())
3755+
3756+
var chatReq map[string]interface{}
3757+
err = json.Unmarshal(translatedBody, &chatReq)
3758+
Expect(err).NotTo(HaveOccurred())
3759+
Expect(chatReq).NotTo(HaveKey("stream"))
3760+
Expect(chatReq).NotTo(HaveKey("stream_options"))
3761+
})
3762+
37303763
It("should include system instructions in translated request", func() {
37313764
responseAPIReq := `{
37323765
"model": "gpt-4",

src/semantic-router/pkg/extproc/req_filter_response_api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1010
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
1111
"github.com/openai/openai-go"
12+
"github.com/tidwall/sjson"
1213

1314
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
1415
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/responseapi"
@@ -148,6 +149,15 @@ func (f *ResponseAPIFilter) TranslateRequest(ctx context.Context, body []byte) (
148149
return nil, nil, err
149150
}
150151

152+
// The SDK struct doesn't expose a Stream field (the SDK sets it via
153+
// request options internally). We inject it so the downstream pipeline
154+
// and the upstream backend see the correct "stream" flag.
155+
if req.Stream {
156+
if b, err := sjson.SetBytes(translatedBody, "stream", true); err == nil {
157+
translatedBody = b
158+
}
159+
}
160+
151161
// Store translated body in context for later use
152162
respCtx.TranslatedBody = translatedBody
153163

tools/agent/repo-manifest.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,8 @@ docs:
180180
- docs/agent/tech-debt/td-034-runtime-and-dashboard-state-durability-and-telemetry-contract.md
181181
- docs/agent/tech-debt/td-035-signal-group-default-coverage-contract-gap.md
182182
- docs/agent/tech-debt/td-036-decision-tree-authoring-roundtrip-gap.md
183-
<<<<<<< HEAD
184183
- docs/agent/tech-debt/td-037-dev-integration-env-ownership-and-shared-suite-topology.md
185-
=======
186-
- docs/agent/tech-debt/td-037-custom-chat-completions-structs.md
187-
>>>>>>> 4e87dfdc ([Feat][Router] Migrate custom Chat Completions structs to official SDK types)
184+
- docs/agent/tech-debt/td-038-custom-chat-completions-structs.md
188185
- docs/agent/glossary.md
189186
- docs/agent/adr/README.md
190187
- docs/agent/adr/adr-0001-harness-layering.md

tools/agent/structure-rules.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ legacy_hotspots:
146146
interface_checks: relaxed
147147
- paths:
148148
- src/semantic-router/pkg/extproc/extproc_test.go
149+
file_checks: relaxed
149150
function_checks: relaxed
150151
- paths:
151152
- src/semantic-router/pkg/classification/hallucination_detector_test.go

0 commit comments

Comments
 (0)