Skip to content

Commit 4a2ca13

Browse files
committed
fix(responseapi): inject stream flag into translated Response API body
ChatCompletionNewParams does not expose a Stream field (the library uses request options internally). When translating Response API requests to Chat Completions format, the "stream": true key was missing from the JSON body, which would prevent backends from streaming. Inject it using sjson after marshaling. Also renumbers TD037 to TD038 after upstream claimed TD037, and adds follow-up issue #1685 tracking link. Signed-off-by: Asaad Balum <asaad.balum@gmail.com> Signed-off-by: asaadbalum <asaad.balum@gmail.com>
1 parent 128f0d5 commit 4a2ca13

File tree

5 files changed

+72
-6
lines changed

5 files changed

+72
-6
lines changed

docs/agent/tech-debt/td-038-custom-chat-completions-structs.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,7 @@ explaining why the extension is necessary.
5454
- [ ] `pkg/memory` serialization types migrated or documented as intentional exceptions
5555
- [ ] Zero custom `ChatCompletion*` type definitions remain outside documented exceptions
5656
- [ ] Compatibility tests cover all conversion paths
57+
58+
## Tracking
59+
60+
Follow-up issue: #1685

src/semantic-router/pkg/extproc/req_filter_response_api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1010
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
1111
"github.com/openai/openai-go"
12+
"github.com/tidwall/sjson"
1213

1314
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
1415
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/responseapi"
@@ -148,6 +149,15 @@ func (f *ResponseAPIFilter) TranslateRequest(ctx context.Context, body []byte) (
148149
return nil, nil, err
149150
}
150151

152+
// The SDK struct doesn't expose a Stream field (the SDK sets it via
153+
// request options internally). We inject it so the downstream pipeline
154+
// and the upstream backend see the correct "stream" flag.
155+
if req.Stream {
156+
if b, err := sjson.SetBytes(translatedBody, "stream", true); err == nil {
157+
translatedBody = b
158+
}
159+
}
160+
151161
// Store translated body in context for later use
152162
respCtx.TranslatedBody = translatedBody
153163

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package extproc
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
7+
. "github.com/onsi/ginkgo/v2"
8+
. "github.com/onsi/gomega"
9+
)
10+
11+
var _ = Describe("Response API Stream Flag Injection", func() {
12+
var (
13+
filter *ResponseAPIFilter
14+
mockStore *MockResponseStore
15+
)
16+
17+
BeforeEach(func() {
18+
mockStore = NewMockResponseStore()
19+
filter = NewResponseAPIFilter(mockStore)
20+
})
21+
22+
It("should include stream flag in translated request when streaming", func() {
23+
responseAPIReq := `{
24+
"model": "gpt-4",
25+
"input": "Hello",
26+
"stream": true
27+
}`
28+
29+
_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
30+
Expect(err).NotTo(HaveOccurred())
31+
32+
var chatReq map[string]interface{}
33+
err = json.Unmarshal(translatedBody, &chatReq)
34+
Expect(err).NotTo(HaveOccurred())
35+
Expect(chatReq["stream"]).To(Equal(true))
36+
Expect(chatReq).To(HaveKey("stream_options"))
37+
})
38+
39+
It("should omit stream flag when not streaming", func() {
40+
responseAPIReq := `{
41+
"model": "gpt-4",
42+
"input": "Hello"
43+
}`
44+
45+
_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
46+
Expect(err).NotTo(HaveOccurred())
47+
48+
var chatReq map[string]interface{}
49+
err = json.Unmarshal(translatedBody, &chatReq)
50+
Expect(err).NotTo(HaveOccurred())
51+
Expect(chatReq).NotTo(HaveKey("stream"))
52+
Expect(chatReq).NotTo(HaveKey("stream_options"))
53+
})
54+
})

tools/agent/repo-manifest.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,8 @@ docs:
180180
- docs/agent/tech-debt/td-034-runtime-and-dashboard-state-durability-and-telemetry-contract.md
181181
- docs/agent/tech-debt/td-035-signal-group-default-coverage-contract-gap.md
182182
- docs/agent/tech-debt/td-036-decision-tree-authoring-roundtrip-gap.md
183-
<<<<<<< HEAD
184183
- docs/agent/tech-debt/td-037-dev-integration-env-ownership-and-shared-suite-topology.md
185-
=======
186-
- docs/agent/tech-debt/td-037-custom-chat-completions-structs.md
187-
>>>>>>> 4e87dfdc ([Feat][Router] Migrate custom Chat Completions structs to official SDK types)
184+
- docs/agent/tech-debt/td-038-custom-chat-completions-structs.md
188185
- docs/agent/glossary.md
189186
- docs/agent/adr/README.md
190187
- docs/agent/adr/adr-0001-harness-layering.md

tools/agent/structure-rules.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,12 @@ legacy_hotspots:
153153
- src/semantic-router/pkg/extproc/hallucination_test.go
154154
- src/semantic-router/pkg/extproc/req_filter_tools_test.go
155155
function_checks: relaxed
156-
- paths:
157-
- src/vllm-sr/cli/models.py
158156
- paths:
159157
- src/semantic-router/pkg/modelselection/benchmark_runner.go
158+
file_checks: relaxed
160159
function_checks: relaxed
160+
- paths:
161+
- src/vllm-sr/cli/models.py
161162
- paths:
162163
- src/semantic-router/pkg/extproc/processor_req_body.go
163164
- src/semantic-router/pkg/extproc/processor_res_body.go

0 commit comments

Comments
 (0)