fix(responseapi): inject stream flag into translated Response API body

asaadbalum · asaadbalum · commit 4a2ca13ed8cc · 2026-03-30T22:28:47.000+02:00
ChatCompletionNewParams does not expose a Stream field (the library uses request options internally). When translating Response API requests to Chat Completions format, the "stream": true key was missing from the JSON body, which would prevent backends from streaming. Inject it using sjson after marshaling. Also renumbers TD037 to TD038 after upstream claimed TD037, and adds follow-up issue #1685 tracking link. Signed-off-by: Asaad Balum <asaad.balum@gmail.com> Signed-off-by: asaadbalum <asaad.balum@gmail.com>
diff --git a/docs/agent/tech-debt/td-038-custom-chat-completions-structs.md b/docs/agent/tech-debt/td-038-custom-chat-completions-structs.md
@@ -54,3 +54,7 @@ explaining why the extension is necessary.
 - [ ] `pkg/memory` serialization types migrated or documented as intentional exceptions
 - [ ] Zero custom `ChatCompletion*` type definitions remain outside documented exceptions
 - [ ] Compatibility tests cover all conversion paths
+
+## Tracking
+
+Follow-up issue: #1685
diff --git a/src/semantic-router/pkg/extproc/req_filter_response_api.go b/src/semantic-router/pkg/extproc/req_filter_response_api.go
@@ -9,6 +9,7 @@ import (
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
 	"github.com/openai/openai-go"
+	"github.com/tidwall/sjson"
 
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/responseapi"
@@ -148,6 +149,15 @@ func (f *ResponseAPIFilter) TranslateRequest(ctx context.Context, body []byte) (
 		return nil, nil, err
 	}
 
+	// The SDK struct doesn't expose a Stream field (the SDK sets it via
+	// request options internally). We inject it so the downstream pipeline
+	// and the upstream backend see the correct "stream" flag.
+	if req.Stream {
+		if b, err := sjson.SetBytes(translatedBody, "stream", true); err == nil {
+			translatedBody = b
+		}
+	}
+
 	// Store translated body in context for later use
 	respCtx.TranslatedBody = translatedBody
 
diff --git a/src/semantic-router/pkg/extproc/req_filter_response_api_test.go b/src/semantic-router/pkg/extproc/req_filter_response_api_test.go
@@ -0,0 +1,54 @@
+package extproc
+
+import (
+	"context"
+	"encoding/json"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("Response API Stream Flag Injection", func() {
+	var (
+		filter    *ResponseAPIFilter
+		mockStore *MockResponseStore
+	)
+
+	BeforeEach(func() {
+		mockStore = NewMockResponseStore()
+		filter = NewResponseAPIFilter(mockStore)
+	})
+
+	It("should include stream flag in translated request when streaming", func() {
+		responseAPIReq := `{
+			"model": "gpt-4",
+			"input": "Hello",
+			"stream": true
+		}`
+
+		_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
+		Expect(err).NotTo(HaveOccurred())
+
+		var chatReq map[string]interface{}
+		err = json.Unmarshal(translatedBody, &chatReq)
+		Expect(err).NotTo(HaveOccurred())
+		Expect(chatReq["stream"]).To(Equal(true))
+		Expect(chatReq).To(HaveKey("stream_options"))
+	})
+
+	It("should omit stream flag when not streaming", func() {
+		responseAPIReq := `{
+			"model": "gpt-4",
+			"input": "Hello"
+		}`
+
+		_, translatedBody, err := filter.TranslateRequest(context.Background(), []byte(responseAPIReq))
+		Expect(err).NotTo(HaveOccurred())
+
+		var chatReq map[string]interface{}
+		err = json.Unmarshal(translatedBody, &chatReq)
+		Expect(err).NotTo(HaveOccurred())
+		Expect(chatReq).NotTo(HaveKey("stream"))
+		Expect(chatReq).NotTo(HaveKey("stream_options"))
+	})
+})
diff --git a/tools/agent/repo-manifest.yaml b/tools/agent/repo-manifest.yaml
@@ -180,11 +180,8 @@ docs:
   - docs/agent/tech-debt/td-034-runtime-and-dashboard-state-durability-and-telemetry-contract.md
   - docs/agent/tech-debt/td-035-signal-group-default-coverage-contract-gap.md
   - docs/agent/tech-debt/td-036-decision-tree-authoring-roundtrip-gap.md
-<<<<<<< HEAD
   - docs/agent/tech-debt/td-037-dev-integration-env-ownership-and-shared-suite-topology.md
-=======
-  - docs/agent/tech-debt/td-037-custom-chat-completions-structs.md
->>>>>>> 4e87dfdc ([Feat][Router] Migrate custom Chat Completions structs to official SDK types)
+  - docs/agent/tech-debt/td-038-custom-chat-completions-structs.md
   - docs/agent/glossary.md
   - docs/agent/adr/README.md
   - docs/agent/adr/adr-0001-harness-layering.md
diff --git a/tools/agent/structure-rules.yaml b/tools/agent/structure-rules.yaml
@@ -153,11 +153,12 @@ legacy_hotspots:
       - src/semantic-router/pkg/extproc/hallucination_test.go
       - src/semantic-router/pkg/extproc/req_filter_tools_test.go
     function_checks: relaxed
-  - paths:
-      - src/vllm-sr/cli/models.py
   - paths:
       - src/semantic-router/pkg/modelselection/benchmark_runner.go
+    file_checks: relaxed
     function_checks: relaxed
+  - paths:
+      - src/vllm-sr/cli/models.py
   - paths:
       - src/semantic-router/pkg/extproc/processor_req_body.go
       - src/semantic-router/pkg/extproc/processor_res_body.go