Skip to content

Commit dc578a0

Browse files
committed
Optimized request prefill error messages
Signed-off-by: learner0810 <zhongjun.li@daocloud.io>
1 parent 1519a28 commit dc578a0

File tree

4 files changed

+29
-5
lines changed

4 files changed

+29
-5
lines changed

Dockerfile.sidecar

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ COPY go.mod go.mod
1111
COPY go.sum go.sum
1212
# cache deps before building and copying source so that we don't need to re-download as much
1313
# and so that source changes don't invalidate our downloaded layer
14-
RUN go mod download
14+
RUN go env -w GOPROXY=https://goproxy.cn && go mod download
1515

1616
# Copy the go source
1717
COPY cmd/pd-sidecar/main.go cmd/cmd.go

cmd/pd-sidecar/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ var (
4343

4444
func main() {
4545
port := flag.String("port", "8000", "the port the sidecar is listening on")
46-
vLLMPort := flag.String("vllm-port", "8001", "the port vLLM is listening on")
46+
vLLMPort := flag.String("vllm-port", "30611", "the port vLLM is listening on")
4747
vLLMDataParallelSize := flag.Int("data-parallel-size", 1, "the vLLM DATA-PARALLEL-SIZE value")
4848
connector := flag.String("connector", proxy.ConnectorNIXLV2, "the P/D connector being used. Supported: "+strings.Join(supportedConnectors, ", "))
4949
prefillerUseTLS := flag.Bool("prefiller-use-tls", false, "whether to use TLS when sending requests to prefillers")
@@ -120,7 +120,7 @@ func main() {
120120
if *decoderUseTLS {
121121
scheme = "https"
122122
}
123-
targetURL, err := url.Parse(scheme + "://localhost:" + *vLLMPort)
123+
targetURL, err := url.Parse(scheme + "://10.20.100.240:" + *vLLMPort)
124124
if err != nil {
125125
logger.Error(err, "failed to create targetURL")
126126
return

pkg/sidecar/proxy/connector_nixlv2.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,26 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi
130130
)
131131

132132
if isHTTPError(pw.statusCode) {
133-
s.logger.Error(err, "request failed", "code", pw.statusCode)
133+
s.logger.Error(err, "request failed", "code", pw.statusCode, "body", pw.buffer.String())
134134
prefillSpan.SetStatus(codes.Error, "prefill request failed")
135135
prefillSpan.End()
136-
w.WriteHeader(pw.statusCode)
136+
137+
if shouldFallbackToDecode(pw) {
138+
s.logger.Info("fallback to decode", "request_id", uuidStr)
139+
r.Body = io.NopCloser(strings.NewReader(string(original)))
140+
s.decoderProxy.ServeHTTP(w, r)
141+
} else {
142+
for key, values := range pw.Header() {
143+
for _, v := range values {
144+
w.Header().Add(key, v)
145+
}
146+
}
147+
w.WriteHeader(pw.statusCode)
148+
_, err := w.Write([]byte(pw.buffer.String()))
149+
if err != nil {
150+
s.logger.Error(err, "failed to send error response to client")
151+
}
152+
}
137153
return
138154
}
139155
prefillSpan.End()

pkg/sidecar/proxy/proxy_helpers.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,11 @@ func (s *Server) createDecoderProxyHandler(decoderURL *url.URL, decoderInsecureS
142142
func isHTTPError(statusCode int) bool {
143143
return statusCode < http.StatusOK || statusCode >= http.StatusMultipleChoices
144144
}
145+
146+
// shouldFallbackToDecode returns false for client error 4xx status codes (400–451). For all other status codes, it returns true.
147+
func shouldFallbackToDecode(pw *bufferedResponseWriter) bool {
148+
if pw.statusCode >= http.StatusBadRequest && pw.statusCode <= http.StatusUnavailableForLegalReasons {
149+
return false
150+
}
151+
return true
152+
}

0 commit comments

Comments
 (0)