Skip to content

Commit b43cfc0

Browse files
committed
remove request control hooks
1 parent 932b5e0 commit b43cfc0

File tree

4 files changed

+10
-220
lines changed

4 files changed

+10
-220
lines changed

pkg/plugins/scorer/pd_slo_aware_router_hooks.go

Lines changed: 0 additions & 111 deletions
This file was deleted.

pkg/plugins/scorer/pd_slo_aware_scorer.go

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,11 @@ func PDSLOAwareScorerFactory(name string, rawConfig json.RawMessage, handle plug
7474
return nil, fmt.Errorf("failed to start latency predictor: %w", err)
7575
}
7676

77-
// Create the SLO aware router and inject P/D-aware request builder
78-
// This builder will populate the PodType field based on llm-d.ai/role labels
79-
baseRouter := predictedlatency.NewPredictedLatency(cfg, predictor).WithName(name)
80-
baseRouter.SetRequestBuilder(NewPDPredictionRequestBuilder())
77+
// Create the SLO aware router with P/D-aware request builder
78+
// The builder populates the PodType field based on llm-d.ai/role labels
79+
// to distinguish prefill vs decode pods in training data
80+
router := predictedlatency.NewPredictedLatency(cfg, predictor).WithName(name)
81+
router.SetRequestBuilder(NewPDPredictionRequestBuilder())
8182

82-
// Wrap with PDSLOAwareRouter to add P/D-specific hook logic
83-
// The wrapper delegates to the base router while adding P/D-specific header extraction
84-
pdRouter := &PDSLOAwareRouter{
85-
PredictedLatency: baseRouter,
86-
}
87-
88-
return pdRouter, nil
83+
return router, nil
8984
}

pkg/sidecar/proxy/connector_nixlv2.go

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"io"
2222
"net/http"
2323
"strings"
24-
"time"
2524

2625
"github.com/google/uuid"
2726
)
@@ -102,19 +101,11 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi
102101
return
103102
}
104103

105-
// 2. Forward request to prefiller with timing measurement
104+
// 2. Forward request to prefiller
106105
s.logger.V(4).Info("sending prefill request", "to", prefillPodHostPort)
107106
s.logger.V(5).Info("Prefill request", "body", string(pbody))
108107
pw := &bufferedResponseWriter{}
109-
110-
// Measure prefill latency for EPP training data
111-
prefillStart := time.Now()
112108
prefillHandler.ServeHTTP(pw, preq)
113-
prefillLatency := time.Since(prefillStart)
114-
115-
s.logger.V(4).Info("prefill completed",
116-
"latency_ms", prefillLatency.Milliseconds(),
117-
"pod", prefillPodHostPort)
118109

119110
if isHTTPError(pw.statusCode) {
120111
s.logger.Error(err, "request failed", "code", pw.statusCode)
@@ -174,19 +165,11 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi
174165
dreq.Body = io.NopCloser(strings.NewReader(string(dbody)))
175166
dreq.ContentLength = int64(len(dbody))
176167

177-
// 2. Forward to local decoder with prefill timing headers
168+
// 2. Forward to local decoder.
178169

179170
s.logger.V(5).Info("sending request to decoder", "body", string(dbody))
180-
181-
// Wrap response writer to inject prefill timing headers for EPP training
182-
timingWriter := &timingResponseWriter{
183-
ResponseWriter: w,
184-
prefillLatencyMs: float64(prefillLatency.Milliseconds()),
185-
prefillPodHost: prefillPodHostPort,
186-
}
187-
188-
if !s.forwardDataParallel || !s.dataParallelHandler(timingWriter, dreq) {
171+
if !s.forwardDataParallel || !s.dataParallelHandler(w, dreq) {
189172
s.logger.V(4).Info("sending request to decoder", "to", s.decoderURL.Host)
190-
s.decoderProxy.ServeHTTP(timingWriter, dreq)
173+
s.decoderProxy.ServeHTTP(w, dreq)
191174
}
192175
}

pkg/sidecar/proxy/timing_writer.go

Lines changed: 0 additions & 77 deletions
This file was deleted.

0 commit comments

Comments
 (0)