@@ -65,7 +65,7 @@ func getLatestMetricsForProfile(predictedLatencyCtx *predictedLatencyCtx) (*fwkd
6565func processPreRequestForLatencyPrediction (
6666 ctx context.Context ,
6767 predictor latencypredictor.PredictorInterface ,
68- requestBuilder PredictionRequestBuilder ,
68+ endpointRoleLabel string ,
6969 predictedLatencyCtx * predictedLatencyCtx ,
7070) error {
7171 logger := log .FromContext (ctx )
@@ -83,9 +83,9 @@ func processPreRequestForLatencyPrediction(
8383 target_endpoint_metadata := predictedLatencyCtx .targetMetadata
8484 prefix_cache_score := predictedLatencyCtx .prefixCacheScoresForEndpoints [target_endpoint_metadata .NamespacedName .Name ]
8585
86- // Build prediction request using the builder (ensures pod type is included for P/D )
87- in := requestBuilder . BuildPredictionRequest (
88- ctx ,
86+ // Build prediction request ( pod type is included if endpointRoleLabel is configured )
87+ in := buildPredictionRequest (
88+ endpointRoleLabel ,
8989 target_endpoint_metadata ,
9090 m ,
9191 predictedLatencyCtx .schedulingRequest .Body .Completions .Prompt ,
@@ -121,7 +121,7 @@ func processFirstTokenForLatencyPrediction(
121121 ctx context.Context ,
122122 predictor latencypredictor.PredictorInterface ,
123123 streamingMode bool ,
124- requestBuilder PredictionRequestBuilder ,
124+ endpointRoleLabel string ,
125125 predictedLatencyCtx * predictedLatencyCtx ,
126126 now time.Time ,
127127 samplingMean float64 ,
@@ -141,10 +141,10 @@ func processFirstTokenForLatencyPrediction(
141141 targetEndpointMetadata := predictedLatencyCtx .targetMetadata
142142 prefixCacheScore := predictedLatencyCtx .prefixCacheScoresForEndpoints [targetEndpointMetadata .NamespacedName .Name ]
143143 logger .V (logutil .DEBUG ).Info ("Recording TTFT training data" , "ttft_ms" , predictedLatencyCtx .ttft , "prefixCacheScore" , prefixCacheScore )
144- recordTTFTTrainingData (ctx , predictor , requestBuilder , predictedLatencyCtx , m , targetEndpointMetadata , now , prefixCacheScore )
144+ recordTTFTTrainingData (ctx , predictor , endpointRoleLabel , predictedLatencyCtx , m , targetEndpointMetadata , now , prefixCacheScore )
145145
146146 if streamingMode {
147- predictFirstTPOT (ctx , predictor , requestBuilder , predictedLatencyCtx , targetEndpointMetadata )
147+ predictFirstTPOT (ctx , predictor , endpointRoleLabel , predictedLatencyCtx , targetEndpointMetadata )
148148 }
149149
150150 // Advance timestamp
@@ -165,17 +165,16 @@ func initializeSampler(ctx context.Context, predictedLatencyCtx *predictedLatenc
165165func recordTTFTTrainingData (
166166 ctx context.Context ,
167167 predictor latencypredictor.PredictorInterface ,
168- requestBuilder PredictionRequestBuilder ,
168+ endpointRoleLabel string ,
169169 predictedLatencyCtx * predictedLatencyCtx ,
170170 m * fwkdl.Metrics ,
171171 targetEndpointMetadata * fwkdl.EndpointMetadata ,
172172 now time.Time ,
173173 prefixCacheScore float64 ,
174174) {
175175 logger := log .FromContext (ctx )
176- // Build training entry using the builder
177- entry := requestBuilder .BuildTrainingEntry (
178- ctx ,
176+ entry := buildTrainingEntry (
177+ endpointRoleLabel ,
179178 targetEndpointMetadata ,
180179 m ,
181180 predictedLatencyCtx .schedulingRequest .Body .Completions .Prompt ,
@@ -193,7 +192,7 @@ func recordTTFTTrainingData(
193192func predictFirstTPOT (
194193 ctx context.Context ,
195194 predictor latencypredictor.PredictorInterface ,
196- requestBuilder PredictionRequestBuilder ,
195+ endpointRoleLabel string ,
197196 predictedLatencyCtx * predictedLatencyCtx ,
198197 targetEndpointMetadata * fwkdl.EndpointMetadata ,
199198) {
@@ -205,9 +204,8 @@ func predictFirstTPOT(
205204 return
206205 }
207206
208- // Build prediction request using the builder (ensures pod type is included for P/D)
209- in := requestBuilder .BuildPredictionRequest (
210- ctx ,
207+ in := buildPredictionRequest (
208+ endpointRoleLabel ,
211209 targetEndpointMetadata ,
212210 m ,
213211 predictedLatencyCtx .schedulingRequest .Body .Completions .Prompt ,
@@ -233,7 +231,7 @@ func predictFirstTPOT(
233231func processTokenForLatencyPrediction (
234232 ctx context.Context ,
235233 predictor latencypredictor.PredictorInterface ,
236- requestBuilder PredictionRequestBuilder ,
234+ endpointRoleLabel string ,
237235 predictedLatencyCtx * predictedLatencyCtx ,
238236 targetEndpointMetadata * fwkdl.EndpointMetadata ,
239237 now time.Time ,
@@ -265,9 +263,8 @@ func processTokenForLatencyPrediction(
265263 "error" , err )
266264 return
267265 }
268- // Record actual TPOT using builder
269- entry := requestBuilder .BuildTrainingEntry (
270- ctx ,
266+ entry := buildTrainingEntry (
267+ endpointRoleLabel ,
271268 targetEndpointMetadata ,
272269 m ,
273270 predictedLatencyCtx .schedulingRequest .Body .Completions .Prompt ,
@@ -283,9 +280,8 @@ func processTokenForLatencyPrediction(
283280
284281 // Sampled predict
285282 if predictedLatencyCtx .tokenSampler .shouldPredict (predictedLatencyCtx .generatedTokenCount ) {
286- // Build prediction request using the builder (ensures pod type is included for P/D)
287- in := requestBuilder .BuildPredictionRequest (
288- ctx ,
283+ in := buildPredictionRequest (
284+ endpointRoleLabel ,
289285 targetEndpointMetadata ,
290286 m ,
291287 predictedLatencyCtx .schedulingRequest .Body .Completions .Prompt ,
@@ -321,7 +317,7 @@ func bulkPredictWithMetrics(
321317 ctx context.Context ,
322318 predictor latencypredictor.PredictorInterface ,
323319 metricsStates []* fwkdl.Metrics ,
324- requestBuilder PredictionRequestBuilder ,
320+ endpointRoleLabel string ,
325321 targetEndpointsMetadatas []* fwkdl.EndpointMetadata ,
326322 prompts []string ,
327323 generatedTokenCounts []int ,
@@ -353,11 +349,11 @@ func bulkPredictWithMetrics(
353349 }
354350 }
355351
356- // Build bulk prediction requests using the builder
352+ // Build bulk prediction requests
357353 bulkRequests := make ([]latencypredictor.PredictionRequest , len (metricsStates ))
358354 for i := range metricsStates {
359- bulkRequests [i ] = requestBuilder . BuildPredictionRequest (
360- ctx ,
355+ bulkRequests [i ] = buildPredictionRequest (
356+ endpointRoleLabel ,
361357 targetEndpointsMetadatas [i ],
362358 metricsStates [i ],
363359 prompts [i ],
0 commit comments