@@ -215,18 +215,18 @@ func (p *Plugin) Consumes() map[string]any {
215215 return map [string ]any {}
216216}
217217
218- // PrepareRequestData hashes prompt, finds longest prefix match and stores it in pod as attribute.
219- func (p * Plugin ) PrepareRequestData (ctx context.Context , request * types.LLMRequest , pods []types.Pod ) error {
220- hashes := hashPrompt (ctx , request , getBlockSize (pods , p .config ), p .config .MaxPrefixBlocksToMatch )
218+ // PrepareRequestData hashes prompt, finds longest prefix match and stores it in endpoint as attribute.
219+ func (p * Plugin ) PrepareRequestData (ctx context.Context , request * types.LLMRequest , endpoints []types.Endpoint ) error {
220+ hashes := hashPrompt (ctx , request , getBlockSize (endpoints , p .config ), p .config .MaxPrefixBlocksToMatch )
221221 state := & SchedulingContextState {
222222 PrefixHashes : hashes ,
223223 PrefixCacheServers : p .matchLongestPrefix (ctx , hashes ),
224224 }
225225 total := len (state .PrefixHashes )
226226
227- for _ , pod := range pods {
228- matchLen := state .PrefixCacheServers [ServerID (pod . GetPod ().NamespacedName )]
229- pod .Put (approximateprefix .PrefixCacheMatchInfoKey , approximateprefix .NewPrefixCacheMatchInfo (matchLen , total ))
227+ for _ , endpoint := range endpoints {
228+ matchLen := state .PrefixCacheServers [ServerID (endpoint . GetMetadata ().NamespacedName )]
229+ endpoint .Put (approximateprefix .PrefixCacheMatchInfoKey , approximateprefix .NewPrefixCacheMatchInfo (matchLen , total ))
230230 }
231231 return nil
232232}
0 commit comments