Skip to content

Commit 1e04f9d

Browse files
committed
Avoid usage of reflection.
Instead of inspecting the state of the LRU cache directly, it's possible to verify behavior via the usual scoring API. Signed-off-by: usize <mofoster@redhat.com>
1 parent f4e8f32 commit 1e04f9d

File tree

2 files changed

+32
-45
lines changed

2 files changed

+32
-45
lines changed

pkg/plugins/scorer/no_hit_lru.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ func NoHitLRUFactory(name string, rawParameters json.RawMessage, handle plugins.
6868
return NewNoHitLRU(handle.Context(), &parameters).WithName(name), nil
6969
}
7070

71-
7271
// NewNoHitLRU creates a new NoHitLRU scorer
7372
func NewNoHitLRU(ctx context.Context, params *NoHitLRUParameters) *NoHitLRU {
7473
prefixPluginName := prefix.PrefixCachePluginType

pkg/plugins/scorer/no_hit_lru_test.go

Lines changed: 32 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@ package scorer_test
33
import (
44
"context"
55
"encoding/json"
6-
"reflect"
76
"strings"
87
"testing"
9-
"unsafe"
108

119
"github.com/google/go-cmp/cmp"
12-
lru "github.com/hashicorp/golang-lru/v2"
1310

1411
k8stypes "k8s.io/apimachinery/pkg/types"
1512
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -307,19 +304,36 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
307304
}
308305
}
309306

310-
assertLRU := func(expected []string) {
307+
// Test LRU behavior indirectly through scoring rather than internal state
308+
assertHighestScoredPod := func(expectedPod types.Pod, testName string) {
311309
t.Helper()
312-
actual := getLRUKeys(t, scorer)
313-
if !reflect.DeepEqual(expected, actual) {
314-
t.Fatalf("expected LRU keys %v, got %v", expected, actual)
310+
coldReq := &types.LLMRequest{RequestId: testName + "-scoring-check"}
311+
scores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReq, pods)
312+
313+
highestScore := -1.0
314+
var highestPod types.Pod
315+
for pod, score := range scores {
316+
if score > highestScore {
317+
highestScore = score
318+
highestPod = pod
319+
}
320+
}
321+
322+
if highestPod.GetPod().NamespacedName.String() != expectedPod.GetPod().NamespacedName.String() {
323+
t.Fatalf("expected %s to have highest score for LRU behavior, but %s had highest score (%f). All scores: %+v",
324+
expectedPod.GetPod().NamespacedName.String(),
325+
highestPod.GetPod().NamespacedName.String(),
326+
highestScore,
327+
scores)
315328
}
316329
}
317330

318331
t.Run("initial cold request seeds cache", func(t *testing.T) {
319332
coldReqA := &types.LLMRequest{RequestId: "cold-1"}
320333
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqA, pods)
321334
scorer.PreRequest(ctx, coldReqA, requestToPod(podA), 0)
322-
assertLRU([]string{"default/pod-a"})
335+
// After podA handles a cold request, other pods should score higher for new cold requests
336+
assertHighestScoredPod(podB, "after-podA-used")
323337
})
324338

325339
t.Run("unused pods rank above existing ones", func(t *testing.T) {
@@ -353,49 +367,23 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
353367
if postWarmScores[podB] <= postWarmScores[podA] {
354368
t.Fatalf("expected warm request to leave ordering unchanged, scores=%+v", postWarmScores)
355369
}
356-
assertLRU([]string{"default/pod-a"})
357370
})
358371

359372
t.Run("second cold request rotates to podB", func(t *testing.T) {
360-
coldReqCheck := &types.LLMRequest{RequestId: "cold-check"}
361-
scorer.PreRequest(ctx, coldReqCheck, requestToPod(podB), 0)
362-
373+
// Simulate podB handling a cold request
363374
coldReqB := &types.LLMRequest{RequestId: "cold-2"}
364-
scoresAfterB := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqB, pods)
365-
if scoresAfterB[podC] <= scoresAfterB[podA] {
366-
t.Fatalf("expected pod-c to outrank pod-a after pod-a handled previous cold request, scores=%+v", scoresAfterB)
367-
}
368-
if scoresAfterB[podC] <= scoresAfterB[podB] {
369-
t.Fatalf("expected pod-c to outrank pod-b after pod-b handled previous cold request, scores=%+v", scoresAfterB)
370-
}
371-
scorer.PreRequest(ctx, coldReqB, requestToPod(podC), 0)
372-
assertLRU([]string{"default/pod-a", "default/pod-b", "default/pod-c"})
375+
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqB, pods)
376+
scorer.PreRequest(ctx, coldReqB, requestToPod(podB), 0)
377+
// Now podC should score highest since both podA and podB have been used
378+
assertHighestScoredPod(podC, "after-podB-used")
373379
})
374380

375381
t.Run("third cold request rotates back to podA", func(t *testing.T) {
382+
// Simulate podC handling a cold request
376383
coldReqC := &types.LLMRequest{RequestId: "cold-3"}
377-
scoresAfterC := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqC, pods)
378-
if scoresAfterC[podA] <= scoresAfterC[podB] {
379-
t.Fatalf("expected pod-a to outrank pod-b after pod-a handled previous cold request, scores=%+v", scoresAfterC)
380-
}
381-
if scoresAfterC[podA] <= scoresAfterC[podC] {
382-
t.Fatalf("expected pod-a to outrank pod-c after pod-a handled previous cold request, scores=%+v", scoresAfterC)
383-
}
384-
scorer.PreRequest(ctx, coldReqC, requestToPod(podA), 0)
385-
assertLRU([]string{"default/pod-b", "default/pod-c", "default/pod-a"})
384+
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqC, pods)
385+
scorer.PreRequest(ctx, coldReqC, requestToPod(podC), 0)
386+
// Now podA should score highest again (LRU rotation)
387+
assertHighestScoredPod(podA, "after-podC-used")
386388
})
387389
}
388-
389-
func getLRUKeys(t *testing.T, s *scorer.NoHitLRU) []string {
390-
t.Helper()
391-
v := reflect.ValueOf(s).Elem().FieldByName("lruCache")
392-
if !v.IsValid() {
393-
t.Fatal("lruCache field not found")
394-
}
395-
v = reflect.NewAt(v.Type(), unsafe.Pointer(v.UnsafeAddr())).Elem()
396-
cache, ok := v.Interface().(*lru.Cache[string, struct{}])
397-
if !ok {
398-
t.Fatalf("unexpected lru cache type %T", v.Interface())
399-
}
400-
return cache.Keys()
401-
}

0 commit comments

Comments
 (0)