Skip to content

Commit 4bde565

Browse files
committed
minor refactoring based on reviews
Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com>
1 parent eb12968 commit 4bde565

File tree

3 files changed

+19
-23
lines changed

3 files changed

+19
-23
lines changed

docs/architecture.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,6 @@ Scores pods based on the number of active requests being served per pod. Each re
361361
individually with its own TTL to ensure accurate timeout handling. Pods with fewer active
362362
requests receive higher scores.
363363

364-
The scorer maintains two data structures for efficiency:
365-
- A cache of individual requests with TTL for automatic cleanup
366-
- A count map for fast O(1) lookup during scoring
367-
368364
Scores are normalized to a range of 0-1, where pods with fewer active requests get higher scores.
369365

370366
- **Type**: `active-request-scorer`

pkg/plugins/scorer/active_request.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,27 +48,23 @@ func (r *requestEntry) String() string {
4848
// compile-time type assertion
4949
var _ framework.Scorer = &ActiveRequestScorer{}
5050

51-
// ActiveRequestScorerFactory defines the factory function for the LoadAwareScorer
52-
func ActiveRequestScorerFactory(name string, rawParameters json.RawMessage,
53-
handle plugins.Handle) (plugins.Plugin, error) {
51+
// ActiveRequestScorerFactory defines the factory function for the ActiveRequestScorer.
52+
func ActiveRequestScorerFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
5453
parameters := ActiveRequestScorerParameters{RequestTimeout: defaultRequestTimeout}
5554
if rawParameters != nil {
5655
if err := json.Unmarshal(rawParameters, &parameters); err != nil {
57-
return nil, fmt.Errorf("failed to parse the parameters of the '%s' scorer - %w",
58-
ActiveRequestScorerType, err)
56+
return nil, fmt.Errorf("failed to parse the parameters of the '%s' scorer - %w", ActiveRequestScorerType, err)
5957
}
6058
}
6159

6260
return NewActiveRequestScorer(handle.Context(), parameters.RequestTimeout).WithName(name), nil
6361
}
6462

65-
// NewActiveRequestScorer creates a new local-serving-queue scorer.
63+
// NewActiveRequestScorer creates a new ActiveRequestScorer scorer.
6664
func NewActiveRequestScorer(ctx context.Context, requestTimeout int) *ActiveRequestScorer {
6765
if requestTimeout <= 0 {
6866
requestTimeout = defaultRequestTimeout
69-
log.FromContext(ctx).
70-
V(logutil.DEFAULT).
71-
Info("Request timeout should be positive, using default request timeout")
67+
log.FromContext(ctx).V(logutil.DEFAULT).Info("Request timeout should be positive, using default request timeout")
7268
}
7369

7470
// cache for individual requests with their own TTL
@@ -127,11 +123,13 @@ func (s *ActiveRequestScorer) WithName(name string) *ActiveRequestScorer {
127123
func (s *ActiveRequestScorer) Score(ctx context.Context, _ *types.CycleState, _ *types.LLMRequest,
128124
pods []types.Pod) map[types.Pod]float64 {
129125
scoredPods := make(map[string]int)
130-
sum := 0
126+
maxCount := 0
131127
s.mutex.RLock()
132128
for podName, count := range s.podCounts {
133129
scoredPods[podName] = count
134-
sum += count
130+
if count >= maxCount {
131+
maxCount = count
132+
}
135133
}
136134
s.mutex.RUnlock()
137135

@@ -142,7 +140,7 @@ func (s *ActiveRequestScorer) Score(ctx context.Context, _ *types.CycleState, _
142140
if count == 0 {
143141
scoredPodsMap[pod] = 1.0 // no requests means highest score
144142
} else {
145-
scoredPodsMap[pod] = float64(sum-count) / float64(sum)
143+
scoredPodsMap[pod] = float64(maxCount-count) / float64(maxCount)
146144
}
147145
} else {
148146
scoredPodsMap[pod] = 1.0
@@ -203,22 +201,24 @@ func (s *ActiveRequestScorer) PostResponse(ctx context.Context, request *types.L
203201
// incrementPodCount increments the request count for a pod.
204202
func (s *ActiveRequestScorer) incrementPodCount(podName string) {
205203
s.mutex.Lock()
204+
defer s.mutex.Unlock()
205+
206206
s.podCounts[podName]++
207-
s.mutex.Unlock()
208207
}
209208

210209
// decrementPodCount decrements the request count for a pod and removes
211210
// the entry if count reaches zero.
212211
func (s *ActiveRequestScorer) decrementPodCount(podName string) {
213212
s.mutex.Lock()
213+
defer s.mutex.Unlock()
214+
214215
if count, exists := s.podCounts[podName]; exists {
215216
if count <= 1 {
216217
delete(s.podCounts, podName)
217218
} else {
218219
s.podCounts[podName] = count - 1
219220
}
220221
}
221-
s.mutex.Unlock()
222222
}
223223

224224
func cleanCachePeriodically(ctx context.Context, cache *ttlcache.Cache[string, *requestEntry], requestTimeout int) {

pkg/plugins/scorer/active_request_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,30 +55,30 @@ func TestActiveRequestScorer_Score(t *testing.T) {
5555
name: "all pods in cache with different request counts",
5656
setupCache: func(s *ActiveRequestScorer) {
5757
s.mutex.Lock()
58-
s.podCounts["default/pod-a"] = 2
58+
s.podCounts["default/pod-a"] = 3
5959
s.podCounts["default/pod-b"] = 0
6060
s.podCounts["default/pod-c"] = 6
6161
s.mutex.Unlock()
6262
},
6363
input: []types.Pod{podA, podB, podC},
6464
wantScores: map[types.Pod]float64{
65-
podA: 0.75,
65+
podA: 0.5,
6666
podB: 1.0,
67-
podC: 0.25,
67+
podC: 0.0,
6868
},
6969
},
7070
{
7171
name: "some pods in cache",
7272
setupCache: func(s *ActiveRequestScorer) {
7373
s.mutex.Lock()
74-
s.podCounts["default/pod-a"] = 3
74+
s.podCounts["default/pod-a"] = 4
7575
s.podCounts["default/pod-c"] = 1
7676
// pod-b not in cache
7777
s.mutex.Unlock()
7878
},
7979
input: []types.Pod{podA, podB, podC},
8080
wantScores: map[types.Pod]float64{
81-
podA: 0.25,
81+
podA: 0.0,
8282
podB: 1.0,
8383
podC: 0.75,
8484
},

0 commit comments

Comments
 (0)