@@ -48,27 +48,23 @@ func (r *requestEntry) String() string {
4848// compile-time type assertion
4949var _ framework.Scorer = & ActiveRequestScorer {}
5050
51- // ActiveRequestScorerFactory defines the factory function for the LoadAwareScorer
52- func ActiveRequestScorerFactory (name string , rawParameters json.RawMessage ,
53- handle plugins.Handle ) (plugins.Plugin , error ) {
51+ // ActiveRequestScorerFactory defines the factory function for the ActiveRequestScorer.
52+ func ActiveRequestScorerFactory (name string , rawParameters json.RawMessage , handle plugins.Handle ) (plugins.Plugin , error ) {
5453 parameters := ActiveRequestScorerParameters {RequestTimeout : defaultRequestTimeout }
5554 if rawParameters != nil {
5655 if err := json .Unmarshal (rawParameters , & parameters ); err != nil {
57- return nil , fmt .Errorf ("failed to parse the parameters of the '%s' scorer - %w" ,
58- ActiveRequestScorerType , err )
56+ return nil , fmt .Errorf ("failed to parse the parameters of the '%s' scorer - %w" , ActiveRequestScorerType , err )
5957 }
6058 }
6159
6260 return NewActiveRequestScorer (handle .Context (), parameters .RequestTimeout ).WithName (name ), nil
6361}
6462
65- // NewActiveRequestScorer creates a new local-serving-queue scorer.
63+ // NewActiveRequestScorer creates a new ActiveRequestScorer scorer.
6664func NewActiveRequestScorer (ctx context.Context , requestTimeout int ) * ActiveRequestScorer {
6765 if requestTimeout <= 0 {
6866 requestTimeout = defaultRequestTimeout
69- log .FromContext (ctx ).
70- V (logutil .DEFAULT ).
71- Info ("Request timeout should be positive, using default request timeout" )
67+ log .FromContext (ctx ).V (logutil .DEFAULT ).Info ("Request timeout should be positive, using default request timeout" )
7268 }
7369
7470 // cache for individual requests with their own TTL
@@ -127,11 +123,13 @@ func (s *ActiveRequestScorer) WithName(name string) *ActiveRequestScorer {
127123func (s * ActiveRequestScorer ) Score (ctx context.Context , _ * types.CycleState , _ * types.LLMRequest ,
128124 pods []types.Pod ) map [types.Pod ]float64 {
129125 scoredPods := make (map [string ]int )
130- sum := 0
126+ maxCount := 0
131127 s .mutex .RLock ()
132128 for podName , count := range s .podCounts {
133129 scoredPods [podName ] = count
134- sum += count
130+ if count >= maxCount {
131+ maxCount = count
132+ }
135133 }
136134 s .mutex .RUnlock ()
137135
@@ -142,7 +140,7 @@ func (s *ActiveRequestScorer) Score(ctx context.Context, _ *types.CycleState, _
142140 if count == 0 {
143141 scoredPodsMap [pod ] = 1.0 // no requests means highest score
144142 } else {
145- scoredPodsMap [pod ] = float64 (sum - count ) / float64 (sum )
143+ scoredPodsMap [pod ] = float64 (maxCount - count ) / float64 (maxCount )
146144 }
147145 } else {
148146 scoredPodsMap [pod ] = 1.0
@@ -203,22 +201,24 @@ func (s *ActiveRequestScorer) PostResponse(ctx context.Context, request *types.L
203201// incrementPodCount increments the request count for a pod.
204202func (s * ActiveRequestScorer ) incrementPodCount (podName string ) {
205203 s .mutex .Lock ()
204+ defer s .mutex .Unlock ()
205+
206206 s .podCounts [podName ]++
207- s .mutex .Unlock ()
208207}
209208
210209// decrementPodCount decrements the request count for a pod and removes
211210// the entry if count reaches zero.
212211func (s * ActiveRequestScorer ) decrementPodCount (podName string ) {
213212 s .mutex .Lock ()
213+ defer s .mutex .Unlock ()
214+
214215 if count , exists := s .podCounts [podName ]; exists {
215216 if count <= 1 {
216217 delete (s .podCounts , podName )
217218 } else {
218219 s .podCounts [podName ] = count - 1
219220 }
220221 }
221- s .mutex .Unlock ()
222222}
223223
224224func cleanCachePeriodically (ctx context.Context , cache * ttlcache.Cache [string , * requestEntry ], requestTimeout int ) {
0 commit comments