Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ require (
k8s.io/client-go v0.33.1
sigs.k8s.io/controller-runtime v0.21.0
sigs.k8s.io/gateway-api v1.3.0
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250619193052-1362b1474677
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250623153357-7df5d3dfdafa
)

require (
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250618193051-f66be2d1e763 h
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250618193051-f66be2d1e763/go.mod h1:JYY3NYINfBa4ULLEowjqb/E+SkY1iqk4N2miCplP528=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250619193052-1362b1474677 h1:ImKlQzbdg2USp/+w6kJbvrGQLVUe9ISb8n/yyJhSJ+4=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250619193052-1362b1474677/go.mod h1:JYY3NYINfBa4ULLEowjqb/E+SkY1iqk4N2miCplP528=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250623153357-7df5d3dfdafa h1:A9+jI1H50lG6tHMOaQfk6FiKx+1ehCbU1X947cVlCu4=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250623153357-7df5d3dfdafa/go.mod h1:JYY3NYINfBa4ULLEowjqb/E+SkY1iqk4N2miCplP528=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
Expand Down
6 changes: 3 additions & 3 deletions pkg/plugins/filter/by_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ type ByLabels struct {
selector labels.Selector
}

// Name returns the name of the filter
func (blf *ByLabels) Name() string {
// Type returns the type of the filter
func (blf *ByLabels) Type() string {
return blf.name
}

// Filter filters out all pods that do not satisfy the label selector
func (blf *ByLabels) Filter(_ context.Context, _ *types.LLMRequest, _ *types.CycleState, pods []types.Pod) []types.Pod {
func (blf *ByLabels) Filter(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) []types.Pod {
filtered := []types.Pod{}

for _, pod := range pods {
Expand Down
12 changes: 6 additions & 6 deletions pkg/plugins/filter/pd_role_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ var _ framework.Filter = &PrefillFilter{}
// PrefillFilter - filters out pods that are not marked with role Prefill
type PrefillFilter struct{}

// Name returns the name of the filter
func (pf *PrefillFilter) Name() string {
// Type returns the type of the filter
func (pf *PrefillFilter) Type() string {
return "prefill-filter"
}

// Filter filters out all pods that are not marked as "prefill"
func (pf *PrefillFilter) Filter(_ context.Context, _ *types.LLMRequest, _ *types.CycleState, pods []types.Pod) []types.Pod {
func (pf *PrefillFilter) Filter(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) []types.Pod {
filteredPods := []types.Pod{}

for _, pod := range pods {
Expand All @@ -48,13 +48,13 @@ var _ framework.Filter = &DecodeFilter{}
// DecodeFilter - filters out pods that are not marked with role Decode or Both
type DecodeFilter struct{}

// Name returns the name of the filter
func (df *DecodeFilter) Name() string {
// Type returns the type of the filter
func (df *DecodeFilter) Type() string {
return "decode-filter"
}

// Filter removes all pods that are not marked as "decode" or "both"
func (df *DecodeFilter) Filter(_ context.Context, _ *types.LLMRequest, _ *types.CycleState, pods []types.Pod) []types.Pod {
func (df *DecodeFilter) Filter(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) []types.Pod {
filteredPods := []types.Pod{}

for _, pod := range pods {
Expand Down
4 changes: 2 additions & 2 deletions pkg/plugins/pre-request/pd_prerequest.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ func NewPrefillHeaderHandler() *PrefillHeaderHandler {
// PrefillHeaderHandler PreRequest plugin
type PrefillHeaderHandler struct{}

// Name returns the PreRequest plugin name
func (p *PrefillHeaderHandler) Name() string {
// Type returns the type of the PreRequest plugin.
func (p *PrefillHeaderHandler) Type() string {
return "prefill-header"
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/plugins/profile/pd-profile-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ type PdProfileHandler struct {
prefixScorer *scorer.PrefixAwareScorer
}

// Name returns the name of the Profile Handler.
func (h *PdProfileHandler) Name() string {
// Type returns the type of the Profile Handler.
func (h *PdProfileHandler) Type() string {
return name
}

// Pick selects the SchedulingProfiles to run from the list of candidate profiles, while taking into consideration the request properties and the
// previously executed cycles along with their results.
func (h *PdProfileHandler) Pick(ctx context.Context, request *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
func (h *PdProfileHandler) Pick(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
profileResults map[string]*types.ProfileRunResult) map[string]*framework.SchedulerProfile {
if _, executed := profileResults[decode]; !executed {
// if decode profile was not executed yet, first let the scheduler run the decode profile
Expand Down Expand Up @@ -77,7 +77,7 @@ func (h *PdProfileHandler) Pick(ctx context.Context, request *types.LLMRequest,
// ProcessResults handles the outcome of the profile runs after the selected profiles ran.
// In case of an error in any of the profiles, the matching entry in the profileResults will contain nil, to indicate there was
// an error while running the profile.
func (h *PdProfileHandler) ProcessResults(_ context.Context, _ *types.LLMRequest,
func (h *PdProfileHandler) ProcessResults(_ context.Context, _ *types.CycleState, _ *types.LLMRequest,
profileResults map[string]*types.ProfileRunResult) (*types.SchedulingResult, error) {
if profileResults[decode] == nil { // if decode profile failed to run, we should fail
return nil, errors.New("failed to find available decode workers")
Expand Down
6 changes: 3 additions & 3 deletions pkg/plugins/scorer/kvcache-aware.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ type KVCacheAwareScorer struct {
kvCacheIndexer *kvcache.Indexer
}

// Name returns the name of the scorer.
func (s *KVCacheAwareScorer) Name() string {
// Type returns the type of the scorer.
func (s *KVCacheAwareScorer) Type() string {
return kvCacheAwareScorerName
}

// Score scores the provided pod based on the KVCache index state.
// The returned scores are normalized to a range of 0-1.
func (s *KVCacheAwareScorer) Score(ctx context.Context, request *types.LLMRequest, _ *types.CycleState, pods []types.Pod) map[types.Pod]float64 {
func (s *KVCacheAwareScorer) Score(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
loggerDebug := log.FromContext(ctx).WithName(kvCacheAwareScorerName).V(logutil.DEBUG)
if request == nil {
loggerDebug.Info("Request is nil, skipping scoring")
Expand Down
6 changes: 3 additions & 3 deletions pkg/plugins/scorer/load_aware_scorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ type LoadAwareScorer struct {
queueThreshold float64
}

// Name returns the scorer's name
func (s *LoadAwareScorer) Name() string {
// Type returns the type of the scorer.
func (s *LoadAwareScorer) Type() string {
return "load-aware-scorer"
}

Expand All @@ -41,7 +41,7 @@ func (s *LoadAwareScorer) Name() string {
// Pod with requests in the queue will get score between 0.5 and 0.
// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA)
// In future pods with additional capacity will get score higher than 0.5
func (s *LoadAwareScorer) Score(_ context.Context, _ *types.LLMRequest, _ *types.CycleState, pods []types.Pod) map[types.Pod]float64 {
func (s *LoadAwareScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
scoredPods := make(map[types.Pod]float64)

for _, pod := range pods {
Expand Down
6 changes: 3 additions & 3 deletions pkg/plugins/scorer/prefix_aware.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ type PrefixAwareScorer struct {
podToPromptHits sync.Map
}

// Name returns the scorer's name
func (s *PrefixAwareScorer) Name() string {
// Type returns the type of the scorer.
func (s *PrefixAwareScorer) Type() string {
return "prefix-aware-scorer"
}

// Score scores the target pods based on the longest prefix match.
func (s *PrefixAwareScorer) Score(ctx context.Context, request *types.LLMRequest, _ *types.CycleState, pods []types.Pod) map[types.Pod]float64 {
func (s *PrefixAwareScorer) Score(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
loggerDebug := log.FromContext(ctx).WithName(prefixAwareScorerName).V(logutil.DEBUG)
if request == nil {
loggerDebug.Info("Request is nil, skipping scoring")
Expand Down
4 changes: 2 additions & 2 deletions pkg/plugins/scorer/prefix_aware_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func TestPrefixAwareScorer(t *testing.T) {

// Score pods
pods := []types.Pod{pod1, pod2}
scores := s.Score(context.Background(), request, nil, pods)
scores := s.Score(context.Background(), nil, request, pods)

for p, score := range scores {
if score != test.expectedScores[p] {
Expand Down Expand Up @@ -168,7 +168,7 @@ func TestPrefixAwareScorerProfiling(t *testing.T) {
pods = append(pods, v)
}

scores := s.Score(context.Background(), request, nil, pods)
scores := s.Score(context.Background(), nil, request, pods)

highestScore := scores[name2Pod["pod"+strconv.Itoa(nPodsInStore-1)]]
if highestScore < 0.99 {
Expand Down
6 changes: 3 additions & 3 deletions pkg/plugins/scorer/session_affinity.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ func NewSessionAffinity() *SessionAffinity {
type SessionAffinity struct {
}

// Name returns the scorer's name
func (s *SessionAffinity) Name() string {
// Type returns the type of the scorer.
func (s *SessionAffinity) Type() string {
return "session-affinity-scorer"
}

// Score assign a high score to the pod used in previous requests and zero to others
func (s *SessionAffinity) Score(ctx context.Context, request *types.LLMRequest, _ *types.CycleState, pods []types.Pod) map[types.Pod]float64 {
func (s *SessionAffinity) Score(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
scoredPods := make(map[types.Pod]float64)
sessionToken := request.Headers[sessionTokenHeader]
podName := ""
Expand Down
Loading