Skip to content

Commit 06ed85d

Browse files
authored
refactor: add early validation in DP profile handler (#554)
- validate number of schedulingProfiles in EPP to be 1 otherwise return empty map to reduce computation on filter and scores. - add unit test Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent 6b6a1fc commit 06ed85d

File tree

2 files changed

+80
-2
lines changed

2 files changed

+80
-2
lines changed

pkg/plugins/profile/dp_profile_handler.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"net"
99
"strconv"
1010

11+
"sigs.k8s.io/controller-runtime/pkg/log"
1112
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
1213
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
1314
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
@@ -74,12 +75,19 @@ func (h *DataParallelProfileHandler) WithName(name string) *DataParallelProfileH
7475

7576
// Pick selects the SchedulingProfiles to run from the list of candidate profiles, while taking into consideration the request properties and the
7677
// previously executed cycles along with their results.
77-
func (h *DataParallelProfileHandler) Pick(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
78+
func (h *DataParallelProfileHandler) Pick(ctx context.Context, _ *types.CycleState, _ *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
7879
profileResults map[string]*types.ProfileRunResult) map[string]*framework.SchedulerProfile {
7980
if len(profiles) == len(profileResults) { // all profiles have been executed already in previous call
8081
return map[string]*framework.SchedulerProfile{}
8182
}
82-
// return all profiles
83+
// Validate that only one profile is configured for Data Parallel mode
84+
if len(profiles) != 1 {
85+
log.FromContext(ctx).Error(nil, "Data Parallel profile handler requires exactly one scheduling profile",
86+
"profileCount", len(profiles),
87+
)
88+
return map[string]*framework.SchedulerProfile{} // return empty map for fast exit in later steps
89+
}
90+
// return only one profile
8391
return profiles
8492
}
8593

pkg/plugins/profile/dp_profile_handler_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88

99
"github.com/stretchr/testify/assert"
1010
"github.com/stretchr/testify/require"
11+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
1112
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
1213

1314
"github.com/llm-d/llm-d-inference-scheduler/pkg/common"
@@ -120,6 +121,75 @@ func TestDataParallelProfileHandlerFactoryInvalidJSON(t *testing.T) {
120121
}
121122
}
122123

124+
func Test_DataParallelProfileHandler_Pick(t *testing.T) {
125+
tests := []struct {
126+
name string
127+
profiles map[string]*framework.SchedulerProfile
128+
profileResults map[string]*types.ProfileRunResult
129+
expectEmptyResult bool
130+
expectLogError bool
131+
description string
132+
}{
133+
{
134+
name: "success: single profile, first call",
135+
profiles: map[string]*framework.SchedulerProfile{
136+
"default": {},
137+
},
138+
profileResults: map[string]*types.ProfileRunResult{},
139+
expectEmptyResult: false,
140+
expectLogError: false,
141+
description: "Should return the single profile to run",
142+
},
143+
{
144+
name: "success: single profile, second call (all already executed)",
145+
profiles: map[string]*framework.SchedulerProfile{
146+
"default": {},
147+
},
148+
profileResults: map[string]*types.ProfileRunResult{
149+
"default": newMockProfileRunResult(DefaultTestPodPort, "pod1"),
150+
},
151+
expectEmptyResult: true,
152+
expectLogError: false,
153+
description: "Should return empty map since all profiles have been executed already in previous call",
154+
},
155+
{
156+
name: "error: multiple profiles configured in EPP",
157+
profiles: map[string]*framework.SchedulerProfile{
158+
"profile1": {},
159+
"profile2": {},
160+
},
161+
profileResults: map[string]*types.ProfileRunResult{},
162+
expectEmptyResult: true,
163+
expectLogError: true,
164+
description: "Should return empty map and log error for multiple profiles",
165+
},
166+
{
167+
name: "error: zero profiles configured in EPP",
168+
profiles: map[string]*framework.SchedulerProfile{},
169+
profileResults: map[string]*types.ProfileRunResult{},
170+
expectEmptyResult: true,
171+
expectLogError: true,
172+
description: "Should return empty map and log error for zero profiles",
173+
},
174+
}
175+
176+
for _, tt := range tests {
177+
t.Run(tt.name, func(t *testing.T) {
178+
handler := NewDataParallelProfileHandler(8000).WithName("test-handler")
179+
ctx := context.Background()
180+
181+
result := handler.Pick(ctx, &types.CycleState{}, &types.LLMRequest{}, tt.profiles, tt.profileResults)
182+
183+
if tt.expectEmptyResult {
184+
assert.Empty(t, result, tt.description)
185+
} else {
186+
assert.NotEmpty(t, result, tt.description)
187+
assert.Equal(t, len(tt.profiles), len(result), "Should return all profiles when valid")
188+
}
189+
})
190+
}
191+
}
192+
123193
func Test_DataParallelProfileHandler_ProcessResults(t *testing.T) {
124194
tests := []struct {
125195
name string

0 commit comments

Comments
 (0)