refactor: add early validation in DP profile handler (#554)

zdtsw · web-flow · commit 06ed85dd482c · 2026-01-21T10:46:49.000Z
- validate number of schedulingProfiles in EPP to be 1 otherwise return
  empty map to reduce computation on filter and scores.
- add unit test

Signed-off-by: Wen Zhou &lt;wenzhou@redhat.com&gt;
diff --git a/pkg/plugins/profile/dp_profile_handler.go b/pkg/plugins/profile/dp_profile_handler.go
@@ -8,6 +8,7 @@ import (
 	"net"
 	"strconv"
 
+	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
@@ -74,12 +75,19 @@ func (h *DataParallelProfileHandler) WithName(name string) *DataParallelProfileH
 
 // Pick selects the SchedulingProfiles to run from the list of candidate profiles, while taking into consideration the request properties and the
 // previously executed cycles along with their results.
-func (h *DataParallelProfileHandler) Pick(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
+func (h *DataParallelProfileHandler) Pick(ctx context.Context, _ *types.CycleState, _ *types.LLMRequest, profiles map[string]*framework.SchedulerProfile,
 	profileResults map[string]*types.ProfileRunResult) map[string]*framework.SchedulerProfile {
 	if len(profiles) == len(profileResults) { // all profiles have been executed already in previous call
 		return map[string]*framework.SchedulerProfile{}
 	}
-	// return all profiles
+	// Validate that only one profile is configured for Data Parallel mode
+	if len(profiles) != 1 {
+		log.FromContext(ctx).Error(nil, "Data Parallel profile handler requires exactly one scheduling profile",
+			"profileCount", len(profiles),
+		)
+		return map[string]*framework.SchedulerProfile{} // return empty map for fast exit in later steps
+	}
+	// return only one profile
 	return profiles
 }
 
diff --git a/pkg/plugins/profile/dp_profile_handler_test.go b/pkg/plugins/profile/dp_profile_handler_test.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 
 	"github.com/llm-d/llm-d-inference-scheduler/pkg/common"
@@ -120,6 +121,75 @@ func TestDataParallelProfileHandlerFactoryInvalidJSON(t *testing.T) {
 	}
 }
 
+func Test_DataParallelProfileHandler_Pick(t *testing.T) {
+	tests := []struct {
+		name              string
+		profiles          map[string]*framework.SchedulerProfile
+		profileResults    map[string]*types.ProfileRunResult
+		expectEmptyResult bool
+		expectLogError    bool
+		description       string
+	}{
+		{
+			name: "success: single profile, first call",
+			profiles: map[string]*framework.SchedulerProfile{
+				"default": {},
+			},
+			profileResults:    map[string]*types.ProfileRunResult{},
+			expectEmptyResult: false,
+			expectLogError:    false,
+			description:       "Should return the single profile to run",
+		},
+		{
+			name: "success: single profile, second call (all already executed)",
+			profiles: map[string]*framework.SchedulerProfile{
+				"default": {},
+			},
+			profileResults: map[string]*types.ProfileRunResult{
+				"default": newMockProfileRunResult(DefaultTestPodPort, "pod1"),
+			},
+			expectEmptyResult: true,
+			expectLogError:    false,
+			description:       "Should return empty map since all profiles have been executed already in previous call",
+		},
+		{
+			name: "error: multiple profiles configured in EPP",
+			profiles: map[string]*framework.SchedulerProfile{
+				"profile1": {},
+				"profile2": {},
+			},
+			profileResults:    map[string]*types.ProfileRunResult{},
+			expectEmptyResult: true,
+			expectLogError:    true,
+			description:       "Should return empty map and log error for multiple profiles",
+		},
+		{
+			name:              "error: zero profiles configured in EPP",
+			profiles:          map[string]*framework.SchedulerProfile{},
+			profileResults:    map[string]*types.ProfileRunResult{},
+			expectEmptyResult: true,
+			expectLogError:    true,
+			description:       "Should return empty map and log error for zero profiles",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			handler := NewDataParallelProfileHandler(8000).WithName("test-handler")
+			ctx := context.Background()
+
+			result := handler.Pick(ctx, &types.CycleState{}, &types.LLMRequest{}, tt.profiles, tt.profileResults)
+
+			if tt.expectEmptyResult {
+				assert.Empty(t, result, tt.description)
+			} else {
+				assert.NotEmpty(t, result, tt.description)
+				assert.Equal(t, len(tt.profiles), len(result), "Should return all profiles when valid")
+			}
+		})
+	}
+}
+
 func Test_DataParallelProfileHandler_ProcessResults(t *testing.T) {
 	tests := []struct {
 		name           string