diff --git a/operator/internal/webhook/admission/pgs/validation/podgangset.go b/operator/internal/webhook/admission/pgs/validation/podgangset.go index a729fb40f..bf3f75b56 100644 --- a/operator/internal/webhook/admission/pgs/validation/podgangset.go +++ b/operator/internal/webhook/admission/pgs/validation/podgangset.go @@ -17,6 +17,7 @@ package validation import ( + "fmt" "reflect" "slices" "strings" @@ -33,6 +34,10 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" ) +const ( + maxCombinedResourceNameLength = 45 +) + var allowedStartupTypes = sets.New(grovecorev1alpha1.CliqueStartupTypeInOrder, grovecorev1alpha1.CliqueStartupTypeAnyOrder, grovecorev1alpha1.CliqueStartupTypeExplicit) type pgsValidator struct { @@ -53,7 +58,8 @@ func newPGSValidator(pgs *grovecorev1alpha1.PodGangSet, operation admissionv1.Op func (v *pgsValidator) validate() ([]string, error) { allErrs := field.ErrorList{} - allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&v.pgs.ObjectMeta, true, apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...) + allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&v.pgs.ObjectMeta, true, + apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...) fldPath := field.NewPath("spec") warnings, errs := v.validatePodGangSetSpec(fldPath) if len(errs) != 0 { @@ -101,19 +107,22 @@ func (v *pgsValidator) validatePodCliqueTemplates(fldPath *field.Path) ([]string allErrs = append(allErrs, field.Required(fldPath, "at least one PodClique must be defined")) } + // Get all clique names that belong to scaling groups + scalingGroupCliqueNames := v.getScalingGroupCliqueNames() + cliqueNames := make([]string, 0, len(cliqueTemplateSpecs)) cliqueRoles := make([]string, 0, len(cliqueTemplateSpecs)) schedulerNames := make([]string, 0, len(cliqueTemplateSpecs)) for _, cliqueTemplateSpec := range cliqueTemplateSpecs { - cliqueNames = append(cliqueNames, cliqueTemplateSpec.Name) - cliqueRoles = append(cliqueRoles, cliqueTemplateSpec.Spec.RoleName) - warns, errs := v.validatePodCliqueTemplateSpec(cliqueTemplateSpec, fldPath) + warns, errs := v.validatePodCliqueTemplateSpec(cliqueTemplateSpec, fldPath, scalingGroupCliqueNames) if len(errs) != 0 { allErrs = append(allErrs, errs...) } if len(warns) != 0 { warnings = append(warnings, warns...) } + cliqueNames = append(cliqueNames, cliqueTemplateSpec.Name) + cliqueRoles = append(cliqueRoles, cliqueTemplateSpec.Spec.RoleName) schedulerNames = append(schedulerNames, cliqueTemplateSpec.Spec.PodSpec.SchedulerName) } @@ -137,6 +146,31 @@ func (v *pgsValidator) validatePodCliqueTemplates(fldPath *field.Path) ([]string return warnings, allErrs } +func (v *pgsValidator) validatePodCliqueNameConstrains(fldPath *field.Path, cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec, scalingGroupCliqueNames sets.Set[string]) field.ErrorList { + allErrs := field.ErrorList{} + if err := apivalidation.NameIsDNSSubdomain(cliqueTemplateSpec.Name, false); err != nil { + allErrs = append(allErrs, field.Invalid(fldPath.Child("name"), cliqueTemplateSpec.Name, + "invalid PodCliqueTemplateSpec name, must be a valid DNS subdomain")) + } + + // Only validate pod name constraints for PodCliques that are NOT part of any scaling group + // any pod clique that is part of scaling groups will be checked as part of scaling group pod name constraints. + if !scalingGroupCliqueNames.Has(cliqueTemplateSpec.Name) { + allErrs = append(allErrs, validateStandalonePodClique(fldPath, v, cliqueTemplateSpec)...) + } + return allErrs +} + +func validateStandalonePodClique(fldPath *field.Path, v *pgsValidator, cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec) field.ErrorList { + allErrs := field.ErrorList{} + if err := validatePodNameConstraints(v.pgs.Name, "", cliqueTemplateSpec.Name); err != nil { + // add error to each of filed paths that compose the podName in case of a PodCliqueTemplateSpec + allErrs = append(allErrs, field.Invalid(fldPath.Child("name"), cliqueTemplateSpec.Name, err.Error())) + allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), v.pgs.Name, err.Error())) + } + return allErrs +} + func (v *pgsValidator) validatePodGangSchedulingPolicyConfig(schedulingPolicyConfig *grovecorev1alpha1.SchedulingPolicyConfig, fldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} if schedulingPolicyConfig == nil { @@ -156,12 +190,18 @@ func (v *pgsValidator) validatePodCliqueScalingGroupConfigs(fldPath *field.Path) }) pclqScalingGroupNames := make([]string, 0, len(v.pgs.Spec.Template.PodCliqueScalingGroupConfigs)) var cliqueNamesAcrossAllScalingGroups []string + groupNameFiledPath := fldPath.Child("name") for _, scalingGroupConfig := range v.pgs.Spec.Template.PodCliqueScalingGroupConfigs { + if err := apivalidation.NameIsDNSSubdomain(scalingGroupConfig.Name, false); err != nil { + allErrs = append(allErrs, field.Invalid(groupNameFiledPath, scalingGroupConfig.Name, + "invalid PodCliqueScalingGroupConfig name, must be a valid DNS subdomain")) + } pclqScalingGroupNames = append(pclqScalingGroupNames, scalingGroupConfig.Name) cliqueNamesAcrossAllScalingGroups = append(cliqueNamesAcrossAllScalingGroups, scalingGroupConfig.CliqueNames...) // validate that scaling groups only contains clique names that are defined in the PodGangSet. - allErrs = append(allErrs, validateScalingGroupPodCliqueNames(allPodGangSetCliqueNames, scalingGroupConfig.CliqueNames, fldPath.Child("cliqueNames"))...) + allErrs = append(allErrs, v.validateScalingGroupPodCliqueNames(scalingGroupConfig.Name, allPodGangSetCliqueNames, + scalingGroupConfig.CliqueNames, fldPath.Child("cliqueNames"), groupNameFiledPath)...) } // validate that the scaling group names are unique @@ -195,7 +235,8 @@ func (v *pgsValidator) validateTerminationDelay(fldPath *field.Path) field.Error return allErrs } -func (v *pgsValidator) validatePodCliqueTemplateSpec(cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec, fldPath *field.Path) ([]string, field.ErrorList) { +func (v *pgsValidator) validatePodCliqueTemplateSpec(cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec, + fldPath *field.Path, scalingGroupCliqueNames sets.Set[string]) ([]string, field.ErrorList) { allErrs := field.ErrorList{} allErrs = append(allErrs, validateNonEmptyStringField(cliqueTemplateSpec.Name, fldPath.Child("name"))...) @@ -206,6 +247,7 @@ func (v *pgsValidator) validatePodCliqueTemplateSpec(cliqueTemplateSpec *groveco if len(errs) != 0 { allErrs = append(allErrs, errs...) } + allErrs = append(allErrs, v.validatePodCliqueNameConstrains(fldPath, cliqueTemplateSpec, scalingGroupCliqueNames)...) return warnings, allErrs } @@ -280,8 +322,17 @@ func (v *pgsValidator) checkNetworkPackGroupConfigsForPartialPCSGInclusions(fldP return allErrs } +// getScalingGroupCliqueNames returns a set of all clique names that belong to scaling groups +func (v *pgsValidator) getScalingGroupCliqueNames() sets.Set[string] { + scalingGroupCliqueNames := sets.New[string]() + for _, scalingGroupConfig := range v.pgs.Spec.Template.PodCliqueScalingGroupConfigs { + scalingGroupCliqueNames.Insert(scalingGroupConfig.CliqueNames...) + } + return scalingGroupCliqueNames +} + // checks if the PodClique names specified in PodCliqueScalingGroupConfig refer to a defined clique in the PodGangSet. -func validateScalingGroupPodCliqueNames(allPclqNames, pclqNameInScalingGrp []string, fldPath *field.Path) field.ErrorList { +func (v *pgsValidator) validateScalingGroupPodCliqueNames(pcsgName string, allPclqNames, pclqNameInScalingGrp []string, fldPath, pcsgNameFieldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} _, unidentifiedPclqNames := lo.Difference(allPclqNames, lo.Uniq(pclqNameInScalingGrp)) @@ -289,6 +340,15 @@ func validateScalingGroupPodCliqueNames(allPclqNames, pclqNameInScalingGrp []str allErrs = append(allErrs, field.Invalid(fldPath, strings.Join(unidentifiedPclqNames, ","), "unidentified PodClique names found")) } + // validate scaling group PodClique pods names are valid. + for _, pclqName := range pclqNameInScalingGrp { + if err := validatePodNameConstraints(v.pgs.Name, pcsgName, pclqName); err != nil { + // add error to each of filed paths that compose the podName + allErrs = append(allErrs, field.Invalid(fldPath.Child("name"), pclqName, err.Error())) + allErrs = append(allErrs, field.Invalid(pcsgNameFieldPath, pclqName, err.Error())) + allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), v.pgs.Name, err.Error())) + } + } return allErrs } @@ -302,12 +362,14 @@ func (v *pgsValidator) validatePodCliqueSpec(name string, cliqueSpec grovecorev1 // Ideally this should never happen, the defaulting webhook will always set the default value for minAvailable. if cliqueSpec.MinAvailable == nil { allErrs = append(allErrs, field.Required(fldPath.Child("minAvailable"), "field is required")) - } - if *cliqueSpec.MinAvailable <= 0 { - allErrs = append(allErrs, field.Invalid(fldPath.Child("minAvailable"), *cliqueSpec.MinAvailable, "must be greater than 0")) - } - if *cliqueSpec.MinAvailable > cliqueSpec.Replicas { - allErrs = append(allErrs, field.Invalid(fldPath.Child("minAvailable"), *cliqueSpec.MinAvailable, "minAvailable must not be greater than replicas")) + } else { + // prevent nil pointer dereference, no point checking the value if it is nil + if *cliqueSpec.MinAvailable <= 0 { + allErrs = append(allErrs, field.Invalid(fldPath.Child("minAvailable"), *cliqueSpec.MinAvailable, "must be greater than 0")) + } + if *cliqueSpec.MinAvailable > cliqueSpec.Replicas { + allErrs = append(allErrs, field.Invalid(fldPath.Child("minAvailable"), *cliqueSpec.MinAvailable, "minAvailable must not be greater than replicas")) + } } if v.isStartupTypeExplicit() && len(cliqueSpec.StartsAfter) > 0 { @@ -469,3 +531,34 @@ func clearContainerImages(containers []corev1.Container) { containers[i].Image = "" } } + +// validatePodNameConstraints validates Grove pod name component constraints. +// This function validates the constraints for component names that will be used +// to construct pod names. +// +// Pod names that belong to a PCSG follow the format: +// ----- +// +// Pod names that do not belong to a PCSG follow the format: +// --- +// +// Constraints: +// - Random string + hyphens: 10 chars for PCSG pods, 8 chars for non-PCSG pods +// - Max sum of all resource name characters: 45 chars +func validatePodNameConstraints(pgsName, pcsgName, pclqName string) error { + // Check resource name constraints + resourceNameLength := len(pgsName) + len(pclqName) + if pcsgName != "" { + resourceNameLength += len(pcsgName) + } + + if resourceNameLength > maxCombinedResourceNameLength { + if pcsgName != "" { + return fmt.Errorf("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodGangSet '%s', PodCliqueScalingGroup '%s', or PodClique '%s'", + resourceNameLength, pgsName, pcsgName, pclqName) + } + return fmt.Errorf("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodGangSet '%s' or PodClique '%s'", + resourceNameLength, pgsName, pclqName) + } + return nil +} diff --git a/operator/internal/webhook/admission/pgs/validation/podgangset_test.go b/operator/internal/webhook/admission/pgs/validation/podgangset_test.go new file mode 100644 index 000000000..2b1558c8d --- /dev/null +++ b/operator/internal/webhook/admission/pgs/validation/podgangset_test.go @@ -0,0 +1,506 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package validation + +import ( + "errors" + "fmt" + "testing" + "time" + + grovecorev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" + + "github.com/stretchr/testify/assert" + admissionv1 "k8s.io/api/admission/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/validation/field" + "k8s.io/utils/ptr" +) + +// Helper function to create a dummy valid PodGangSet +// with a minimal configuration +// This is used to test the validation logic without needing a full setup. +func createDummyPodGangSet(name string) *grovecorev1alpha1.PodGangSet { + return &grovecorev1alpha1.PodGangSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + }, + Spec: grovecorev1alpha1.PodGangSetSpec{ + Replicas: 1, + Template: grovecorev1alpha1.PodGangSetTemplateSpec{ + TerminationDelay: &metav1.Duration{Duration: 30 * time.Second}, + StartupType: ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder), + Cliques: []*grovecorev1alpha1.PodCliqueTemplateSpec{ + { + Name: "test", + Labels: nil, + Annotations: nil, + Spec: grovecorev1alpha1.PodCliqueSpec{ + Replicas: 1, + RoleName: "dummy-role", + MinAvailable: ptr.To[int32](1), + }, + }, + }, + }, + }, + } +} + +// createDummyPodCliqueTemplate Helper function to create a valid PodCliqueTemplateSpec with predefined values +// This is used to test the validation logic without needing a full setup. +// It creates a PodCliqueTemplateSpec with a single container and minimal configuration. +func createDummyPodCliqueTemplate(name string) *grovecorev1alpha1.PodCliqueTemplateSpec { + return &grovecorev1alpha1.PodCliqueTemplateSpec{ + Name: name, + Spec: grovecorev1alpha1.PodCliqueSpec{ + Replicas: 1, + MinAvailable: ptr.To(int32(1)), + PodSpec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Image: "test:latest", + }, + }, + }, + RoleName: fmt.Sprintf("dummy-%s-role", name), + }, + } +} + +// Helper function to create a PodCliqueScalingGroupConfig +func createScalingGroupConfig(name string, cliqueNames []string) grovecorev1alpha1.PodCliqueScalingGroupConfig { + return grovecorev1alpha1.PodCliqueScalingGroupConfig{ + Name: name, + CliqueNames: cliqueNames, + } +} + +func TestPodCliqueTemplateNameValidation(t *testing.T) { + testCases := []struct { + description string + pgsName string + cliqueNames []string + expectError bool + expectedErrMsg string + expectedErrPath string + }{ + { + description: "Valid PodClique template names", + pgsName: "inference", + cliqueNames: []string{"prefill", "decode"}, + expectError: false, + }, + { + description: "PodClique template name that exceeds character limit", + pgsName: "verylongpodgangsetnamethatisverylong", // 34 chars + cliqueNames: []string{"verylongpodcliquenamethatexceedslimit"}, // 37 chars, total 34+37=71 > 45 + expectError: true, + expectedErrMsg: "combined resource name length", + expectedErrPath: "spec.template.cliques.name", + }, + { + description: "Empty PodClique template name", + pgsName: "inference", + cliqueNames: []string{""}, + expectError: true, + expectedErrMsg: "field cannot be empty", + expectedErrPath: "spec.template.cliques.name", + }, + { + description: "PodClique template name with invalid characters", + pgsName: "inference", + cliqueNames: []string{"prefill_worker"}, + expectError: true, + expectedErrMsg: "invalid PodCliqueTemplateSpec name", + expectedErrPath: "spec.template.cliques.name", + }, + { + description: "Multiple PodClique templates with valid names", + pgsName: "inference", + cliqueNames: []string{"prefill", "decode", "embed"}, + expectError: false, + }, + { + description: "Multiple PodClique templates with one invalid name", + pgsName: "inference", + cliqueNames: []string{"prefill", "verylongpodcliquenamethatexceedslimit"}, + expectError: true, + expectedErrMsg: "combined resource name length", + expectedErrPath: "spec.template.cliques.name", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + pgs := createDummyPodGangSet(tc.pgsName) + + // Add PodClique templates + for _, cliqueName := range tc.cliqueNames { + pgs.Spec.Template.Cliques = append(pgs.Spec.Template.Cliques, createDummyPodCliqueTemplate(cliqueName)) + } + + validator := newPGSValidator(pgs, admissionv1.Create) + warnings, err := validator.validate() + + if !tc.expectError { + assert.NoError(t, err, "Expected no validation error for test case: %s", tc.description) + } else { + assert.Error(t, err, "Expected validation error for test case: %s", tc.description) + assert.Contains(t, err.Error(), tc.expectedErrMsg, "Error message should contain expected text") + // Check if this is an aggregate error with field errors + var aggErr utilerrors.Aggregate + if errors.As(err, &aggErr) && len(aggErr.Errors()) > 0 { + var fieldErr *field.Error + if errors.As(aggErr.Errors()[0], &fieldErr) { + assert.Contains(t, fieldErr.Field, tc.expectedErrPath, "Error field path should match expected") + } + } + } + + // Warnings should be empty for these test cases + assert.Empty(t, warnings, "No warnings expected for these test cases") + }) + } +} + +func TestScalingGroupPodCliqueNameValidation(t *testing.T) { + testCases := []struct { + description string + pgsName string + scalingGroups []grovecorev1alpha1.PodCliqueScalingGroupConfig + cliqueTemplates []string + expectError bool + expectedErrMsg string + expectedErrPath string + }{ + { + description: "Valid scaling group with valid PodClique names", + pgsName: "inference", + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("workers", []string{"prefill", "decode"}), + }, + cliqueTemplates: []string{"prefill", "decode"}, + expectError: false, + }, + { + description: "Scaling group with PodClique names exceeding character limit", + pgsName: "verylongpodgangsetname", + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("verylongscalinggroup", []string{"verylongpodcliquename"}), + }, + cliqueTemplates: []string{"verylongpodcliquename"}, + expectError: true, + expectedErrMsg: "combined resource name length", + expectedErrPath: "spec.template.podCliqueScalingGroups.cliqueNames.name", + }, + { + description: "Multiple scaling groups with valid names", + pgsName: "inference", + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("workers", []string{"prefill"}), + createScalingGroupConfig("embedders", []string{"embed"}), + }, + cliqueTemplates: []string{"prefill", "embed"}, + expectError: false, + }, + { + description: "Scaling group with mixed valid/invalid PodClique names", + pgsName: "inference", + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("workers", []string{"prefill", "verylongpodcliquenamethatexceedslimit"}), + }, + cliqueTemplates: []string{"prefill", "verylongpodcliquenamethatexceedslimit"}, + expectError: true, + expectedErrMsg: "combined resource name length", + expectedErrPath: "spec.template.podCliqueScalingGroups.cliqueNames.name", + }, + { + description: "Scaling group referencing non-existent PodClique", + pgsName: "inference", + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("workers", []string{"nonexistent"}), + }, + cliqueTemplates: []string{"prefill"}, + expectError: true, + expectedErrMsg: "unidentified PodClique names found", + expectedErrPath: "spec.template.podCliqueScalingGroups.cliqueNames", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + pgs := createDummyPodGangSet(tc.pgsName) + + // Add PodClique templates + for _, cliqueName := range tc.cliqueTemplates { + pgs.Spec.Template.Cliques = append(pgs.Spec.Template.Cliques, createDummyPodCliqueTemplate(cliqueName)) + } + + // Add scaling groups + pgs.Spec.Template.PodCliqueScalingGroupConfigs = tc.scalingGroups + + validator := newPGSValidator(pgs, admissionv1.Create) + warnings, err := validator.validate() + + if tc.expectError { + assert.Error(t, err, "Expected validation error for test case: %s", tc.description) + assert.Contains(t, err.Error(), tc.expectedErrMsg, "Error message should contain expected text") + } else { + assert.NoError(t, err, "Expected no validation error for test case: %s", tc.description) + } + + // Warnings should be empty for these test cases + assert.Empty(t, warnings, "No warnings expected for these test cases") + }) + } +} + +func TestPodGangSetIntegrationValidation(t *testing.T) { + testCases := []struct { + description string + pgsName string + cliqueTemplates []string + scalingGroups []grovecorev1alpha1.PodCliqueScalingGroupConfig + expectError bool + expectedErrCount int + expectedErrMsgs []string + }{ + { + description: "Valid PodGangSet with templates and scaling groups", + pgsName: "inference", + cliqueTemplates: []string{"prefill", "decode", "embed"}, + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("workers", []string{"prefill", "decode"}), + }, + expectError: false, + }, + { + description: "PodGangSet with invalid template and scaling group names", + pgsName: "verylongpodgangsetname", + cliqueTemplates: []string{"verylongpodcliquename"}, + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("verylongscalinggroup", []string{"verylongpodcliquename"}), + }, + expectError: true, + expectedErrCount: 3, // Multiple field paths get validation errors (name, scaling group name, metadata name) + expectedErrMsgs: []string{"combined resource name length"}, + }, + { + description: "PodGangSet with maximum valid character usage", + pgsName: "pgs", // 3 chars + cliqueTemplates: []string{"cliquename20charssss"}, // 20 chars total (3+20=23, well under 45) + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("sg", []string{"cliquename20charssss"}), // 3+2+20=25, under 45 + }, + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + pgs := createDummyPodGangSet(tc.pgsName) + + // Add PodClique templates + for _, cliqueName := range tc.cliqueTemplates { + pgs.Spec.Template.Cliques = append(pgs.Spec.Template.Cliques, createDummyPodCliqueTemplate(cliqueName)) + } + + // Add scaling groups + pgs.Spec.Template.PodCliqueScalingGroupConfigs = tc.scalingGroups + + validator := newPGSValidator(pgs, admissionv1.Create) + warnings, err := validator.validate() + + if !tc.expectError { + assert.NoError(t, err, "Expected no validation error for test case: %s", tc.description) + } else { + assert.Error(t, err, "Expected validation error for test case: %s", tc.description) + // Check error count if specified + if tc.expectedErrCount > 0 { + var aggErr utilerrors.Aggregate + if errors.As(err, &aggErr) { + assert.Len(t, aggErr.Errors(), tc.expectedErrCount, "Expected specific number of validation errors") + } + } + + // Check error messages if specified + for _, expectedMsg := range tc.expectedErrMsgs { + assert.Contains(t, err.Error(), expectedMsg, "Error should contain expected message") + } + } + + // Warnings should be empty for these test cases + assert.Empty(t, warnings, "No warnings expected for these test cases") + }) + } +} + +func TestErrorFieldPaths(t *testing.T) { + testCases := []struct { + description string + pgsName string + cliqueNames []string + scalingGroups []grovecorev1alpha1.PodCliqueScalingGroupConfig + expectedFieldErrs []string + }{ + { + description: "Template name validation error paths", + pgsName: "verylongpodgangsetnamethatisverylong", // 34 chars + cliqueNames: []string{"verylongpodcliquenamethatexceedslimit"}, // 37 chars, total > 45 + expectedFieldErrs: []string{ + "spec.template.cliques.name", + "metadata.name", + }, + }, + { + description: "Scaling group name validation error paths", + pgsName: "verylongpodgangsetname", + cliqueNames: []string{"validname"}, + scalingGroups: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + createScalingGroupConfig("verylongscalinggroup", []string{"verylongpodcliquename"}), + }, + expectedFieldErrs: []string{ + "spec.template.podCliqueScalingGroups.cliqueNames.name", + "spec.template.podCliqueScalingGroups.name", + "metadata.name", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + pgs := createDummyPodGangSet(tc.pgsName) + + // Add PodClique templates + for _, cliqueName := range tc.cliqueNames { + pgs.Spec.Template.Cliques = append(pgs.Spec.Template.Cliques, createDummyPodCliqueTemplate(cliqueName)) + } + + // Add scaling groups + pgs.Spec.Template.PodCliqueScalingGroupConfigs = tc.scalingGroups + + validator := newPGSValidator(pgs, admissionv1.Create) + _, err := validator.validate() + + assert.Error(t, err, "Expected validation error") + + var actualFieldPaths []string + if aggErr, ok := err.(utilerrors.Aggregate); ok { + for _, e := range aggErr.Errors() { + if fieldErr, ok := e.(*field.Error); ok { + actualFieldPaths = append(actualFieldPaths, fieldErr.Field) + } + } + } + + // Check that all expected field paths are present + for _, expectedPath := range tc.expectedFieldErrs { + found := false + for _, actualPath := range actualFieldPaths { + if actualPath == expectedPath { + found = true + break + } + } + assert.True(t, found, "Expected field path %s not found in actual errors: %v", expectedPath, actualFieldPaths) + } + }) + } +} + +func TestValidatePodNameConstraints(t *testing.T) { + testCases := []struct { + description string + pgsName string + pcsgName string + pclqName string + expectError bool + }{ + { + description: "Valid PCSG component names", + pgsName: "inference", + pcsgName: "workers", + pclqName: "prefill", + expectError: false, + }, + { + description: "Valid non-PCSG component names", + pgsName: "inference", + pcsgName: "", + pclqName: "prefill", + expectError: false, + }, + { + description: "Resource names exceed 45 characters", + pgsName: "verylongpgsname", + pcsgName: "verylongpcsgname", + pclqName: "verylongpclqname", + expectError: true, + }, + { + description: "Non-PCSG resource names exceed 45 characters", + pgsName: "verylongpodgangsetnamethatisverylong", + pcsgName: "", + pclqName: "verylongpodcliquename", + expectError: true, + }, + { + description: "Maximum valid character usage for PCSG", + pgsName: "pgs", + pcsgName: "sg", + pclqName: "cliquename20charssss", + expectError: false, + }, + { + description: "Maximum valid character usage for non-PCSG", + pgsName: "pgsnametwentychars123", + pcsgName: "", + pclqName: "cliquenametwentyfourchar", + expectError: false, + }, + { + description: "Edge case - exactly 45 characters", + pgsName: "twentycharspgsnames12", + pcsgName: "", + pclqName: "twentyfourcharspclqname1", + expectError: false, + }, + { + description: "Edge case - 46 characters (should fail)", + pgsName: "twentyonecharspgsnames", + pcsgName: "", + pclqName: "twentyfivecharspclqname12", + expectError: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := validatePodNameConstraints(tc.pgsName, tc.pcsgName, tc.pclqName) + if tc.expectError { + assert.Error(t, err, "Expected error for component names: pgs=%s, pcsg=%s, pclq=%s", tc.pgsName, tc.pcsgName, tc.pclqName) + } else { + assert.NoError(t, err, "Expected no error for component names: pgs=%s, pcsg=%s, pclq=%s", tc.pgsName, tc.pcsgName, tc.pclqName) + } + }) + } +}