1717package validation
1818
1919import (
20+ "fmt"
2021 "reflect"
2122 "slices"
2223 "strings"
2324
2425 grovecorev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
2526 "github.com/NVIDIA/grove/operator/internal/utils"
26-
2727 "github.com/samber/lo"
2828 admissionv1 "k8s.io/api/admission/v1"
2929 corev1 "k8s.io/api/core/v1"
@@ -33,6 +33,10 @@ import (
3333 "k8s.io/apimachinery/pkg/util/validation/field"
3434)
3535
36+ const (
37+ maxCombinedResourceNameLength = 45
38+ )
39+
3640var allowedStartupTypes = sets .New (grovecorev1alpha1 .CliqueStartupTypeInOrder , grovecorev1alpha1 .CliqueStartupTypeAnyOrder , grovecorev1alpha1 .CliqueStartupTypeExplicit )
3741
3842type pgsValidator struct {
@@ -53,7 +57,8 @@ func newPGSValidator(pgs *grovecorev1alpha1.PodGangSet, operation admissionv1.Op
5357func (v * pgsValidator ) validate () ([]string , error ) {
5458 allErrs := field.ErrorList {}
5559
56- allErrs = append (allErrs , apivalidation .ValidateObjectMeta (& v .pgs .ObjectMeta , true , apivalidation .NameIsDNSSubdomain , field .NewPath ("metadata" ))... )
60+ allErrs = append (allErrs , apivalidation .ValidateObjectMeta (& v .pgs .ObjectMeta , true ,
61+ apivalidation .NameIsDNSSubdomain , field .NewPath ("metadata" ))... )
5762 fldPath := field .NewPath ("spec" )
5863 warnings , errs := v .validatePodGangSetSpec (fldPath )
5964 if len (errs ) != 0 {
@@ -101,10 +106,28 @@ func (v *pgsValidator) validatePodCliqueTemplates(fldPath *field.Path) ([]string
101106 allErrs = append (allErrs , field .Required (fldPath , "at least one PodClique must be defined" ))
102107 }
103108
109+ // Get all clique names that belong to scaling groups
110+ scalingGroupCliqueNames := v .getScalingGroupCliqueNames ()
111+
104112 cliqueNames := make ([]string , 0 , len (cliqueTemplateSpecs ))
105113 cliqueRoles := make ([]string , 0 , len (cliqueTemplateSpecs ))
106114 schedulerNames := make ([]string , 0 , len (cliqueTemplateSpecs ))
107115 for _ , cliqueTemplateSpec := range cliqueTemplateSpecs {
116+ if err := apivalidation .NameIsDNSSubdomain (cliqueTemplateSpec .Name , false ); err != nil {
117+ allErrs = append (allErrs , field .Invalid (fldPath .Child ("name" ), cliqueTemplateSpec .Name ,
118+ "invalid PodCliqueTemplateSpec name, must be a valid DNS subdomain" ))
119+ }
120+
121+ // Only validate pod name constraints for PodCliques that are NOT part of any scaling group
122+ // any pod clique that is part of scaling groups will be checked as part of scaling group pod name constraints.
123+ if ! scalingGroupCliqueNames .Has (cliqueTemplateSpec .Name ) {
124+ if err := validatePodNameConstraints (v .pgs .Name , "" , cliqueTemplateSpec .Name ); err != nil {
125+ // add error to each of filed paths that compose the podName in case of a PodCliqueTemplateSpec
126+ allErrs = append (allErrs , field .Invalid (fldPath .Child ("name" ), cliqueTemplateSpec .Name , err .Error ()))
127+ allErrs = append (allErrs , field .Invalid (field .NewPath ("metadata" ).Child ("name" ), v .pgs .Name , err .Error ()))
128+ }
129+ }
130+
108131 cliqueNames = append (cliqueNames , cliqueTemplateSpec .Name )
109132 cliqueRoles = append (cliqueRoles , cliqueTemplateSpec .Spec .RoleName )
110133 warns , errs := v .validatePodCliqueTemplateSpec (cliqueTemplateSpec , fldPath )
@@ -156,12 +179,19 @@ func (v *pgsValidator) validatePodCliqueScalingGroupConfigs(fldPath *field.Path)
156179 })
157180 pclqScalingGroupNames := make ([]string , 0 , len (v .pgs .Spec .Template .PodCliqueScalingGroupConfigs ))
158181 var cliqueNamesAcrossAllScalingGroups []string
182+ groupNameFiledPath := fldPath .Child ("name" )
159183
160184 for _ , scalingGroupConfig := range v .pgs .Spec .Template .PodCliqueScalingGroupConfigs {
185+ if err := apivalidation .NameIsDNSSubdomain (scalingGroupConfig .Name , false ); err != nil {
186+ allErrs = append (allErrs , field .Invalid (groupNameFiledPath , scalingGroupConfig .Name ,
187+ "invalid PodCliqueScalingGroupConfig name, must be a valid DNS subdomain" ))
188+ }
161189 pclqScalingGroupNames = append (pclqScalingGroupNames , scalingGroupConfig .Name )
162190 cliqueNamesAcrossAllScalingGroups = append (cliqueNamesAcrossAllScalingGroups , scalingGroupConfig .CliqueNames ... )
163191 // validate that scaling groups only contains clique names that are defined in the PodGangSet.
164- allErrs = append (allErrs , validateScalingGroupPodCliqueNames (allPodGangSetCliqueNames , scalingGroupConfig .CliqueNames , fldPath .Child ("cliqueNames" ))... )
192+ allErrs = append (allErrs , v .validateScalingGroupPodCliqueNames (scalingGroupConfig .Name , allPodGangSetCliqueNames ,
193+ scalingGroupConfig .CliqueNames , fldPath .Child ("cliqueNames" ), groupNameFiledPath )... )
194+
165195 }
166196
167197 // validate that the scaling group names are unique
@@ -280,15 +310,36 @@ func (v *pgsValidator) checkNetworkPackGroupConfigsForPartialPCSGInclusions(fldP
280310 return allErrs
281311}
282312
313+ // getScalingGroupCliqueNames returns a set of all clique names that belong to scaling groups
314+ func (v * pgsValidator ) getScalingGroupCliqueNames () sets.Set [string ] {
315+ scalingGroupCliqueNames := sets .New [string ]()
316+ for _ , scalingGroupConfig := range v .pgs .Spec .Template .PodCliqueScalingGroupConfigs {
317+ for _ , cliqueName := range scalingGroupConfig .CliqueNames {
318+ scalingGroupCliqueNames .Insert (cliqueName )
319+ }
320+ }
321+ return scalingGroupCliqueNames
322+
323+ }
324+
283325// checks if the PodClique names specified in PodCliqueScalingGroupConfig refer to a defined clique in the PodGangSet.
284- func validateScalingGroupPodCliqueNames (allPclqNames , pclqNameInScalingGrp []string , fldPath * field.Path ) field.ErrorList {
326+ func ( v * pgsValidator ) validateScalingGroupPodCliqueNames (pcsgName string , allPclqNames , pclqNameInScalingGrp []string , fldPath , pcsgNameFieldPath * field.Path ) field.ErrorList {
285327 allErrs := field.ErrorList {}
286328
287329 _ , unidentifiedPclqNames := lo .Difference (allPclqNames , lo .Uniq (pclqNameInScalingGrp ))
288330 if len (unidentifiedPclqNames ) > 0 {
289331 allErrs = append (allErrs , field .Invalid (fldPath , strings .Join (unidentifiedPclqNames , "," ), "unidentified PodClique names found" ))
290332 }
291333
334+ // validate scaling group PodClique pods names are valid.
335+ for _ , pclqName := range pclqNameInScalingGrp {
336+ if err := validatePodNameConstraints (v .pgs .Name , pcsgName , pclqName ); err != nil {
337+ // add error to each of filed paths that compose the podName
338+ allErrs = append (allErrs , field .Invalid (fldPath .Child ("name" ), pclqName , err .Error ()))
339+ allErrs = append (allErrs , field .Invalid (pcsgNameFieldPath , pclqName , err .Error ()))
340+ allErrs = append (allErrs , field .Invalid (field .NewPath ("metadata" ).Child ("name" ), v .pgs .Name , err .Error ()))
341+ }
342+ }
292343 return allErrs
293344}
294345
@@ -302,12 +353,14 @@ func (v *pgsValidator) validatePodCliqueSpec(name string, cliqueSpec grovecorev1
302353 // Ideally this should never happen, the defaulting webhook will always set the default value for minAvailable.
303354 if cliqueSpec .MinAvailable == nil {
304355 allErrs = append (allErrs , field .Required (fldPath .Child ("minAvailable" ), "field is required" ))
305- }
306- if * cliqueSpec .MinAvailable <= 0 {
307- allErrs = append (allErrs , field .Invalid (fldPath .Child ("minAvailable" ), * cliqueSpec .MinAvailable , "must be greater than 0" ))
308- }
309- if * cliqueSpec .MinAvailable > cliqueSpec .Replicas {
310- allErrs = append (allErrs , field .Invalid (fldPath .Child ("minAvailable" ), * cliqueSpec .MinAvailable , "minAvailable must not be greater than replicas" ))
356+ } else {
357+ // prevent nil pointer dereference, no point checking the value if it is nil
358+ if * cliqueSpec .MinAvailable <= 0 {
359+ allErrs = append (allErrs , field .Invalid (fldPath .Child ("minAvailable" ), * cliqueSpec .MinAvailable , "must be greater than 0" ))
360+ }
361+ if * cliqueSpec .MinAvailable > cliqueSpec .Replicas {
362+ allErrs = append (allErrs , field .Invalid (fldPath .Child ("minAvailable" ), * cliqueSpec .MinAvailable , "minAvailable must not be greater than replicas" ))
363+ }
311364 }
312365
313366 if v .isStartupTypeExplicit () && len (cliqueSpec .StartsAfter ) > 0 {
@@ -469,3 +522,34 @@ func clearContainerImages(containers []corev1.Container) {
469522 containers [i ].Image = ""
470523 }
471524}
525+
526+ // validatePodNameConstraints validates Grove pod name component constraints.
527+ // This function validates the constraints for component names that will be used
528+ // to construct pod names.
529+ //
530+ // Pod names that belong to a PCSG follow the format:
531+ // <pgs-name>-<pgs-index>-<pcsg-name>-<pcsg-index>-<pclq-name>-<random>
532+ //
533+ // Pod names that do not belong to a PCSG follow the format:
534+ // <pgs-name>-<pgs-index>-<pclq-name>-<random>
535+ //
536+ // Constraints:
537+ // - Random string + hyphens: 10 chars for PCSG pods, 8 chars for non-PCSG pods
538+ // - Max sum of all resource name characters: 45 chars
539+ func validatePodNameConstraints (pgsName , pcsgName , pclqName string ) error {
540+ // Check resource name constraints
541+ resourceNameLength := len (pgsName ) + len (pclqName )
542+ if pcsgName != "" {
543+ resourceNameLength += len (pcsgName )
544+ }
545+
546+ if resourceNameLength > maxCombinedResourceNameLength {
547+ if pcsgName != "" {
548+ return fmt .Errorf ("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodGangSet '%s', PodCliqueScalingGroup '%s', or PodClique '%s'" ,
549+ resourceNameLength , pgsName , pcsgName , pclqName )
550+ }
551+ return fmt .Errorf ("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodGangSet '%s' or PodClique '%s'" ,
552+ resourceNameLength , pgsName , pclqName )
553+ }
554+ return nil
555+ }
0 commit comments