Skip to content

Commit edc4901

Browse files
Updating size to be pipelineparallelism
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent b0dfacf commit edc4901

File tree

9 files changed

+34
-34
lines changed

9 files changed

+34
-34
lines changed

api/apps/v1alpha1/nimservice_types.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ type NimServiceMultiNodeConfig struct {
142142
BackendType NIMBackendType `json:"backendType,omitempty"`
143143

144144
// +kubebuilder:default:=1
145-
// Size specifies the number of pods to create for the multi-node NIMService.
145+
// PipelineParallelism specifies the number of pods to create for the multi-node NIMService.
146146
// +kubebuilder:validation:Minimum=1
147-
Size int `json:"size,omitempty"`
147+
PipelineParallelism int `json:"pipelineParallelism,omitempty"`
148148

149149
// MPI config for NIMService using LeaderWorkerSet
150150
MPI *MultiNodeMPIConfig `json:"mpi,omitempty"`
@@ -339,7 +339,7 @@ func (n *NIMService) getLWSCommonEnv() []corev1.EnvVar {
339339
},
340340
{
341341
Name: "NIM_NUM_COMPUTE_NODES",
342-
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
342+
Value: fmt.Sprintf("%d", n.Spec.MultiNode.PipelineParallelism),
343343
},
344344
{
345345
Name: "NIM_MULTI_NODE",
@@ -351,7 +351,7 @@ func (n *NIMService) getLWSCommonEnv() []corev1.EnvVar {
351351
},
352352
{
353353
Name: "NIM_PIPELINE_PARALLEL_SIZE",
354-
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
354+
Value: fmt.Sprintf("%d", n.Spec.MultiNode.PipelineParallelism),
355355
},
356356
{
357357
Name: "NIM_NODE_RANK",
@@ -916,7 +916,7 @@ func (n *NIMService) GetLWSSize() int {
916916
if n.Spec.MultiNode == nil {
917917
return 0
918918
}
919-
return n.Spec.MultiNode.Size
919+
return n.Spec.MultiNode.PipelineParallelism
920920
}
921921

922922
// GetDeploymentKind returns the kind of deployment for NIMService.
@@ -1210,7 +1210,7 @@ func (n *NIMService) generateMPIConfigData() map[string]string {
12101210
data := make(map[string]string)
12111211
for i := 0; i < n.Spec.Replicas; i++ {
12121212
hostfile := fmt.Sprintf("localhost slots=%d\n", n.GetGPUCountPerPod())
1213-
for j := 1; j < n.Spec.MultiNode.Size; j++ {
1213+
for j := 1; j < n.Spec.MultiNode.PipelineParallelism; j++ {
12141214
workerHostname := fmt.Sprintf("%s-%d-%d.%s.%s.svc slots=%d",
12151215
n.GetLWSName(), i, j, n.GetLWSName(), n.GetNamespace(), n.GetGPUCountPerPod())
12161216
hostfile += workerHostname + "\n"

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,10 +1304,10 @@ spec:
13041304
required:
13051305
- mpiStartTimeout
13061306
type: object
1307-
size:
1307+
pipelineParallelism:
13081308
default: 1
1309-
description: Size specifies the number of pods to create
1310-
for the multi-node NIMService.
1309+
description: PipelineParallelism specifies the number
1310+
of pods to create for the multi-node NIMService.
13111311
minimum: 1
13121312
type: integer
13131313
type: object

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,10 +1237,10 @@ spec:
12371237
required:
12381238
- mpiStartTimeout
12391239
type: object
1240-
size:
1240+
pipelineParallelism:
12411241
default: 1
1242-
description: Size specifies the number of pods to create for the
1243-
multi-node NIMService.
1242+
description: PipelineParallelism specifies the number of pods
1243+
to create for the multi-node NIMService.
12441244
minimum: 1
12451245
type: integer
12461246
type: object

config/crd/bases/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,10 +1304,10 @@ spec:
13041304
required:
13051305
- mpiStartTimeout
13061306
type: object
1307-
size:
1307+
pipelineParallelism:
13081308
default: 1
1309-
description: Size specifies the number of pods to create
1310-
for the multi-node NIMService.
1309+
description: PipelineParallelism specifies the number
1310+
of pods to create for the multi-node NIMService.
13111311
minimum: 1
13121312
type: integer
13131313
type: object

config/crd/bases/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,10 +1237,10 @@ spec:
12371237
required:
12381238
- mpiStartTimeout
12391239
type: object
1240-
size:
1240+
pipelineParallelism:
12411241
default: 1
1242-
description: Size specifies the number of pods to create for the
1243-
multi-node NIMService.
1242+
description: PipelineParallelism specifies the number of pods
1243+
to create for the multi-node NIMService.
12441244
minimum: 1
12451245
type: integer
12461246
type: object

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,10 +1304,10 @@ spec:
13041304
required:
13051305
- mpiStartTimeout
13061306
type: object
1307-
size:
1307+
pipelineParallelism:
13081308
default: 1
1309-
description: Size specifies the number of pods to create
1310-
for the multi-node NIMService.
1309+
description: PipelineParallelism specifies the number
1310+
of pods to create for the multi-node NIMService.
13111311
minimum: 1
13121312
type: integer
13131313
type: object

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,10 +1237,10 @@ spec:
12371237
required:
12381238
- mpiStartTimeout
12391239
type: object
1240-
size:
1240+
pipelineParallelism:
12411241
default: 1
1242-
description: Size specifies the number of pods to create for the
1243-
multi-node NIMService.
1242+
description: PipelineParallelism specifies the number of pods
1243+
to create for the multi-node NIMService.
12441244
minimum: 1
12451245
type: integer
12461246
type: object

internal/controller/platform/standalone/nimservice_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,7 +1102,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
11021102
Service: appsv1alpha1.Service{Type: corev1.ServiceTypeLoadBalancer, Port: ptr.To[int32](8123), Annotations: map[string]string{"annotation-key-specific": "service"}},
11031103
},
11041104
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
1105-
Size: 2,
1105+
PipelineParallelism: 2,
11061106
},
11071107
},
11081108
}
@@ -1923,8 +1923,8 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
19231923
Service: appsv1alpha1.Service{Type: corev1.ServiceTypeLoadBalancer, Port: ptr.To[int32](8123)},
19241924
},
19251925
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
1926-
BackendType: appsv1alpha1.NIMBackendTypeLWS,
1927-
Size: 2,
1926+
BackendType: appsv1alpha1.NIMBackendTypeLWS,
1927+
PipelineParallelism: 2,
19281928
},
19291929
Resources: &corev1.ResourceRequirements{
19301930
Requests: corev1.ResourceList{
@@ -2150,8 +2150,8 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
21502150
Service: appsv1alpha1.Service{Type: corev1.ServiceTypeLoadBalancer, Port: ptr.To[int32](8123)},
21512151
},
21522152
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
2153-
BackendType: appsv1alpha1.NIMBackendTypeLWS,
2154-
Size: 2,
2153+
BackendType: appsv1alpha1.NIMBackendTypeLWS,
2154+
PipelineParallelism: 2,
21552155
},
21562156
Resources: &corev1.ResourceRequirements{
21572157
Requests: corev1.ResourceList{

internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ func TestValidateResourcesConfiguration(t *testing.T) {
953953
func TestValidateMultiNodeImmutability(t *testing.T) {
954954
fld := field.NewPath("spec").Child("multiNode")
955955
old := baseNIMService()
956-
old.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Size: 1}
956+
old.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{PipelineParallelism: 1}
957957

958958
cases := []struct {
959959
name string
@@ -962,12 +962,12 @@ func TestValidateMultiNodeImmutability(t *testing.T) {
962962
}{
963963
{"unchanged", func() *appsv1alpha1.NIMService {
964964
n := baseNIMService()
965-
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Size: 1}
965+
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{PipelineParallelism: 1}
966966
return n
967967
}(), 0},
968968
{"changed", func() *appsv1alpha1.NIMService {
969969
n := baseNIMService()
970-
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Size: 2}
970+
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{PipelineParallelism: 2}
971971
return n
972972
}(), 1},
973973
}
@@ -1087,15 +1087,15 @@ func TestValidateKServeConfiguration(t *testing.T) {
10871087
ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe
10881088
ns.Spec.Annotations = map[string]string{"serving.kserve.org/deploymentMode": "RawDeployment"}
10891089
ns.Spec.Scale.Enabled = &trueVal // should be fine
1090-
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Size: 1}
1090+
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{PipelineParallelism: 1}
10911091
},
10921092
wantErrs: 1, // only multiNode should trigger
10931093
},
10941094
{
10951095
name: "kserve – multidnode alone",
10961096
modify: func(ns *appsv1alpha1.NIMService) {
10971097
ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe
1098-
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Size: 2}
1098+
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{PipelineParallelism: 2}
10991099
},
11001100
wantErrs: 1,
11011101
},

0 commit comments

Comments
 (0)