Skip to content

Commit 1271327

Browse files
Updating pipelineparallelism field
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent 1188da9 commit 1271327

File tree

9 files changed

+32
-32
lines changed

9 files changed

+32
-32
lines changed

api/apps/v1alpha1/nimservice_types.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,9 @@ type MultiNodeMPIConfig struct {
155155

156156
type Parallelism struct {
157157
// +kubebuilder:default:=1
158-
// PipelineParallelism specifies the number of pods to create for the multi-node NIMService.
158+
// PP specifies the number of pods to create for the multi-node NIMService.
159159
// +kubebuilder:validation:Minimum=1
160-
PipelineParallelism int `json:"pipelineParallelism,omitempty"`
160+
PP int `json:"pp,omitempty"`
161161
}
162162

163163
// NIMCacheVolSpec defines the spec to use NIMCache volume.
@@ -343,7 +343,7 @@ func (n *NIMService) getLWSCommonEnv() []corev1.EnvVar {
343343
},
344344
{
345345
Name: "NIM_NUM_COMPUTE_NODES",
346-
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Parallelism.PipelineParallelism),
346+
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Parallelism.PP),
347347
},
348348
{
349349
Name: "NIM_MULTI_NODE",
@@ -355,7 +355,7 @@ func (n *NIMService) getLWSCommonEnv() []corev1.EnvVar {
355355
},
356356
{
357357
Name: "NIM_PIPELINE_PARALLEL_SIZE",
358-
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Parallelism.PipelineParallelism),
358+
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Parallelism.PP),
359359
},
360360
{
361361
Name: "NIM_NODE_RANK",
@@ -920,7 +920,7 @@ func (n *NIMService) GetLWSSize() int {
920920
if n.Spec.MultiNode == nil {
921921
return 0
922922
}
923-
return n.Spec.MultiNode.Parallelism.PipelineParallelism
923+
return n.Spec.MultiNode.Parallelism.PP
924924
}
925925

926926
// GetDeploymentKind returns the kind of deployment for NIMService.
@@ -1206,7 +1206,7 @@ func (n *NIMService) generateMPIConfigData() map[string]string {
12061206
data := make(map[string]string)
12071207
for i := 0; i < n.Spec.Replicas; i++ {
12081208
hostfile := fmt.Sprintf("localhost slots=%d\n", n.GetGPUCountPerPod())
1209-
for j := 1; j < n.Spec.MultiNode.Parallelism.PipelineParallelism; j++ {
1209+
for j := 1; j < n.Spec.MultiNode.Parallelism.PP; j++ {
12101210
workerHostname := fmt.Sprintf("%s-%d-%d.%s.%s.svc slots=%d",
12111211
n.GetLWSName(), i, j, n.GetLWSName(), n.GetNamespace(), n.GetGPUCountPerPod())
12121212
hostfile += workerHostname + "\n"

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,10 @@ spec:
10461046
description: Parallelism specifies the parallelism strategy
10471047
for the multi-node NIMService.
10481048
properties:
1049-
pipelineParallelism:
1049+
pp:
10501050
default: 1
1051-
description: PipelineParallelism specifies the number
1052-
of pods to create for the multi-node NIMService.
1051+
description: PP specifies the number of pods to
1052+
create for the multi-node NIMService.
10531053
minimum: 1
10541054
type: integer
10551055
type: object

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,10 @@ spec:
980980
description: Parallelism specifies the parallelism strategy for
981981
the multi-node NIMService.
982982
properties:
983-
pipelineParallelism:
983+
pp:
984984
default: 1
985-
description: PipelineParallelism specifies the number of pods
986-
to create for the multi-node NIMService.
985+
description: PP specifies the number of pods to create for
986+
the multi-node NIMService.
987987
minimum: 1
988988
type: integer
989989
type: object

config/crd/bases/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,10 @@ spec:
10461046
description: Parallelism specifies the parallelism strategy
10471047
for the multi-node NIMService.
10481048
properties:
1049-
pipelineParallelism:
1049+
pp:
10501050
default: 1
1051-
description: PipelineParallelism specifies the number
1052-
of pods to create for the multi-node NIMService.
1051+
description: PP specifies the number of pods to
1052+
create for the multi-node NIMService.
10531053
minimum: 1
10541054
type: integer
10551055
type: object

config/crd/bases/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,10 @@ spec:
980980
description: Parallelism specifies the parallelism strategy for
981981
the multi-node NIMService.
982982
properties:
983-
pipelineParallelism:
983+
pp:
984984
default: 1
985-
description: PipelineParallelism specifies the number of pods
986-
to create for the multi-node NIMService.
985+
description: PP specifies the number of pods to create for
986+
the multi-node NIMService.
987987
minimum: 1
988988
type: integer
989989
type: object

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,10 @@ spec:
10461046
description: Parallelism specifies the parallelism strategy
10471047
for the multi-node NIMService.
10481048
properties:
1049-
pipelineParallelism:
1049+
pp:
10501050
default: 1
1051-
description: PipelineParallelism specifies the number
1052-
of pods to create for the multi-node NIMService.
1051+
description: PP specifies the number of pods to
1052+
create for the multi-node NIMService.
10531053
minimum: 1
10541054
type: integer
10551055
type: object

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,10 @@ spec:
980980
description: Parallelism specifies the parallelism strategy for
981981
the multi-node NIMService.
982982
properties:
983-
pipelineParallelism:
983+
pp:
984984
default: 1
985-
description: PipelineParallelism specifies the number of pods
986-
to create for the multi-node NIMService.
985+
description: PP specifies the number of pods to create for
986+
the multi-node NIMService.
987987
minimum: 1
988988
type: integer
989989
type: object

internal/controller/platform/standalone/nimservice_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,7 +1184,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
11841184
},
11851185
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
11861186
Parallelism: appsv1alpha1.Parallelism{
1187-
PipelineParallelism: 2,
1187+
PP: 2,
11881188
},
11891189
},
11901190
},
@@ -2008,7 +2008,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
20082008
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
20092009
BackendType: appsv1alpha1.NIMBackendTypeLWS,
20102010
Parallelism: appsv1alpha1.Parallelism{
2011-
PipelineParallelism: 2,
2011+
PP: 2,
20122012
},
20132013
},
20142014
Resources: &corev1.ResourceRequirements{
@@ -2237,7 +2237,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
22372237
MultiNode: &appsv1alpha1.NimServiceMultiNodeConfig{
22382238
BackendType: appsv1alpha1.NIMBackendTypeLWS,
22392239
Parallelism: appsv1alpha1.Parallelism{
2240-
PipelineParallelism: 2,
2240+
PP: 2,
22412241
},
22422242
},
22432243
Resources: &corev1.ResourceRequirements{

internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ func TestValidateResourcesConfiguration(t *testing.T) {
953953
func TestValidateMultiNodeImmutability(t *testing.T) {
954954
fld := field.NewPath("spec").Child("multiNode")
955955
old := baseNIMService()
956-
old.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PipelineParallelism: 1}}
956+
old.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PP: 1}}
957957

958958
cases := []struct {
959959
name string
@@ -962,12 +962,12 @@ func TestValidateMultiNodeImmutability(t *testing.T) {
962962
}{
963963
{"unchanged", func() *appsv1alpha1.NIMService {
964964
n := baseNIMService()
965-
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PipelineParallelism: 1}}
965+
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PP: 1}}
966966
return n
967967
}(), 0},
968968
{"changed", func() *appsv1alpha1.NIMService {
969969
n := baseNIMService()
970-
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PipelineParallelism: 2}}
970+
n.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PP: 2}}
971971
return n
972972
}(), 1},
973973
}
@@ -1087,15 +1087,15 @@ func TestValidateKServeConfiguration(t *testing.T) {
10871087
ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe
10881088
ns.Spec.Annotations = map[string]string{"serving.kserve.org/deploymentMode": "RawDeployment"}
10891089
ns.Spec.Scale.Enabled = &trueVal // should be fine
1090-
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PipelineParallelism: 1}}
1090+
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PP: 1}}
10911091
},
10921092
wantErrs: 1, // only multiNode should trigger
10931093
},
10941094
{
10951095
name: "kserve – multidnode alone",
10961096
modify: func(ns *appsv1alpha1.NIMService) {
10971097
ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe
1098-
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PipelineParallelism: 2}}
1098+
ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: appsv1alpha1.Parallelism{PP: 2}}
10991099
},
11001100
wantErrs: 1,
11011101
},

0 commit comments

Comments
 (0)