Skip to content

Commit fa43dfb

Browse files
committed
Add unit test for multi node NIMService
Signed-off-by: Sheng Lin <shelin@nvidia.com>
1 parent 0d75b87 commit fa43dfb

3 files changed

Lines changed: 160 additions & 1 deletion

File tree

api/apps/v1alpha1/nimservice_types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,9 @@ type NIMServiceSpec struct {
9999
// TODO disable Scale when this is enabled
100100
type NimServiceMultiNodeConfig struct {
101101
// +kubebuilder:validation:Minimum=1
102-
// +kubebuilder:default:=1
102+
// +kubebuilder:default:=2
103103
// Workers specifies how many worker pods per multi-node replica to launch.
104+
// +kubebuilder:validation:Minimum=2
104105
Workers int `json:"workers,omitempty"`
105106

106107
// +kubebuilder:default:=1

internal/controller/platform/standalone/nimservice_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import (
4949
ctrl "sigs.k8s.io/controller-runtime"
5050
"sigs.k8s.io/controller-runtime/pkg/client"
5151
"sigs.k8s.io/controller-runtime/pkg/client/fake"
52+
lwsv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
5253

5354
appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
5455
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
@@ -135,6 +136,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
135136
Expect(networkingv1.AddToScheme(scheme)).To(Succeed())
136137
Expect(corev1.AddToScheme(scheme)).To(Succeed())
137138
Expect(monitoringv1.AddToScheme(scheme)).To(Succeed())
139+
Expect(lwsv1.AddToScheme(scheme)).To(Succeed())
138140

139141
client = fake.NewClientBuilder().WithScheme(scheme).
140142
WithStatusSubresource(&appsv1alpha1.NIMService{}).
@@ -817,6 +819,67 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
817819
})
818820
})
819821

822+
Describe("LWS deployment for multi-node inferencing NIMService", func() {
823+
AfterEach(func() {
824+
lws := &lwsv1.LeaderWorkerSet{
825+
ObjectMeta: metav1.ObjectMeta{
826+
Name: "test-nimservice-lws",
827+
Namespace: "default",
828+
},
829+
}
830+
err := client.Delete(context.TODO(), lws)
831+
Expect(err).NotTo(HaveOccurred())
832+
})
833+
834+
It("should report ready when LWS is available", func() {
835+
lws := &lwsv1.LeaderWorkerSet{
836+
ObjectMeta: metav1.ObjectMeta{
837+
Name: "test-nimservice-lws",
838+
Namespace: "default",
839+
},
840+
Status: lwsv1.LeaderWorkerSetStatus{
841+
Conditions: []metav1.Condition{
842+
{
843+
Type: string(lwsv1.LeaderWorkerSetAvailable),
844+
Status: metav1.ConditionTrue,
845+
},
846+
},
847+
},
848+
}
849+
err := client.Create(context.TODO(), lws)
850+
Expect(err).NotTo(HaveOccurred())
851+
msg, ready, err := reconciler.isLeaderWorkerSetReady(context.TODO(), nimService)
852+
Expect(err).ToNot(HaveOccurred())
853+
Expect(ready).To(Equal(true))
854+
Expect(msg).To(Equal(fmt.Sprintf("leaderworkerset %q is ready", lws.Name)))
855+
})
856+
It("should report not ready when LWS is not available", func() {
857+
lws := &lwsv1.LeaderWorkerSet{
858+
ObjectMeta: metav1.ObjectMeta{
859+
Name: "test-nimservice-lws",
860+
Namespace: "default",
861+
},
862+
Status: lwsv1.LeaderWorkerSetStatus{
863+
Conditions: []metav1.Condition{
864+
{
865+
Type: string(lwsv1.LeaderWorkerSetProgressing),
866+
Status: metav1.ConditionTrue,
867+
},
868+
{
869+
Type: string(lwsv1.LeaderWorkerSetAvailable),
870+
Status: metav1.ConditionFalse,
871+
},
872+
},
873+
},
874+
}
875+
err := client.Create(context.TODO(), lws)
876+
Expect(err).NotTo(HaveOccurred())
877+
msg, ready, err := reconciler.isLeaderWorkerSetReady(context.TODO(), nimService)
878+
Expect(err).ToNot(HaveOccurred())
879+
Expect(ready).To(Equal(false))
880+
Expect(msg).To(Equal(fmt.Sprintf("leaderworkerset %q is not ready", lws.Name)))
881+
})
882+
})
820883
Describe("update model status on NIMService", func() {
821884
BeforeEach(func() {
822885
ingress := &networkingv1.Ingress{

internal/render/render_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,101 @@ var _ = Describe("K8s Resources Rendering", func() {
129129
templatesDir := filepath.Join(path.Dir(path.Dir(cwd)), "manifests")
130130

131131
Context("Rendering templates", func() {
132+
It("should render LeaderWorkerSet template correctly", func() {
133+
params := types.LeaderWorkerSetParams{
134+
Name: "test-lws",
135+
Namespace: "default",
136+
Labels: map[string]string{"app": "test-app"},
137+
Annotations: map[string]string{"annotation-key": "annotation-value"},
138+
Replicas: 3,
139+
Image: "nim-llm:latest",
140+
LeaderVolumes: []corev1.Volume{
141+
{
142+
Name: "test-leader-volume",
143+
VolumeSource: corev1.VolumeSource{
144+
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
145+
ClaimName: "test-leader-pvc",
146+
},
147+
},
148+
},
149+
},
150+
WorkerVolumes: []corev1.Volume{
151+
{
152+
Name: "test-worker-volume",
153+
VolumeSource: corev1.VolumeSource{
154+
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
155+
ClaimName: "test-worker-pvc",
156+
},
157+
},
158+
},
159+
},
160+
LeaderVolumeMounts: []corev1.VolumeMount{
161+
{
162+
Name: "test-leader-volume",
163+
MountPath: "/data",
164+
},
165+
},
166+
WorkerVolumeMounts: []corev1.VolumeMount{
167+
{
168+
Name: "test-worker-volume",
169+
MountPath: "/data",
170+
},
171+
},
172+
LeaderEnvs: []corev1.EnvVar{
173+
{
174+
Name: "LEADER_ENV_VAR",
175+
Value: "value",
176+
},
177+
},
178+
WorkerEnvs: []corev1.EnvVar{
179+
{
180+
Name: "WORKER_ENV_VAR",
181+
Value: "value",
182+
},
183+
},
184+
Resources: &corev1.ResourceRequirements{
185+
Limits: corev1.ResourceList{
186+
corev1.ResourceCPU: resource.MustParse("500m"),
187+
corev1.ResourceMemory: resource.MustParse("128Mi"),
188+
},
189+
Requests: corev1.ResourceList{
190+
corev1.ResourceCPU: resource.MustParse("250m"),
191+
corev1.ResourceMemory: resource.MustParse("64Mi"),
192+
},
193+
},
194+
NodeSelector: map[string]string{"disktype": "ssd"},
195+
Tolerations: []corev1.Toleration{
196+
{
197+
Key: "key1",
198+
Operator: corev1.TolerationOpExists,
199+
Effect: corev1.TaintEffectNoSchedule,
200+
},
201+
},
202+
}
203+
204+
r := render.NewRenderer(templatesDir)
205+
lws, err := r.LeaderWorkerSet(&params)
206+
Expect(err).NotTo(HaveOccurred())
207+
Expect(lws.Name).To(Equal("test-lws"))
208+
Expect(lws.Namespace).To(Equal("default"))
209+
Expect(lws.Labels["app"]).To(Equal("test-app"))
210+
Expect(lws.Annotations["annotation-key"]).To(Equal("annotation-value"))
211+
Expect(*lws.Spec.Replicas).To(Equal(int32(3)))
212+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name).To(Equal("nim-leader"))
213+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image).To(Equal("nim-llm:latest"))
214+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name).To(Equal("nim-worker"))
215+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image).To(Equal("nim-llm:latest"))
216+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].Name).To(Equal("test-leader-volume"))
217+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].VolumeSource.PersistentVolumeClaim.ClaimName).To(Equal("test-leader-pvc"))
218+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].Name).To(Equal("test-worker-volume"))
219+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].VolumeSource.PersistentVolumeClaim.ClaimName).To(Equal("test-worker-pvc"))
220+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].VolumeMounts[0].Name).To(Equal("test-leader-volume"))
221+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].VolumeMounts[0].MountPath).To(Equal("/data"))
222+
Expect(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].VolumeMounts[0].SubPath).To(Equal("subPath"))
223+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].VolumeMounts[0].Name).To(Equal("test-worker-volume"))
224+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].VolumeMounts[0].MountPath).To(Equal("/data"))
225+
Expect(lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].VolumeMounts[0].SubPath).To(Equal("subPath"))
226+
})
132227
It("should render Deployment template correctly", func() {
133228
params := types.DeploymentParams{
134229
Name: "test-deployment",

0 commit comments

Comments
 (0)