Skip to content

Commit bc9f02c

Browse files
committed
create computedomain for multi-node NIMServices
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent 53173fd commit bc9f02c

File tree

575 files changed

+43619
-14128
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

575 files changed

+43619
-14128
lines changed

Makefile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414

1515
# Image URL to use all building/pushing image targets
1616
IMG ?= ghcr.io/nvidia/k8s-nim-operator:${VERSION}
17-
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
18-
ENVTEST_K8S_VERSION = 1.30.0
1917

2018
GO_CMD ?= go
2119
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
@@ -135,12 +133,12 @@ coverage: test
135133

136134
.PHONY: test
137135
test: manifests generate fmt vet envtest ## Run tests.
138-
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v test/ | grep -v api/) -coverprofile $(COVERAGE_FILE)
136+
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test -ldflags "-X github.com/NVIDIA/k8s-dra-driver-gpu/internal/info.version=$(NVIDIA_DRA_DRIVER_GPU_VERSION)" $$(go list ./... | grep -v test/ | grep -v api/) -coverprofile $(COVERAGE_FILE)
139137

140138
# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors.
141139
.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up.
142140
test-e2e:
143-
go test ./test/e2e/ -v -ginkgo.v
141+
go test -ldflags "-X github.com/NVIDIA/k8s-dra-driver-gpu/internal/info.version=$(NVIDIA_DRA_DRIVER_GPU_VERSION)" ./test/e2e/ -v -ginkgo.v
144142

145143
.PHONY: lint
146144
lint: golangci-lint
@@ -155,7 +153,7 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
155153

156154
.PHONY: build
157155
build: manifests generate fmt vet ## Build manager binary.
158-
go build -o bin/manager cmd/main.go
156+
go build -ldflags "-X github.com/NVIDIA/k8s-dra-driver-gpu/internal/info.version=$(NVIDIA_DRA_DRIVER_GPU_VERSION)" -o bin/manager cmd/main.go
159157

160158
.PHONY: run
161159
run: manifests generate fmt vet ## Run a controller from your host.

api/apps/v1alpha1/dra_types.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,37 @@ type DRAResourceClaimTemplateStatusInfo struct {
324324
// ResourceClaimStatuses is the statuses of the generated resource claims from this resource claim template.
325325
ResourceClaimStatuses []DRAResourceClaimStatusInfo `json:"resourceClaimStatuses,omitempty"`
326326
}
327+
328+
// ComputeDomain defines the specification for the compute domain to use for a multi-node NIMService.
329+
// (Note: this will only work on NVLink-enabled nodes.)
330+
//
331+
// +kubebuilder:validation:XValidation:rule="(has(self.create) && self.create) ? !has(self.name) : (has(self.name) && size(self.name) > 0)",message="if create is true, name must not be set; otherwise name is required."
332+
type ComputeDomain struct {
333+
// Create specifies whether to create a new ComputeDomain or use an existing one.
334+
// If set to false, an existing ComputeDomain must be specified via the `Name` field.
335+
Create *bool `json:"create,omitempty"`
336+
// Name of the ComputeDomain to use. Required if `Create` is false (i.e., using an existing ComputeDomain).
337+
Name string `json:"name,omitempty"`
338+
}
339+
340+
// ComputeDomainStatus defines the status of the ComputeDomain.
341+
type ComputeDomainStatus struct {
342+
Name string `json:"name"`
343+
Status string `json:"status,omitempty"`
344+
Nodes []ComputeDomainNodeStatus `json:"nodes,omitempty"`
345+
}
346+
347+
// ComputeDomainStatus defines the status of a node in the ComputeDomain.
348+
type ComputeDomainNodeStatus struct {
349+
// Name is the name of the node.
350+
Name string `json:"name"`
351+
// CliqueID is the clique ID of the NVLink domain.
352+
CliqueID string `json:"cliqueID"`
353+
// Status tracks the readiness of the IMEX daemon running on this node.
354+
// * Ready: the IMEX daemon is ready to broker GPU memory exchanges.
355+
// * NotReady: the IMEX daemon is not ready to broker GPU memory exchanges.
356+
//
357+
// +kubebuilder:validation:Enum=Ready;NotReady
358+
// +kubebuilder:default=NotReady
359+
Status string `json:"status,omitempty"`
360+
}

api/apps/v1alpha1/nimservice_types.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ type NimServiceMultiNodeConfig struct {
149149

150150
// MPI config for NIMService using LeaderWorkerSet
151151
MPI *MultiNodeMPIConfig `json:"mpi,omitempty"`
152+
153+
// ComputeDomain specifies the compute domain to use for a
154+
// multi-node NIMService.
155+
ComputeDomain *ComputeDomain `json:"computeDomain,omitempty"`
152156
}
153157

154158
type ParallelismSpec struct {
@@ -183,6 +187,8 @@ type NIMServiceStatus struct {
183187
// +listType=map
184188
// +listMapKey=name
185189
DRAResourceStatuses []DRAResourceStatus `json:"draResourceStatuses,omitempty"`
190+
// ComputeDomainStatus is the status of the ComputeDomain for a multi-node NIMService.
191+
ComputeDomainStatus *ComputeDomainStatus `json:"computeDomainStatus,omitempty"`
186192
}
187193

188194
// ModelStatus defines the configuration of the NIMService model.
@@ -1898,20 +1904,60 @@ func (n *NIMService) GetInferenceServiceHPAParams() (*int32, int32, string, stri
18981904
return minReplicas, maxReplicas, metric, metricType, target
18991905
}
19001906

1907+
// IsMultiNode returns true if the NIMService is a multi-node NIMService.
1908+
func (n *NIMService) IsMultiNode() bool {
1909+
return n.GetMultiNodePipelineParallelism() > 1
1910+
}
1911+
1912+
// GetMultiNodeTensorParallelism returns the tensor parallelism size for the multi-node NIMService.
19011913
func (n *NIMService) GetMultiNodeTensorParallelism() uint32 {
19021914
if n.Spec.MultiNode != nil && n.Spec.MultiNode.Parallelism != nil && n.Spec.MultiNode.Parallelism.Tensor != nil {
19031915
return *n.Spec.MultiNode.Parallelism.Tensor
19041916
}
19051917
return 0
19061918
}
19071919

1920+
// GetMultiNodePipelineParallelism returns the pipeline parallelism size for the multi-node NIMService.
19081921
func (n *NIMService) GetMultiNodePipelineParallelism() uint32 {
1909-
if n.Spec.MultiNode != nil && n.Spec.MultiNode.Parallelism != nil && n.Spec.MultiNode.Parallelism.Tensor != nil {
1922+
if n.Spec.MultiNode != nil && n.Spec.MultiNode.Parallelism != nil && n.Spec.MultiNode.Parallelism.Pipeline != nil {
19101923
return *n.Spec.MultiNode.Parallelism.Pipeline
19111924
}
19121925
return 0
19131926
}
19141927

1928+
// IsComputeDomainEnabled returns true if the NIMService is a multi-node NIMService and a compute domain is requested.
1929+
func (n *NIMService) IsComputeDomainEnabled() bool {
1930+
if !n.IsMultiNode() {
1931+
return false
1932+
}
1933+
return n.Spec.MultiNode.ComputeDomain != nil
1934+
}
1935+
1936+
// GetComputeDomainName returns the name of the ComputeDomain for the multi-node NIMService.
1937+
func (n *NIMService) GetComputeDomainName() string {
1938+
if n.IsComputeDomainEnabled() {
1939+
if n.Spec.MultiNode.ComputeDomain.Create == nil || !*n.Spec.MultiNode.ComputeDomain.Create {
1940+
return n.Spec.MultiNode.ComputeDomain.Name
1941+
}
1942+
1943+
return n.GetName()
1944+
}
1945+
1946+
return ""
1947+
}
1948+
1949+
// GetComputeDomainParams returns the parameters for rendering the ComputeDomain for the multi-node NIMService.
1950+
func (n *NIMService) GetComputeDomainParams(resourceClaimTemplateName string) *rendertypes.ComputeDomainParams {
1951+
return &rendertypes.ComputeDomainParams{
1952+
Name: n.GetName(),
1953+
Namespace: n.GetNamespace(),
1954+
Labels: n.GetServiceLabels(),
1955+
Annotations: n.GetNIMServiceAnnotations(),
1956+
NumNodes: n.GetMultiNodePipelineParallelism(),
1957+
ResourceClaimTemplateName: resourceClaimTemplateName,
1958+
}
1959+
}
1960+
19151961
func init() {
19161962
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
19171963
}

api/apps/v1alpha1/zz_generated.deepcopy.go

Lines changed: 65 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,27 @@ spec:
20572057
enum:
20582058
- lws
20592059
type: string
2060+
computeDomain:
2061+
description: |-
2062+
ComputeDomain specifies the compute domain to use for a
2063+
multi-node NIMService.
2064+
properties:
2065+
create:
2066+
description: |-
2067+
Create specifies whether to create a new ComputeDomain or use an existing one.
2068+
If set to false, an existing ComputeDomain must be specified via the `Name` field.
2069+
type: boolean
2070+
name:
2071+
description: Name of the ComputeDomain to use. Required
2072+
if `Create` is false (i.e., using an existing
2073+
ComputeDomain).
2074+
type: string
2075+
type: object
2076+
x-kubernetes-validations:
2077+
- message: if create is true, name must not be set;
2078+
otherwise name is required.
2079+
rule: '(has(self.create) && self.create) ? !has(self.name)
2080+
: (has(self.name) && size(self.name) > 0)'
20602081
mpi:
20612082
description: MPI config for NIMService using LeaderWorkerSet
20622083
properties:

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,26 @@ spec:
19781978
enum:
19791979
- lws
19801980
type: string
1981+
computeDomain:
1982+
description: |-
1983+
ComputeDomain specifies the compute domain to use for a
1984+
multi-node NIMService.
1985+
properties:
1986+
create:
1987+
description: |-
1988+
Create specifies whether to create a new ComputeDomain or use an existing one.
1989+
If set to false, an existing ComputeDomain must be specified via the `Name` field.
1990+
type: boolean
1991+
name:
1992+
description: Name of the ComputeDomain to use. Required if
1993+
`Create` is false (i.e., using an existing ComputeDomain).
1994+
type: string
1995+
type: object
1996+
x-kubernetes-validations:
1997+
- message: if create is true, name must not be set; otherwise
1998+
name is required.
1999+
rule: '(has(self.create) && self.create) ? !has(self.name) :
2000+
(has(self.name) && size(self.name) > 0)'
19812001
mpi:
19822002
description: MPI config for NIMService using LeaderWorkerSet
19832003
properties:
@@ -3557,6 +3577,43 @@ spec:
35573577
availableReplicas:
35583578
format: int32
35593579
type: integer
3580+
computeDomainStatus:
3581+
description: ComputeDomainStatus is the status of the ComputeDomain
3582+
for a multi-node NIMService.
3583+
properties:
3584+
name:
3585+
type: string
3586+
nodes:
3587+
items:
3588+
description: ComputeDomainStatus defines the status of a node
3589+
in the ComputeDomain.
3590+
properties:
3591+
cliqueID:
3592+
description: CliqueID is the clique ID of the NVLink domain.
3593+
type: string
3594+
name:
3595+
description: Name is the name of the node.
3596+
type: string
3597+
status:
3598+
default: NotReady
3599+
description: |-
3600+
Status tracks the readiness of the IMEX daemon running on this node.
3601+
* Ready: the IMEX daemon is ready to broker GPU memory exchanges.
3602+
* NotReady: the IMEX daemon is not ready to broker GPU memory exchanges.
3603+
enum:
3604+
- Ready
3605+
- NotReady
3606+
type: string
3607+
required:
3608+
- cliqueID
3609+
- name
3610+
type: object
3611+
type: array
3612+
status:
3613+
type: string
3614+
required:
3615+
- name
3616+
type: object
35603617
conditions:
35613618
items:
35623619
description: Condition contains details for one aspect of the current

cmd/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"os"
2323
"strconv"
2424

25+
nvidiaresourcev1beta1 "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
2526
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2627
monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2728

@@ -63,6 +64,7 @@ func init() {
6364
utilruntime.Must(lws.AddToScheme(scheme))
6465
utilruntime.Must(kservev1beta1.AddToScheme(scheme))
6566
utilruntime.Must(gatewayv1.Install(scheme))
67+
utilruntime.Must(nvidiaresourcev1beta1.AddToScheme(scheme))
6668
// +kubebuilder:scaffold:scheme
6769
}
6870

config/crd/bases/apps.nvidia.com_nimpipelines.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,27 @@ spec:
20572057
enum:
20582058
- lws
20592059
type: string
2060+
computeDomain:
2061+
description: |-
2062+
ComputeDomain specifies the compute domain to use for a
2063+
multi-node NIMService.
2064+
properties:
2065+
create:
2066+
description: |-
2067+
Create specifies whether to create a new ComputeDomain or use an existing one.
2068+
If set to false, an existing ComputeDomain must be specified via the `Name` field.
2069+
type: boolean
2070+
name:
2071+
description: Name of the ComputeDomain to use. Required
2072+
if `Create` is false (i.e., using an existing
2073+
ComputeDomain).
2074+
type: string
2075+
type: object
2076+
x-kubernetes-validations:
2077+
- message: if create is true, name must not be set;
2078+
otherwise name is required.
2079+
rule: '(has(self.create) && self.create) ? !has(self.name)
2080+
: (has(self.name) && size(self.name) > 0)'
20602081
mpi:
20612082
description: MPI config for NIMService using LeaderWorkerSet
20622083
properties:

0 commit comments

Comments
 (0)