Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions api/apps/v1alpha1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,34 @@ type PersistentVolumeClaim struct {
// Annotations for the PVC
Annotations map[string]string `json:"annotations,omitempty"`
}

// DRAResource references exactly one ResourceClaim, either directly
// or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.
//
// When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
// that uniquely identifies the DRA resource.
type DRAResource struct {
// ResourceClaimName is the name of a ResourceClaim object in the same
// namespace as the NIMService.
//
// Exactly one of ResourceClaimName and ResourceClaimTemplateName must
// be set.
ResourceClaimName *string `json:"resourceClaimName,omitempty"`

// ResourceClaimTemplateName is the name of a ResourceClaimTemplate
// object in the same namespace as the pods for this NIMService.
//
// The template will be used to create a new ResourceClaim, which will
// be bound to the pods created for this NIMService.
//
// Exactly one of ResourceClaimName and ResourceClaimTemplateName must
// be set.
ResourceClaimTemplateName *string `json:"resourceClaimTemplateName,omitempty"`

// Requests is the list of requests in the referenced ResourceClaim/ResourceClaimTemplate
// to be made available to the model container of the NIMService pods.
//
// If empty, everything from the claim is made available, otherwise
// only the result of this subset of requests.
Requests []string `json:"requests,omitempty"`
}
67 changes: 52 additions & 15 deletions api/apps/v1alpha1/nimservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,26 @@ type NIMServiceSpec struct {
// The name of an existing pull secret containing the NGC_API_KEY
AuthSecret string `json:"authSecret"`
// Storage is the target storage for caching NIM model if NIMCache is not provided
Storage NIMServiceStorage `json:"storage,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
Expose Expose `json:"expose,omitempty"`
LivenessProbe Probe `json:"livenessProbe,omitempty"`
ReadinessProbe Probe `json:"readinessProbe,omitempty"`
StartupProbe Probe `json:"startupProbe,omitempty"`
Scale Autoscaling `json:"scale,omitempty"`
SchedulerName string `json:"schedulerName,omitempty"`
Metrics Metrics `json:"metrics,omitempty"`
Storage NIMServiceStorage `json:"storage,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
// Resources is the resource requirements for the NIMService deployment.
//
// Note: Only traditional resources like cpu/memory and custom device plugin resources are supported here.
// Any DRA claim references are ignored. Use DRAResources instead for those.
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
// DRAResources is the list of DRA resource claims to be used for the NIMService deployment.
DRAResources []DRAResource `json:"draResources,omitempty"`
Expose Expose `json:"expose,omitempty"`
LivenessProbe Probe `json:"livenessProbe,omitempty"`
ReadinessProbe Probe `json:"readinessProbe,omitempty"`
StartupProbe Probe `json:"startupProbe,omitempty"`
Scale Autoscaling `json:"scale,omitempty"`
SchedulerName string `json:"schedulerName,omitempty"`
Metrics Metrics `json:"metrics,omitempty"`
// +kubebuilder:validation:Minimum=1
// +kubebuilder:default:=1
Replicas int `json:"replicas,omitempty"`
Expand All @@ -99,6 +105,10 @@ type NIMServiceStatus struct {
AvailableReplicas int32 `json:"availableReplicas,omitempty"`
State string `json:"state,omitempty"`
Model *ModelStatus `json:"model,omitempty"`
// DRAResourceStatuses is the status of the DRA resources.
// +listType=map
// +listMapKey=name
DRAResourceStatuses []DRAResourceStatus `json:"draResourceStatuses,omitempty"`
}

// ModelStatus defines the configuration of the NIMService model.
Expand All @@ -108,6 +118,26 @@ type ModelStatus struct {
ExternalEndpoint string `json:"externalEndpoint"`
}

// DRAResourceStatus defines the status of the DRAResource.
type DRAResourceStatus struct {
// Name is the generated name of the DRAResource referenced in the NIMService
// pod template as `spec.resourceClaims[].name`.
Name string `json:"name"`
// ResourceClaimTemplateName is the name of the ResourceClaimTemplate that was
// used to generate the ResourceClaim for an instance of NIMService.
ResourceClaimTemplateName *string `json:"resourceClaimTemplateName,omitempty"`
// ResourceClaims is the status of generated resource claims.
//
// This list is empty if ResourceClaimTemplateName is not set.
ResourceClaims []DRAResourceClaimStatus `json:"resourceClaims,omitempty"`
}

// DRAResourceClaimStatus defines the status of the DRAResourceClaim.
type DRAResourceClaimStatus struct {
// Name is the name of the ResourceClaim that was generated for a NIMService pod.
Name string `json:"name"`
}

// +genclient
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
Expand Down Expand Up @@ -364,7 +394,14 @@ func (n *NIMService) GetImagePullPolicy() string {

// GetResources returns resources to allocate to the NIMService container.
func (n *NIMService) GetResources() *corev1.ResourceRequirements {
return n.Spec.Resources
if n.Spec.Resources == nil {
return nil
}

return &corev1.ResourceRequirements{
Requests: n.Spec.Resources.Requests,
Limits: n.Spec.Resources.Limits,
}
}

// IsProbeEnabled returns true if a given liveness/readiness/startup probe is enabled.
Expand Down
84 changes: 84 additions & 0 deletions api/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 47 additions & 2 deletions bundle/manifests/apps.nvidia.com_nimpipelines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,48 @@ spec:
items:
type: string
type: array
draResources:
description: DRAResources is the list of DRA resource claims
to be used for the NIMService deployment.
items:
description: |-
DRAResource references exactly one ResourceClaim, either directly
or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.

When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
that uniquely identifies the DRA resource.
properties:
requests:
description: |-
Requests is the list of requests in the referenced ResourceClaim/ResourceClaimTemplate
to be made available to the model container of the NIMService pods.

If empty, everything from the claim is made available, otherwise
only the result of this subset of requests.
items:
type: string
type: array
resourceClaimName:
description: |-
ResourceClaimName is the name of a ResourceClaim object in the same
namespace as the NIMService.

Exactly one of ResourceClaimName and ResourceClaimTemplateName must
be set.
type: string
resourceClaimTemplateName:
description: |-
ResourceClaimTemplateName is the name of a ResourceClaimTemplate
object in the same namespace as the pods for this NIMService.

The template will be used to create a new ResourceClaim, which will
be bound to the pods created for this NIMService.

Exactly one of ResourceClaimName and ResourceClaimTemplateName must
be set.
type: string
type: object
type: array
env:
items:
description: EnvVar represents an environment variable
Expand Down Expand Up @@ -1322,8 +1364,11 @@ spec:
minimum: 1
type: integer
resources:
description: ResourceRequirements describes the compute
resource requirements.
description: |-
Resources is the resource requirements for the NIMService deployment.

Note: Only traditional resources like cpu/memory and custom device plugin resources are supported here.
Any DRA claim references are ignored. Use DRAResources instead for those.
properties:
claims:
description: |-
Expand Down
Loading
Loading