Skip to content

Commit bffce49

Browse files
committed
generate unique pod claim name
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent 54e1131 commit bffce49

File tree

13 files changed

+452
-192
lines changed

13 files changed

+452
-192
lines changed

api/apps/v1alpha1/common_types.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,9 @@ type PersistentVolumeClaim struct {
243243
// DRAResource references exactly one ResourceClaim, either directly
244244
// or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.
245245
//
246-
// It adds a name to it that uniquely identifies the ResourceClaim.
247-
// NIMService containers that need access to the ResourceClaim will automatically reference it with this name.
246+
// When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
247+
// that uniquely identifies the DRA resource.
248248
type DRAResource struct {
249-
// Name uniquely identifies this resource claim.
250-
// This must be a DNS_LABEL.
251-
Name string `json:"name"`
252-
253249
// ResourceClaimName is the name of a ResourceClaim object in the same
254250
// namespace as the NIMService.
255251
//
@@ -261,12 +257,7 @@ type DRAResource struct {
261257
// object in the same namespace as the pods for this NIMService.
262258
//
263259
// The template will be used to create a new ResourceClaim, which will
264-
// be bound to the pods created for this NIMService. When the pod is deleted,
265-
// the ResourceClaim will also be deleted. The pod name and resource name, along
266-
// with a generated component, will be used to form a unique name for the
267-
// ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses.
268-
//
269-
// Modifying this field will result in the NIMService going to Failed state.
260+
// be bound to the pods created for this NIMService.
270261
//
271262
// Exactly one of ResourceClaimName and ResourceClaimTemplateName must
272263
// be set.

api/apps/v1alpha1/nimservice_types.go

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,16 @@ type NIMServiceSpec struct {
7474
//
7575
// Note: Only traditional resources like cpu/memory and custom device plugin resources are supported here.
7676
// Any DRA claim references are ignored. Use DRAResources instead for those.
77-
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
78-
DRAResources []DRAResource `json:"draResources,omitempty"`
79-
Expose Expose `json:"expose,omitempty"`
80-
LivenessProbe Probe `json:"livenessProbe,omitempty"`
81-
ReadinessProbe Probe `json:"readinessProbe,omitempty"`
82-
StartupProbe Probe `json:"startupProbe,omitempty"`
83-
Scale Autoscaling `json:"scale,omitempty"`
84-
SchedulerName string `json:"schedulerName,omitempty"`
85-
Metrics Metrics `json:"metrics,omitempty"`
77+
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
78+
// DRAResources is the list of DRA resource claims to be used for the NIMService deployment.
79+
DRAResources []DRAResource `json:"draResources,omitempty"`
80+
Expose Expose `json:"expose,omitempty"`
81+
LivenessProbe Probe `json:"livenessProbe,omitempty"`
82+
ReadinessProbe Probe `json:"readinessProbe,omitempty"`
83+
StartupProbe Probe `json:"startupProbe,omitempty"`
84+
Scale Autoscaling `json:"scale,omitempty"`
85+
SchedulerName string `json:"schedulerName,omitempty"`
86+
Metrics Metrics `json:"metrics,omitempty"`
8687
// +kubebuilder:validation:Minimum=1
8788
// +kubebuilder:default:=1
8889
Replicas int `json:"replicas,omitempty"`
@@ -104,6 +105,10 @@ type NIMServiceStatus struct {
104105
AvailableReplicas int32 `json:"availableReplicas,omitempty"`
105106
State string `json:"state,omitempty"`
106107
Model *ModelStatus `json:"model,omitempty"`
108+
// DRAResourceStatuses is the status of the DRA resources.
109+
// +listType=map
110+
// +listMapKey=name
111+
DRAResourceStatuses []DRAResourceStatus `json:"draResourceStatuses,omitempty"`
107112
}
108113

109114
// ModelStatus defines the configuration of the NIMService model.
@@ -113,6 +118,26 @@ type ModelStatus struct {
113118
ExternalEndpoint string `json:"externalEndpoint"`
114119
}
115120

121+
// DRAResourceStatus defines the status of the DRAResource.
122+
type DRAResourceStatus struct {
123+
// Name is the generated name of the DRAResource referenced in the NIMService
124+
// pod template as `spec.resourceClaims[].name`.
125+
Name string `json:"name"`
126+
// ResourceClaimTemplateName is the name of the ResourceClaimTemplate that was
127+
// used to generate the ResourceClaim for an instance of NIMService.
128+
ResourceClaimTemplateName *string `json:"resourceClaimTemplateName,omitempty"`
129+
// ResourceClaims is the status of generated resource claims.
130+
//
131+
// This list is empty if ResourceClaimTemplateName is not set.
132+
ResourceClaims []DRAResourceClaimStatus `json:"resourceClaims,omitempty"`
133+
}
134+
135+
// DRAResourceClaimStatus defines the status of the DRAResourceClaim.
136+
type DRAResourceClaimStatus struct {
137+
// Name is the name of the ResourceClaim that was generated for a NIMService pod.
138+
Name string `json:"name"`
139+
}
140+
116141
// +genclient
117142
// +kubebuilder:object:root=true
118143
// +kubebuilder:subresource:status
@@ -731,8 +756,6 @@ func (n *NIMService) GetDeploymentParams() *rendertypes.DeploymentParams {
731756
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
732757
})
733758
}
734-
735-
params.PodResourceClaims = n.GetPodResourceClaims()
736759
return params
737760
}
738761

@@ -1008,18 +1031,6 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10081031
return n.Spec.Proxy
10091032
}
10101033

1011-
func (n *NIMService) GetPodResourceClaims() []corev1.PodResourceClaim {
1012-
claims := make([]corev1.PodResourceClaim, len(n.Spec.DRAResources))
1013-
for idx, resource := range n.Spec.DRAResources {
1014-
claims[idx] = corev1.PodResourceClaim{
1015-
Name: resource.Name,
1016-
ResourceClaimName: resource.ResourceClaimName,
1017-
ResourceClaimTemplateName: resource.ResourceClaimTemplateName,
1018-
}
1019-
}
1020-
return claims
1021-
}
1022-
10231034
func init() {
10241035
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
10251036
}

api/apps/v1alpha1/zz_generated.deepcopy.go

Lines changed: 47 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,16 @@ spec:
102102
type: string
103103
type: array
104104
draResources:
105+
description: DRAResources is the list of DRA resource claims
106+
to be used for the NIMService deployment.
105107
items:
106108
description: |-
107109
DRAResource references exactly one ResourceClaim, either directly
108110
or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.
109111
110-
It adds a name to it that uniquely identifies the ResourceClaim.
111-
NIMService containers that need access to the ResourceClaim will automatically reference it with this name.
112+
When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
113+
that uniquely identifies the DRA resource.
112114
properties:
113-
name:
114-
description: |-
115-
Name uniquely identifies this resource claim.
116-
This must be a DNS_LABEL.
117-
type: string
118115
requests:
119116
description: |-
120117
Requests is the list of requests in the referenced ResourceClaim/ResourceClaimTemplate
@@ -139,18 +136,11 @@ spec:
139136
object in the same namespace as the pods for this NIMService.
140137
141138
The template will be used to create a new ResourceClaim, which will
142-
be bound to the pods created for this NIMService. When the pod is deleted,
143-
the ResourceClaim will also be deleted. The pod name and resource name, along
144-
with a generated component, will be used to form a unique name for the
145-
ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses.
146-
147-
Modifying this field will result in the NIMService going to Failed state.
139+
be bound to the pods created for this NIMService.
148140
149141
Exactly one of ResourceClaimName and ResourceClaimTemplateName must
150142
be set.
151143
type: string
152-
required:
153-
- name
154144
type: object
155145
type: array
156146
env:

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,16 @@ spec:
6363
type: string
6464
type: array
6565
draResources:
66+
description: DRAResources is the list of DRA resource claims to be
67+
used for the NIMService deployment.
6668
items:
6769
description: |-
6870
DRAResource references exactly one ResourceClaim, either directly
6971
or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.
7072
71-
It adds a name to it that uniquely identifies the ResourceClaim.
72-
NIMService containers that need access to the ResourceClaim will automatically reference it with this name.
73+
When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
74+
that uniquely identifies the DRA resource.
7375
properties:
74-
name:
75-
description: |-
76-
Name uniquely identifies this resource claim.
77-
This must be a DNS_LABEL.
78-
type: string
7976
requests:
8077
description: |-
8178
Requests is the list of requests in the referenced ResourceClaim/ResourceClaimTemplate
@@ -100,18 +97,11 @@ spec:
10097
object in the same namespace as the pods for this NIMService.
10198
10299
The template will be used to create a new ResourceClaim, which will
103-
be bound to the pods created for this NIMService. When the pod is deleted,
104-
the ResourceClaim will also be deleted. The pod name and resource name, along
105-
with a generated component, will be used to form a unique name for the
106-
ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses.
107-
108-
Modifying this field will result in the NIMService going to Failed state.
100+
be bound to the pods created for this NIMService.
109101
110102
Exactly one of ResourceClaimName and ResourceClaimTemplateName must
111103
be set.
112104
type: string
113-
required:
114-
- name
115105
type: object
116106
type: array
117107
env:
@@ -2326,6 +2316,45 @@ spec:
23262316
- type
23272317
type: object
23282318
type: array
2319+
draResourceStatuses:
2320+
description: DRAResourceStatuses is the status of the DRA resources.
2321+
items:
2322+
description: DRAResourceStatus defines the status of the DRAResource.
2323+
properties:
2324+
name:
2325+
description: |-
2326+
Name is the generated name of the DRAResource referenced in the NIMService
2327+
pod template as `spec.resourceClaims[].name`.
2328+
type: string
2329+
resourceClaimTemplateName:
2330+
description: |-
2331+
ResourceClaimTemplateName is the name of the ResourceClaimTemplate that was
2332+
used to generate the ResourceClaim for an instance of NIMService.
2333+
type: string
2334+
resourceClaims:
2335+
description: |-
2336+
ResourceClaims is the status of generated resource claims.
2337+
2338+
This list is empty if ResourceClaimTemplateName is not set.
2339+
items:
2340+
description: DRAResourceClaimStatus defines the status of
2341+
the DRAResourceClaim.
2342+
properties:
2343+
name:
2344+
description: Name is the name of the ResourceClaim that
2345+
was generated for a NIMService pod.
2346+
type: string
2347+
required:
2348+
- name
2349+
type: object
2350+
type: array
2351+
required:
2352+
- name
2353+
type: object
2354+
type: array
2355+
x-kubernetes-list-map-keys:
2356+
- name
2357+
x-kubernetes-list-type: map
23292358
model:
23302359
description: ModelStatus defines the configuration of the NIMService
23312360
model.

config/crd/bases/apps.nvidia.com_nimpipelines.yaml

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,16 @@ spec:
102102
type: string
103103
type: array
104104
draResources:
105+
description: DRAResources is the list of DRA resource claims
106+
to be used for the NIMService deployment.
105107
items:
106108
description: |-
107109
DRAResource references exactly one ResourceClaim, either directly
108110
or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim.
109111
110-
It adds a name to it that uniquely identifies the ResourceClaim.
111-
NIMService containers that need access to the ResourceClaim will automatically reference it with this name.
112+
When creating the NIMService pods, it adds a name (`DNS_LABEL` format) to it
113+
that uniquely identifies the DRA resource.
112114
properties:
113-
name:
114-
description: |-
115-
Name uniquely identifies this resource claim.
116-
This must be a DNS_LABEL.
117-
type: string
118115
requests:
119116
description: |-
120117
Requests is the list of requests in the referenced ResourceClaim/ResourceClaimTemplate
@@ -139,18 +136,11 @@ spec:
139136
object in the same namespace as the pods for this NIMService.
140137
141138
The template will be used to create a new ResourceClaim, which will
142-
be bound to the pods created for this NIMService. When the pod is deleted,
143-
the ResourceClaim will also be deleted. The pod name and resource name, along
144-
with a generated component, will be used to form a unique name for the
145-
ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses.
146-
147-
Modifying this field will result in the NIMService going to Failed state.
139+
be bound to the pods created for this NIMService.
148140
149141
Exactly one of ResourceClaimName and ResourceClaimTemplateName must
150142
be set.
151143
type: string
152-
required:
153-
- name
154144
type: object
155145
type: array
156146
env:

0 commit comments

Comments
 (0)