Skip to content

Commit 5ec8359

Browse files
committed
add optional grpc,metrics ports to nimservice (#490)
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com> new port validations for nemo microservice CRs Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com> update rag pipeline sample with additional ports Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent 1c6172f commit 5ec8359

31 files changed

Lines changed: 555 additions & 44 deletions

api/apps/v1alpha1/common_types.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,14 @@ import (
2525
)
2626

2727
const (
28-
// DefaultAPIPort is the default api port.
28+
// DefaultAPIPort is the default api port.
2929
DefaultAPIPort = 8000
3030
// DefaultNamedPortAPI is the default name for api port.
3131
DefaultNamedPortAPI = "api"
32+
// DefaultNamedPortGRPC is the default name for grpc port.
33+
DefaultNamedPortGRPC = "grpc"
34+
// DefaultNamedPortMetrics is the default name for metrics port.
35+
DefaultNamedPortMetrics = "metrics"
3236
)
3337

3438
// Expose defines attributes to expose the service.
@@ -40,13 +44,26 @@ type Expose struct {
4044
// Service defines attributes to create a service.
4145
type Service struct {
4246
Type corev1.ServiceType `json:"type,omitempty"`
43-
// override the default service name
47+
// Override the default service name
4448
Name string `json:"name,omitempty"`
4549
// Port is the main api serving port (default: 8000)
50+
//
4651
// +kubebuilder:validation:Minimum=1
4752
// +kubebuilder:validation:Maximum=65535
4853
// +kubebuilder:default:=8000
49-
Port *int32 `json:"port,omitempty"`
54+
Port *int32 `json:"port,omitempty"`
55+
// GRPCPort is the GRPC serving port
56+
// Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
57+
//
58+
// +kubebuilder:validation:Minimum=1
59+
// +kubebuilder:validation:Maximum=65535
60+
GRPCPort *int32 `json:"grpcPort,omitempty"`
61+
// MetricsPort is the port for metrics
62+
// Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
63+
//
64+
// +kubebuilder:validation:Minimum=1
65+
// +kubebuilder:validation:Maximum=65535
66+
MetricsPort *int32 `json:"metricsPort,omitempty"`
5067
Annotations map[string]string `json:"annotations,omitempty"`
5168
}
5269

api/apps/v1alpha1/nemo_customizer_types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,11 @@ type NemoCustomizerSpec struct {
7777
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
7878
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
7979
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
80-
Expose ExposeV1 `json:"expose,omitempty"`
81-
Scale Autoscaling `json:"scale,omitempty"`
82-
Metrics Metrics `json:"metrics,omitempty"`
80+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.grpcPort))", message="unsupported field: spec.expose.service.grpcPort"
81+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.metricsPort))", message="unsupported field: spec.expose.service.metricsPort"
82+
Expose ExposeV1 `json:"expose,omitempty"`
83+
Scale Autoscaling `json:"scale,omitempty"`
84+
Metrics Metrics `json:"metrics,omitempty"`
8385

8486
// +kubebuilder:validation:Minimum=1
8587
// +kubebuilder:default:=1

api/apps/v1alpha1/nemo_datastore_types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@ type NemoDatastoreSpec struct {
6868
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
6969
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
7070
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
71-
Expose ExposeV1 `json:"expose,omitempty"`
72-
Scale Autoscaling `json:"scale,omitempty"`
73-
Metrics Metrics `json:"metrics,omitempty"`
71+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.grpcPort))", message="unsupported field: spec.expose.service.grpcPort"
72+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.metricsPort))", message="unsupported field: spec.expose.service.metricsPort"
73+
Expose ExposeV1 `json:"expose,omitempty"`
74+
Scale Autoscaling `json:"scale,omitempty"`
75+
Metrics Metrics `json:"metrics,omitempty"`
7476
// +kubebuilder:validation:Minimum=1
7577
// +kubebuilder:default:=1
7678
Replicas int `json:"replicas,omitempty"`

api/apps/v1alpha1/nemo_entitystore_types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ type NemoEntitystoreSpec struct {
6969
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
7070
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
7171
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
72-
Expose ExposeV1 `json:"expose,omitempty"`
73-
Scale Autoscaling `json:"scale,omitempty"`
74-
Metrics Metrics `json:"metrics,omitempty"`
72+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.grpcPort))", message="unsupported field: spec.expose.service.grpcPort"
73+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.metricsPort))", message="unsupported field: spec.expose.service.metricsPort"
74+
Expose ExposeV1 `json:"expose,omitempty"`
75+
Scale Autoscaling `json:"scale,omitempty"`
76+
Metrics Metrics `json:"metrics,omitempty"`
7577
// +kubebuilder:validation:Minimum=1
7678
// +kubebuilder:default:=1
7779
Replicas int `json:"replicas,omitempty"`

api/apps/v1alpha1/nemo_evaluator_types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@ type NemoEvaluatorSpec struct {
6868
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
6969
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
7070
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
71-
Expose ExposeV1 `json:"expose,omitempty"`
72-
Scale Autoscaling `json:"scale,omitempty"`
73-
Metrics Metrics `json:"metrics,omitempty"`
71+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.grpcPort))", message="unsupported field: spec.expose.service.grpcPort"
72+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.metricsPort))", message="unsupported field: spec.expose.service.metricsPort"
73+
Expose ExposeV1 `json:"expose,omitempty"`
74+
Scale Autoscaling `json:"scale,omitempty"`
75+
Metrics Metrics `json:"metrics,omitempty"`
7476
// +kubebuilder:validation:Minimum=1
7577
// +kubebuilder:default:=1
7678
Replicas int `json:"replicas,omitempty"`

api/apps/v1alpha1/nemo_guardrails_types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,11 @@ type NemoGuardrailSpec struct {
7070
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
7171
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
7272
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
73-
Expose ExposeV1 `json:"expose,omitempty"`
74-
Scale Autoscaling `json:"scale,omitempty"`
75-
Metrics Metrics `json:"metrics,omitempty"`
73+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.grpcPort))", message="unsupported field: spec.expose.service.grpcPort"
74+
// +kubebuilder:validation:XValidation:rule="!(has(self.service.metricsPort))", message="unsupported field: spec.expose.service.metricsPort"
75+
Expose ExposeV1 `json:"expose,omitempty"`
76+
Scale Autoscaling `json:"scale,omitempty"`
77+
Metrics Metrics `json:"metrics,omitempty"`
7678
// +kubebuilder:validation:Minimum=1
7779
// +kubebuilder:default:=1
7880
Replicas int `json:"replicas,omitempty"`

api/apps/v1alpha1/nimservice_types.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ import (
4040
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
4141

4242
const (
43-
// NIMAPIPort is the default port that the NIM serves on.
44-
NIMAPIPort = 8000
4543
// NIMServiceConditionReady indicates that the NIM deployment is ready.
4644
NIMServiceConditionReady = "NIM_SERVICE_READY"
4745
// NIMServiceConditionFailed indicates that the NIM deployment has failed.
@@ -199,7 +197,11 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
199197
},
200198
{
201199
Name: "NIM_SERVER_PORT",
202-
Value: fmt.Sprintf("%d", NIMAPIPort),
200+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.Port),
201+
},
202+
{
203+
Name: "NIM_HTTP_API_PORT",
204+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.Port),
203205
},
204206
{
205207
Name: "NIM_JSONL_LOGGING",
@@ -210,6 +212,21 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
210212
Value: "INFO",
211213
},
212214
}
215+
if n.Spec.Expose.Service.GRPCPort != nil {
216+
envVars = append(envVars, corev1.EnvVar{
217+
Name: "NIM_GRPC_API_PORT",
218+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.GRPCPort),
219+
}, corev1.EnvVar{
220+
Name: "NIM_TRITON_GRPC_PORT",
221+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.GRPCPort),
222+
})
223+
}
224+
if n.Spec.Expose.Service.MetricsPort != nil {
225+
envVars = append(envVars, corev1.EnvVar{
226+
Name: "NIM_TRITON_METRICS_PORT",
227+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.MetricsPort),
228+
})
229+
}
213230

214231
return envVars
215232
}
@@ -681,10 +698,23 @@ func (n *NIMService) GetDeploymentParams() *rendertypes.DeploymentParams {
681698
{
682699
Name: DefaultNamedPortAPI,
683700
Protocol: corev1.ProtocolTCP,
684-
ContainerPort: NIMAPIPort,
701+
ContainerPort: *n.Spec.Expose.Service.Port,
685702
},
686703
}
687-
704+
if n.Spec.Expose.Service.GRPCPort != nil {
705+
params.Ports = append(params.Ports, corev1.ContainerPort{
706+
Name: DefaultNamedPortGRPC,
707+
Protocol: corev1.ProtocolTCP,
708+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
709+
})
710+
}
711+
if n.Spec.Expose.Service.MetricsPort != nil {
712+
params.Ports = append(params.Ports, corev1.ContainerPort{
713+
Name: DefaultNamedPortMetrics,
714+
Protocol: corev1.ProtocolTCP,
715+
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
716+
})
717+
}
688718
return params
689719
}
690720

@@ -759,6 +789,23 @@ func (n *NIMService) GetServiceParams() *rendertypes.ServiceParams {
759789
Protocol: corev1.ProtocolTCP,
760790
},
761791
}
792+
if n.Spec.Expose.Service.GRPCPort != nil {
793+
params.Ports = append(params.Ports, corev1.ServicePort{
794+
Name: DefaultNamedPortGRPC,
795+
Port: *n.Spec.Expose.Service.GRPCPort,
796+
TargetPort: intstr.FromString(DefaultNamedPortGRPC),
797+
Protocol: corev1.ProtocolTCP,
798+
})
799+
}
800+
if n.Spec.Expose.Service.MetricsPort != nil {
801+
params.Ports = append(params.Ports, corev1.ServicePort{
802+
Name: DefaultNamedPortMetrics,
803+
Port: *n.Spec.Expose.Service.MetricsPort,
804+
TargetPort: intstr.FromString(DefaultNamedPortMetrics),
805+
Protocol: corev1.ProtocolTCP,
806+
})
807+
}
808+
762809
return params
763810
}
764811

@@ -885,6 +932,14 @@ func (n *NIMService) GetServiceMonitorParams() *rendertypes.ServiceMonitorParams
885932
},
886933
},
887934
}
935+
if n.Spec.Expose.Service.MetricsPort != nil {
936+
smSpec.Endpoints = append(smSpec.Endpoints, monitoringv1.Endpoint{
937+
Path: "/metrics",
938+
Port: DefaultNamedPortMetrics,
939+
ScrapeTimeout: serviceMonitor.ScrapeTimeout,
940+
Interval: serviceMonitor.Interval,
941+
})
942+
}
888943
params.SMSpec = smSpec
889944
return params
890945
}

api/apps/v1alpha1/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/apps.nvidia.com_nemocustomizers.yaml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,24 @@ spec:
302302
additionalProperties:
303303
type: string
304304
type: object
305+
grpcPort:
306+
description: |-
307+
GRPCPort is the GRPC serving port
308+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
309+
format: int32
310+
maximum: 65535
311+
minimum: 1
312+
type: integer
313+
metricsPort:
314+
description: |-
315+
MetricsPort is the port for metrics
316+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
317+
format: int32
318+
maximum: 65535
319+
minimum: 1
320+
type: integer
305321
name:
306-
description: override the default service name
322+
description: Override the default service name
307323
type: string
308324
port:
309325
default: 8000
@@ -319,6 +335,11 @@ spec:
319335
type: string
320336
type: object
321337
type: object
338+
x-kubernetes-validations:
339+
- message: 'unsupported field: spec.expose.service.grpcPort'
340+
rule: '!(has(self.service.grpcPort))'
341+
- message: 'unsupported field: spec.expose.service.metricsPort'
342+
rule: '!(has(self.service.metricsPort))'
322343
groupID:
323344
format: int64
324345
type: integer

bundle/manifests/apps.nvidia.com_nemodatastores.yaml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,24 @@ spec:
282282
additionalProperties:
283283
type: string
284284
type: object
285+
grpcPort:
286+
description: |-
287+
GRPCPort is the GRPC serving port
288+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
289+
format: int32
290+
maximum: 65535
291+
minimum: 1
292+
type: integer
293+
metricsPort:
294+
description: |-
295+
MetricsPort is the port for metrics
296+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
297+
format: int32
298+
maximum: 65535
299+
minimum: 1
300+
type: integer
285301
name:
286-
description: override the default service name
302+
description: Override the default service name
287303
type: string
288304
port:
289305
default: 8000
@@ -299,6 +315,11 @@ spec:
299315
type: string
300316
type: object
301317
type: object
318+
x-kubernetes-validations:
319+
- message: 'unsupported field: spec.expose.service.grpcPort'
320+
rule: '!(has(self.service.grpcPort))'
321+
- message: 'unsupported field: spec.expose.service.metricsPort'
322+
rule: '!(has(self.service.metricsPort))'
302323
groupID:
303324
format: int64
304325
type: integer

0 commit comments

Comments
 (0)