Skip to content

Commit d89fca0

Browse files
committed
add optional grpc,metrics ports to nimservice
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent 5c31e6f commit d89fca0

25 files changed

Lines changed: 451 additions & 29 deletions

api/apps/v1alpha1/common_types.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,14 @@ import (
2525
)
2626

2727
const (
28-
// DefaultAPIPort is the default api port.
28+
// DefaultAPIPort is the default api port.
2929
DefaultAPIPort = 8000
3030
// DefaultNamedPortAPI is the default name for api port.
3131
DefaultNamedPortAPI = "api"
32+
// DefaultNamedPortGRPC is the default name for grpc port.
33+
DefaultNamedPortGRPC = "grpc"
34+
// DefaultNamedPortMetrics is the default name for metrics port.
35+
DefaultNamedPortMetrics = "metrics"
3236
)
3337

3438
// Expose defines attributes to expose the service.
@@ -40,13 +44,26 @@ type Expose struct {
4044
// Service defines attributes to create a service.
4145
type Service struct {
4246
Type corev1.ServiceType `json:"type,omitempty"`
43-
// override the default service name
47+
// Override the default service name
4448
Name string `json:"name,omitempty"`
4549
// Port is the main api serving port (default: 8000)
50+
//
4651
// +kubebuilder:validation:Minimum=1
4752
// +kubebuilder:validation:Maximum=65535
4853
// +kubebuilder:default:=8000
49-
Port *int32 `json:"port,omitempty"`
54+
Port *int32 `json:"port,omitempty"`
55+
// GRPCPort is the GRPC serving port
56+
// Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
57+
//
58+
// +kubebuilder:validation:Minimum=1
59+
// +kubebuilder:validation:Maximum=65535
60+
GRPCPort *int32 `json:"grpcPort,omitempty"`
61+
// MetricsPort is the port for metrics
62+
// Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
63+
//
64+
// +kubebuilder:validation:Minimum=1
65+
// +kubebuilder:validation:Maximum=65535
66+
MetricsPort *int32 `json:"metricsPort,omitempty"`
5067
Annotations map[string]string `json:"annotations,omitempty"`
5168
}
5269

api/apps/v1alpha1/nimservice_types.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ import (
4040
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
4141

4242
const (
43-
// NIMAPIPort is the default port that the NIM serves on.
44-
NIMAPIPort = 8000
4543
// NIMServiceConditionReady indicates that the NIM deployment is ready.
4644
NIMServiceConditionReady = "NIM_SERVICE_READY"
4745
// NIMServiceConditionFailed indicates that the NIM deployment has failed.
@@ -199,7 +197,11 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
199197
},
200198
{
201199
Name: "NIM_SERVER_PORT",
202-
Value: fmt.Sprintf("%d", NIMAPIPort),
200+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.Port),
201+
},
202+
{
203+
Name: "NIM_HTTP_API_PORT",
204+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.Port),
203205
},
204206
{
205207
Name: "NIM_JSONL_LOGGING",
@@ -210,6 +212,21 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
210212
Value: "INFO",
211213
},
212214
}
215+
if n.Spec.Expose.Service.GRPCPort != nil {
216+
envVars = append(envVars, corev1.EnvVar{
217+
Name: "NIM_GRPC_API_PORT",
218+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.GRPCPort),
219+
}, corev1.EnvVar{
220+
Name: "NIM_TRITON_GRPC_PORT",
221+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.GRPCPort),
222+
})
223+
}
224+
if n.Spec.Expose.Service.MetricsPort != nil {
225+
envVars = append(envVars, corev1.EnvVar{
226+
Name: "NIM_TRITON_METRICS_PORT",
227+
Value: fmt.Sprintf("%d", *n.Spec.Expose.Service.MetricsPort),
228+
})
229+
}
213230

214231
return envVars
215232
}
@@ -681,10 +698,23 @@ func (n *NIMService) GetDeploymentParams() *rendertypes.DeploymentParams {
681698
{
682699
Name: DefaultNamedPortAPI,
683700
Protocol: corev1.ProtocolTCP,
684-
ContainerPort: NIMAPIPort,
701+
ContainerPort: *n.Spec.Expose.Service.Port,
685702
},
686703
}
687-
704+
if n.Spec.Expose.Service.GRPCPort != nil {
705+
params.Ports = append(params.Ports, corev1.ContainerPort{
706+
Name: DefaultNamedPortGRPC,
707+
Protocol: corev1.ProtocolTCP,
708+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
709+
})
710+
}
711+
if n.Spec.Expose.Service.MetricsPort != nil {
712+
params.Ports = append(params.Ports, corev1.ContainerPort{
713+
Name: DefaultNamedPortMetrics,
714+
Protocol: corev1.ProtocolTCP,
715+
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
716+
})
717+
}
688718
return params
689719
}
690720

@@ -759,6 +789,23 @@ func (n *NIMService) GetServiceParams() *rendertypes.ServiceParams {
759789
Protocol: corev1.ProtocolTCP,
760790
},
761791
}
792+
if n.Spec.Expose.Service.GRPCPort != nil {
793+
params.Ports = append(params.Ports, corev1.ServicePort{
794+
Name: DefaultNamedPortGRPC,
795+
Port: *n.Spec.Expose.Service.GRPCPort,
796+
TargetPort: intstr.FromString(DefaultNamedPortGRPC),
797+
Protocol: corev1.ProtocolTCP,
798+
})
799+
}
800+
if n.Spec.Expose.Service.MetricsPort != nil {
801+
params.Ports = append(params.Ports, corev1.ServicePort{
802+
Name: DefaultNamedPortMetrics,
803+
Port: *n.Spec.Expose.Service.MetricsPort,
804+
TargetPort: intstr.FromString(DefaultNamedPortMetrics),
805+
Protocol: corev1.ProtocolTCP,
806+
})
807+
}
808+
762809
return params
763810
}
764811

@@ -885,6 +932,14 @@ func (n *NIMService) GetServiceMonitorParams() *rendertypes.ServiceMonitorParams
885932
},
886933
},
887934
}
935+
if n.Spec.Expose.Service.MetricsPort != nil {
936+
smSpec.Endpoints = append(smSpec.Endpoints, monitoringv1.Endpoint{
937+
Path: "/metrics",
938+
Port: DefaultNamedPortMetrics,
939+
ScrapeTimeout: serviceMonitor.ScrapeTimeout,
940+
Interval: serviceMonitor.Interval,
941+
})
942+
}
888943
params.SMSpec = smSpec
889944
return params
890945
}

api/apps/v1alpha1/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/apps.nvidia.com_nemocustomizers.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,24 @@ spec:
302302
additionalProperties:
303303
type: string
304304
type: object
305+
grpcPort:
306+
description: |-
307+
GRPCPort is the GRPC serving port
308+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
309+
format: int32
310+
maximum: 65535
311+
minimum: 1
312+
type: integer
313+
metricsPort:
314+
description: |-
315+
MetricsPort is the port for metrics
316+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
317+
format: int32
318+
maximum: 65535
319+
minimum: 1
320+
type: integer
305321
name:
306-
description: override the default service name
322+
description: Override the default service name
307323
type: string
308324
port:
309325
default: 8000

bundle/manifests/apps.nvidia.com_nemodatastores.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,24 @@ spec:
282282
additionalProperties:
283283
type: string
284284
type: object
285+
grpcPort:
286+
description: |-
287+
GRPCPort is the GRPC serving port
288+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
289+
format: int32
290+
maximum: 65535
291+
minimum: 1
292+
type: integer
293+
metricsPort:
294+
description: |-
295+
MetricsPort is the port for metrics
296+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
297+
format: int32
298+
maximum: 65535
299+
minimum: 1
300+
type: integer
285301
name:
286-
description: override the default service name
302+
description: Override the default service name
287303
type: string
288304
port:
289305
default: 8000

bundle/manifests/apps.nvidia.com_nemoentitystores.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,24 @@ spec:
294294
additionalProperties:
295295
type: string
296296
type: object
297+
grpcPort:
298+
description: |-
299+
GRPCPort is the GRPC serving port
300+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
301+
format: int32
302+
maximum: 65535
303+
minimum: 1
304+
type: integer
305+
metricsPort:
306+
description: |-
307+
MetricsPort is the port for metrics
308+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
309+
format: int32
310+
maximum: 65535
311+
minimum: 1
312+
type: integer
297313
name:
298-
description: override the default service name
314+
description: Override the default service name
299315
type: string
300316
port:
301317
default: 8000

bundle/manifests/apps.nvidia.com_nemoevaluators.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,24 @@ spec:
370370
additionalProperties:
371371
type: string
372372
type: object
373+
grpcPort:
374+
description: |-
375+
GRPCPort is the GRPC serving port
376+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
377+
format: int32
378+
maximum: 65535
379+
minimum: 1
380+
type: integer
381+
metricsPort:
382+
description: |-
383+
MetricsPort is the port for metrics
384+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
385+
format: int32
386+
maximum: 65535
387+
minimum: 1
388+
type: integer
373389
name:
374-
description: override the default service name
390+
description: Override the default service name
375391
type: string
376392
port:
377393
default: 8000

bundle/manifests/apps.nvidia.com_nemoguardrails.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,24 @@ spec:
269269
additionalProperties:
270270
type: string
271271
type: object
272+
grpcPort:
273+
description: |-
274+
GRPCPort is the GRPC serving port
275+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
276+
format: int32
277+
maximum: 65535
278+
minimum: 1
279+
type: integer
280+
metricsPort:
281+
description: |-
282+
MetricsPort is the port for metrics
283+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
284+
format: int32
285+
maximum: 65535
286+
minimum: 1
287+
type: integer
272288
name:
273-
description: override the default service name
289+
description: Override the default service name
274290
type: string
275291
port:
276292
default: 8000

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,24 @@ spec:
522522
additionalProperties:
523523
type: string
524524
type: object
525+
grpcPort:
526+
description: |-
527+
GRPCPort is the GRPC serving port
528+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
529+
format: int32
530+
maximum: 65535
531+
minimum: 1
532+
type: integer
533+
metricsPort:
534+
description: |-
535+
MetricsPort is the port for metrics
536+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
537+
format: int32
538+
maximum: 65535
539+
minimum: 1
540+
type: integer
525541
name:
526-
description: override the default service name
542+
description: Override the default service name
527543
type: string
528544
port:
529545
default: 8000

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,24 @@ spec:
472472
additionalProperties:
473473
type: string
474474
type: object
475+
grpcPort:
476+
description: |-
477+
GRPCPort is the GRPC serving port
478+
Note: This port is only applicable for NIMs that runs a Triton GRPC Inference Server.
479+
format: int32
480+
maximum: 65535
481+
minimum: 1
482+
type: integer
483+
metricsPort:
484+
description: |-
485+
MetricsPort is the port for metrics
486+
Note: This port is only applicable for NIMs that runs a separate metrics endpoint on Triton Inference Server.
487+
format: int32
488+
maximum: 65535
489+
minimum: 1
490+
type: integer
475491
name:
476-
description: override the default service name
492+
description: Override the default service name
477493
type: string
478494
port:
479495
default: 8000

0 commit comments

Comments
 (0)