Skip to content

Commit 4d4944e

Browse files
committed
Add support to configure multiple service ports
* Default named port of "api" will be added with legacy .spec.port attribute * Ability to setup multiple named ports "api", "metrics" etc. * auto detection of metrics and api ports for probes and metrics collection Signed-off-by: Shiva Krishna, Merla <smerla@nvidia.com>
1 parent 65d8272 commit 4d4944e

38 files changed

Lines changed: 696 additions & 257 deletions

api/apps/v1alpha1/common_types.go

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,21 @@ import (
2121
autoscalingv2 "k8s.io/api/autoscaling/v2"
2222
corev1 "k8s.io/api/core/v1"
2323
networkingv1 "k8s.io/api/networking/v1"
24+
"k8s.io/apimachinery/pkg/util/intstr"
25+
)
26+
27+
const (
28+
// DefaultAPIPort is the default API port
29+
DefaultAPIPort = 8000
30+
// DefaultNamedPortAPI is the default named API port
31+
DefaultNamedPortAPI = "api"
32+
// DefaultNamedPortMetrics is the default named Metrics port
33+
DefaultNamedPortMetrics = "metrics"
2434
)
2535

2636
// Expose defines attributes to expose the service
2737
type Expose struct {
28-
Service Service `json:"service,omitempty"`
38+
Service Service `json:"service"`
2939
Ingress Ingress `json:"ingress,omitempty"`
3040
}
3141

@@ -36,7 +46,6 @@ type Service struct {
3646
Name string `json:"name,omitempty"`
3747
// Deprecated: Use Ports instead.
3848
// +kubebuilder:deprecatedversion
39-
// +kubebuilder:default=8000
4049
Port int32 `json:"port,omitempty"`
4150
// Defines multiple ports for the service
4251
Ports []corev1.ServicePort `json:"ports,omitempty"`
@@ -75,9 +84,9 @@ type HorizontalPodAutoscalerSpec struct {
7584

7685
// Image defines image attributes
7786
type Image struct {
78-
Repository string `json:"repository,omitempty"`
87+
Repository string `json:"repository"`
7988
PullPolicy string `json:"pullPolicy,omitempty"`
80-
Tag string `json:"tag,omitempty"`
89+
Tag string `json:"tag"`
8190
PullSecrets []string `json:"pullSecrets,omitempty"`
8291
}
8392

@@ -115,3 +124,59 @@ type CertConfig struct {
115124
// MountPath is the path where the certificates should be mounted in the container.
116125
MountPath string `json:"mountPath"`
117126
}
127+
128+
// selectNamedPort returns the first occurrence of a given named port, or an empty string if not found.
129+
func selectNamedPort(serviceSpec Service, portNames ...string) string {
130+
for _, name := range portNames {
131+
for _, port := range serviceSpec.Ports {
132+
if port.Name == name {
133+
return name
134+
}
135+
}
136+
}
137+
return ""
138+
}
139+
140+
// getProbePort determines the appropriate port for probes based on the service spec.
141+
func getProbePort(serviceSpec Service) intstr.IntOrString {
142+
switch len(serviceSpec.Ports) {
143+
case 1:
144+
port := serviceSpec.Ports[0]
145+
if port.Name != "" {
146+
return intstr.FromString(port.Name)
147+
}
148+
return intstr.FromInt(int(port.Port))
149+
case 0:
150+
// Default to "api" as the operator always adds a default named port with 8000
151+
return intstr.FromString(DefaultNamedPortAPI)
152+
default:
153+
// Multiple ports: Prefer "api"
154+
if portName := selectNamedPort(serviceSpec, DefaultNamedPortAPI); portName != "" {
155+
return intstr.FromString(portName)
156+
}
157+
// Default when multiple ports exist
158+
return intstr.FromString(DefaultNamedPortAPI)
159+
}
160+
}
161+
162+
// getMetricsPort determines the appropriate port for metrics based on the service spec.
163+
func getMetricsPort(serviceSpec Service) intstr.IntOrString {
164+
switch len(serviceSpec.Ports) {
165+
case 1:
166+
port := serviceSpec.Ports[0]
167+
if port.Name != "" {
168+
return intstr.FromString(port.Name)
169+
}
170+
return intstr.FromInt(int(port.Port))
171+
case 0:
172+
// Default to "api" as the operator always adds a default named port with 8000
173+
return intstr.FromString(DefaultNamedPortAPI)
174+
default:
175+
// Multiple ports: Prefer "metrics", fallback to "api"
176+
if portName := selectNamedPort(serviceSpec, DefaultNamedPortMetrics, DefaultNamedPortAPI); portName != "" {
177+
return intstr.FromString(portName)
178+
}
179+
// Default when multiple ports exist
180+
return intstr.FromString(DefaultNamedPortMetrics)
181+
}
182+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
Copyright 2025.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
"testing"
21+
22+
corev1 "k8s.io/api/core/v1"
23+
"k8s.io/apimachinery/pkg/util/intstr"
24+
)
25+
26+
// assertEqual checks if the actual result matches the expected result.
27+
func assertEqual(t *testing.T, actual, expected intstr.IntOrString) {
28+
if actual != expected {
29+
t.Errorf("Got %v, expected %v", actual, expected)
30+
}
31+
}
32+
33+
func TestGetProbePort(t *testing.T) {
34+
tests := []struct {
35+
name string
36+
serviceSpec Service
37+
expected intstr.IntOrString
38+
}{
39+
{"Single named port", Service{Ports: []corev1.ServicePort{{Name: "api", Port: 8080}}}, intstr.FromString("api")},
40+
{"Single unnamed port", Service{Ports: []corev1.ServicePort{{Port: 9090}}}, intstr.FromInt(9090)},
41+
{"Multiple ports - prefers 'api'", Service{Ports: []corev1.ServicePort{{Name: "metrics", Port: 9090}, {Name: "api", Port: 8080}}}, intstr.FromString("api")},
42+
{"No ports - uses legacy Port field", Service{Port: 7070}, intstr.FromString("api")},
43+
{"No ports at all - defaults to 'api'", Service{}, intstr.FromString("api")},
44+
}
45+
46+
for _, tt := range tests {
47+
t.Run(tt.name, func(t *testing.T) {
48+
assertEqual(t, getProbePort(tt.serviceSpec), tt.expected)
49+
})
50+
}
51+
}
52+
53+
func TestGetMetricsPort(t *testing.T) {
54+
tests := []struct {
55+
name string
56+
serviceSpec Service
57+
expected intstr.IntOrString
58+
}{
59+
{"Single named port", Service{Ports: []corev1.ServicePort{{Name: "metrics", Port: 8081}}}, intstr.FromString("metrics")},
60+
{"Single unnamed port", Service{Ports: []corev1.ServicePort{{Port: 8181}}}, intstr.FromInt(8181)},
61+
{"Multiple ports - prefers 'metrics'", Service{Ports: []corev1.ServicePort{{Name: "api", Port: 8080}, {Name: "metrics", Port: 9090}}}, intstr.FromString("metrics")},
62+
{"Multiple ports - no 'metrics', uses 'api'", Service{Ports: []corev1.ServicePort{{Name: "grpc", Port: 5050}, {Name: "api", Port: 8080}}}, intstr.FromString("api")},
63+
{"No ports - uses legacy Port field", Service{Port: 6060}, intstr.FromString("api")},
64+
{"No ports at all - defaults to 'metrics'", Service{}, intstr.FromString("api")},
65+
}
66+
67+
for _, tt := range tests {
68+
t.Run(tt.name, func(t *testing.T) {
69+
assertEqual(t, getMetricsPort(tt.serviceSpec), tt.expected)
70+
})
71+
}
72+
}

api/apps/v1alpha1/nemo_customizer_types.go

Lines changed: 49 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,13 @@ import (
3131
networkingv1 "k8s.io/api/networking/v1"
3232
rbacv1 "k8s.io/api/rbac/v1"
3333
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34-
"k8s.io/apimachinery/pkg/util/intstr"
3534
)
3635

3736
const (
37+
// CustomizerAPIPort is the default port that customizer serves on
38+
CustomizerAPIPort = 8000
39+
// CustomizerInternalPort is the default port used for syncing training progress
40+
CustomizerInternalPort = 9009
3841
// NemoCustomizerConditionReady indicates that the NEMO CustomizerService is ready.
3942
NemoCustomizerConditionReady = "Ready"
4043
// NemoCustomizerConditionFailed indicates that the NEMO CustomizerService has failed.
@@ -57,7 +60,7 @@ const (
5760

5861
// NemoCustomizerSpec defines the desired state of NemoCustomizer
5962
type NemoCustomizerSpec struct {
60-
Image Image `json:"image,omitempty"`
63+
Image Image `json:"image"`
6164
Command []string `json:"command,omitempty"`
6265
Args []string `json:"args,omitempty"`
6366
Env []corev1.EnvVar `json:"env,omitempty"`
@@ -67,7 +70,7 @@ type NemoCustomizerSpec struct {
6770
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
6871
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
6972
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
70-
Expose Expose `json:"expose,omitempty"`
73+
Expose Expose `json:"expose"`
7174
LivenessProbe Probe `json:"livenessProbe,omitempty"`
7275
ReadinessProbe Probe `json:"readinessProbe,omitempty"`
7376
StartupProbe Probe `json:"startupProbe,omitempty"`
@@ -428,18 +431,15 @@ func (n *NemoCustomizer) GetStartupProbe() *corev1.Probe {
428431
// GetDefaultStartupProbe returns the default startup probe for the NemoCustomizer container
429432
func (n *NemoCustomizer) GetDefaultStartupProbe() *corev1.Probe {
430433
probe := corev1.Probe{
431-
FailureThreshold: 5,
432-
InitialDelaySeconds: 10,
434+
FailureThreshold: 30,
435+
InitialDelaySeconds: 30,
433436
PeriodSeconds: 10,
434437
SuccessThreshold: 1,
435438
TimeoutSeconds: 1,
436439
ProbeHandler: corev1.ProbeHandler{
437440
HTTPGet: &corev1.HTTPGetAction{
438441
Path: "/v1/health/ready",
439-
Port: intstr.IntOrString{
440-
Type: intstr.Type(1),
441-
StrVal: "api",
442-
},
442+
Port: getProbePort(n.Spec.Expose.Service),
443443
},
444444
},
445445
}
@@ -466,11 +466,7 @@ func (n *NemoCustomizer) GetDefaultLivenessProbe() *corev1.Probe {
466466
ProbeHandler: corev1.ProbeHandler{
467467
HTTPGet: &corev1.HTTPGetAction{
468468
Path: "/v1/health/live",
469-
Port: intstr.IntOrString{
470-
Type: intstr.Type(1),
471-
StrVal: "api",
472-
},
473-
Scheme: "HTTP",
469+
Port: getProbePort(n.Spec.Expose.Service),
474470
},
475471
},
476472
}
@@ -496,10 +492,7 @@ func (n *NemoCustomizer) GetDefaultReadinessProbe() *corev1.Probe {
496492
ProbeHandler: corev1.ProbeHandler{
497493
HTTPGet: &corev1.HTTPGetAction{
498494
Path: "/v1/health/ready",
499-
Port: intstr.IntOrString{
500-
Type: intstr.Type(1),
501-
StrVal: "api",
502-
},
495+
Port: getProbePort(n.Spec.Expose.Service),
503496
},
504497
},
505498
}
@@ -569,19 +562,14 @@ func (n *NemoCustomizer) GetInternalServicePort() int32 {
569562
}
570563

571564
// Default to 9009 if no internal port is found
572-
return 9009
565+
return CustomizerInternalPort
573566
}
574567

575568
// GetServicePorts returns the service ports for the NemoCustomizer deployment
576569
func (n *NemoCustomizer) GetServicePorts() []corev1.ServicePort {
577570
return n.Spec.Expose.Service.Ports
578571
}
579572

580-
// GetServicePort returns the service port for the NemoCustomizer deployment
581-
func (n *NemoCustomizer) GetServicePort() int32 {
582-
return n.Spec.Expose.Service.Port
583-
}
584-
585573
// GetServiceType returns the service type for the NemoCustomizer deployment
586574
func (n *NemoCustomizer) GetServiceType() string {
587575
return string(n.Spec.Expose.Service.Type)
@@ -661,24 +649,22 @@ func (n *NemoCustomizer) GetDeploymentParams() *rendertypes.DeploymentParams {
661649
// Set runtime class
662650
params.RuntimeClassName = n.GetRuntimeClass()
663651

664-
// Extract ports from spec and update rendering params
665-
if len(n.GetServicePorts()) > 0 {
666-
var containerPorts []corev1.ContainerPort
667-
for _, svcPort := range n.GetServicePorts() {
668-
containerPorts = append(containerPorts, corev1.ContainerPort{
669-
Name: svcPort.Name,
670-
Protocol: svcPort.Protocol,
671-
ContainerPort: svcPort.Port,
672-
})
673-
}
674-
params.Ports = containerPorts
675-
} else {
676-
params.Ports = []corev1.ContainerPort{{
677-
Name: "api",
652+
// Setup container ports for customizer
653+
// TODO: set these to use defined values in the service spec
654+
// once that is allowed by the customizer through env variables
655+
containerPorts := []corev1.ContainerPort{
656+
{
657+
Name: DefaultNamedPortAPI,
678658
Protocol: corev1.ProtocolTCP,
679-
ContainerPort: n.GetServicePort(),
680-
}}
659+
ContainerPort: CustomizerAPIPort,
660+
},
661+
{
662+
Name: "internal",
663+
Protocol: corev1.ProtocolTCP,
664+
ContainerPort: CustomizerInternalPort,
665+
},
681666
}
667+
params.Ports = containerPorts
682668

683669
return params
684670
}
@@ -749,13 +735,19 @@ func (n *NemoCustomizer) GetServiceParams() *rendertypes.ServiceParams {
749735
if len(servicePorts) != 0 {
750736
params.Ports = servicePorts
751737
} else {
752-
// Use corev1.ServicePort instead of deprecated params.Port
753-
params.Ports = []corev1.ServicePort{{
754-
Name: "api",
755-
Port: 8000,
756-
TargetPort: intstr.FromInt(8000),
757-
Protocol: corev1.ProtocolTCP,
758-
}}
738+
// Set default ports
739+
params.Ports = []corev1.ServicePort{
740+
{
741+
Name: DefaultNamedPortAPI,
742+
Port: CustomizerAPIPort,
743+
Protocol: corev1.ProtocolTCP,
744+
},
745+
{
746+
Name: "internal",
747+
Port: CustomizerInternalPort,
748+
Protocol: corev1.ProtocolTCP,
749+
},
750+
}
759751
}
760752

761753
return params
@@ -892,11 +884,20 @@ func (n *NemoCustomizer) GetServiceMonitorParams() *rendertypes.ServiceMonitorPa
892884
params.Labels = svcLabels
893885
params.Annotations = n.GetServiceMonitorAnnotations()
894886

887+
// Determine the appropriate port for monitoring
888+
metricsPort := getMetricsPort(n.Spec.Expose.Service)
889+
895890
// Set Service Monitor spec
896891
smSpec := monitoringv1.ServiceMonitorSpec{
897892
NamespaceSelector: monitoringv1.NamespaceSelector{MatchNames: []string{n.Namespace}},
898893
Selector: metav1.LabelSelector{MatchLabels: n.GetServiceLabels()},
899-
Endpoints: []monitoringv1.Endpoint{{Port: "service-port", ScrapeTimeout: serviceMonitor.ScrapeTimeout, Interval: serviceMonitor.Interval}},
894+
Endpoints: []monitoringv1.Endpoint{
895+
{
896+
Port: metricsPort.StrVal,
897+
ScrapeTimeout: serviceMonitor.ScrapeTimeout,
898+
Interval: serviceMonitor.Interval,
899+
},
900+
},
900901
}
901902
params.SMSpec = smSpec
902903
return params

0 commit comments

Comments
 (0)