Skip to content

Commit b268ea7

Browse files
committed
Support KServe platform for NIMService
Signed-off-by: Xieshen Zhang <xiezhang@redhat.com>
1 parent be6a9ca commit b268ea7

1,851 files changed

Lines changed: 731665 additions & 4757 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

api/apps/v1alpha1/nimservice_types.go

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"maps"
2222
"os"
2323

24+
kserveconstants "github.com/kserve/kserve/pkg/constants"
2425
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2526
autoscalingv2 "k8s.io/api/autoscaling/v2"
2627
corev1 "k8s.io/api/core/v1"
@@ -1011,6 +1012,279 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10111012
return n.Spec.Proxy
10121013
}
10131014

1015+
// GetInferenceServiceParams returns params to render InferenceService from templates.
1016+
func (n *NIMService) GetInferenceServiceParams(
1017+
deploymentMode kserveconstants.DeploymentModeType) *rendertypes.InferenceServiceParams {
1018+
1019+
params := &rendertypes.InferenceServiceParams{}
1020+
1021+
// Set metadata
1022+
params.Name = n.GetName()
1023+
params.Namespace = n.GetNamespace()
1024+
params.Labels = n.GetServiceLabels()
1025+
params.Annotations = n.GetNIMServiceAnnotations()
1026+
params.PodAnnotations = n.GetNIMServiceAnnotations()
1027+
delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey)
1028+
1029+
// Set template spec
1030+
if !n.IsAutoScalingEnabled() || deploymentMode != kserveconstants.RawDeployment {
1031+
params.MinReplicas = int32(n.GetReplicas())
1032+
} else {
1033+
minReplicas, maxReplicas, metric, metricType, target := n.GetInferenceServiceHPAParams()
1034+
if minReplicas != nil {
1035+
params.MinReplicas = *minReplicas
1036+
}
1037+
if maxReplicas > 0 {
1038+
params.MaxReplicas = maxReplicas
1039+
}
1040+
if metric != "" {
1041+
params.ScaleMetric = metric
1042+
}
1043+
if metricType != "" {
1044+
params.ScaleMetricType = metricType
1045+
}
1046+
if target > 0 {
1047+
params.ScaleTarget = target
1048+
}
1049+
}
1050+
1051+
params.NodeSelector = n.GetNodeSelector()
1052+
params.Tolerations = n.GetTolerations()
1053+
params.Affinity = n.GetPodAffinity()
1054+
params.ImagePullSecrets = n.GetImagePullSecrets()
1055+
params.ImagePullPolicy = n.GetImagePullPolicy()
1056+
1057+
// Set labels and selectors
1058+
params.SelectorLabels = n.GetSelectorLabels()
1059+
1060+
// Set container spec
1061+
params.ContainerName = n.GetContainerName()
1062+
params.Env = n.GetEnv()
1063+
params.Args = n.GetArgs()
1064+
params.Command = n.GetCommand()
1065+
params.Resources = n.GetResources()
1066+
params.Image = n.GetImage()
1067+
1068+
// Set container probes
1069+
if IsProbeEnabled(n.Spec.LivenessProbe) {
1070+
params.LivenessProbe = n.GetInferenceServiceLivenessProbe(deploymentMode)
1071+
}
1072+
if IsProbeEnabled(n.Spec.ReadinessProbe) {
1073+
params.ReadinessProbe = n.GetInferenceServiceReadinessProbe(deploymentMode)
1074+
}
1075+
if IsProbeEnabled(n.Spec.StartupProbe) {
1076+
params.StartupProbe = n.GetInferenceServiceStartupProbe(deploymentMode)
1077+
}
1078+
1079+
params.UserID = n.GetUserID()
1080+
params.GroupID = n.GetGroupID()
1081+
1082+
// Set service account
1083+
params.ServiceAccountName = n.GetServiceAccountName()
1084+
1085+
// Set runtime class
1086+
params.RuntimeClassName = n.GetRuntimeClassName()
1087+
1088+
// Set scheduler
1089+
params.SchedulerName = n.GetSchedulerName()
1090+
1091+
params.Ports = n.GetInferenceServicePorts(deploymentMode)
1092+
1093+
return params
1094+
}
1095+
1096+
// GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1097+
func (n *NIMService) GetInferenceServiceLivenessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1098+
if modeType == kserveconstants.RawDeployment {
1099+
if n.Spec.LivenessProbe.Probe == nil {
1100+
return n.GetDefaultLivenessProbe()
1101+
}
1102+
} else {
1103+
if n.Spec.LivenessProbe.Probe == nil {
1104+
probe := &corev1.Probe{
1105+
InitialDelaySeconds: 15,
1106+
TimeoutSeconds: 1,
1107+
PeriodSeconds: 10,
1108+
SuccessThreshold: 1,
1109+
FailureThreshold: 3,
1110+
}
1111+
if n.Spec.Expose.Service.GRPCPort != nil {
1112+
probe.ProbeHandler = corev1.ProbeHandler{
1113+
GRPC: &corev1.GRPCAction{
1114+
Port: *n.Spec.Expose.Service.GRPCPort,
1115+
},
1116+
}
1117+
} else {
1118+
probe.ProbeHandler = corev1.ProbeHandler{
1119+
HTTPGet: &corev1.HTTPGetAction{
1120+
Path: "/v1/health/live",
1121+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1122+
},
1123+
}
1124+
}
1125+
return probe
1126+
}
1127+
}
1128+
1129+
return n.Spec.LivenessProbe.Probe
1130+
}
1131+
1132+
// GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1133+
func (n *NIMService) GetInferenceServiceReadinessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1134+
if modeType == kserveconstants.RawDeployment {
1135+
if n.Spec.ReadinessProbe.Probe == nil {
1136+
return n.GetDefaultReadinessProbe()
1137+
}
1138+
} else {
1139+
if n.Spec.ReadinessProbe.Probe == nil {
1140+
probe := &corev1.Probe{
1141+
InitialDelaySeconds: 15,
1142+
TimeoutSeconds: 1,
1143+
PeriodSeconds: 10,
1144+
SuccessThreshold: 1,
1145+
FailureThreshold: 3,
1146+
}
1147+
if n.Spec.Expose.Service.GRPCPort != nil {
1148+
probe.ProbeHandler = corev1.ProbeHandler{
1149+
GRPC: &corev1.GRPCAction{
1150+
Port: *n.Spec.Expose.Service.GRPCPort,
1151+
},
1152+
}
1153+
} else {
1154+
probe.ProbeHandler = corev1.ProbeHandler{
1155+
HTTPGet: &corev1.HTTPGetAction{
1156+
Path: "/v1/health/ready",
1157+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1158+
},
1159+
}
1160+
}
1161+
return probe
1162+
}
1163+
}
1164+
1165+
return n.Spec.ReadinessProbe.Probe
1166+
}
1167+
1168+
// GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1169+
func (n *NIMService) GetInferenceServiceStartupProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1170+
if modeType == kserveconstants.RawDeployment {
1171+
if n.Spec.StartupProbe.Probe == nil {
1172+
return n.GetDefaultStartupProbe()
1173+
}
1174+
} else {
1175+
if n.Spec.StartupProbe.Probe == nil {
1176+
probe := &corev1.Probe{
1177+
InitialDelaySeconds: 30,
1178+
TimeoutSeconds: 1,
1179+
PeriodSeconds: 10,
1180+
SuccessThreshold: 1,
1181+
FailureThreshold: 30,
1182+
}
1183+
if n.Spec.Expose.Service.GRPCPort != nil {
1184+
probe.ProbeHandler = corev1.ProbeHandler{
1185+
GRPC: &corev1.GRPCAction{
1186+
Port: *n.Spec.Expose.Service.GRPCPort,
1187+
},
1188+
}
1189+
} else {
1190+
probe.ProbeHandler = corev1.ProbeHandler{
1191+
HTTPGet: &corev1.HTTPGetAction{
1192+
Path: "/v1/health/ready",
1193+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1194+
},
1195+
}
1196+
}
1197+
return probe
1198+
}
1199+
}
1200+
1201+
return n.Spec.StartupProbe.Probe
1202+
}
1203+
1204+
// GetInferenceServicePorts returns ports for the NIMService container.
1205+
func (n *NIMService) GetInferenceServicePorts(modeType kserveconstants.DeploymentModeType) []corev1.ContainerPort {
1206+
ports := []corev1.ContainerPort{}
1207+
1208+
// Setup container ports for nimservice
1209+
if modeType == kserveconstants.RawDeployment {
1210+
ports = append(ports, corev1.ContainerPort{
1211+
Name: DefaultNamedPortAPI,
1212+
Protocol: corev1.ProtocolTCP,
1213+
ContainerPort: *n.Spec.Expose.Service.Port,
1214+
})
1215+
if n.Spec.Expose.Service.GRPCPort != nil {
1216+
ports = append(ports, corev1.ContainerPort{
1217+
Name: DefaultNamedPortGRPC,
1218+
Protocol: corev1.ProtocolTCP,
1219+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1220+
})
1221+
}
1222+
if n.Spec.Expose.Service.MetricsPort != nil {
1223+
ports = append(ports, corev1.ContainerPort{
1224+
Name: DefaultNamedPortMetrics,
1225+
Protocol: corev1.ProtocolTCP,
1226+
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
1227+
})
1228+
}
1229+
} else {
1230+
ports = append(ports, corev1.ContainerPort{
1231+
Protocol: corev1.ProtocolTCP,
1232+
ContainerPort: *n.Spec.Expose.Service.Port,
1233+
})
1234+
if n.Spec.Expose.Service.GRPCPort != nil {
1235+
ports = append(ports, corev1.ContainerPort{
1236+
Protocol: corev1.ProtocolTCP,
1237+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1238+
})
1239+
}
1240+
}
1241+
1242+
return ports
1243+
}
1244+
1245+
// GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1246+
func (n *NIMService) GetInferenceServiceHPAParams() (*int32, int32, string, string, int32) {
1247+
hpa := n.GetHPA()
1248+
1249+
var minReplicas *int32
1250+
var maxReplicas int32
1251+
var metric string
1252+
var metricType string
1253+
var target int32
1254+
1255+
if hpa.MinReplicas != nil {
1256+
minReplicas = hpa.MinReplicas
1257+
}
1258+
maxReplicas = hpa.MaxReplicas
1259+
1260+
for _, m := range hpa.Metrics {
1261+
if m.Type == autoscalingv2.ResourceMetricSourceType && m.Resource != nil {
1262+
if m.Resource.Name == corev1.ResourceCPU || m.Resource.Name == corev1.ResourceMemory {
1263+
metric = string(m.Resource.Name)
1264+
metricType = string(m.Resource.Target.Type)
1265+
1266+
switch m.Resource.Target.Type {
1267+
case autoscalingv2.UtilizationMetricType:
1268+
if m.Resource.Target.AverageUtilization != nil {
1269+
target = *m.Resource.Target.AverageUtilization
1270+
}
1271+
case autoscalingv2.ValueMetricType:
1272+
if m.Resource.Target.Value != nil {
1273+
target = int32((*m.Resource.Target.Value).Value())
1274+
}
1275+
case autoscalingv2.AverageValueMetricType:
1276+
if m.Resource.Target.AverageValue != nil {
1277+
target = int32((*m.Resource.Target.AverageValue).Value())
1278+
}
1279+
}
1280+
break
1281+
}
1282+
}
1283+
}
1284+
1285+
return minReplicas, maxReplicas, metric, metricType, target
1286+
}
1287+
10141288
func init() {
10151289
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
10161290
}

cmd/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"flag"
2222
"os"
2323

24+
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2425
monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
26+
2527
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
2628
// to ensure that exec-entrypoint and run can make use of them.
2729
"k8s.io/apimachinery/pkg/runtime"
@@ -55,6 +57,7 @@ func init() {
5557
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
5658
utilruntime.Must(appsv1alpha1.AddToScheme(scheme))
5759
utilruntime.Must(monitoring.AddToScheme(scheme))
60+
utilruntime.Must(kservev1beta1.AddToScheme(scheme))
5861
// +kubebuilder:scaffold:scheme
5962
}
6063

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ spec:
9090
resources:
9191
limits:
9292
cpu: 500m
93-
memory: 128Mi
93+
memory: 512Mi
9494
requests:
9595
cpu: 10m
9696
memory: 64Mi

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,18 @@ rules:
286286
- securitycontextconstraints
287287
verbs:
288288
- use
289+
- apiGroups:
290+
- serving.kserve.io
291+
resources:
292+
- inferenceservices
293+
verbs:
294+
- create
295+
- delete
296+
- get
297+
- list
298+
- patch
299+
- update
300+
- watch
289301
- apiGroups:
290302
- storage.k8s.io
291303
resources:

0 commit comments

Comments
 (0)