Skip to content

Commit 380a408

Browse files
committed
Support KServe platform for NIMService
Signed-off-by: Xieshen Zhang <xiezhang@redhat.com>
1 parent ccff6e0 commit 380a408

1,855 files changed

Lines changed: 731856 additions & 4757 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

api/apps/v1alpha1/nimservice_types.go

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"maps"
2222
"os"
2323

24+
kserveconstants "github.com/kserve/kserve/pkg/constants"
2425
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2526
autoscalingv2 "k8s.io/api/autoscaling/v2"
2627
corev1 "k8s.io/api/core/v1"
@@ -1011,6 +1012,281 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10111012
return n.Spec.Proxy
10121013
}
10131014

1015+
// GetInferenceServiceParams returns params to render InferenceService from templates.
1016+
func (n *NIMService) GetInferenceServiceParams(
1017+
deploymentMode kserveconstants.DeploymentModeType) *rendertypes.InferenceServiceParams {
1018+
1019+
params := &rendertypes.InferenceServiceParams{}
1020+
1021+
// Set metadata
1022+
params.Name = n.GetName()
1023+
params.Namespace = n.GetNamespace()
1024+
params.Labels = n.GetServiceLabels()
1025+
params.Annotations = n.GetNIMServiceAnnotations()
1026+
params.PodAnnotations = n.GetNIMServiceAnnotations()
1027+
delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey)
1028+
1029+
// Set template spec
1030+
if !n.IsAutoScalingEnabled() || deploymentMode != kserveconstants.RawDeployment {
1031+
params.MinReplicas = ptr.To[int32](int32(n.GetReplicas()))
1032+
} else {
1033+
params.Annotations[kserveconstants.AutoscalerClass] = string(kserveconstants.AutoscalerClassHPA)
1034+
1035+
minReplicas, maxReplicas, metric, metricType, target := n.GetInferenceServiceHPAParams()
1036+
if minReplicas != nil {
1037+
params.MinReplicas = minReplicas
1038+
}
1039+
if maxReplicas > 0 {
1040+
params.MaxReplicas = ptr.To[int32](maxReplicas)
1041+
}
1042+
if metric != "" {
1043+
params.ScaleMetric = metric
1044+
}
1045+
if metricType != "" {
1046+
params.ScaleMetricType = metricType
1047+
}
1048+
if target > 0 {
1049+
params.ScaleTarget = ptr.To(target)
1050+
}
1051+
}
1052+
1053+
params.NodeSelector = n.GetNodeSelector()
1054+
params.Tolerations = n.GetTolerations()
1055+
params.Affinity = n.GetPodAffinity()
1056+
params.ImagePullSecrets = n.GetImagePullSecrets()
1057+
params.ImagePullPolicy = n.GetImagePullPolicy()
1058+
1059+
// Set labels and selectors
1060+
params.SelectorLabels = n.GetSelectorLabels()
1061+
1062+
// Set container spec
1063+
params.ContainerName = n.GetContainerName()
1064+
params.Env = n.GetEnv()
1065+
params.Args = n.GetArgs()
1066+
params.Command = n.GetCommand()
1067+
params.Resources = n.GetResources()
1068+
params.Image = n.GetImage()
1069+
1070+
// Set container probes
1071+
if IsProbeEnabled(n.Spec.LivenessProbe) {
1072+
params.LivenessProbe = n.GetInferenceServiceLivenessProbe(deploymentMode)
1073+
}
1074+
if IsProbeEnabled(n.Spec.ReadinessProbe) {
1075+
params.ReadinessProbe = n.GetInferenceServiceReadinessProbe(deploymentMode)
1076+
}
1077+
if IsProbeEnabled(n.Spec.StartupProbe) {
1078+
params.StartupProbe = n.GetInferenceServiceStartupProbe(deploymentMode)
1079+
}
1080+
1081+
params.UserID = n.GetUserID()
1082+
params.GroupID = n.GetGroupID()
1083+
1084+
// Set service account
1085+
params.ServiceAccountName = n.GetServiceAccountName()
1086+
1087+
// Set runtime class
1088+
params.RuntimeClassName = n.GetRuntimeClassName()
1089+
1090+
// Set scheduler
1091+
params.SchedulerName = n.GetSchedulerName()
1092+
1093+
params.Ports = n.GetInferenceServicePorts(deploymentMode)
1094+
1095+
return params
1096+
}
1097+
1098+
// GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1099+
func (n *NIMService) GetInferenceServiceLivenessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1100+
if modeType == kserveconstants.RawDeployment {
1101+
if n.Spec.LivenessProbe.Probe == nil {
1102+
return n.GetDefaultLivenessProbe()
1103+
}
1104+
} else {
1105+
if n.Spec.LivenessProbe.Probe == nil {
1106+
probe := &corev1.Probe{
1107+
InitialDelaySeconds: 15,
1108+
TimeoutSeconds: 1,
1109+
PeriodSeconds: 10,
1110+
SuccessThreshold: 1,
1111+
FailureThreshold: 3,
1112+
}
1113+
if n.Spec.Expose.Service.GRPCPort != nil {
1114+
probe.ProbeHandler = corev1.ProbeHandler{
1115+
GRPC: &corev1.GRPCAction{
1116+
Port: *n.Spec.Expose.Service.GRPCPort,
1117+
},
1118+
}
1119+
} else {
1120+
probe.ProbeHandler = corev1.ProbeHandler{
1121+
HTTPGet: &corev1.HTTPGetAction{
1122+
Path: "/v1/health/live",
1123+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1124+
},
1125+
}
1126+
}
1127+
return probe
1128+
}
1129+
}
1130+
1131+
return n.Spec.LivenessProbe.Probe
1132+
}
1133+
1134+
// GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1135+
func (n *NIMService) GetInferenceServiceReadinessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1136+
if modeType == kserveconstants.RawDeployment {
1137+
if n.Spec.ReadinessProbe.Probe == nil {
1138+
return n.GetDefaultReadinessProbe()
1139+
}
1140+
} else {
1141+
if n.Spec.ReadinessProbe.Probe == nil {
1142+
probe := &corev1.Probe{
1143+
InitialDelaySeconds: 15,
1144+
TimeoutSeconds: 1,
1145+
PeriodSeconds: 10,
1146+
SuccessThreshold: 1,
1147+
FailureThreshold: 3,
1148+
}
1149+
if n.Spec.Expose.Service.GRPCPort != nil {
1150+
probe.ProbeHandler = corev1.ProbeHandler{
1151+
GRPC: &corev1.GRPCAction{
1152+
Port: *n.Spec.Expose.Service.GRPCPort,
1153+
},
1154+
}
1155+
} else {
1156+
probe.ProbeHandler = corev1.ProbeHandler{
1157+
HTTPGet: &corev1.HTTPGetAction{
1158+
Path: "/v1/health/ready",
1159+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1160+
},
1161+
}
1162+
}
1163+
return probe
1164+
}
1165+
}
1166+
1167+
return n.Spec.ReadinessProbe.Probe
1168+
}
1169+
1170+
// GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1171+
func (n *NIMService) GetInferenceServiceStartupProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1172+
if modeType == kserveconstants.RawDeployment {
1173+
if n.Spec.StartupProbe.Probe == nil {
1174+
return n.GetDefaultStartupProbe()
1175+
}
1176+
} else {
1177+
if n.Spec.StartupProbe.Probe == nil {
1178+
probe := &corev1.Probe{
1179+
InitialDelaySeconds: 30,
1180+
TimeoutSeconds: 1,
1181+
PeriodSeconds: 10,
1182+
SuccessThreshold: 1,
1183+
FailureThreshold: 30,
1184+
}
1185+
if n.Spec.Expose.Service.GRPCPort != nil {
1186+
probe.ProbeHandler = corev1.ProbeHandler{
1187+
GRPC: &corev1.GRPCAction{
1188+
Port: *n.Spec.Expose.Service.GRPCPort,
1189+
},
1190+
}
1191+
} else {
1192+
probe.ProbeHandler = corev1.ProbeHandler{
1193+
HTTPGet: &corev1.HTTPGetAction{
1194+
Path: "/v1/health/ready",
1195+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1196+
},
1197+
}
1198+
}
1199+
return probe
1200+
}
1201+
}
1202+
1203+
return n.Spec.StartupProbe.Probe
1204+
}
1205+
1206+
// GetInferenceServicePorts returns ports for the NIMService container.
1207+
func (n *NIMService) GetInferenceServicePorts(modeType kserveconstants.DeploymentModeType) []corev1.ContainerPort {
1208+
ports := []corev1.ContainerPort{}
1209+
1210+
// Setup container ports for nimservice
1211+
if modeType == kserveconstants.RawDeployment {
1212+
ports = append(ports, corev1.ContainerPort{
1213+
Name: DefaultNamedPortAPI,
1214+
Protocol: corev1.ProtocolTCP,
1215+
ContainerPort: *n.Spec.Expose.Service.Port,
1216+
})
1217+
if n.Spec.Expose.Service.GRPCPort != nil {
1218+
ports = append(ports, corev1.ContainerPort{
1219+
Name: DefaultNamedPortGRPC,
1220+
Protocol: corev1.ProtocolTCP,
1221+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1222+
})
1223+
}
1224+
if n.Spec.Expose.Service.MetricsPort != nil {
1225+
ports = append(ports, corev1.ContainerPort{
1226+
Name: DefaultNamedPortMetrics,
1227+
Protocol: corev1.ProtocolTCP,
1228+
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
1229+
})
1230+
}
1231+
} else {
1232+
ports = append(ports, corev1.ContainerPort{
1233+
Protocol: corev1.ProtocolTCP,
1234+
ContainerPort: *n.Spec.Expose.Service.Port,
1235+
})
1236+
if n.Spec.Expose.Service.GRPCPort != nil {
1237+
ports = append(ports, corev1.ContainerPort{
1238+
Protocol: corev1.ProtocolTCP,
1239+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1240+
})
1241+
}
1242+
}
1243+
1244+
return ports
1245+
}
1246+
1247+
// GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1248+
func (n *NIMService) GetInferenceServiceHPAParams() (*int32, int32, string, string, int32) {
1249+
hpa := n.GetHPA()
1250+
1251+
var minReplicas *int32
1252+
var maxReplicas int32
1253+
var metric string
1254+
var metricType string
1255+
var target int32
1256+
1257+
if hpa.MinReplicas != nil {
1258+
minReplicas = hpa.MinReplicas
1259+
}
1260+
maxReplicas = hpa.MaxReplicas
1261+
1262+
for _, m := range hpa.Metrics {
1263+
if m.Type == autoscalingv2.ResourceMetricSourceType && m.Resource != nil {
1264+
if m.Resource.Name == corev1.ResourceCPU || m.Resource.Name == corev1.ResourceMemory {
1265+
metric = string(m.Resource.Name)
1266+
metricType = string(m.Resource.Target.Type)
1267+
1268+
switch m.Resource.Target.Type {
1269+
case autoscalingv2.UtilizationMetricType:
1270+
if m.Resource.Target.AverageUtilization != nil {
1271+
target = *m.Resource.Target.AverageUtilization
1272+
}
1273+
case autoscalingv2.ValueMetricType:
1274+
if m.Resource.Target.Value != nil {
1275+
target = int32((*m.Resource.Target.Value).Value())
1276+
}
1277+
case autoscalingv2.AverageValueMetricType:
1278+
if m.Resource.Target.AverageValue != nil {
1279+
target = int32((*m.Resource.Target.AverageValue).Value())
1280+
}
1281+
}
1282+
break
1283+
}
1284+
}
1285+
}
1286+
1287+
return minReplicas, maxReplicas, metric, metricType, target
1288+
}
1289+
10141290
func init() {
10151291
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
10161292
}

cmd/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"flag"
2222
"os"
2323

24+
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2425
monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
26+
2527
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
2628
// to ensure that exec-entrypoint and run can make use of them.
2729
"k8s.io/apimachinery/pkg/runtime"
@@ -55,6 +57,7 @@ func init() {
5557
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
5658
utilruntime.Must(appsv1alpha1.AddToScheme(scheme))
5759
utilruntime.Must(monitoring.AddToScheme(scheme))
60+
utilruntime.Must(kservev1beta1.AddToScheme(scheme))
5861
// +kubebuilder:scaffold:scheme
5962
}
6063

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ spec:
9090
resources:
9191
limits:
9292
cpu: 500m
93-
memory: 128Mi
93+
memory: 512Mi
9494
requests:
9595
cpu: 10m
9696
memory: 64Mi

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,18 @@ rules:
289289
- securitycontextconstraints
290290
verbs:
291291
- use
292+
- apiGroups:
293+
- serving.kserve.io
294+
resources:
295+
- inferenceservices
296+
verbs:
297+
- create
298+
- delete
299+
- get
300+
- list
301+
- patch
302+
- update
303+
- watch
292304
- apiGroups:
293305
- storage.k8s.io
294306
resources:

0 commit comments

Comments
 (0)