@@ -21,6 +21,7 @@ import (
2121 "maps"
2222 "os"
2323
24+ kserveconstants "github.com/kserve/kserve/pkg/constants"
2425 monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2526 autoscalingv2 "k8s.io/api/autoscaling/v2"
2627 corev1 "k8s.io/api/core/v1"
@@ -1011,6 +1012,281 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10111012 return n .Spec .Proxy
10121013}
10131014
1015+ // GetInferenceServiceParams returns params to render InferenceService from templates.
1016+ func (n * NIMService ) GetInferenceServiceParams (
1017+ deploymentMode kserveconstants.DeploymentModeType ) * rendertypes.InferenceServiceParams {
1018+
1019+ params := & rendertypes.InferenceServiceParams {}
1020+
1021+ // Set metadata
1022+ params .Name = n .GetName ()
1023+ params .Namespace = n .GetNamespace ()
1024+ params .Labels = n .GetServiceLabels ()
1025+ params .Annotations = n .GetNIMServiceAnnotations ()
1026+ params .PodAnnotations = n .GetNIMServiceAnnotations ()
1027+ delete (params .PodAnnotations , utils .NvidiaAnnotationParentSpecHashKey )
1028+
1029+ // Set template spec
1030+ if ! n .IsAutoScalingEnabled () || deploymentMode != kserveconstants .RawDeployment {
1031+ params .MinReplicas = ptr.To [int32 ](int32 (n .GetReplicas ()))
1032+ } else {
1033+ params .Annotations [kserveconstants .AutoscalerClass ] = string (kserveconstants .AutoscalerClassHPA )
1034+
1035+ minReplicas , maxReplicas , metric , metricType , target := n .GetInferenceServiceHPAParams ()
1036+ if minReplicas != nil {
1037+ params .MinReplicas = minReplicas
1038+ }
1039+ if maxReplicas > 0 {
1040+ params .MaxReplicas = ptr.To [int32 ](maxReplicas )
1041+ }
1042+ if metric != "" {
1043+ params .ScaleMetric = metric
1044+ }
1045+ if metricType != "" {
1046+ params .ScaleMetricType = metricType
1047+ }
1048+ if target > 0 {
1049+ params .ScaleTarget = ptr .To (target )
1050+ }
1051+ }
1052+
1053+ params .NodeSelector = n .GetNodeSelector ()
1054+ params .Tolerations = n .GetTolerations ()
1055+ params .Affinity = n .GetPodAffinity ()
1056+ params .ImagePullSecrets = n .GetImagePullSecrets ()
1057+ params .ImagePullPolicy = n .GetImagePullPolicy ()
1058+
1059+ // Set labels and selectors
1060+ params .SelectorLabels = n .GetSelectorLabels ()
1061+
1062+ // Set container spec
1063+ params .ContainerName = n .GetContainerName ()
1064+ params .Env = n .GetEnv ()
1065+ params .Args = n .GetArgs ()
1066+ params .Command = n .GetCommand ()
1067+ params .Resources = n .GetResources ()
1068+ params .Image = n .GetImage ()
1069+
1070+ // Set container probes
1071+ if IsProbeEnabled (n .Spec .LivenessProbe ) {
1072+ params .LivenessProbe = n .GetInferenceServiceLivenessProbe (deploymentMode )
1073+ }
1074+ if IsProbeEnabled (n .Spec .ReadinessProbe ) {
1075+ params .ReadinessProbe = n .GetInferenceServiceReadinessProbe (deploymentMode )
1076+ }
1077+ if IsProbeEnabled (n .Spec .StartupProbe ) {
1078+ params .StartupProbe = n .GetInferenceServiceStartupProbe (deploymentMode )
1079+ }
1080+
1081+ params .UserID = n .GetUserID ()
1082+ params .GroupID = n .GetGroupID ()
1083+
1084+ // Set service account
1085+ params .ServiceAccountName = n .GetServiceAccountName ()
1086+
1087+ // Set runtime class
1088+ params .RuntimeClassName = n .GetRuntimeClassName ()
1089+
1090+ // Set scheduler
1091+ params .SchedulerName = n .GetSchedulerName ()
1092+
1093+ params .Ports = n .GetInferenceServicePorts (deploymentMode )
1094+
1095+ return params
1096+ }
1097+
1098+ // GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1099+ func (n * NIMService ) GetInferenceServiceLivenessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1100+ if modeType == kserveconstants .RawDeployment {
1101+ if n .Spec .LivenessProbe .Probe == nil {
1102+ return n .GetDefaultLivenessProbe ()
1103+ }
1104+ } else {
1105+ if n .Spec .LivenessProbe .Probe == nil {
1106+ probe := & corev1.Probe {
1107+ InitialDelaySeconds : 15 ,
1108+ TimeoutSeconds : 1 ,
1109+ PeriodSeconds : 10 ,
1110+ SuccessThreshold : 1 ,
1111+ FailureThreshold : 3 ,
1112+ }
1113+ if n .Spec .Expose .Service .GRPCPort != nil {
1114+ probe .ProbeHandler = corev1.ProbeHandler {
1115+ GRPC : & corev1.GRPCAction {
1116+ Port : * n .Spec .Expose .Service .GRPCPort ,
1117+ },
1118+ }
1119+ } else {
1120+ probe .ProbeHandler = corev1.ProbeHandler {
1121+ HTTPGet : & corev1.HTTPGetAction {
1122+ Path : "/v1/health/live" ,
1123+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1124+ },
1125+ }
1126+ }
1127+ return probe
1128+ }
1129+ }
1130+
1131+ return n .Spec .LivenessProbe .Probe
1132+ }
1133+
1134+ // GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1135+ func (n * NIMService ) GetInferenceServiceReadinessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1136+ if modeType == kserveconstants .RawDeployment {
1137+ if n .Spec .ReadinessProbe .Probe == nil {
1138+ return n .GetDefaultReadinessProbe ()
1139+ }
1140+ } else {
1141+ if n .Spec .ReadinessProbe .Probe == nil {
1142+ probe := & corev1.Probe {
1143+ InitialDelaySeconds : 15 ,
1144+ TimeoutSeconds : 1 ,
1145+ PeriodSeconds : 10 ,
1146+ SuccessThreshold : 1 ,
1147+ FailureThreshold : 3 ,
1148+ }
1149+ if n .Spec .Expose .Service .GRPCPort != nil {
1150+ probe .ProbeHandler = corev1.ProbeHandler {
1151+ GRPC : & corev1.GRPCAction {
1152+ Port : * n .Spec .Expose .Service .GRPCPort ,
1153+ },
1154+ }
1155+ } else {
1156+ probe .ProbeHandler = corev1.ProbeHandler {
1157+ HTTPGet : & corev1.HTTPGetAction {
1158+ Path : "/v1/health/ready" ,
1159+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1160+ },
1161+ }
1162+ }
1163+ return probe
1164+ }
1165+ }
1166+
1167+ return n .Spec .ReadinessProbe .Probe
1168+ }
1169+
1170+ // GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1171+ func (n * NIMService ) GetInferenceServiceStartupProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1172+ if modeType == kserveconstants .RawDeployment {
1173+ if n .Spec .StartupProbe .Probe == nil {
1174+ return n .GetDefaultStartupProbe ()
1175+ }
1176+ } else {
1177+ if n .Spec .StartupProbe .Probe == nil {
1178+ probe := & corev1.Probe {
1179+ InitialDelaySeconds : 30 ,
1180+ TimeoutSeconds : 1 ,
1181+ PeriodSeconds : 10 ,
1182+ SuccessThreshold : 1 ,
1183+ FailureThreshold : 30 ,
1184+ }
1185+ if n .Spec .Expose .Service .GRPCPort != nil {
1186+ probe .ProbeHandler = corev1.ProbeHandler {
1187+ GRPC : & corev1.GRPCAction {
1188+ Port : * n .Spec .Expose .Service .GRPCPort ,
1189+ },
1190+ }
1191+ } else {
1192+ probe .ProbeHandler = corev1.ProbeHandler {
1193+ HTTPGet : & corev1.HTTPGetAction {
1194+ Path : "/v1/health/ready" ,
1195+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1196+ },
1197+ }
1198+ }
1199+ return probe
1200+ }
1201+ }
1202+
1203+ return n .Spec .StartupProbe .Probe
1204+ }
1205+
1206+ // GetInferenceServicePorts returns ports for the NIMService container.
1207+ func (n * NIMService ) GetInferenceServicePorts (modeType kserveconstants.DeploymentModeType ) []corev1.ContainerPort {
1208+ ports := []corev1.ContainerPort {}
1209+
1210+ // Setup container ports for nimservice
1211+ if modeType == kserveconstants .RawDeployment {
1212+ ports = append (ports , corev1.ContainerPort {
1213+ Name : DefaultNamedPortAPI ,
1214+ Protocol : corev1 .ProtocolTCP ,
1215+ ContainerPort : * n .Spec .Expose .Service .Port ,
1216+ })
1217+ if n .Spec .Expose .Service .GRPCPort != nil {
1218+ ports = append (ports , corev1.ContainerPort {
1219+ Name : DefaultNamedPortGRPC ,
1220+ Protocol : corev1 .ProtocolTCP ,
1221+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1222+ })
1223+ }
1224+ if n .Spec .Expose .Service .MetricsPort != nil {
1225+ ports = append (ports , corev1.ContainerPort {
1226+ Name : DefaultNamedPortMetrics ,
1227+ Protocol : corev1 .ProtocolTCP ,
1228+ ContainerPort : * n .Spec .Expose .Service .MetricsPort ,
1229+ })
1230+ }
1231+ } else {
1232+ ports = append (ports , corev1.ContainerPort {
1233+ Protocol : corev1 .ProtocolTCP ,
1234+ ContainerPort : * n .Spec .Expose .Service .Port ,
1235+ })
1236+ if n .Spec .Expose .Service .GRPCPort != nil {
1237+ ports = append (ports , corev1.ContainerPort {
1238+ Protocol : corev1 .ProtocolTCP ,
1239+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1240+ })
1241+ }
1242+ }
1243+
1244+ return ports
1245+ }
1246+
1247+ // GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1248+ func (n * NIMService ) GetInferenceServiceHPAParams () (* int32 , int32 , string , string , int32 ) {
1249+ hpa := n .GetHPA ()
1250+
1251+ var minReplicas * int32
1252+ var maxReplicas int32
1253+ var metric string
1254+ var metricType string
1255+ var target int32
1256+
1257+ if hpa .MinReplicas != nil {
1258+ minReplicas = hpa .MinReplicas
1259+ }
1260+ maxReplicas = hpa .MaxReplicas
1261+
1262+ for _ , m := range hpa .Metrics {
1263+ if m .Type == autoscalingv2 .ResourceMetricSourceType && m .Resource != nil {
1264+ if m .Resource .Name == corev1 .ResourceCPU || m .Resource .Name == corev1 .ResourceMemory {
1265+ metric = string (m .Resource .Name )
1266+ metricType = string (m .Resource .Target .Type )
1267+
1268+ switch m .Resource .Target .Type {
1269+ case autoscalingv2 .UtilizationMetricType :
1270+ if m .Resource .Target .AverageUtilization != nil {
1271+ target = * m .Resource .Target .AverageUtilization
1272+ }
1273+ case autoscalingv2 .ValueMetricType :
1274+ if m .Resource .Target .Value != nil {
1275+ target = int32 ((* m .Resource .Target .Value ).Value ())
1276+ }
1277+ case autoscalingv2 .AverageValueMetricType :
1278+ if m .Resource .Target .AverageValue != nil {
1279+ target = int32 ((* m .Resource .Target .AverageValue ).Value ())
1280+ }
1281+ }
1282+ break
1283+ }
1284+ }
1285+ }
1286+
1287+ return minReplicas , maxReplicas , metric , metricType , target
1288+ }
1289+
10141290func init () {
10151291 SchemeBuilder .Register (& NIMService {}, & NIMServiceList {})
10161292}
0 commit comments