@@ -21,6 +21,7 @@ import (
2121 "maps"
2222 "os"
2323
24+ kserveconstants "github.com/kserve/kserve/pkg/constants"
2425 monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2526 autoscalingv2 "k8s.io/api/autoscaling/v2"
2627 corev1 "k8s.io/api/core/v1"
@@ -1011,6 +1012,279 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10111012 return n .Spec .Proxy
10121013}
10131014
1015+ // GetInferenceServiceParams returns params to render InferenceService from templates.
1016+ func (n * NIMService ) GetInferenceServiceParams (
1017+ deploymentMode kserveconstants.DeploymentModeType ) * rendertypes.InferenceServiceParams {
1018+
1019+ params := & rendertypes.InferenceServiceParams {}
1020+
1021+ // Set metadata
1022+ params .Name = n .GetName ()
1023+ params .Namespace = n .GetNamespace ()
1024+ params .Labels = n .GetServiceLabels ()
1025+ params .Annotations = n .GetNIMServiceAnnotations ()
1026+ params .PodAnnotations = n .GetNIMServiceAnnotations ()
1027+ delete (params .PodAnnotations , utils .NvidiaAnnotationParentSpecHashKey )
1028+
1029+ // Set template spec
1030+ if ! n .IsAutoScalingEnabled () || deploymentMode != kserveconstants .RawDeployment {
1031+ params .MinReplicas = int32 (n .GetReplicas ())
1032+ } else {
1033+ minReplicas , maxReplicas , metric , metricType , target := n .GetInferenceServiceHPAParams ()
1034+ if minReplicas != nil {
1035+ params .MinReplicas = * minReplicas
1036+ }
1037+ if maxReplicas > 0 {
1038+ params .MaxReplicas = maxReplicas
1039+ }
1040+ if metric != "" {
1041+ params .ScaleMetric = metric
1042+ }
1043+ if metricType != "" {
1044+ params .ScaleMetricType = metricType
1045+ }
1046+ if target > 0 {
1047+ params .ScaleTarget = target
1048+ }
1049+ }
1050+
1051+ params .NodeSelector = n .GetNodeSelector ()
1052+ params .Tolerations = n .GetTolerations ()
1053+ params .Affinity = n .GetPodAffinity ()
1054+ params .ImagePullSecrets = n .GetImagePullSecrets ()
1055+ params .ImagePullPolicy = n .GetImagePullPolicy ()
1056+
1057+ // Set labels and selectors
1058+ params .SelectorLabels = n .GetSelectorLabels ()
1059+
1060+ // Set container spec
1061+ params .ContainerName = n .GetContainerName ()
1062+ params .Env = n .GetEnv ()
1063+ params .Args = n .GetArgs ()
1064+ params .Command = n .GetCommand ()
1065+ params .Resources = n .GetResources ()
1066+ params .Image = n .GetImage ()
1067+
1068+ // Set container probes
1069+ if IsProbeEnabled (n .Spec .LivenessProbe ) {
1070+ params .LivenessProbe = n .GetInferenceServiceLivenessProbe (deploymentMode )
1071+ }
1072+ if IsProbeEnabled (n .Spec .ReadinessProbe ) {
1073+ params .ReadinessProbe = n .GetInferenceServiceReadinessProbe (deploymentMode )
1074+ }
1075+ if IsProbeEnabled (n .Spec .StartupProbe ) {
1076+ params .StartupProbe = n .GetInferenceServiceStartupProbe (deploymentMode )
1077+ }
1078+
1079+ params .UserID = n .GetUserID ()
1080+ params .GroupID = n .GetGroupID ()
1081+
1082+ // Set service account
1083+ params .ServiceAccountName = n .GetServiceAccountName ()
1084+
1085+ // Set runtime class
1086+ params .RuntimeClassName = n .GetRuntimeClassName ()
1087+
1088+ // Set scheduler
1089+ params .SchedulerName = n .GetSchedulerName ()
1090+
1091+ params .Ports = n .GetInferenceServicePorts (deploymentMode )
1092+
1093+ return params
1094+ }
1095+
1096+ // GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1097+ func (n * NIMService ) GetInferenceServiceLivenessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1098+ if modeType == kserveconstants .RawDeployment {
1099+ if n .Spec .LivenessProbe .Probe == nil {
1100+ return n .GetDefaultLivenessProbe ()
1101+ }
1102+ } else {
1103+ if n .Spec .LivenessProbe .Probe == nil {
1104+ probe := & corev1.Probe {
1105+ InitialDelaySeconds : 15 ,
1106+ TimeoutSeconds : 1 ,
1107+ PeriodSeconds : 10 ,
1108+ SuccessThreshold : 1 ,
1109+ FailureThreshold : 3 ,
1110+ }
1111+ if n .Spec .Expose .Service .GRPCPort != nil {
1112+ probe .ProbeHandler = corev1.ProbeHandler {
1113+ GRPC : & corev1.GRPCAction {
1114+ Port : * n .Spec .Expose .Service .GRPCPort ,
1115+ },
1116+ }
1117+ } else {
1118+ probe .ProbeHandler = corev1.ProbeHandler {
1119+ HTTPGet : & corev1.HTTPGetAction {
1120+ Path : "/v1/health/live" ,
1121+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1122+ },
1123+ }
1124+ }
1125+ return probe
1126+ }
1127+ }
1128+
1129+ return n .Spec .LivenessProbe .Probe
1130+ }
1131+
1132+ // GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1133+ func (n * NIMService ) GetInferenceServiceReadinessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1134+ if modeType == kserveconstants .RawDeployment {
1135+ if n .Spec .ReadinessProbe .Probe == nil {
1136+ return n .GetDefaultReadinessProbe ()
1137+ }
1138+ } else {
1139+ if n .Spec .ReadinessProbe .Probe == nil {
1140+ probe := & corev1.Probe {
1141+ InitialDelaySeconds : 15 ,
1142+ TimeoutSeconds : 1 ,
1143+ PeriodSeconds : 10 ,
1144+ SuccessThreshold : 1 ,
1145+ FailureThreshold : 3 ,
1146+ }
1147+ if n .Spec .Expose .Service .GRPCPort != nil {
1148+ probe .ProbeHandler = corev1.ProbeHandler {
1149+ GRPC : & corev1.GRPCAction {
1150+ Port : * n .Spec .Expose .Service .GRPCPort ,
1151+ },
1152+ }
1153+ } else {
1154+ probe .ProbeHandler = corev1.ProbeHandler {
1155+ HTTPGet : & corev1.HTTPGetAction {
1156+ Path : "/v1/health/ready" ,
1157+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1158+ },
1159+ }
1160+ }
1161+ return probe
1162+ }
1163+ }
1164+
1165+ return n .Spec .ReadinessProbe .Probe
1166+ }
1167+
1168+ // GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1169+ func (n * NIMService ) GetInferenceServiceStartupProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1170+ if modeType == kserveconstants .RawDeployment {
1171+ if n .Spec .StartupProbe .Probe == nil {
1172+ return n .GetDefaultStartupProbe ()
1173+ }
1174+ } else {
1175+ if n .Spec .StartupProbe .Probe == nil {
1176+ probe := & corev1.Probe {
1177+ InitialDelaySeconds : 30 ,
1178+ TimeoutSeconds : 1 ,
1179+ PeriodSeconds : 10 ,
1180+ SuccessThreshold : 1 ,
1181+ FailureThreshold : 30 ,
1182+ }
1183+ if n .Spec .Expose .Service .GRPCPort != nil {
1184+ probe .ProbeHandler = corev1.ProbeHandler {
1185+ GRPC : & corev1.GRPCAction {
1186+ Port : * n .Spec .Expose .Service .GRPCPort ,
1187+ },
1188+ }
1189+ } else {
1190+ probe .ProbeHandler = corev1.ProbeHandler {
1191+ HTTPGet : & corev1.HTTPGetAction {
1192+ Path : "/v1/health/ready" ,
1193+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1194+ },
1195+ }
1196+ }
1197+ return probe
1198+ }
1199+ }
1200+
1201+ return n .Spec .StartupProbe .Probe
1202+ }
1203+
1204+ // GetInferenceServicePorts returns ports for the NIMService container.
1205+ func (n * NIMService ) GetInferenceServicePorts (modeType kserveconstants.DeploymentModeType ) []corev1.ContainerPort {
1206+ ports := []corev1.ContainerPort {}
1207+
1208+ // Setup container ports for nimservice
1209+ if modeType == kserveconstants .RawDeployment {
1210+ ports = append (ports , corev1.ContainerPort {
1211+ Name : DefaultNamedPortAPI ,
1212+ Protocol : corev1 .ProtocolTCP ,
1213+ ContainerPort : * n .Spec .Expose .Service .Port ,
1214+ })
1215+ if n .Spec .Expose .Service .GRPCPort != nil {
1216+ ports = append (ports , corev1.ContainerPort {
1217+ Name : DefaultNamedPortGRPC ,
1218+ Protocol : corev1 .ProtocolTCP ,
1219+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1220+ })
1221+ }
1222+ if n .Spec .Expose .Service .MetricsPort != nil {
1223+ ports = append (ports , corev1.ContainerPort {
1224+ Name : DefaultNamedPortMetrics ,
1225+ Protocol : corev1 .ProtocolTCP ,
1226+ ContainerPort : * n .Spec .Expose .Service .MetricsPort ,
1227+ })
1228+ }
1229+ } else {
1230+ ports = append (ports , corev1.ContainerPort {
1231+ Protocol : corev1 .ProtocolTCP ,
1232+ ContainerPort : * n .Spec .Expose .Service .Port ,
1233+ })
1234+ if n .Spec .Expose .Service .GRPCPort != nil {
1235+ ports = append (ports , corev1.ContainerPort {
1236+ Protocol : corev1 .ProtocolTCP ,
1237+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1238+ })
1239+ }
1240+ }
1241+
1242+ return ports
1243+ }
1244+
1245+ // GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1246+ func (n * NIMService ) GetInferenceServiceHPAParams () (* int32 , int32 , string , string , int32 ) {
1247+ hpa := n .GetHPA ()
1248+
1249+ var minReplicas * int32
1250+ var maxReplicas int32
1251+ var metric string
1252+ var metricType string
1253+ var target int32
1254+
1255+ if hpa .MinReplicas != nil {
1256+ minReplicas = hpa .MinReplicas
1257+ }
1258+ maxReplicas = hpa .MaxReplicas
1259+
1260+ for _ , m := range hpa .Metrics {
1261+ if m .Type == autoscalingv2 .ResourceMetricSourceType && m .Resource != nil {
1262+ if m .Resource .Name == corev1 .ResourceCPU || m .Resource .Name == corev1 .ResourceMemory {
1263+ metric = string (m .Resource .Name )
1264+ metricType = string (m .Resource .Target .Type )
1265+
1266+ switch m .Resource .Target .Type {
1267+ case autoscalingv2 .UtilizationMetricType :
1268+ if m .Resource .Target .AverageUtilization != nil {
1269+ target = * m .Resource .Target .AverageUtilization
1270+ }
1271+ case autoscalingv2 .ValueMetricType :
1272+ if m .Resource .Target .Value != nil {
1273+ target = int32 ((* m .Resource .Target .Value ).Value ())
1274+ }
1275+ case autoscalingv2 .AverageValueMetricType :
1276+ if m .Resource .Target .AverageValue != nil {
1277+ target = int32 ((* m .Resource .Target .AverageValue ).Value ())
1278+ }
1279+ }
1280+ break
1281+ }
1282+ }
1283+ }
1284+
1285+ return minReplicas , maxReplicas , metric , metricType , target
1286+ }
1287+
10141288func init () {
10151289 SchemeBuilder .Register (& NIMService {}, & NIMServiceList {})
10161290}
0 commit comments