@@ -26,6 +26,7 @@ import (
2626 "maps"
2727 "os"
2828
29+ kserveconstants "github.com/kserve/kserve/pkg/constants"
2930 monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3031 autoscalingv2 "k8s.io/api/autoscaling/v2"
3132 corev1 "k8s.io/api/core/v1"
@@ -1441,6 +1442,281 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
14411442 return n .Spec .Proxy
14421443}
14431444
1445+ // GetInferenceServiceParams returns params to render InferenceService from templates.
1446+ func (n * NIMService ) GetInferenceServiceParams (
1447+ deploymentMode kserveconstants.DeploymentModeType ) * rendertypes.InferenceServiceParams {
1448+
1449+ params := & rendertypes.InferenceServiceParams {}
1450+
1451+ // Set metadata
1452+ params .Name = n .GetName ()
1453+ params .Namespace = n .GetNamespace ()
1454+ params .Labels = n .GetServiceLabels ()
1455+ params .Annotations = n .GetNIMServiceAnnotations ()
1456+ params .PodAnnotations = n .GetNIMServiceAnnotations ()
1457+ delete (params .PodAnnotations , utils .NvidiaAnnotationParentSpecHashKey )
1458+
1459+ // Set template spec
1460+ if ! n .IsAutoScalingEnabled () || deploymentMode != kserveconstants .RawDeployment {
1461+ params .MinReplicas = ptr.To [int32 ](int32 (n .GetReplicas ()))
1462+ } else {
1463+ params .Annotations [kserveconstants .AutoscalerClass ] = string (kserveconstants .AutoscalerClassHPA )
1464+
1465+ minReplicas , maxReplicas , metric , metricType , target := n .GetInferenceServiceHPAParams ()
1466+ if minReplicas != nil {
1467+ params .MinReplicas = minReplicas
1468+ }
1469+ if maxReplicas > 0 {
1470+ params .MaxReplicas = ptr.To [int32 ](maxReplicas )
1471+ }
1472+ if metric != "" {
1473+ params .ScaleMetric = metric
1474+ }
1475+ if metricType != "" {
1476+ params .ScaleMetricType = metricType
1477+ }
1478+ if target > 0 {
1479+ params .ScaleTarget = ptr .To (target )
1480+ }
1481+ }
1482+
1483+ params .NodeSelector = n .GetNodeSelector ()
1484+ params .Tolerations = n .GetTolerations ()
1485+ params .Affinity = n .GetPodAffinity ()
1486+ params .ImagePullSecrets = n .GetImagePullSecrets ()
1487+ params .ImagePullPolicy = n .GetImagePullPolicy ()
1488+
1489+ // Set labels and selectors
1490+ params .SelectorLabels = n .GetSelectorLabels ()
1491+
1492+ // Set container spec
1493+ params .ContainerName = n .GetContainerName ()
1494+ params .Env = n .GetEnv ()
1495+ params .Args = n .GetArgs ()
1496+ params .Command = n .GetCommand ()
1497+ params .Resources = n .GetResources ()
1498+ params .Image = n .GetImage ()
1499+
1500+ // Set container probes
1501+ if IsProbeEnabled (n .Spec .LivenessProbe ) {
1502+ params .LivenessProbe = n .GetInferenceServiceLivenessProbe (deploymentMode )
1503+ }
1504+ if IsProbeEnabled (n .Spec .ReadinessProbe ) {
1505+ params .ReadinessProbe = n .GetInferenceServiceReadinessProbe (deploymentMode )
1506+ }
1507+ if IsProbeEnabled (n .Spec .StartupProbe ) {
1508+ params .StartupProbe = n .GetInferenceServiceStartupProbe (deploymentMode )
1509+ }
1510+
1511+ params .UserID = n .GetUserID ()
1512+ params .GroupID = n .GetGroupID ()
1513+
1514+ // Set service account
1515+ params .ServiceAccountName = n .GetServiceAccountName ()
1516+
1517+ // Set runtime class
1518+ params .RuntimeClassName = n .GetRuntimeClassName ()
1519+
1520+ // Set scheduler
1521+ params .SchedulerName = n .GetSchedulerName ()
1522+
1523+ params .Ports = n .GetInferenceServicePorts (deploymentMode )
1524+
1525+ return params
1526+ }
1527+
1528+ // GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1529+ func (n * NIMService ) GetInferenceServiceLivenessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1530+ if modeType == kserveconstants .RawDeployment {
1531+ if n .Spec .LivenessProbe .Probe == nil {
1532+ return n .GetDefaultLivenessProbe ()
1533+ }
1534+ } else {
1535+ if n .Spec .LivenessProbe .Probe == nil {
1536+ probe := & corev1.Probe {
1537+ InitialDelaySeconds : 15 ,
1538+ TimeoutSeconds : 1 ,
1539+ PeriodSeconds : 10 ,
1540+ SuccessThreshold : 1 ,
1541+ FailureThreshold : 3 ,
1542+ }
1543+ if n .Spec .Expose .Service .GRPCPort != nil {
1544+ probe .ProbeHandler = corev1.ProbeHandler {
1545+ GRPC : & corev1.GRPCAction {
1546+ Port : * n .Spec .Expose .Service .GRPCPort ,
1547+ },
1548+ }
1549+ } else {
1550+ probe .ProbeHandler = corev1.ProbeHandler {
1551+ HTTPGet : & corev1.HTTPGetAction {
1552+ Path : "/v1/health/live" ,
1553+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1554+ },
1555+ }
1556+ }
1557+ return probe
1558+ }
1559+ }
1560+
1561+ return n .Spec .LivenessProbe .Probe
1562+ }
1563+
1564+ // GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1565+ func (n * NIMService ) GetInferenceServiceReadinessProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1566+ if modeType == kserveconstants .RawDeployment {
1567+ if n .Spec .ReadinessProbe .Probe == nil {
1568+ return n .GetDefaultReadinessProbe ()
1569+ }
1570+ } else {
1571+ if n .Spec .ReadinessProbe .Probe == nil {
1572+ probe := & corev1.Probe {
1573+ InitialDelaySeconds : 15 ,
1574+ TimeoutSeconds : 1 ,
1575+ PeriodSeconds : 10 ,
1576+ SuccessThreshold : 1 ,
1577+ FailureThreshold : 3 ,
1578+ }
1579+ if n .Spec .Expose .Service .GRPCPort != nil {
1580+ probe .ProbeHandler = corev1.ProbeHandler {
1581+ GRPC : & corev1.GRPCAction {
1582+ Port : * n .Spec .Expose .Service .GRPCPort ,
1583+ },
1584+ }
1585+ } else {
1586+ probe .ProbeHandler = corev1.ProbeHandler {
1587+ HTTPGet : & corev1.HTTPGetAction {
1588+ Path : "/v1/health/ready" ,
1589+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1590+ },
1591+ }
1592+ }
1593+ return probe
1594+ }
1595+ }
1596+
1597+ return n .Spec .ReadinessProbe .Probe
1598+ }
1599+
1600+ // GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1601+ func (n * NIMService ) GetInferenceServiceStartupProbe (modeType kserveconstants.DeploymentModeType ) * corev1.Probe {
1602+ if modeType == kserveconstants .RawDeployment {
1603+ if n .Spec .StartupProbe .Probe == nil {
1604+ return n .GetDefaultStartupProbe ()
1605+ }
1606+ } else {
1607+ if n .Spec .StartupProbe .Probe == nil {
1608+ probe := & corev1.Probe {
1609+ InitialDelaySeconds : 30 ,
1610+ TimeoutSeconds : 1 ,
1611+ PeriodSeconds : 10 ,
1612+ SuccessThreshold : 1 ,
1613+ FailureThreshold : 30 ,
1614+ }
1615+ if n .Spec .Expose .Service .GRPCPort != nil {
1616+ probe .ProbeHandler = corev1.ProbeHandler {
1617+ GRPC : & corev1.GRPCAction {
1618+ Port : * n .Spec .Expose .Service .GRPCPort ,
1619+ },
1620+ }
1621+ } else {
1622+ probe .ProbeHandler = corev1.ProbeHandler {
1623+ HTTPGet : & corev1.HTTPGetAction {
1624+ Path : "/v1/health/ready" ,
1625+ Port : intstr .FromInt32 (* n .Spec .Expose .Service .Port ),
1626+ },
1627+ }
1628+ }
1629+ return probe
1630+ }
1631+ }
1632+
1633+ return n .Spec .StartupProbe .Probe
1634+ }
1635+
1636+ // GetInferenceServicePorts returns ports for the NIMService container.
1637+ func (n * NIMService ) GetInferenceServicePorts (modeType kserveconstants.DeploymentModeType ) []corev1.ContainerPort {
1638+ ports := []corev1.ContainerPort {}
1639+
1640+ // Setup container ports for nimservice
1641+ if modeType == kserveconstants .RawDeployment {
1642+ ports = append (ports , corev1.ContainerPort {
1643+ Name : DefaultNamedPortAPI ,
1644+ Protocol : corev1 .ProtocolTCP ,
1645+ ContainerPort : * n .Spec .Expose .Service .Port ,
1646+ })
1647+ if n .Spec .Expose .Service .GRPCPort != nil {
1648+ ports = append (ports , corev1.ContainerPort {
1649+ Name : DefaultNamedPortGRPC ,
1650+ Protocol : corev1 .ProtocolTCP ,
1651+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1652+ })
1653+ }
1654+ if n .Spec .Expose .Service .MetricsPort != nil {
1655+ ports = append (ports , corev1.ContainerPort {
1656+ Name : DefaultNamedPortMetrics ,
1657+ Protocol : corev1 .ProtocolTCP ,
1658+ ContainerPort : * n .Spec .Expose .Service .MetricsPort ,
1659+ })
1660+ }
1661+ } else {
1662+ ports = append (ports , corev1.ContainerPort {
1663+ Protocol : corev1 .ProtocolTCP ,
1664+ ContainerPort : * n .Spec .Expose .Service .Port ,
1665+ })
1666+ if n .Spec .Expose .Service .GRPCPort != nil {
1667+ ports = append (ports , corev1.ContainerPort {
1668+ Protocol : corev1 .ProtocolTCP ,
1669+ ContainerPort : * n .Spec .Expose .Service .GRPCPort ,
1670+ })
1671+ }
1672+ }
1673+
1674+ return ports
1675+ }
1676+
1677+ // GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1678+ func (n * NIMService ) GetInferenceServiceHPAParams () (* int32 , int32 , string , string , int32 ) {
1679+ hpa := n .GetHPA ()
1680+
1681+ var minReplicas * int32
1682+ var maxReplicas int32
1683+ var metric string
1684+ var metricType string
1685+ var target int32
1686+
1687+ if hpa .MinReplicas != nil {
1688+ minReplicas = hpa .MinReplicas
1689+ }
1690+ maxReplicas = hpa .MaxReplicas
1691+
1692+ for _ , m := range hpa .Metrics {
1693+ if m .Type == autoscalingv2 .ResourceMetricSourceType && m .Resource != nil {
1694+ if m .Resource .Name == corev1 .ResourceCPU || m .Resource .Name == corev1 .ResourceMemory {
1695+ metric = string (m .Resource .Name )
1696+ metricType = string (m .Resource .Target .Type )
1697+
1698+ switch m .Resource .Target .Type {
1699+ case autoscalingv2 .UtilizationMetricType :
1700+ if m .Resource .Target .AverageUtilization != nil {
1701+ target = * m .Resource .Target .AverageUtilization
1702+ }
1703+ case autoscalingv2 .ValueMetricType :
1704+ if m .Resource .Target .Value != nil {
1705+ target = int32 ((* m .Resource .Target .Value ).Value ())
1706+ }
1707+ case autoscalingv2 .AverageValueMetricType :
1708+ if m .Resource .Target .AverageValue != nil {
1709+ target = int32 ((* m .Resource .Target .AverageValue ).Value ())
1710+ }
1711+ }
1712+ break
1713+ }
1714+ }
1715+ }
1716+
1717+ return minReplicas , maxReplicas , metric , metricType , target
1718+ }
1719+
14441720func init () {
14451721 SchemeBuilder .Register (& NIMService {}, & NIMServiceList {})
14461722}
0 commit comments