Skip to content

Commit 4dcedf0

Browse files
xieshenzhshivamerla
authored andcommitted
Support KServe platform for NIMService
Signed-off-by: Xieshen Zhang <xiezhang@redhat.com>
1 parent 0685bfd commit 4dcedf0

File tree

1,786 files changed

+728043
-3714
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,786 files changed

+728043
-3714
lines changed

api/apps/v1alpha1/nimservice_types.go

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"maps"
2727
"os"
2828

29+
kserveconstants "github.com/kserve/kserve/pkg/constants"
2930
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3031
autoscalingv2 "k8s.io/api/autoscaling/v2"
3132
corev1 "k8s.io/api/core/v1"
@@ -1441,6 +1442,281 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
14411442
return n.Spec.Proxy
14421443
}
14431444

1445+
// GetInferenceServiceParams returns params to render InferenceService from templates.
1446+
func (n *NIMService) GetInferenceServiceParams(
1447+
deploymentMode kserveconstants.DeploymentModeType) *rendertypes.InferenceServiceParams {
1448+
1449+
params := &rendertypes.InferenceServiceParams{}
1450+
1451+
// Set metadata
1452+
params.Name = n.GetName()
1453+
params.Namespace = n.GetNamespace()
1454+
params.Labels = n.GetServiceLabels()
1455+
params.Annotations = n.GetNIMServiceAnnotations()
1456+
params.PodAnnotations = n.GetNIMServiceAnnotations()
1457+
delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey)
1458+
1459+
// Set template spec
1460+
if !n.IsAutoScalingEnabled() || deploymentMode != kserveconstants.RawDeployment {
1461+
params.MinReplicas = ptr.To[int32](int32(n.GetReplicas()))
1462+
} else {
1463+
params.Annotations[kserveconstants.AutoscalerClass] = string(kserveconstants.AutoscalerClassHPA)
1464+
1465+
minReplicas, maxReplicas, metric, metricType, target := n.GetInferenceServiceHPAParams()
1466+
if minReplicas != nil {
1467+
params.MinReplicas = minReplicas
1468+
}
1469+
if maxReplicas > 0 {
1470+
params.MaxReplicas = ptr.To[int32](maxReplicas)
1471+
}
1472+
if metric != "" {
1473+
params.ScaleMetric = metric
1474+
}
1475+
if metricType != "" {
1476+
params.ScaleMetricType = metricType
1477+
}
1478+
if target > 0 {
1479+
params.ScaleTarget = ptr.To(target)
1480+
}
1481+
}
1482+
1483+
params.NodeSelector = n.GetNodeSelector()
1484+
params.Tolerations = n.GetTolerations()
1485+
params.Affinity = n.GetPodAffinity()
1486+
params.ImagePullSecrets = n.GetImagePullSecrets()
1487+
params.ImagePullPolicy = n.GetImagePullPolicy()
1488+
1489+
// Set labels and selectors
1490+
params.SelectorLabels = n.GetSelectorLabels()
1491+
1492+
// Set container spec
1493+
params.ContainerName = n.GetContainerName()
1494+
params.Env = n.GetEnv()
1495+
params.Args = n.GetArgs()
1496+
params.Command = n.GetCommand()
1497+
params.Resources = n.GetResources()
1498+
params.Image = n.GetImage()
1499+
1500+
// Set container probes
1501+
if IsProbeEnabled(n.Spec.LivenessProbe) {
1502+
params.LivenessProbe = n.GetInferenceServiceLivenessProbe(deploymentMode)
1503+
}
1504+
if IsProbeEnabled(n.Spec.ReadinessProbe) {
1505+
params.ReadinessProbe = n.GetInferenceServiceReadinessProbe(deploymentMode)
1506+
}
1507+
if IsProbeEnabled(n.Spec.StartupProbe) {
1508+
params.StartupProbe = n.GetInferenceServiceStartupProbe(deploymentMode)
1509+
}
1510+
1511+
params.UserID = n.GetUserID()
1512+
params.GroupID = n.GetGroupID()
1513+
1514+
// Set service account
1515+
params.ServiceAccountName = n.GetServiceAccountName()
1516+
1517+
// Set runtime class
1518+
params.RuntimeClassName = n.GetRuntimeClassName()
1519+
1520+
// Set scheduler
1521+
params.SchedulerName = n.GetSchedulerName()
1522+
1523+
params.Ports = n.GetInferenceServicePorts(deploymentMode)
1524+
1525+
return params
1526+
}
1527+
1528+
// GetInferenceServiceLivenessProbe returns liveness probe for the NIMService container.
1529+
func (n *NIMService) GetInferenceServiceLivenessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1530+
if modeType == kserveconstants.RawDeployment {
1531+
if n.Spec.LivenessProbe.Probe == nil {
1532+
return n.GetDefaultLivenessProbe()
1533+
}
1534+
} else {
1535+
if n.Spec.LivenessProbe.Probe == nil {
1536+
probe := &corev1.Probe{
1537+
InitialDelaySeconds: 15,
1538+
TimeoutSeconds: 1,
1539+
PeriodSeconds: 10,
1540+
SuccessThreshold: 1,
1541+
FailureThreshold: 3,
1542+
}
1543+
if n.Spec.Expose.Service.GRPCPort != nil {
1544+
probe.ProbeHandler = corev1.ProbeHandler{
1545+
GRPC: &corev1.GRPCAction{
1546+
Port: *n.Spec.Expose.Service.GRPCPort,
1547+
},
1548+
}
1549+
} else {
1550+
probe.ProbeHandler = corev1.ProbeHandler{
1551+
HTTPGet: &corev1.HTTPGetAction{
1552+
Path: "/v1/health/live",
1553+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1554+
},
1555+
}
1556+
}
1557+
return probe
1558+
}
1559+
}
1560+
1561+
return n.Spec.LivenessProbe.Probe
1562+
}
1563+
1564+
// GetInferenceServiceReadinessProbe returns readiness probe for the NIMService container.
1565+
func (n *NIMService) GetInferenceServiceReadinessProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1566+
if modeType == kserveconstants.RawDeployment {
1567+
if n.Spec.ReadinessProbe.Probe == nil {
1568+
return n.GetDefaultReadinessProbe()
1569+
}
1570+
} else {
1571+
if n.Spec.ReadinessProbe.Probe == nil {
1572+
probe := &corev1.Probe{
1573+
InitialDelaySeconds: 15,
1574+
TimeoutSeconds: 1,
1575+
PeriodSeconds: 10,
1576+
SuccessThreshold: 1,
1577+
FailureThreshold: 3,
1578+
}
1579+
if n.Spec.Expose.Service.GRPCPort != nil {
1580+
probe.ProbeHandler = corev1.ProbeHandler{
1581+
GRPC: &corev1.GRPCAction{
1582+
Port: *n.Spec.Expose.Service.GRPCPort,
1583+
},
1584+
}
1585+
} else {
1586+
probe.ProbeHandler = corev1.ProbeHandler{
1587+
HTTPGet: &corev1.HTTPGetAction{
1588+
Path: "/v1/health/ready",
1589+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1590+
},
1591+
}
1592+
}
1593+
return probe
1594+
}
1595+
}
1596+
1597+
return n.Spec.ReadinessProbe.Probe
1598+
}
1599+
1600+
// GetInferenceServiceStartupProbe returns startup probe for the NIMService container.
1601+
func (n *NIMService) GetInferenceServiceStartupProbe(modeType kserveconstants.DeploymentModeType) *corev1.Probe {
1602+
if modeType == kserveconstants.RawDeployment {
1603+
if n.Spec.StartupProbe.Probe == nil {
1604+
return n.GetDefaultStartupProbe()
1605+
}
1606+
} else {
1607+
if n.Spec.StartupProbe.Probe == nil {
1608+
probe := &corev1.Probe{
1609+
InitialDelaySeconds: 30,
1610+
TimeoutSeconds: 1,
1611+
PeriodSeconds: 10,
1612+
SuccessThreshold: 1,
1613+
FailureThreshold: 30,
1614+
}
1615+
if n.Spec.Expose.Service.GRPCPort != nil {
1616+
probe.ProbeHandler = corev1.ProbeHandler{
1617+
GRPC: &corev1.GRPCAction{
1618+
Port: *n.Spec.Expose.Service.GRPCPort,
1619+
},
1620+
}
1621+
} else {
1622+
probe.ProbeHandler = corev1.ProbeHandler{
1623+
HTTPGet: &corev1.HTTPGetAction{
1624+
Path: "/v1/health/ready",
1625+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1626+
},
1627+
}
1628+
}
1629+
return probe
1630+
}
1631+
}
1632+
1633+
return n.Spec.StartupProbe.Probe
1634+
}
1635+
1636+
// GetInferenceServicePorts returns ports for the NIMService container.
1637+
func (n *NIMService) GetInferenceServicePorts(modeType kserveconstants.DeploymentModeType) []corev1.ContainerPort {
1638+
ports := []corev1.ContainerPort{}
1639+
1640+
// Setup container ports for nimservice
1641+
if modeType == kserveconstants.RawDeployment {
1642+
ports = append(ports, corev1.ContainerPort{
1643+
Name: DefaultNamedPortAPI,
1644+
Protocol: corev1.ProtocolTCP,
1645+
ContainerPort: *n.Spec.Expose.Service.Port,
1646+
})
1647+
if n.Spec.Expose.Service.GRPCPort != nil {
1648+
ports = append(ports, corev1.ContainerPort{
1649+
Name: DefaultNamedPortGRPC,
1650+
Protocol: corev1.ProtocolTCP,
1651+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1652+
})
1653+
}
1654+
if n.Spec.Expose.Service.MetricsPort != nil {
1655+
ports = append(ports, corev1.ContainerPort{
1656+
Name: DefaultNamedPortMetrics,
1657+
Protocol: corev1.ProtocolTCP,
1658+
ContainerPort: *n.Spec.Expose.Service.MetricsPort,
1659+
})
1660+
}
1661+
} else {
1662+
ports = append(ports, corev1.ContainerPort{
1663+
Protocol: corev1.ProtocolTCP,
1664+
ContainerPort: *n.Spec.Expose.Service.Port,
1665+
})
1666+
if n.Spec.Expose.Service.GRPCPort != nil {
1667+
ports = append(ports, corev1.ContainerPort{
1668+
Protocol: corev1.ProtocolTCP,
1669+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1670+
})
1671+
}
1672+
}
1673+
1674+
return ports
1675+
}
1676+
1677+
// GetInferenceServiceHPAParams returns the HPA spec for the NIMService deployment.
1678+
func (n *NIMService) GetInferenceServiceHPAParams() (*int32, int32, string, string, int32) {
1679+
hpa := n.GetHPA()
1680+
1681+
var minReplicas *int32
1682+
var maxReplicas int32
1683+
var metric string
1684+
var metricType string
1685+
var target int32
1686+
1687+
if hpa.MinReplicas != nil {
1688+
minReplicas = hpa.MinReplicas
1689+
}
1690+
maxReplicas = hpa.MaxReplicas
1691+
1692+
for _, m := range hpa.Metrics {
1693+
if m.Type == autoscalingv2.ResourceMetricSourceType && m.Resource != nil {
1694+
if m.Resource.Name == corev1.ResourceCPU || m.Resource.Name == corev1.ResourceMemory {
1695+
metric = string(m.Resource.Name)
1696+
metricType = string(m.Resource.Target.Type)
1697+
1698+
switch m.Resource.Target.Type {
1699+
case autoscalingv2.UtilizationMetricType:
1700+
if m.Resource.Target.AverageUtilization != nil {
1701+
target = *m.Resource.Target.AverageUtilization
1702+
}
1703+
case autoscalingv2.ValueMetricType:
1704+
if m.Resource.Target.Value != nil {
1705+
target = int32((*m.Resource.Target.Value).Value())
1706+
}
1707+
case autoscalingv2.AverageValueMetricType:
1708+
if m.Resource.Target.AverageValue != nil {
1709+
target = int32((*m.Resource.Target.AverageValue).Value())
1710+
}
1711+
}
1712+
break
1713+
}
1714+
}
1715+
}
1716+
1717+
return minReplicas, maxReplicas, metric, metricType, target
1718+
}
1719+
14441720
func init() {
14451721
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
14461722
}

cmd/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"flag"
2222
"os"
2323

24+
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2425
monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
26+
2527
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
2628
// to ensure that exec-entrypoint and run can make use of them.
2729

@@ -58,6 +60,7 @@ func init() {
5860
utilruntime.Must(appsv1alpha1.AddToScheme(scheme))
5961
utilruntime.Must(monitoring.AddToScheme(scheme))
6062
utilruntime.Must(lws.AddToScheme(scheme))
63+
utilruntime.Must(kservev1beta1.AddToScheme(scheme))
6164
// +kubebuilder:scaffold:scheme
6265
}
6366

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ spec:
9090
resources:
9191
limits:
9292
cpu: 500m
93-
memory: 128Mi
93+
memory: 512Mi
9494
requests:
9595
cpu: 10m
9696
memory: 64Mi

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,18 @@ rules:
308308
- securitycontextconstraints
309309
verbs:
310310
- use
311+
- apiGroups:
312+
- serving.kserve.io
313+
resources:
314+
- inferenceservices
315+
verbs:
316+
- create
317+
- delete
318+
- get
319+
- list
320+
- patch
321+
- update
322+
- watch
311323
- apiGroups:
312324
- storage.k8s.io
313325
resources:

0 commit comments

Comments
 (0)