Skip to content

Commit 1f78b10

Browse files
committed
Support KServe platform for NIMService
Signed-off-by: Xieshen Zhang <xiezhang@redhat.com>
1 parent be6a9ca commit 1f78b10

1,851 files changed

Lines changed: 731584 additions & 4757 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

api/apps/v1alpha1/nimservice_types.go

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,184 @@ func (n *NIMService) GetProxySpec() *ProxySpec {
10111011
return n.Spec.Proxy
10121012
}
10131013

1014+
// GetInferenceServiceParams returns params to render InferenceService from templates.
1015+
func (n *NIMService) GetInferenceServiceParams() *rendertypes.InferenceServiceParams {
1016+
params := &rendertypes.InferenceServiceParams{}
1017+
1018+
// Set metadata
1019+
params.Name = n.GetName()
1020+
params.Namespace = n.GetNamespace()
1021+
params.Labels = n.GetServiceLabels()
1022+
params.Annotations = n.GetNIMServiceAnnotations()
1023+
params.PodAnnotations = n.GetNIMServiceAnnotations()
1024+
delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey)
1025+
1026+
// Set template spec
1027+
if !n.IsAutoScalingEnabled() {
1028+
params.MinReplicas = int32(n.GetReplicas())
1029+
} else {
1030+
hpa := n.GetHPA()
1031+
1032+
if hpa.MinReplicas != nil {
1033+
params.MinReplicas = *hpa.MinReplicas
1034+
}
1035+
params.MaxReplicas = hpa.MaxReplicas
1036+
1037+
for _, m := range hpa.Metrics {
1038+
if m.Type == autoscalingv2.ResourceMetricSourceType && m.Resource != nil {
1039+
if m.Resource.Name == corev1.ResourceCPU || m.Resource.Name == corev1.ResourceMemory {
1040+
params.ScaleMetric = string(m.Resource.Name)
1041+
params.ScaleMetricType = string(m.Resource.Target.Type)
1042+
1043+
switch m.Resource.Target.Type {
1044+
case autoscalingv2.UtilizationMetricType:
1045+
if m.Resource.Target.AverageUtilization != nil {
1046+
params.ScaleTarget = *m.Resource.Target.AverageUtilization
1047+
}
1048+
case autoscalingv2.ValueMetricType:
1049+
if m.Resource.Target.Value != nil {
1050+
params.ScaleTarget = int32((*m.Resource.Target.Value).Value())
1051+
}
1052+
case autoscalingv2.AverageValueMetricType:
1053+
if m.Resource.Target.AverageValue != nil {
1054+
params.ScaleTarget = int32((*m.Resource.Target.AverageValue).Value())
1055+
}
1056+
}
1057+
break
1058+
}
1059+
}
1060+
}
1061+
}
1062+
1063+
params.NodeSelector = n.GetNodeSelector()
1064+
params.Tolerations = n.GetTolerations()
1065+
params.Affinity = n.GetPodAffinity()
1066+
params.ImagePullSecrets = n.GetImagePullSecrets()
1067+
params.ImagePullPolicy = n.GetImagePullPolicy()
1068+
1069+
// Set labels and selectors
1070+
params.SelectorLabels = n.GetSelectorLabels()
1071+
1072+
// Set container spec
1073+
params.ContainerName = n.GetContainerName()
1074+
params.Env = n.GetEnv()
1075+
params.Args = n.GetArgs()
1076+
params.Command = n.GetCommand()
1077+
params.Resources = n.GetResources()
1078+
params.Image = n.GetImage()
1079+
1080+
// Set container probes
1081+
if IsProbeEnabled(n.Spec.LivenessProbe) {
1082+
if n.Spec.LivenessProbe.Probe == nil {
1083+
params.LivenessProbe = &corev1.Probe{
1084+
InitialDelaySeconds: 15,
1085+
TimeoutSeconds: 1,
1086+
PeriodSeconds: 10,
1087+
SuccessThreshold: 1,
1088+
FailureThreshold: 3,
1089+
}
1090+
if n.Spec.Expose.Service.GRPCPort != nil {
1091+
params.LivenessProbe.ProbeHandler = corev1.ProbeHandler{
1092+
GRPC: &corev1.GRPCAction{
1093+
Port: *n.Spec.Expose.Service.GRPCPort,
1094+
},
1095+
}
1096+
} else {
1097+
params.LivenessProbe.ProbeHandler = corev1.ProbeHandler{
1098+
HTTPGet: &corev1.HTTPGetAction{
1099+
Path: "/v1/health/live",
1100+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1101+
},
1102+
}
1103+
}
1104+
} else {
1105+
params.LivenessProbe = n.Spec.LivenessProbe.Probe
1106+
}
1107+
}
1108+
if IsProbeEnabled(n.Spec.ReadinessProbe) {
1109+
if n.Spec.ReadinessProbe.Probe == nil {
1110+
params.ReadinessProbe = &corev1.Probe{
1111+
InitialDelaySeconds: 15,
1112+
TimeoutSeconds: 1,
1113+
PeriodSeconds: 10,
1114+
SuccessThreshold: 1,
1115+
FailureThreshold: 3,
1116+
}
1117+
if n.Spec.Expose.Service.GRPCPort != nil {
1118+
params.ReadinessProbe.ProbeHandler = corev1.ProbeHandler{
1119+
GRPC: &corev1.GRPCAction{
1120+
Port: *n.Spec.Expose.Service.GRPCPort,
1121+
},
1122+
}
1123+
} else {
1124+
params.ReadinessProbe.ProbeHandler = corev1.ProbeHandler{
1125+
HTTPGet: &corev1.HTTPGetAction{
1126+
Path: "/v1/health/ready",
1127+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1128+
},
1129+
}
1130+
}
1131+
} else {
1132+
params.ReadinessProbe = n.Spec.ReadinessProbe.Probe
1133+
}
1134+
}
1135+
if IsProbeEnabled(n.Spec.StartupProbe) {
1136+
if n.Spec.StartupProbe.Probe == nil {
1137+
params.StartupProbe = &corev1.Probe{
1138+
InitialDelaySeconds: 30,
1139+
TimeoutSeconds: 1,
1140+
PeriodSeconds: 10,
1141+
SuccessThreshold: 1,
1142+
FailureThreshold: 30,
1143+
}
1144+
if n.Spec.Expose.Service.GRPCPort != nil {
1145+
params.StartupProbe.ProbeHandler = corev1.ProbeHandler{
1146+
GRPC: &corev1.GRPCAction{
1147+
Port: *n.Spec.Expose.Service.GRPCPort,
1148+
},
1149+
}
1150+
} else {
1151+
params.StartupProbe.ProbeHandler = corev1.ProbeHandler{
1152+
HTTPGet: &corev1.HTTPGetAction{
1153+
Path: "/v1/health/ready",
1154+
Port: intstr.FromInt32(*n.Spec.Expose.Service.Port),
1155+
},
1156+
}
1157+
}
1158+
} else {
1159+
params.StartupProbe = n.Spec.StartupProbe.Probe
1160+
}
1161+
}
1162+
1163+
params.UserID = n.GetUserID()
1164+
params.GroupID = n.GetGroupID()
1165+
1166+
// Set service account
1167+
params.ServiceAccountName = n.GetServiceAccountName()
1168+
1169+
// Set runtime class
1170+
params.RuntimeClassName = n.GetRuntimeClassName()
1171+
1172+
// Set scheduler
1173+
params.SchedulerName = n.GetSchedulerName()
1174+
1175+
// Setup container ports for nimservice
1176+
if n.Spec.Expose.Service.GRPCPort != nil {
1177+
params.Ports = append(params.Ports, corev1.ContainerPort{
1178+
Protocol: corev1.ProtocolTCP,
1179+
ContainerPort: *n.Spec.Expose.Service.GRPCPort,
1180+
})
1181+
} else {
1182+
params.Ports = []corev1.ContainerPort{
1183+
{
1184+
Protocol: corev1.ProtocolTCP,
1185+
ContainerPort: *n.Spec.Expose.Service.Port,
1186+
},
1187+
}
1188+
}
1189+
return params
1190+
}
1191+
10141192
func init() {
10151193
SchemeBuilder.Register(&NIMService{}, &NIMServiceList{})
10161194
}

cmd/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"flag"
2222
"os"
2323

24+
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2425
monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
26+
2527
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
2628
// to ensure that exec-entrypoint and run can make use of them.
2729
"k8s.io/apimachinery/pkg/runtime"
@@ -55,6 +57,7 @@ func init() {
5557
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
5658
utilruntime.Must(appsv1alpha1.AddToScheme(scheme))
5759
utilruntime.Must(monitoring.AddToScheme(scheme))
60+
utilruntime.Must(kservev1beta1.AddToScheme(scheme))
5861
// +kubebuilder:scaffold:scheme
5962
}
6063

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ spec:
9090
resources:
9191
limits:
9292
cpu: 500m
93-
memory: 128Mi
93+
memory: 512Mi
9494
requests:
9595
cpu: 10m
9696
memory: 64Mi

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,18 @@ rules:
286286
- securitycontextconstraints
287287
verbs:
288288
- use
289+
- apiGroups:
290+
- serving.kserve.io
291+
resources:
292+
- inferenceservices
293+
verbs:
294+
- create
295+
- delete
296+
- get
297+
- list
298+
- patch
299+
- update
300+
- watch
289301
- apiGroups:
290302
- storage.k8s.io
291303
resources:

0 commit comments

Comments
 (0)