Skip to content

Commit 3a8153c

Browse files
hdefazioJooho
andauthored
Stop and resume a model by adding a new annotation [Serverless] (kserve#4337)
Signed-off-by: Hannah DeFazio <h2defazio@gmail.com> Signed-off-by: Jooho Lee <jlee@redhat.com> Co-authored-by: Jooho Lee <ljhiyh@gmail.com>
1 parent 4df41b7 commit 3a8153c

File tree

8 files changed

+795
-79
lines changed

8 files changed

+795
-79
lines changed

pkg/apis/serving/v1beta1/inference_service.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ limitations under the License.
1717
package v1beta1
1818

1919
import (
20+
"strings"
21+
2022
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
24+
"github.com/kserve/kserve/pkg/constants"
2125
)
2226

2327
// InferenceServiceSpec is the top level type for this resource
@@ -136,3 +140,15 @@ type InferenceServiceList struct {
136140
func init() {
137141
SchemeBuilder.Register(&InferenceService{}, &InferenceServiceList{})
138142
}
143+
144+
func (isvc *InferenceService) GetForceStopRuntime() bool {
145+
forceStopRuntime := false
146+
if isvc == nil || isvc.Annotations == nil {
147+
return forceStopRuntime
148+
}
149+
if val, exist := isvc.Annotations[constants.StopAnnotationKey]; exist {
150+
forceStopRuntime = strings.EqualFold(val, "true")
151+
}
152+
153+
return forceStopRuntime
154+
}

pkg/apis/serving/v1beta1/inference_service_status.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ const (
124124
RoutesReady apis.ConditionType = "RoutesReady"
125125
// LatestDeploymentReady is set when underlying configurations for all components have reported readiness.
126126
LatestDeploymentReady apis.ConditionType = "LatestDeploymentReady"
127+
// Stopped is set when the inference service has been stopped and all related objects are deleted
128+
Stopped apis.ConditionType = "Stopped"
127129
)
128130

129131
type ModelStatus struct {

pkg/constants/constants.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ var (
9494
TargetUtilizationPercentage = KServeAPIGroupName + "/targetUtilizationPercentage"
9595
MinScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/min-scale"
9696
MaxScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/max-scale"
97+
StopAnnotationKey = KServeAPIGroupName + "/stop"
9798
RollOutDurationAnnotationKey = KnativeServingAPIGroupName + "/rollout-duration"
9899
KnativeOpenshiftEnablePassthroughKey = "serving.knative.openshift.io/enablePassthrough"
99100
EnableMetricAggregation = KServeAPIGroupName + "/enable-metric-aggregation"

pkg/controller/v1beta1/inferenceservice/components/predictor.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"k8s.io/apimachinery/pkg/types"
3232
"k8s.io/client-go/kubernetes"
3333
"k8s.io/utils/ptr"
34+
"knative.dev/pkg/apis"
3435
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
3536

3637
ctrl "sigs.k8s.io/controller-runtime"
@@ -189,9 +190,47 @@ func (p *Predictor) Reconcile(ctx context.Context, isvc *v1beta1.InferenceServic
189190
} else {
190191
var err error
191192
podLabelKey = constants.RevisionLabel
193+
192194
if kstatus, err = p.reconcileKnativeDeployment(ctx, isvc, &objectMeta, &podSpec); err != nil {
193195
return ctrl.Result{}, err
194196
}
197+
if isvc.GetForceStopRuntime() {
198+
// Exit early if we have already set the status to stopped
199+
existing_stopped_condition := isvc.Status.GetCondition(v1beta1.Stopped)
200+
if existing_stopped_condition != nil && existing_stopped_condition.Status == corev1.ConditionTrue {
201+
return ctrl.Result{}, nil
202+
}
203+
204+
deployMode := isvc.Status.DeploymentMode
205+
206+
// Clear all statuses
207+
isvc.Status = v1beta1.InferenceServiceStatus{}
208+
209+
// Preserve the deployment mode value
210+
isvc.Status.DeploymentMode = deployMode
211+
212+
// Set the ready condition
213+
predictor_ready_condition := &apis.Condition{
214+
Type: v1beta1.PredictorReady,
215+
Status: corev1.ConditionFalse,
216+
}
217+
isvc.Status.SetCondition(v1beta1.PredictorReady, predictor_ready_condition)
218+
219+
// Add the stopped condition
220+
stopped_condition := &apis.Condition{
221+
Type: v1beta1.Stopped,
222+
Status: corev1.ConditionTrue,
223+
}
224+
isvc.Status.SetCondition(v1beta1.Stopped, stopped_condition)
225+
226+
return ctrl.Result{}, nil
227+
} else {
228+
resume_condition := &apis.Condition{
229+
Type: v1beta1.Stopped,
230+
Status: corev1.ConditionFalse,
231+
}
232+
isvc.Status.SetCondition(v1beta1.Stopped, resume_condition)
233+
}
195234
}
196235

197236
statusSpec := isvc.Status.Components[v1beta1.PredictorComponent]
@@ -204,6 +243,7 @@ func (p *Predictor) Reconcile(ctx context.Context, isvc *v1beta1.InferenceServic
204243
if err != nil {
205244
return ctrl.Result{}, errors.Wrapf(err, "fails to list inferenceservice pods by label")
206245
}
246+
207247
if isvc.Status.PropagateModelStatus(statusSpec, predictorPods, rawDeployment, kstatus) {
208248
return ctrl.Result{}, nil
209249
} else {
@@ -693,6 +733,8 @@ func (p *Predictor) reconcileKnativeDeployment(ctx context.Context, isvc *v1beta
693733
if err != nil {
694734
return nil, errors.Wrapf(err, "fails to reconcile predictor")
695735
}
696-
isvc.Status.PropagateStatus(v1beta1.PredictorComponent, kstatus)
736+
if !isvc.GetForceStopRuntime() {
737+
isvc.Status.PropagateStatus(v1beta1.PredictorComponent, kstatus)
738+
}
697739
return kstatus, nil
698740
}

pkg/controller/v1beta1/inferenceservice/controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,10 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
254254
if isvc.Spec.Explainer != nil {
255255
componentList = append(componentList, v1beta1.ExplainerComponent)
256256
}
257-
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.RoutesReady)
258-
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.LatestDeploymentReady)
257+
if !isvc.GetForceStopRuntime() {
258+
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.RoutesReady)
259+
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.LatestDeploymentReady)
260+
}
259261
}
260262
// Reconcile ingress
261263
ingressConfig, err := v1beta1.NewIngressConfig(isvcConfigMap)

0 commit comments

Comments
 (0)