Skip to content

Commit ce43040

Browse files
hdefazioJooho
andauthored
Stop and Resume a transformer (kserve#4534)
Signed-off-by: Hannah DeFazio <h2defazio@gmail.com> Co-authored-by: Jooho Lee <jlee@redhat.com>
1 parent 40806a7 commit ce43040

File tree

7 files changed

+1842
-1187
lines changed

7 files changed

+1842
-1187
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ clean:
161161

162162
# Run tests
163163
test: fmt vet manifests envtest test-qpext
164-
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test $$(go list ./pkg/...) ./cmd/... -coverprofile coverage.out -coverpkg ./pkg/... ./cmd...
164+
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test --timeout 20m $$(go list ./pkg/...) ./cmd/... -coverprofile coverage.out -coverpkg ./pkg/... ./cmd...
165165

166166
test-qpext:
167167
cd qpext && go test -v ./... -cover

pkg/controller/v1beta1/inferenceservice/components/predictor.go

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -198,42 +198,27 @@ func (p *Predictor) Reconcile(ctx context.Context, isvc *v1beta1.InferenceServic
198198
// Handle InferenceService status updates based on the force stop annotation.
199199
// If true, transition the service to a stopped and unready state; otherwise, ensure it's not marked as stopped.
200200
if utils.GetForceStopRuntime(isvc) {
201-
// Exit early if we have already set the status to stopped
202-
existingStoppedCondition := isvc.Status.GetCondition(v1beta1.Stopped)
203-
if existingStoppedCondition != nil && existingStoppedCondition.Status == corev1.ConditionTrue {
201+
// Exit early if we have already set the predictor's status to stopped
202+
existingPredictorCondition := isvc.Status.GetCondition(v1beta1.PredictorReady)
203+
if existingPredictorCondition != nil && existingPredictorCondition.Status == corev1.ConditionFalse && existingPredictorCondition.Reason == v1beta1.StoppedISVCReason {
204204
return ctrl.Result{}, nil
205205
}
206206

207+
// Preserve the deployment mode value
207208
deployMode := isvc.Status.DeploymentMode
208209

209210
// Clear all statuses
210211
isvc.Status = v1beta1.InferenceServiceStatus{}
211-
212-
// Preserve the deployment mode value
213212
isvc.Status.DeploymentMode = deployMode
214213

215-
// Set the ready condition
216-
predictorReadyCondition := &apis.Condition{
214+
// Set the predictor's ready condition to false
215+
isvc.Status.SetCondition(v1beta1.PredictorReady, &apis.Condition{
217216
Type: v1beta1.PredictorReady,
218217
Status: corev1.ConditionFalse,
219218
Reason: v1beta1.StoppedISVCReason,
220-
}
221-
isvc.Status.SetCondition(v1beta1.PredictorReady, predictorReadyCondition)
222-
223-
// Add the stopped condition
224-
stoppedCondition := &apis.Condition{
225-
Type: v1beta1.Stopped,
226-
Status: corev1.ConditionTrue,
227-
}
228-
isvc.Status.SetCondition(v1beta1.Stopped, stoppedCondition)
219+
})
229220

230221
return ctrl.Result{}, nil
231-
} else {
232-
resumeCondition := &apis.Condition{
233-
Type: v1beta1.Stopped,
234-
Status: corev1.ConditionFalse,
235-
}
236-
isvc.Status.SetCondition(v1beta1.Stopped, resumeCondition)
237222
}
238223

239224
statusSpec := isvc.Status.Components[v1beta1.PredictorComponent]

pkg/controller/v1beta1/inferenceservice/components/transformer.go

Lines changed: 67 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/runtime"
3030
"k8s.io/client-go/kubernetes"
31+
"knative.dev/pkg/apis"
3132
ctrl "sigs.k8s.io/controller-runtime"
3233
"sigs.k8s.io/controller-runtime/pkg/client"
3334
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
@@ -152,45 +153,78 @@ func (p *Transformer) Reconcile(ctx context.Context, isvc *v1beta1.InferenceServ
152153

153154
// Here we allow switch between knative and vanilla deployment
154155
if p.deploymentMode == constants.RawDeployment {
155-
r, err := raw.NewRawKubeReconciler(ctx, p.client, p.clientset, p.scheme, objectMeta, metav1.ObjectMeta{},
156-
&isvc.Spec.Transformer.ComponentExtensionSpec, &podSpec, nil)
157-
if err != nil {
158-
return ctrl.Result{}, errors.Wrapf(err, "fails to create NewRawKubeReconciler for transformer")
159-
}
160-
// set Deployment Controller
161-
for _, deployment := range r.Deployment.DeploymentList {
162-
if err := controllerutil.SetControllerReference(isvc, deployment, p.scheme); err != nil {
163-
return ctrl.Result{}, errors.Wrapf(err, "fails to set deployment owner reference for transformer")
164-
}
156+
if err := p.reconcileTransformerRawDeployment(ctx, isvc, &objectMeta, &podSpec); err != nil {
157+
return ctrl.Result{}, err
165158
}
166-
// set Service Controller
167-
for _, svc := range r.Service.ServiceList {
168-
if err := controllerutil.SetControllerReference(isvc, svc, p.scheme); err != nil {
169-
return ctrl.Result{}, errors.Wrapf(err, "fails to set service owner reference for transformer")
170-
}
171-
}
172-
// set autoscaler Controller
173-
if err := r.Scaler.Autoscaler.SetControllerReferences(isvc, p.scheme); err != nil {
174-
return ctrl.Result{}, errors.Wrapf(err, "fails to set autoscaler owner references for transformer")
159+
} else {
160+
if err := p.reconcileTransformerKnativeDeployment(ctx, isvc, &objectMeta, &podSpec); err != nil {
161+
return ctrl.Result{}, err
175162
}
163+
}
176164

177-
deployment, err := r.Reconcile(ctx)
178-
if err != nil {
179-
return ctrl.Result{}, errors.Wrapf(err, "fails to reconcile transformer")
165+
if utils.GetForceStopRuntime(isvc) {
166+
// Exit early if we have already set the transformer's status to stopped
167+
existingTransformerCondition := isvc.Status.GetCondition(v1beta1.TransformerReady)
168+
if existingTransformerCondition != nil && existingTransformerCondition.Status == corev1.ConditionFalse && existingTransformerCondition.Reason == v1beta1.StoppedISVCReason {
169+
return ctrl.Result{}, nil
180170
}
181-
isvc.Status.PropagateRawStatus(v1beta1.TransformerComponent, deployment, r.URL)
182-
} else {
183-
r := knative.NewKsvcReconciler(p.client, p.scheme, objectMeta, &isvc.Spec.Transformer.ComponentExtensionSpec,
184-
&podSpec, isvc.Status.Components[v1beta1.TransformerComponent], p.inferenceServiceConfig.ServiceLabelDisallowedList)
185171

186-
if err := controllerutil.SetControllerReference(isvc, r.Service, p.scheme); err != nil {
187-
return ctrl.Result{}, errors.Wrapf(err, "fails to set owner reference for transformer")
172+
// Set the ready condition to false
173+
isvc.Status.SetCondition(v1beta1.TransformerReady, &apis.Condition{
174+
Type: v1beta1.TransformerReady,
175+
Status: corev1.ConditionFalse,
176+
Reason: v1beta1.StoppedISVCReason,
177+
})
178+
}
179+
return ctrl.Result{}, nil
180+
}
181+
182+
func (p *Transformer) reconcileTransformerRawDeployment(ctx context.Context, isvc *v1beta1.InferenceService, objectMeta *metav1.ObjectMeta, podSpec *corev1.PodSpec) error {
183+
r, err := raw.NewRawKubeReconciler(ctx, p.client, p.clientset, p.scheme, *objectMeta, metav1.ObjectMeta{},
184+
&isvc.Spec.Transformer.ComponentExtensionSpec, podSpec, nil)
185+
if err != nil {
186+
return errors.Wrapf(err, "fails to create NewRawKubeReconciler for transformer")
187+
}
188+
// set Deployment Controller
189+
for _, deployment := range r.Deployment.DeploymentList {
190+
if err := controllerutil.SetControllerReference(isvc, deployment, p.scheme); err != nil {
191+
return errors.Wrapf(err, "fails to set deployment owner reference for transformer")
188192
}
189-
status, err := r.Reconcile(ctx)
190-
if err != nil {
191-
return ctrl.Result{}, errors.Wrapf(err, "fails to reconcile transformer")
193+
}
194+
// set Service Controller
195+
for _, svc := range r.Service.ServiceList {
196+
if err := controllerutil.SetControllerReference(isvc, svc, p.scheme); err != nil {
197+
return errors.Wrapf(err, "fails to set service owner reference for transformer")
192198
}
193-
isvc.Status.PropagateStatus(v1beta1.TransformerComponent, status)
194199
}
195-
return ctrl.Result{}, nil
200+
// set autoscaler Controller
201+
if err := r.Scaler.Autoscaler.SetControllerReferences(isvc, p.scheme); err != nil {
202+
return errors.Wrapf(err, "fails to set autoscaler owner references for transformer")
203+
}
204+
205+
deployment, err := r.Reconcile(ctx)
206+
if err != nil {
207+
return errors.Wrapf(err, "fails to reconcile transformer")
208+
}
209+
if !utils.GetForceStopRuntime(isvc) {
210+
isvc.Status.PropagateRawStatus(v1beta1.TransformerComponent, deployment, r.URL)
211+
}
212+
return nil
213+
}
214+
215+
func (p *Transformer) reconcileTransformerKnativeDeployment(ctx context.Context, isvc *v1beta1.InferenceService, objectMeta *metav1.ObjectMeta, podSpec *corev1.PodSpec) error {
216+
r := knative.NewKsvcReconciler(p.client, p.scheme, *objectMeta, &isvc.Spec.Transformer.ComponentExtensionSpec,
217+
podSpec, isvc.Status.Components[v1beta1.TransformerComponent], p.inferenceServiceConfig.ServiceLabelDisallowedList)
218+
219+
if err := controllerutil.SetControllerReference(isvc, r.Service, p.scheme); err != nil {
220+
return errors.Wrapf(err, "fails to set owner reference for transformer")
221+
}
222+
kstatus, err := r.Reconcile(ctx)
223+
if err != nil {
224+
return errors.Wrapf(err, "fails to reconcile transformer")
225+
}
226+
if !utils.GetForceStopRuntime(isvc) {
227+
isvc.Status.PropagateStatus(v1beta1.TransformerComponent, kstatus)
228+
}
229+
return nil
196230
}

pkg/controller/v1beta1/inferenceservice/controller.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
139139
annotations := utils.Filter(isvc.Annotations, func(key string) bool {
140140
return !utils.Includes(isvcConfig.ServiceAnnotationDisallowedList, key)
141141
})
142+
forceStopRuntime := utils.GetForceStopRuntime(isvc)
142143

143144
deployConfig, err := v1beta1.NewDeployConfig(isvcConfigMap)
144145
if err != nil {
@@ -254,6 +255,28 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
254255
return result, nil
255256
}
256257
}
258+
// Handle InferenceService status updates based on the force stop annotation.
259+
// If true, transition the service to a stopped and unready state; otherwise, ensure it's not marked as stopped.
260+
if forceStopRuntime {
261+
// Exit early if we have already set the status to stopped
262+
existingStoppedCondition := isvc.Status.GetCondition(v1beta1.Stopped)
263+
if existingStoppedCondition != nil && existingStoppedCondition.Status == corev1.ConditionTrue {
264+
// TODO: Set condition to stoppING
265+
} else {
266+
// Add the stopped condition
267+
stoppedCondition := &apis.Condition{
268+
Type: v1beta1.Stopped,
269+
Status: corev1.ConditionTrue,
270+
}
271+
isvc.Status.SetCondition(v1beta1.Stopped, stoppedCondition)
272+
}
273+
} else {
274+
resumeCondition := &apis.Condition{
275+
Type: v1beta1.Stopped,
276+
Status: corev1.ConditionFalse,
277+
}
278+
isvc.Status.SetCondition(v1beta1.Stopped, resumeCondition)
279+
}
257280
// reconcile RoutesReady and LatestDeploymentReady conditions for serverless deployment
258281
if deploymentMode == constants.Serverless {
259282
componentList := []v1beta1.ComponentType{v1beta1.PredictorComponent}
@@ -263,7 +286,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
263286
if isvc.Spec.Explainer != nil {
264287
componentList = append(componentList, v1beta1.ExplainerComponent)
265288
}
266-
if !utils.GetForceStopRuntime(isvc) {
289+
if !forceStopRuntime {
267290
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.RoutesReady)
268291
isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1.LatestDeploymentReady)
269292
}

0 commit comments

Comments
 (0)