Skip to content

Commit 61663b2

Browse files
recognize better transient error (#303)
* recognize better transient error Co-authored-by: I501080 <[email protected]>
1 parent 571f018 commit 61663b2

File tree

8 files changed

+262
-98
lines changed

8 files changed

+262
-98
lines changed

api/common.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package api
22

33
import (
4+
"fmt"
5+
46
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
57
"k8s.io/apimachinery/pkg/runtime"
68
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -17,6 +19,30 @@ const (
1719
ForceRotateAnnotation string = "services.cloud.sap.com/forceRotate"
1820
)
1921

22+
type HTTPStatusCodeError struct {
23+
// StatusCode is the HTTP status code returned by the broker.
24+
StatusCode int
25+
// ErrorMessage is a machine-readable error string that may be returned by the broker.
26+
ErrorMessage *string
27+
// Description is a human-readable description of the error that may be returned by the broker.
28+
Description *string
29+
// ResponseError is set to the error that occurred when unmarshalling a response body from the broker.
30+
ResponseError error
31+
}
32+
33+
func (e HTTPStatusCodeError) Error() string {
34+
errorMessage := "<nil>"
35+
description := "<nil>"
36+
37+
if e.ErrorMessage != nil {
38+
errorMessage = *e.ErrorMessage
39+
}
40+
if e.Description != nil {
41+
description = *e.Description
42+
}
43+
return fmt.Sprintf("Status: %v; ErrorMessage: %v; Description: %v; ResponseError: %v", e.StatusCode, errorMessage, description, e.ResponseError)
44+
}
45+
2046
const (
2147
// ConditionSucceeded represents whether the last operation CREATE/UPDATE/DELETE was successful.
2248
ConditionSucceeded = "Succeeded"

client/sm/client.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"strconv"
2929
"strings"
3030

31+
"github.com/SAP/sap-btp-service-operator/api"
3132
"github.com/SAP/sap-btp-service-operator/client/sm/types"
3233
"github.com/SAP/sap-btp-service-operator/internal/auth"
3334
"github.com/SAP/sap-btp-service-operator/internal/httputil"
@@ -67,8 +68,9 @@ type Client interface {
6768
Call(method string, smpath string, body io.Reader, q *Parameters) (*http.Response, error)
6869
}
6970
type ServiceManagerError struct {
70-
Message string
71-
StatusCode int
71+
Message string
72+
StatusCode int
73+
BrokerError *api.HTTPStatusCodeError `json:"broker_error,omitempty"`
7274
}
7375

7476
func (e *ServiceManagerError) Error() string {

controllers/base_controller.go

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,7 @@ func (r *BaseReconciler) updateStatus(ctx context.Context, object api.SAPBTPReso
137137
func (r *BaseReconciler) init(ctx context.Context, obj api.SAPBTPResource) error {
138138
obj.SetReady(metav1.ConditionFalse)
139139
setInProgressConditions(smClientTypes.CREATE, "Pending", obj)
140-
if err := r.updateStatus(ctx, obj); err != nil {
141-
return err
142-
}
143-
return nil
140+
return r.updateStatus(ctx, obj)
144141
}
145142

146143
func getConditionReason(opType smClientTypes.OperationCategory, state smClientTypes.OperationState) string {
@@ -302,43 +299,72 @@ func isDelete(object metav1.ObjectMeta) bool {
302299
return !object.DeletionTimestamp.IsZero()
303300
}
304301

305-
func isTransientError(ctx context.Context, err error) bool {
302+
func isTransientError(ctx context.Context, err error) (bool, string) {
306303
log := GetLogger(ctx)
307-
if smError, ok := err.(*sm.ServiceManagerError); ok {
304+
smError, ok := err.(*sm.ServiceManagerError)
305+
if !ok {
306+
return false, err.Error()
307+
}
308+
309+
statusCode := smError.StatusCode
310+
errMsg := smError.Error()
311+
if isBrokerErrorExist(smError) {
312+
log.Info(fmt.Sprintf("Broker returned error status code %d", smError.BrokerError.StatusCode))
313+
statusCode = smError.BrokerError.StatusCode
314+
errMsg = smError.BrokerError.Error()
315+
} else {
308316
log.Info(fmt.Sprintf("SM returned error status code %d", smError.StatusCode))
309-
return smError.StatusCode == http.StatusTooManyRequests || smError.StatusCode == http.StatusServiceUnavailable ||
310-
smError.StatusCode == http.StatusGatewayTimeout || smError.StatusCode == http.StatusNotFound || smError.StatusCode == http.StatusBadGateway
311317
}
312-
return false
318+
return isTransientStatusCode(statusCode), errMsg
319+
}
320+
321+
func isTransientStatusCode(StatusCode int) bool {
322+
return StatusCode == http.StatusTooManyRequests || StatusCode == http.StatusServiceUnavailable ||
323+
StatusCode == http.StatusGatewayTimeout || StatusCode == http.StatusNotFound || StatusCode == http.StatusBadGateway
324+
}
325+
326+
func isBrokerErrorExist(smError *sm.ServiceManagerError) bool {
327+
return smError.BrokerError != nil && smError.BrokerError.StatusCode != 0
328+
}
329+
330+
func (r *BaseReconciler) handleError(ctx context.Context, operationType smClientTypes.OperationCategory, err error, resource api.SAPBTPResource) (ctrl.Result, error) {
331+
var (
332+
isTransient bool
333+
errMsg string
334+
)
335+
336+
if isTransient, errMsg = isTransientError(ctx, err); isTransient {
337+
return r.markAsTransientError(ctx, operationType, errMsg, resource)
338+
}
339+
return r.markAsNonTransientError(ctx, operationType, errMsg, resource)
313340
}
314341

315-
func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, nonTransientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
342+
func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
316343
log := GetLogger(ctx)
317-
setFailureConditions(operationType, nonTransientErr.Error(), object)
344+
setFailureConditions(operationType, errMsg, object)
318345
if operationType != smClientTypes.DELETE {
319-
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), nonTransientErr.Error()))
346+
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg))
320347
}
321348
object.SetObservedGeneration(object.GetGeneration())
322349
err := r.updateStatus(ctx, object)
323350
if err != nil {
324351
return ctrl.Result{}, err
325352
}
326353
if operationType == smClientTypes.DELETE {
327-
return ctrl.Result{}, nonTransientErr
354+
return ctrl.Result{}, fmt.Errorf(errMsg)
328355
}
329356
return ctrl.Result{}, nil
330357
}
331358

332-
func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, transientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
359+
func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
333360
log := GetLogger(ctx)
334-
if smError, ok := transientErr.(*sm.ServiceManagerError); ok && smError.StatusCode != http.StatusTooManyRequests {
335-
setInProgressConditions(operationType, transientErr.Error(), object)
336-
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), transientErr.Error()))
337-
if err := r.updateStatus(ctx, object); err != nil {
338-
return ctrl.Result{}, err
339-
}
361+
setInProgressConditions(operationType, errMsg, object)
362+
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), errMsg))
363+
if err := r.updateStatus(ctx, object); err != nil {
364+
return ctrl.Result{}, err
340365
}
341-
return ctrl.Result{}, transientErr
366+
367+
return ctrl.Result{}, fmt.Errorf(errMsg)
342368
}
343369

344370
func isInProgress(object api.SAPBTPResource) bool {

controllers/servicebinding_controller.go

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque
8585

8686
smClient, err := r.getSMClient(ctx, serviceBinding)
8787
if err != nil {
88-
return r.markAsTransientError(ctx, Unknown, err, serviceBinding)
88+
return r.markAsTransientError(ctx, Unknown, err.Error(), serviceBinding)
8989
}
9090

9191
if len(serviceBinding.Status.OperationURL) > 0 {
@@ -173,7 +173,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque
173173
binding, err := r.getBindingForRecovery(ctx, smClient, serviceBinding)
174174
if err != nil {
175175
log.Error(err, "failed to check binding recovery")
176-
return r.markAsTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
176+
return r.markAsTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
177177
}
178178
if binding != nil {
179179
// Recovery - restore binding from SM
@@ -209,7 +209,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
209209
_, bindingParameters, err := buildParameters(r.Client, serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters)
210210
if err != nil {
211211
log.Error(err, "failed to parse smBinding parameters")
212-
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
212+
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
213213
}
214214

215215
smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{
@@ -225,16 +225,13 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
225225

226226
if bindErr != nil {
227227
log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID)
228-
if isTransientError(ctx, bindErr) {
229-
return r.markAsTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
230-
}
231-
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
228+
return r.handleError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
232229
}
233230

234231
if operationURL != "" {
235232
var bindingID string
236233
if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 {
237-
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
234+
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
238235
}
239236
serviceBinding.Status.BindingID = bindingID
240237

@@ -295,7 +292,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, smClient sm.Clien
295292
operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, buildUserInfo(ctx, serviceBinding.Spec.UserInfo))
296293
if unbindErr != nil {
297294
// delete will proceed anyway
298-
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr, serviceBinding)
295+
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr.Error(), serviceBinding)
299296
}
300297

301298
if operationURL != "" {
@@ -336,8 +333,7 @@ func (r *ServiceBindingReconciler) poll(ctx context.Context, smClient sm.Client,
336333
}
337334

338335
if status == nil {
339-
err := fmt.Errorf("failed to get last operation status of %s", serviceBinding.Name)
340-
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, err, serviceBinding)
336+
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, fmt.Sprintf("failed to get last operation status of %s", serviceBinding.Name), serviceBinding)
341337
}
342338
switch status.State {
343339
case smClientTypes.INPROGRESS:
@@ -689,9 +685,9 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC
689685
log := GetLogger(ctx)
690686
log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name))
691687
if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown {
692-
return r.markAsNonTransientError(ctx, op, err, binding)
688+
return r.markAsNonTransientError(ctx, op, err.Error(), binding)
693689
}
694-
return r.markAsTransientError(ctx, op, err, binding)
690+
return r.markAsTransientError(ctx, op, err.Error(), binding)
695691
}
696692

697693
func (r *ServiceBindingReconciler) addInstanceInfo(ctx context.Context, binding *servicesv1.ServiceBinding, credentialsMap map[string][]byte) ([]SecretMetadataProperty, error) {

controllers/servicebinding_controller_test.go

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,7 @@ var _ = Describe("ServiceBinding controller", func() {
4444
var guid string
4545

4646
createBindingWithoutAssertionsAndWait := func(ctx context.Context, name string, namespace string, instanceName string, externalName string, wait bool) (*v1.ServiceBinding, error) {
47-
binding := newBindingObject(name, namespace)
48-
binding.Spec.ServiceInstanceName = instanceName
49-
binding.Spec.ExternalName = externalName
50-
binding.Spec.Parameters = &runtime.RawExtension{
51-
Raw: []byte(`{"key": "value"}`),
52-
}
53-
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
54-
{
55-
SecretKeyRef: &v1.SecretKeyReference{
56-
Name: "param-secret",
57-
Key: "secret-parameter",
58-
},
59-
},
60-
}
47+
binding := generateBasicBindingTemplate(name, namespace, instanceName, externalName)
6148

6249
if err := k8sClient.Create(ctx, binding); err != nil {
6350
return nil, err
@@ -538,6 +525,34 @@ var _ = Describe("ServiceBinding controller", func() {
538525
})
539526
})
540527

528+
When("SM returned error 502 and broker returned 429", func() {
529+
BeforeEach(func() {
530+
errorMessage = "too many requests from broker"
531+
fakeClient.BindReturns(nil, "", getTransientBrokerError(errorMessage))
532+
})
533+
534+
It("should detect the error as transient and eventually succeed", func() {
535+
createdBinding, _ := createBindingWithoutAssertionsAndWait(context.Background(),
536+
bindingName, bindingTestNamespace, instanceName, "binding-external-name", false)
537+
expectBindingToBeInFailedStateWithMsg(createdBinding, errorMessage)
538+
539+
fakeClient.BindReturns(&smClientTypes.ServiceBinding{ID: fakeBindingID,
540+
Credentials: json.RawMessage("{\"secret_key\": \"secret_value\"}")}, "", nil)
541+
validateBindingIsReady(createdBinding, bindingName)
542+
})
543+
})
544+
545+
When("SM returned 502 and broker returned 400", func() {
546+
BeforeEach(func() {
547+
errorMessage = "very bad request"
548+
fakeClient.BindReturnsOnCall(0, nil, "", getNonTransientBrokerError(errorMessage))
549+
})
550+
551+
It("should detect the error as non-transient and fail", func() {
552+
createBindingWithError(context.Background(), bindingName, bindingTestNamespace, instanceName, "binding-external-name", errorMessage)
553+
})
554+
})
555+
541556
})
542557

543558
When("SM returned invalid credentials json", func() {
@@ -1167,6 +1182,41 @@ var _ = Describe("ServiceBinding controller", func() {
11671182
})
11681183
})
11691184

1185+
func expectBindingToBeInFailedStateWithMsg(binding *v1.ServiceBinding, message string) {
1186+
cond := meta.FindStatusCondition(binding.GetConditions(), api.ConditionSucceeded)
1187+
Expect(cond).To(Not(BeNil()))
1188+
Expect(cond.Message).To(ContainSubstring(message))
1189+
Expect(cond.Status).To(Equal(metav1.ConditionFalse))
1190+
}
1191+
1192+
func validateBindingIsReady(createdBinding *v1.ServiceBinding, bindingName string) {
1193+
Eventually(func() bool {
1194+
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: bindingName, Namespace: bindingTestNamespace}, createdBinding)
1195+
if err != nil {
1196+
return false
1197+
}
1198+
return isReady(createdBinding)
1199+
}, timeout, interval).Should(BeTrue())
1200+
}
1201+
1202+
func generateBasicBindingTemplate(name, namespace, instanceName, externalName string) *v1.ServiceBinding {
1203+
binding := newBindingObject(name, namespace)
1204+
binding.Spec.ServiceInstanceName = instanceName
1205+
binding.Spec.ExternalName = externalName
1206+
binding.Spec.Parameters = &runtime.RawExtension{
1207+
Raw: []byte(`{"key": "value"}`),
1208+
}
1209+
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
1210+
{
1211+
SecretKeyRef: &v1.SecretKeyReference{
1212+
Name: "param-secret",
1213+
Key: "secret-parameter",
1214+
},
1215+
},
1216+
}
1217+
return binding
1218+
}
1219+
11701220
func validateSecretData(secret *corev1.Secret, expectedKey string, expectedValue string) {
11711221
Expect(secret.Data).ToNot(BeNil())
11721222
Expect(secret.Data).To(HaveKey(expectedKey))

0 commit comments

Comments
 (0)