Skip to content

Add retry for IBM Cloud API rate limiting (HTTP 429) #502

@meomnzak

Description

@meomnzak

Description

Summary:
IBM Cloud APIs return HTTP 429 (Too Many Requests) when rate limited, along with a Retry-After header indicating how long to wait. Currently, the codebase does not handle this.

Goal:

Introduce a shared retry helper.

pkg/cloudprovider/ibm/retry.go

// DoWithRetry handles HTTP 429 rate limiting with exponential backoff.
// It retries up to 5 times, respecting the Retry-After header if present.
func DoWithRetry[T any](ctx context.Context, fn func() (T, *core.DetailedResponse, error)) (T, error) {
	var zero T
	backoff := 100 * time.Millisecond

	for attempt := 0; attempt < 5; attempt++ {
		result, response, err := fn()

		// Success or non-rate-limit error
		if response == nil || response.StatusCode != 429 {
			return result, err
		}

		// Parse Retry-After header if present
		if ra := response.Headers.Get("Retry-After"); ra != "" {
			if secs, _ := strconv.Atoi(ra); secs > 0 {
				backoff = time.Duration(secs) * time.Second
			}
		}

		select {
		case <-ctx.Done():
			return zero, ctx.Err()
		case <-time.After(backoff):
			backoff = min(backoff*2, 30*time.Second)
		}
	}

	return zero, fmt.Errorf("rate limited after retries")
}

Possible Solutions:

Option 1

Wrap at the client level. This is what I think is a better approach. But then this retry logic will be called whether or not it is batched.

Before:

func (c *GlobalCatalogClient) GetPricing(ctx context.Context, catalogEntryID string) (*globalcatalogv1.PricingGet, error) {
	if err := c.ensureClient(ctx); err != nil {
		return nil, err
	}

	if sdkClient, ok := c.client.(*globalcatalogv1.GlobalCatalogV1); ok {
		pricingOptions := &globalcatalogv1.GetPricingOptions{
			ID: &catalogEntryID,
		}

		pricingData, _, err := sdkClient.GetPricing(pricingOptions)
		if err != nil {
			return nil, fmt.Errorf("calling GetPricing API: %w", err)
		}

		return pricingData, nil
	}

	return nil, fmt.Errorf("invalid client type for GetPricing")
}

After:

func (c *GlobalCatalogClient) GetPricing(ctx context.Context, catalogEntryID string) (*globalcatalogv1.PricingGet, error) {
	if err := c.ensureClient(ctx); err != nil {
		return nil, err
	}

	if sdkClient, ok := c.client.(*globalcatalogv1.GlobalCatalogV1); ok {
		pricingOptions := &globalcatalogv1.GetPricingOptions{
			ID: &catalogEntryID,
		}

		pricingData, err := DoWithRetry(ctx, func() (*globalcatalogv1.PricingGet, *core.DetailedResponse, error) {
			return sdkClient.GetPricing(pricingOptions)
		})
		if err != nil {
			return nil, fmt.Errorf("calling GetPricing API: %w", err)
		}

		return pricingData, nil
	}

	return nil, fmt.Errorf("invalid client type for GetPricing")
}

Option 2

Retry at the executor/batcher level. But then we will need to change all client interface signatures used by batchers.

Before:

type pricingClient interface {
	GetPricing(ctx context.Context, catalogEntryID string) (*globalcatalogv1.PricingGet, error)
}

func (p *PricingBatcher) execPricingBatch() BatchExecutor[PricingQueryInput, globalcatalogv1.PricingGet] {
	return func(ctx context.Context, inputs []*PricingQueryInput) []Result[globalcatalogv1.PricingGet] {
		results := make([]Result[globalcatalogv1.PricingGet], len(inputs))
		if len(inputs) == 0 {
			return results
		}

		id := inputs[0].CatalogEntryID
		out, err := p.client.GetPricing(ctx, id)

		for i := range inputs {
			results[i] = Result[globalcatalogv1.PricingGet]{Output: out, Err: err}
		}
		return results
	}
}

After:

type pricingClient interface {
	GetPricing(ctx context.Context, catalogEntryID string) (*globalcatalogv1.PricingGet, *core.DetailedResponse, error)
}

func (p *PricingBatcher) execPricingBatch() BatchExecutor[PricingQueryInput, globalcatalogv1.PricingGet] {
	return func(ctx context.Context, inputs []*PricingQueryInput) []Result[globalcatalogv1.PricingGet] {
		results := make([]Result[globalcatalogv1.PricingGet], len(inputs))
		if len(inputs) == 0 {
			return results
		}

		id := inputs[0].CatalogEntryID
		out, err := ibm.DoWithRetry(ctx, func() (*globalcatalogv1.PricingGet, *core.DetailedResponse, error) {
			return p.client.GetPricing(ctx, id)
		})

		for i := range inputs {
			results[i] = Result[globalcatalogv1.PricingGet]{Output: out, Err: err}
		}
		return results
	}
}

Metadata

Metadata

Assignees

Labels

kind/featureCategorizes issue or PR as related to a new feature.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions