Skip to content

Commit eb1a26c

Browse files
mvicknrclaude
andauthored
chore: Centralize retry and add to missing places (#34)
* chore: Centralize retry and add to missing places Co-Authored-By: Claude <noreply@anthropic.com> * chore: trigger workflows * chore: Trigger tests on pull request as well * chore: Add missing needs * Revert "chore: Trigger tests on pull request as well" This reverts commit 5a82419. --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent d61458d commit eb1a26c

File tree

11 files changed

+529
-123
lines changed

11 files changed

+529
-123
lines changed

.github/workflows/test_action.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ jobs:
223223
fi
224224
225225
test-docs-flow:
226+
needs: setup-test-tag
226227
name: Test Documentation Flow (MDX Parsing)
227228
runs-on: ubuntu-latest
228229
steps:

cmd/agent-metadata-action/main_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,9 +813,9 @@ func TestRunAgentFlow_SigningError_ServiceFailure(t *testing.T) {
813813

814814
outputStr := getStdout()
815815

816-
// Verify retries occurred (MaxRetries = 3 from sign package)
816+
// Verify retries occurred (3 attempts from retry package)
817817
assert.Equal(t, 3, requestCount, "Should have made 3 signing requests (retries)")
818818
assert.Contains(t, outputStr, "Signing attempt 1 failed")
819819
assert.Contains(t, outputStr, "Signing attempt 2 failed")
820-
assert.Contains(t, outputStr, "Failed to sign manifest index after 3 attempts")
820+
assert.Contains(t, outputStr, "Failed to sign manifest index")
821821
}

internal/client/instrumentation.go

Lines changed: 90 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"agent-metadata-action/internal/logging"
1313
"agent-metadata-action/internal/models"
14+
"agent-metadata-action/internal/retry"
1415
)
1516

1617
// InstrumentationClient handles instrumentation metadata operations
@@ -69,91 +70,112 @@ func (c *InstrumentationClient) SendMetadata(ctx context.Context, agentType stri
6970
"agent.version": agentVersion,
7071
})
7172
logging.Errorf(ctx, "Failed to marshal metadata: %v", err)
72-
return fmt.Errorf("failed to marshal metadata: %w", err)
73+
return retry.NewNonRetryableError(fmt.Errorf("failed to marshal metadata: %w", err))
7374
}
7475
logging.Debugf(ctx, "JSON payload size: %d bytes", len(jsonBody))
7576
logging.Debugf(ctx, "Configuration definitions count: %d", len(metadata.ConfigurationDefinitions))
7677
logging.Debugf(ctx, "Agent control entries: %d", len(metadata.AgentControlDefinitions))
7778

78-
// Create HTTP request
79-
logging.Debug(ctx, "Creating HTTP POST request...")
80-
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonBody))
81-
if err != nil {
82-
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
83-
"error.operation": "create_http_request",
84-
"http.url": url,
85-
"agent.type": agentType,
86-
"agent.version": agentVersion,
87-
})
88-
logging.Errorf(ctx, "Failed to create request: %v", err)
89-
return fmt.Errorf("failed to create request: %w", err)
79+
// Execute request with retry logic
80+
retryConfig := retry.Config{
81+
MaxAttempts: 3,
82+
BaseDelay: 2 * time.Second,
83+
Operation: "Metadata submission",
9084
}
9185

92-
// Set headers
93-
logging.Debug(ctx, "Setting request headers...")
94-
req.Header.Set("Content-Type", "application/json")
95-
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.token))
96-
// SECURITY: Token is in header but not logged
86+
err = retry.Do(ctx, retryConfig, func() error {
87+
// Create HTTP request (must be recreated for each retry)
88+
logging.Debug(ctx, "Creating HTTP POST request...")
89+
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonBody))
90+
if err != nil {
91+
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
92+
"error.operation": "create_http_request",
93+
"http.url": url,
94+
"agent.type": agentType,
95+
"agent.version": agentVersion,
96+
})
97+
logging.Errorf(ctx, "Failed to create request: %v", err)
98+
return retry.NewNonRetryableError(fmt.Errorf("failed to create request: %w", err))
99+
}
97100

98-
// Execute request
99-
logging.Debug(ctx, "Sending HTTP request...")
100-
startTime := time.Now()
101-
resp, err := c.httpClient.Do(req)
102-
duration := time.Since(startTime)
101+
// Set headers
102+
logging.Debug(ctx, "Setting request headers...")
103+
req.Header.Set("Content-Type", "application/json")
104+
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.token))
105+
106+
// Execute request
107+
logging.Debug(ctx, "Sending HTTP request...")
108+
startTime := time.Now()
109+
resp, err := c.httpClient.Do(req)
110+
duration := time.Since(startTime)
111+
112+
if err != nil {
113+
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
114+
"error.operation": "execute_http_request",
115+
"http.url": url,
116+
"http.duration": duration.String(),
117+
"agent.type": agentType,
118+
"agent.version": agentVersion,
119+
})
120+
logging.Errorf(ctx, "HTTP request failed after %s: %v", duration, err)
121+
return fmt.Errorf("failed to send metadata: %w", err)
122+
}
123+
defer resp.Body.Close()
103124

104-
if err != nil {
105-
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
106-
"error.operation": "execute_http_request",
107-
"http.url": url,
108-
"http.duration": duration.String(),
109-
"agent.type": agentType,
110-
"agent.version": agentVersion,
111-
})
112-
logging.Errorf(ctx, "HTTP request failed after %s: %v", duration, err)
113-
return fmt.Errorf("failed to send metadata: %w", err)
114-
}
115-
defer resp.Body.Close()
125+
logging.Debugf(ctx, "Response received in %s", duration)
126+
logging.Debugf(ctx, "HTTP status code: %d %s", resp.StatusCode, resp.Status)
116127

117-
logging.Debugf(ctx, "Response received in %s", duration)
118-
logging.Debugf(ctx, "HTTP status code: %d %s", resp.StatusCode, resp.Status)
128+
// Read response body for error details (with size limit)
129+
logging.Debug(ctx, "Reading response body...")
130+
body, err := io.ReadAll(resp.Body)
131+
if err != nil {
132+
logging.Errorf(ctx, "Failed to read response body: %v", err)
133+
return fmt.Errorf("failed to read response: %w", err)
134+
}
135+
logging.Debugf(ctx, "Response body size: %d bytes", len(body))
136+
137+
// Check for non-2xx status codes
138+
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
139+
// Log response body for debugging, but truncate if too large
140+
responsePreview := string(body)
141+
if len(responsePreview) > 500 {
142+
responsePreview = responsePreview[:500] + "... (truncated)"
143+
}
144+
145+
err := fmt.Errorf("metadata submission failed with status %d: %s", resp.StatusCode, string(body))
146+
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
147+
"error.operation": "http_non_2xx_response",
148+
"http.status_code": resp.StatusCode,
149+
"http.url": url,
150+
"http.response_body": responsePreview,
151+
"agent.type": agentType,
152+
"agent.version": agentVersion,
153+
})
154+
logging.Errorf(ctx, "Metadata submission failed with status %d", resp.StatusCode)
155+
logging.Debugf(ctx, "Error response body: %s", responsePreview)
156+
157+
// Determine if error is retryable
158+
// Retry on: 5xx (server errors), 408 (timeout), 429 (rate limit)
159+
// Don't retry: 4xx (client errors, except 408 and 429)
160+
isRetryable := resp.StatusCode >= 500 || resp.StatusCode == 408 || resp.StatusCode == 429
161+
if !isRetryable {
162+
return retry.NewNonRetryableError(err)
163+
}
164+
return err
165+
}
119166

120-
// Read response body for error details (with size limit)
121-
logging.Debug(ctx, "Reading response body...")
122-
body, err := io.ReadAll(resp.Body)
123-
if err != nil {
124-
logging.Errorf(ctx, "Failed to read response body: %v", err)
125-
return fmt.Errorf("failed to read response: %w", err)
126-
}
127-
logging.Debugf(ctx, "Response body size: %d bytes", len(body))
128-
129-
// Check for non-2xx status codes
130-
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
131-
// Log response body for debugging, but truncate if too large
132-
responsePreview := string(body)
133-
if len(responsePreview) > 500 {
134-
responsePreview = responsePreview[:500] + "... (truncated)"
167+
// Success logging
168+
if len(body) > 0 {
169+
logging.Debugf(ctx, "Success response: %s", string(body))
135170
}
136171

137-
err := fmt.Errorf("metadata submission failed with status %d: %s", resp.StatusCode, string(body))
138-
logging.NoticeErrorWithCategory(ctx, err, "metadata.send", map[string]interface{}{
139-
"error.operation": "http_non_2xx_response",
140-
"http.status_code": resp.StatusCode,
141-
"http.url": url,
142-
"http.response_body": responsePreview,
143-
"agent.type": agentType,
144-
"agent.version": agentVersion,
145-
})
146-
logging.Errorf(ctx, "Metadata submission failed with status %d", resp.StatusCode)
147-
logging.Debugf(ctx, "Error response body: %s", responsePreview)
172+
return nil
173+
})
148174

175+
if err != nil {
149176
return err
150177
}
151178

152-
// Success logging
153179
logging.Notice(ctx, "Metadata successfully submitted to instrumentation service")
154-
if len(body) > 0 {
155-
logging.Debugf(ctx, "Success response: %s", string(body))
156-
}
157-
158180
return nil
159181
}

internal/client/instrumentation_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ func TestSendMetadata_ContextCancellation(t *testing.T) {
312312
_ = getStdout()
313313

314314
require.Error(t, err)
315-
assert.Contains(t, err.Error(), "failed to send metadata")
315+
assert.Contains(t, err.Error(), "retry cancelled")
316316
}
317317

318318
func TestSendMetadata_SuccessWithResponseBody(t *testing.T) {

internal/oci/client.go

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package oci
33
import (
44
"agent-metadata-action/internal/logging"
55
"agent-metadata-action/internal/models"
6+
"agent-metadata-action/internal/retry"
67
"bytes"
78
"context"
89
"encoding/json"
@@ -58,21 +59,21 @@ func NewClient(ctx context.Context, registry, username, password string) (*Clien
5859
func (c *Client) UploadArtifact(ctx context.Context, artifact *models.ArtifactDefinition, artifactPath, version string) (string, int64, error) {
5960
tempDir, err := os.MkdirTemp("", "oras-upload-*")
6061
if err != nil {
61-
return "", 0, fmt.Errorf("failed to create temp directory: %w", err)
62+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to create temp directory: %w", err))
6263
}
6364
defer os.RemoveAll(tempDir)
6465

6566
fs, err := file.New(tempDir)
6667
if err != nil {
67-
return "", 0, fmt.Errorf("failed to create file store: %w", err)
68+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to create file store: %w", err))
6869
}
6970
defer fs.Close()
7071

7172
layerAnnotations := CreateLayerAnnotations(artifact, version)
7273

7374
layerDesc, err := fs.Add(ctx, artifact.Name, artifact.GetMediaType(), artifactPath)
7475
if err != nil {
75-
return "", 0, fmt.Errorf("failed to add file to store: %w", err)
76+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to add file to store: %w", err))
7677
}
7778

7879
layerDesc.Annotations = layerAnnotations
@@ -86,7 +87,7 @@ func (c *Client) UploadArtifact(ctx context.Context, artifact *models.ArtifactDe
8687
}
8788
configBytes, err := json.Marshal(config)
8889
if err != nil {
89-
return "", 0, fmt.Errorf("failed to marshal config: %w", err)
90+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to marshal config: %w", err))
9091
}
9192

9293
configDesc := ocispec.Descriptor{
@@ -96,7 +97,7 @@ func (c *Client) UploadArtifact(ctx context.Context, artifact *models.ArtifactDe
9697
}
9798

9899
if err = fs.Push(ctx, configDesc, bytes.NewReader(configBytes)); err != nil {
99-
return "", 0, fmt.Errorf("failed to push config: %w", err)
100+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to push config: %w", err))
100101
}
101102

102103
artifactType := artifact.GetArtifactType()
@@ -108,25 +109,40 @@ func (c *Client) UploadArtifact(ctx context.Context, artifact *models.ArtifactDe
108109

109110
manifestDesc, err := oras.PackManifest(ctx, fs, oras.PackManifestVersion1_1, artifactType, packOpts)
110111
if err != nil {
111-
return "", 0, fmt.Errorf("failed to pack manifest: %w", err)
112+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to pack manifest: %w", err))
112113
}
113114

114115
// Tag manifest in file store with a temporary tag so it can be referenced during copy
115116
tempTag := "temp-manifest"
116117
if err = fs.Tag(ctx, manifestDesc, tempTag); err != nil {
117-
return "", 0, fmt.Errorf("failed to tag manifest in file store: %w", err)
118+
return "", 0, retry.NewNonRetryableError(fmt.Errorf("failed to tag manifest in file store: %w", err))
118119
}
119120

120-
pushCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
121-
defer cancel()
122-
123121
logging.Debugf(ctx, "Pushing artifact %s to registry by digest (digest: %s)", artifact.Name, manifestDesc.Digest.String())
124122

123+
// Copy manifest and blobs to remote registry by digest with retry logic
124+
retryConfig := retry.Config{
125+
MaxAttempts: 3,
126+
BaseDelay: 2 * time.Second,
127+
Operation: "OCI artifact upload",
128+
}
129+
125130
// Copy manifest and blobs to remote registry by digest
126131
copyOpts := oras.CopyOptions{}
127132
digestRef := manifestDesc.Digest.String()
128-
if _, err = oras.Copy(pushCtx, fs, tempTag, c.repo, digestRef, copyOpts); err != nil {
129-
return "", 0, fmt.Errorf("failed to push artifact to registry: %w", err)
133+
134+
err = retry.Do(ctx, retryConfig, func() error {
135+
pushCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
136+
defer cancel()
137+
138+
if _, err := oras.Copy(pushCtx, fs, tempTag, c.repo, digestRef, copyOpts); err != nil {
139+
return fmt.Errorf("failed to push artifact to registry: %w", err)
140+
}
141+
return nil
142+
})
143+
144+
if err != nil {
145+
return "", 0, err
130146
}
131147

132148
logging.Debugf(ctx, "Successfully uploaded artifact by digest: %s", manifestDesc.Digest.String())

internal/parser/mdx.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const (
2626
//EBPF Subject = "" @todo update once eBPF is publishing release notes
2727
)
2828

29+
// SubjectToAgentTypeMapping list based on these: https://source.datanerd.us/fleet-management/fleet-deployment-api/blob/master/src/main/java/com/newrelic/fleetdeploymentapi/constant/AgentConstants.java
2930
var SubjectToAgentTypeMapping = map[Subject]string{
3031
DotNet: "NRDotNetAgent",
3132
Infra: "NRInfra", // maps to the same agent type for k8s & host

0 commit comments

Comments
 (0)