Skip to content

Commit 1283e23

Browse files
WVerlaekona-agent
andcommitted
Add root trace span for build command and fix S3 cache timeout
- Add leeway.command span that wraps entire build execution including workspace loading and cache operations - Replace ListObjects with parallel HeadObject calls in ExistingPackages to fix timeout on buckets with millions of objects - Add Bytes field to DownloadResult to track artifact sizes - Add debug logging for cache operations Co-authored-by: Ona <no-reply@ona.com>
1 parent 74b1482 commit 1283e23

15 files changed

Lines changed: 339 additions & 389 deletions

cmd/build.go

Lines changed: 103 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
log "github.com/sirupsen/logrus"
2222
"github.com/spf13/cobra"
2323
"go.opentelemetry.io/otel"
24-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
24+
"go.opentelemetry.io/otel/attribute"
25+
"go.opentelemetry.io/otel/codes"
26+
"go.opentelemetry.io/otel/trace"
2527
)
2628

2729
// CleanupFunc is a function that performs cleanup operations and must be deferred
@@ -58,12 +60,35 @@ Examples:
5860
},
5961
}
6062

61-
func build(cmd *cobra.Command, args []string) error {
63+
func build(cmd *cobra.Command, args []string) (buildErr error) {
64+
// Initialize tracer early to capture full command execution time
65+
commandCtx, commandSpan, tracerShutdown := initCommandTracer(cmd)
66+
defer func() {
67+
// Set span status based on build result before shutdown
68+
if commandSpan != nil {
69+
if buildErr != nil {
70+
commandSpan.RecordError(buildErr)
71+
commandSpan.SetStatus(codes.Error, buildErr.Error())
72+
} else {
73+
commandSpan.SetStatus(codes.Ok, "command completed successfully")
74+
}
75+
}
76+
tracerShutdown()
77+
}()
78+
6279
_, pkg, _, _ := getTarget(args, false)
6380
if pkg == nil {
6481
return errors.New("build needs a package")
6582
}
66-
opts, localCache, shutdown := getBuildOpts(cmd)
83+
84+
// Add target package info to command span
85+
if commandSpan != nil {
86+
commandSpan.SetAttributes(
87+
attribute.String("leeway.target.package", pkg.FullName()),
88+
)
89+
}
90+
91+
opts, localCache, shutdown := getBuildOpts(cmd, commandCtx)
6792
defer shutdown()
6893

6994
var (
@@ -197,6 +222,71 @@ func saveBuildResult(ctx context.Context, loc string, localCache cache.LocalCach
197222
return nil
198223
}
199224

225+
// initCommandTracer initializes the OpenTelemetry tracer and creates a root span
226+
// for the entire command execution. Returns the context with the span, the span itself,
227+
// and a shutdown function that must be deferred.
228+
func initCommandTracer(cmd *cobra.Command) (context.Context, trace.Span, CleanupFunc) {
229+
otelEndpoint, err := cmd.Flags().GetString("otel-endpoint")
230+
if err != nil || otelEndpoint == "" {
231+
return context.Background(), nil, func() {}
232+
}
233+
234+
// Set leeway version for telemetry
235+
telemetry.SetLeewayVersion(leeway.Version)
236+
237+
// Get insecure flag
238+
otelInsecure, _ := cmd.Flags().GetBool("otel-insecure")
239+
240+
// Initialize tracer with the provided endpoint and TLS configuration
241+
tp, err := telemetry.InitTracer(context.Background(), otelEndpoint, otelInsecure)
242+
if err != nil {
243+
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
244+
return context.Background(), nil, func() {}
245+
}
246+
247+
// Parse trace context if provided
248+
traceParent, _ := cmd.Flags().GetString("trace-parent")
249+
traceState, _ := cmd.Flags().GetString("trace-state")
250+
251+
parentCtx := context.Background()
252+
if traceParent != "" {
253+
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
254+
log.WithError(err).Warn("invalid trace-parent format")
255+
} else {
256+
ctx, err := telemetry.ParseTraceContext(traceParent, traceState)
257+
if err != nil {
258+
log.WithError(err).Warn("failed to parse trace context")
259+
} else {
260+
parentCtx = ctx
261+
}
262+
}
263+
}
264+
265+
// Create root span for the entire command execution
266+
tracer := otel.Tracer("leeway")
267+
ctx, span := tracer.Start(parentCtx, "leeway.command",
268+
trace.WithSpanKind(trace.SpanKindInternal),
269+
)
270+
271+
// Add command attributes
272+
span.SetAttributes(
273+
attribute.String("leeway.version", leeway.Version),
274+
attribute.String("leeway.command", "build"),
275+
)
276+
277+
// Create shutdown function
278+
shutdown := func() {
279+
span.End()
280+
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
281+
defer cancel()
282+
if err := telemetry.Shutdown(shutdownCtx, tp); err != nil {
283+
log.WithError(err).Warn("failed to shutdown tracer provider")
284+
}
285+
}
286+
287+
return ctx, span, shutdown
288+
}
289+
200290
func init() {
201291
rootCmd.AddCommand(buildCmd)
202292

@@ -246,7 +336,7 @@ func addBuildFlags(cmd *cobra.Command) {
246336
cmd.Flags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
247337
}
248338

249-
func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, CleanupFunc) {
339+
func getBuildOpts(cmd *cobra.Command, commandCtx context.Context) ([]leeway.BuildOption, cache.LocalCache, CleanupFunc) {
250340
// Track if user explicitly set LEEWAY_DOCKER_EXPORT_TO_CACHE before workspace loading.
251341
// This allows us to distinguish:
252342
// - User set explicitly: High priority (overrides package config)
@@ -347,59 +437,12 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
347437
reporter = append(reporter, leeway.NewGitHubReporter())
348438
}
349439

350-
// Initialize OpenTelemetry reporter if endpoint is configured
351-
var tracerProvider *sdktrace.TracerProvider
352-
var otelShutdown func()
353-
if otelEndpoint, err := cmd.Flags().GetString("otel-endpoint"); err != nil {
354-
log.Fatal(err)
355-
} else if otelEndpoint != "" {
356-
// Set leeway version for telemetry
357-
telemetry.SetLeewayVersion(leeway.Version)
358-
359-
// Get insecure flag
360-
otelInsecure, err := cmd.Flags().GetBool("otel-insecure")
361-
if err != nil {
362-
log.Fatal(err)
363-
}
364-
365-
// Initialize tracer with the provided endpoint and TLS configuration
366-
tp, err := telemetry.InitTracer(context.Background(), otelEndpoint, otelInsecure)
367-
if err != nil {
368-
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
369-
} else {
370-
tracerProvider = tp
371-
372-
// Parse trace context if provided
373-
traceParent, _ := cmd.Flags().GetString("trace-parent")
374-
traceState, _ := cmd.Flags().GetString("trace-state")
375-
376-
parentCtx := context.Background()
377-
if traceParent != "" {
378-
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
379-
log.WithError(err).Warn("invalid trace-parent format")
380-
} else {
381-
ctx, err := telemetry.ParseTraceContext(traceParent, traceState)
382-
if err != nil {
383-
log.WithError(err).Warn("failed to parse trace context")
384-
} else {
385-
parentCtx = ctx
386-
}
387-
}
388-
}
389-
390-
// Create OTel reporter
391-
tracer := otel.Tracer("leeway")
392-
reporter = append(reporter, leeway.NewOTelReporter(tracer, parentCtx))
393-
394-
// Create shutdown function
395-
otelShutdown = func() {
396-
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
397-
defer cancel()
398-
if err := telemetry.Shutdown(shutdownCtx, tracerProvider); err != nil {
399-
log.WithError(err).Warn("failed to shutdown tracer provider")
400-
}
401-
}
402-
}
440+
// Add OpenTelemetry reporter if tracer was initialized (commandCtx has a span)
441+
// The tracer is initialized in initCommandTracer() which creates the root leeway.command span.
442+
// The OTelReporter will create leeway.build as a child of leeway.command.
443+
if otelEndpoint, _ := cmd.Flags().GetString("otel-endpoint"); otelEndpoint != "" {
444+
tracer := otel.Tracer("leeway")
445+
reporter = append(reporter, leeway.NewOTelReporter(tracer, commandCtx))
403446
}
404447

405448
dontTest, err := cmd.Flags().GetBool("dont-test")
@@ -465,10 +508,8 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
465508
dockerExportSet = true
466509
}
467510

468-
// Create a no-op shutdown function if otelShutdown is nil
469-
if otelShutdown == nil {
470-
otelShutdown = func() {}
471-
}
511+
// Return a no-op cleanup function since tracer shutdown is handled by initCommandTracer
512+
noopCleanup := func() {}
472513

473514
return []leeway.BuildOption{
474515
leeway.WithLocalCache(localCache),
@@ -488,7 +529,7 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
488529
leeway.WithInFlightChecksums(inFlightChecksums),
489530
leeway.WithDockerExportToCache(dockerExportToCache, dockerExportSet),
490531
leeway.WithDockerExportEnv(dockerExportEnvValue, dockerExportEnvSet),
491-
}, localCache, otelShutdown
532+
}, localCache, noopCleanup
492533
}
493534

494535
type pushOnlyRemoteCache struct {

cmd/build_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"context"
45
"os"
56
"testing"
67

@@ -242,7 +243,7 @@ func TestGetBuildOptsWithInFlightChecksums(t *testing.T) {
242243
}
243244

244245
// Test getBuildOpts function
245-
opts, localCache, _ := getBuildOpts(cmd)
246+
opts, localCache, _ := getBuildOpts(cmd, context.Background())
246247

247248
// We can't directly test the WithInFlightChecksums option since it's internal,
248249
// but we can verify the function doesn't error and returns options

cmd/provenance-assert.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"context"
45
"encoding/base64"
56
"encoding/json"
67
"io"
@@ -125,7 +126,7 @@ func getProvenanceTarget(cmd *cobra.Command, args []string) (bundleFN, pkgFN str
125126
log.Fatal("provenance export requires a package")
126127
}
127128

128-
_, cache, _ := getBuildOpts(cmd)
129+
_, cache, _ := getBuildOpts(cmd, context.Background())
129130

130131
var ok bool
131132
pkgFN, ok = cache.Location(pkg)

cmd/run.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"context"
45
"errors"
56

67
log "github.com/sirupsen/logrus"
@@ -27,7 +28,7 @@ Should any of the scripts fail Leeway will exit with an exit code of 1 once all
2728
if script == nil {
2829
return errors.New("run needs a script")
2930
}
30-
opts, _, _ := getBuildOpts(cmd)
31+
opts, _, _ := getBuildOpts(cmd, context.Background())
3132
return script.Run(opts...)
3233
})
3334
}

cmd/sbom-export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ If no package is specified, the workspace's default target is used.`,
3232
}
3333

3434
// Get build options and cache
35-
_, localCache, _ := getBuildOpts(cmd)
35+
_, localCache, _ := getBuildOpts(cmd, context.Background())
3636

3737
// Get output format and file
3838
format, _ := cmd.Flags().GetString("format")

cmd/sbom-scan.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"context"
45
"os"
56

67
"github.com/gitpod-io/leeway/pkg/leeway"
@@ -30,7 +31,7 @@ If no package is specified, the workspace's default target is used.`,
3031
}
3132

3233
// Get cache
33-
_, localCache, _ := getBuildOpts(cmd)
34+
_, localCache, _ := getBuildOpts(cmd, context.Background())
3435

3536
// Get output directory
3637
outputDir, _ := cmd.Flags().GetString("output-dir")

0 commit comments

Comments
 (0)