Skip to content

Commit 7fc1dc5

Browse files
WVerlaekona-agent
andcommitted
Add root trace span for build command and fix S3 cache timeout
- Add leeway.command span that wraps entire build execution including workspace loading and cache operations - Refactor telemetry package with singleton pattern and span utilities (Initialize, Shutdown, Enabled, Tracer, StartSpan, FinishSpan) - Move OTel tracing initialization to root command for reuse across commands - Replace ListObjects with parallel HeadObject calls in ExistingPackages to fix timeout on buckets with millions of objects - Add tracing spans for cache download operations with size attributes - Add String() method to DownloadStatus for cleaner span attributes Co-authored-by: Ona <no-reply@ona.com>
1 parent 74b1482 commit 7fc1dc5

17 files changed

Lines changed: 263 additions & 423 deletions

cmd/build.go

Lines changed: 37 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@ import (
2020
"github.com/gookit/color"
2121
log "github.com/sirupsen/logrus"
2222
"github.com/spf13/cobra"
23-
"go.opentelemetry.io/otel"
24-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
23+
"go.opentelemetry.io/otel/attribute"
24+
"go.opentelemetry.io/otel/trace"
2525
)
2626

27-
// CleanupFunc is a function that performs cleanup operations and must be deferred
28-
type CleanupFunc func()
27+
2928

3029
// buildCmd represents the build command
3130
var buildCmd = &cobra.Command{
@@ -58,13 +57,35 @@ Examples:
5857
},
5958
}
6059

61-
func build(cmd *cobra.Command, args []string) error {
60+
func build(cmd *cobra.Command, args []string) (buildErr error) {
61+
// Create command span if tracing is enabled
62+
commandCtx := cmd.Context()
63+
if telemetry.Enabled() {
64+
parentCtx := rootSpanCtx
65+
if parentCtx == nil {
66+
parentCtx = cmd.Context()
67+
}
68+
var span trace.Span
69+
commandCtx, span = telemetry.StartSpan(parentCtx, "leeway.command",
70+
attribute.String("leeway.version", leeway.Version),
71+
attribute.String("leeway.command", "build"),
72+
)
73+
defer telemetry.FinishSpan(span, &buildErr)
74+
}
75+
6276
_, pkg, _, _ := getTarget(args, false)
6377
if pkg == nil {
6478
return errors.New("build needs a package")
6579
}
66-
opts, localCache, shutdown := getBuildOpts(cmd)
67-
defer shutdown()
80+
81+
// Add target package info to command span
82+
if telemetry.Enabled() {
83+
if span := trace.SpanFromContext(commandCtx); span.IsRecording() {
84+
span.SetAttributes(attribute.String("leeway.target.package", pkg.FullName()))
85+
}
86+
}
87+
88+
opts, localCache := getBuildOpts(cmd, commandCtx)
6889

6990
var (
7091
watch, _ = cmd.Flags().GetBool("watch")
@@ -240,13 +261,9 @@ func addBuildFlags(cmd *cobra.Command) {
240261
cmd.Flags().Bool("report-github", os.Getenv("GITHUB_OUTPUT") != "", "Report package build success/failure to GitHub Actions using the GITHUB_OUTPUT environment variable")
241262
cmd.Flags().Bool("fixed-build-dir", true, "Use a fixed build directory for each package, instead of based on the package version, to better utilize caches based on absolute paths (defaults to true)")
242263
cmd.Flags().Bool("docker-export-to-cache", false, "Export Docker images to cache instead of pushing directly (enables SLSA L3 compliance)")
243-
cmd.Flags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
244-
cmd.Flags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
245-
cmd.Flags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
246-
cmd.Flags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
247264
}
248265

249-
func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, CleanupFunc) {
266+
func getBuildOpts(cmd *cobra.Command, commandCtx context.Context) ([]leeway.BuildOption, cache.LocalCache) {
250267
// Track if user explicitly set LEEWAY_DOCKER_EXPORT_TO_CACHE before workspace loading.
251268
// This allows us to distinguish:
252269
// - User set explicitly: High priority (overrides package config)
@@ -347,59 +364,9 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
347364
reporter = append(reporter, leeway.NewGitHubReporter())
348365
}
349366

350-
// Initialize OpenTelemetry reporter if endpoint is configured
351-
var tracerProvider *sdktrace.TracerProvider
352-
var otelShutdown func()
353-
if otelEndpoint, err := cmd.Flags().GetString("otel-endpoint"); err != nil {
354-
log.Fatal(err)
355-
} else if otelEndpoint != "" {
356-
// Set leeway version for telemetry
357-
telemetry.SetLeewayVersion(leeway.Version)
358-
359-
// Get insecure flag
360-
otelInsecure, err := cmd.Flags().GetBool("otel-insecure")
361-
if err != nil {
362-
log.Fatal(err)
363-
}
364-
365-
// Initialize tracer with the provided endpoint and TLS configuration
366-
tp, err := telemetry.InitTracer(context.Background(), otelEndpoint, otelInsecure)
367-
if err != nil {
368-
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
369-
} else {
370-
tracerProvider = tp
371-
372-
// Parse trace context if provided
373-
traceParent, _ := cmd.Flags().GetString("trace-parent")
374-
traceState, _ := cmd.Flags().GetString("trace-state")
375-
376-
parentCtx := context.Background()
377-
if traceParent != "" {
378-
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
379-
log.WithError(err).Warn("invalid trace-parent format")
380-
} else {
381-
ctx, err := telemetry.ParseTraceContext(traceParent, traceState)
382-
if err != nil {
383-
log.WithError(err).Warn("failed to parse trace context")
384-
} else {
385-
parentCtx = ctx
386-
}
387-
}
388-
}
389-
390-
// Create OTel reporter
391-
tracer := otel.Tracer("leeway")
392-
reporter = append(reporter, leeway.NewOTelReporter(tracer, parentCtx))
393-
394-
// Create shutdown function
395-
otelShutdown = func() {
396-
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
397-
defer cancel()
398-
if err := telemetry.Shutdown(shutdownCtx, tracerProvider); err != nil {
399-
log.WithError(err).Warn("failed to shutdown tracer provider")
400-
}
401-
}
402-
}
367+
// Add OpenTelemetry reporter if tracing is enabled
368+
if telemetry.Enabled() {
369+
reporter = append(reporter, leeway.NewOTelReporter(telemetry.Tracer(), commandCtx))
403370
}
404371

405372
dontTest, err := cmd.Flags().GetBool("dont-test")
@@ -465,11 +432,6 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
465432
dockerExportSet = true
466433
}
467434

468-
// Create a no-op shutdown function if otelShutdown is nil
469-
if otelShutdown == nil {
470-
otelShutdown = func() {}
471-
}
472-
473435
return []leeway.BuildOption{
474436
leeway.WithLocalCache(localCache),
475437
leeway.WithRemoteCache(remoteCache),
@@ -488,7 +450,7 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
488450
leeway.WithInFlightChecksums(inFlightChecksums),
489451
leeway.WithDockerExportToCache(dockerExportToCache, dockerExportSet),
490452
leeway.WithDockerExportEnv(dockerExportEnvValue, dockerExportEnvSet),
491-
}, localCache, otelShutdown
453+
}, localCache
492454
}
493455

494456
type pushOnlyRemoteCache struct {
@@ -621,6 +583,10 @@ func getRemoteCache(cmd *cobra.Command) cache.RemoteCache {
621583
if err != nil {
622584
log.Fatalf("cannot access remote S3 cache: %v", err)
623585
}
586+
// Set tracer if tracing is enabled
587+
if telemetry.Enabled() {
588+
rc.SetTracer(telemetry.Tracer())
589+
}
624590
return rc
625591
default:
626592
if slsaConfig != nil && slsaConfig.Verification {

cmd/build_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ func TestGetBuildOptsWithInFlightChecksums(t *testing.T) {
242242
}
243243

244244
// Test getBuildOpts function
245-
opts, localCache, _ := getBuildOpts(cmd)
245+
opts, localCache := getBuildOpts(cmd, cmd.Context())
246246

247247
// We can't directly test the WithInFlightChecksums option since it's internal,
248248
// but we can verify the function doesn't error and returns options

cmd/provenance-assert.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func getProvenanceTarget(cmd *cobra.Command, args []string) (bundleFN, pkgFN str
125125
log.Fatal("provenance export requires a package")
126126
}
127127

128-
_, cache, _ := getBuildOpts(cmd)
128+
_, cache := getBuildOpts(cmd, cmd.Context())
129129

130130
var ok bool
131131
pkgFN, ok = cache.Location(pkg)

cmd/root.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"golang.org/x/xerrors"
1414

1515
"github.com/gitpod-io/leeway/pkg/leeway"
16+
"github.com/gitpod-io/leeway/pkg/leeway/telemetry"
1617
)
1718

1819
const (
@@ -95,6 +96,9 @@ var (
9596
buildArgs []string
9697
verbose bool
9798
variant string
99+
100+
// rootSpanCtx holds the context with parent trace information (if provided)
101+
rootSpanCtx context.Context
98102
)
99103

100104
// rootCmd represents the base command when called without any subcommands
@@ -134,6 +138,37 @@ variables have an effect on leeway:
134138
if verbose {
135139
log.SetLevel(log.DebugLevel)
136140
}
141+
142+
// Initialize OpenTelemetry tracing if endpoint is configured
143+
otelEndpoint, _ := cmd.Flags().GetString("otel-endpoint")
144+
if otelEndpoint != "" {
145+
telemetry.SetLeewayVersion(leeway.Version)
146+
147+
otelInsecure, _ := cmd.Flags().GetBool("otel-insecure")
148+
if err := telemetry.Initialize(cmd.Context(), otelEndpoint, otelInsecure); err != nil {
149+
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
150+
} else {
151+
// Parse trace context if provided
152+
traceParent, _ := cmd.Flags().GetString("trace-parent")
153+
traceState, _ := cmd.Flags().GetString("trace-state")
154+
155+
rootSpanCtx = cmd.Context()
156+
if traceParent != "" {
157+
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
158+
log.WithError(err).Warn("invalid trace-parent format")
159+
} else if ctx, err := telemetry.ParseTraceContext(traceParent, traceState); err != nil {
160+
log.WithError(err).Warn("failed to parse trace context")
161+
} else {
162+
rootSpanCtx = ctx
163+
}
164+
}
165+
}
166+
}
167+
},
168+
PersistentPostRun: func(cmd *cobra.Command, args []string) {
169+
if err := telemetry.Shutdown(context.Background()); err != nil {
170+
log.WithError(err).Warn("failed to shutdown tracer provider")
171+
}
137172
},
138173
BashCompletionFunction: bashCompletionFunc,
139174
}
@@ -183,6 +218,12 @@ func init() {
183218
rootCmd.PersistentFlags().StringVar(&variant, "variant", "", "selects a package variant")
184219
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "enables verbose logging")
185220
rootCmd.PersistentFlags().Bool("dut", false, "used for testing only - doesn't actually do anything")
221+
222+
// OpenTelemetry tracing flags
223+
rootCmd.PersistentFlags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
224+
rootCmd.PersistentFlags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
225+
rootCmd.PersistentFlags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
226+
rootCmd.PersistentFlags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
186227
}
187228

188229
func getWorkspace() (leeway.Workspace, error) {

cmd/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Should any of the scripts fail Leeway will exit with an exit code of 1 once all
2727
if script == nil {
2828
return errors.New("run needs a script")
2929
}
30-
opts, _, _ := getBuildOpts(cmd)
30+
opts, _ := getBuildOpts(cmd, cmd.Context())
3131
return script.Run(opts...)
3232
})
3333
}

cmd/sbom-export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ If no package is specified, the workspace's default target is used.`,
3232
}
3333

3434
// Get build options and cache
35-
_, localCache, _ := getBuildOpts(cmd)
35+
_, localCache := getBuildOpts(cmd, cmd.Context())
3636

3737
// Get output format and file
3838
format, _ := cmd.Flags().GetString("format")

cmd/sbom-scan.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ If no package is specified, the workspace's default target is used.`,
3030
}
3131

3232
// Get cache
33-
_, localCache, _ := getBuildOpts(cmd)
33+
_, localCache := getBuildOpts(cmd, cmd.Context())
3434

3535
// Get output directory
3636
outputDir, _ := cmd.Flags().GetString("output-dir")

0 commit comments

Comments
 (0)