Skip to content

Commit f976d1f

Browse files
WVerlaekona-agent
andcommitted
Add root trace span for build command and fix S3 cache timeout
- Add leeway.command span in root command that wraps all command execution - Refactor telemetry package with singleton pattern and span utilities (Initialize, Shutdown, Enabled, Tracer, StartSpan, FinishSpan) - Replace ListObjects with parallel HeadObject calls in ExistingPackages to fix timeout on buckets with millions of objects - Add tracing spans for cache download operations with size attributes - Add String() method to DownloadStatus for cleaner span attributes Co-authored-by: Ona <no-reply@ona.com>
1 parent 74b1482 commit f976d1f

17 files changed

Lines changed: 271 additions & 423 deletions

cmd/build.go

Lines changed: 34 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,10 @@ import (
2020
"github.com/gookit/color"
2121
log "github.com/sirupsen/logrus"
2222
"github.com/spf13/cobra"
23-
"go.opentelemetry.io/otel"
24-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
23+
"go.opentelemetry.io/otel/attribute"
2524
)
2625

27-
// CleanupFunc is a function that performs cleanup operations and must be deferred
28-
type CleanupFunc func()
26+
2927

3028
// buildCmd represents the build command
3129
var buildCmd = &cobra.Command{
@@ -58,13 +56,33 @@ Examples:
5856
},
5957
}
6058

61-
func build(cmd *cobra.Command, args []string) error {
59+
func build(cmd *cobra.Command, args []string) (buildErr error) {
60+
// Use command context from root (has the command span)
61+
ctx := cmd.Context()
62+
if commandCtx != nil {
63+
ctx = commandCtx
64+
}
65+
66+
// Capture build error in command span
67+
if commandSpan != nil {
68+
defer func() {
69+
if buildErr != nil {
70+
commandSpan.RecordError(buildErr)
71+
}
72+
}()
73+
}
74+
6275
_, pkg, _, _ := getTarget(args, false)
6376
if pkg == nil {
6477
return errors.New("build needs a package")
6578
}
66-
opts, localCache, shutdown := getBuildOpts(cmd)
67-
defer shutdown()
79+
80+
// Add target package info to command span
81+
if commandSpan != nil {
82+
commandSpan.SetAttributes(attribute.String("leeway.target.package", pkg.FullName()))
83+
}
84+
85+
opts, localCache := getBuildOpts(cmd, ctx)
6886

6987
var (
7088
watch, _ = cmd.Flags().GetBool("watch")
@@ -240,13 +258,9 @@ func addBuildFlags(cmd *cobra.Command) {
240258
cmd.Flags().Bool("report-github", os.Getenv("GITHUB_OUTPUT") != "", "Report package build success/failure to GitHub Actions using the GITHUB_OUTPUT environment variable")
241259
cmd.Flags().Bool("fixed-build-dir", true, "Use a fixed build directory for each package, instead of based on the package version, to better utilize caches based on absolute paths (defaults to true)")
242260
cmd.Flags().Bool("docker-export-to-cache", false, "Export Docker images to cache instead of pushing directly (enables SLSA L3 compliance)")
243-
cmd.Flags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
244-
cmd.Flags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
245-
cmd.Flags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
246-
cmd.Flags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
247261
}
248262

249-
func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, CleanupFunc) {
263+
func getBuildOpts(cmd *cobra.Command, commandCtx context.Context) ([]leeway.BuildOption, cache.LocalCache) {
250264
// Track if user explicitly set LEEWAY_DOCKER_EXPORT_TO_CACHE before workspace loading.
251265
// This allows us to distinguish:
252266
// - User set explicitly: High priority (overrides package config)
@@ -347,59 +361,9 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
347361
reporter = append(reporter, leeway.NewGitHubReporter())
348362
}
349363

350-
// Initialize OpenTelemetry reporter if endpoint is configured
351-
var tracerProvider *sdktrace.TracerProvider
352-
var otelShutdown func()
353-
if otelEndpoint, err := cmd.Flags().GetString("otel-endpoint"); err != nil {
354-
log.Fatal(err)
355-
} else if otelEndpoint != "" {
356-
// Set leeway version for telemetry
357-
telemetry.SetLeewayVersion(leeway.Version)
358-
359-
// Get insecure flag
360-
otelInsecure, err := cmd.Flags().GetBool("otel-insecure")
361-
if err != nil {
362-
log.Fatal(err)
363-
}
364-
365-
// Initialize tracer with the provided endpoint and TLS configuration
366-
tp, err := telemetry.InitTracer(context.Background(), otelEndpoint, otelInsecure)
367-
if err != nil {
368-
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
369-
} else {
370-
tracerProvider = tp
371-
372-
// Parse trace context if provided
373-
traceParent, _ := cmd.Flags().GetString("trace-parent")
374-
traceState, _ := cmd.Flags().GetString("trace-state")
375-
376-
parentCtx := context.Background()
377-
if traceParent != "" {
378-
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
379-
log.WithError(err).Warn("invalid trace-parent format")
380-
} else {
381-
ctx, err := telemetry.ParseTraceContext(traceParent, traceState)
382-
if err != nil {
383-
log.WithError(err).Warn("failed to parse trace context")
384-
} else {
385-
parentCtx = ctx
386-
}
387-
}
388-
}
389-
390-
// Create OTel reporter
391-
tracer := otel.Tracer("leeway")
392-
reporter = append(reporter, leeway.NewOTelReporter(tracer, parentCtx))
393-
394-
// Create shutdown function
395-
otelShutdown = func() {
396-
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
397-
defer cancel()
398-
if err := telemetry.Shutdown(shutdownCtx, tracerProvider); err != nil {
399-
log.WithError(err).Warn("failed to shutdown tracer provider")
400-
}
401-
}
402-
}
364+
// Add OpenTelemetry reporter if tracing is enabled
365+
if telemetry.Enabled() {
366+
reporter = append(reporter, leeway.NewOTelReporter(telemetry.Tracer(), commandCtx))
403367
}
404368

405369
dontTest, err := cmd.Flags().GetBool("dont-test")
@@ -465,11 +429,6 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
465429
dockerExportSet = true
466430
}
467431

468-
// Create a no-op shutdown function if otelShutdown is nil
469-
if otelShutdown == nil {
470-
otelShutdown = func() {}
471-
}
472-
473432
return []leeway.BuildOption{
474433
leeway.WithLocalCache(localCache),
475434
leeway.WithRemoteCache(remoteCache),
@@ -488,7 +447,7 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
488447
leeway.WithInFlightChecksums(inFlightChecksums),
489448
leeway.WithDockerExportToCache(dockerExportToCache, dockerExportSet),
490449
leeway.WithDockerExportEnv(dockerExportEnvValue, dockerExportEnvSet),
491-
}, localCache, otelShutdown
450+
}, localCache
492451
}
493452

494453
type pushOnlyRemoteCache struct {
@@ -621,6 +580,10 @@ func getRemoteCache(cmd *cobra.Command) cache.RemoteCache {
621580
if err != nil {
622581
log.Fatalf("cannot access remote S3 cache: %v", err)
623582
}
583+
// Set tracer if tracing is enabled
584+
if telemetry.Enabled() {
585+
rc.SetTracer(telemetry.Tracer())
586+
}
624587
return rc
625588
default:
626589
if slsaConfig != nil && slsaConfig.Verification {

cmd/build_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ func TestGetBuildOptsWithInFlightChecksums(t *testing.T) {
242242
}
243243

244244
// Test getBuildOpts function
245-
opts, localCache, _ := getBuildOpts(cmd)
245+
opts, localCache := getBuildOpts(cmd, cmd.Context())
246246

247247
// We can't directly test the WithInFlightChecksums option since it's internal,
248248
// but we can verify the function doesn't error and returns options

cmd/provenance-assert.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func getProvenanceTarget(cmd *cobra.Command, args []string) (bundleFN, pkgFN str
125125
log.Fatal("provenance export requires a package")
126126
}
127127

128-
_, cache, _ := getBuildOpts(cmd)
128+
_, cache := getBuildOpts(cmd, cmd.Context())
129129

130130
var ok bool
131131
pkgFN, ok = cache.Location(pkg)

cmd/root.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@ import (
1010
"github.com/gookit/color"
1111
log "github.com/sirupsen/logrus"
1212
"github.com/spf13/cobra"
13+
"go.opentelemetry.io/otel/attribute"
14+
otelTrace "go.opentelemetry.io/otel/trace"
1315
"golang.org/x/xerrors"
1416

1517
"github.com/gitpod-io/leeway/pkg/leeway"
18+
"github.com/gitpod-io/leeway/pkg/leeway/telemetry"
1619
)
1720

1821
const (
@@ -95,6 +98,11 @@ var (
9598
buildArgs []string
9699
verbose bool
97100
variant string
101+
102+
// commandSpan is the root span for the current command execution
103+
commandSpan otelTrace.Span
104+
// commandCtx is the context with the command span
105+
commandCtx context.Context
98106
)
99107

100108
// rootCmd represents the base command when called without any subcommands
@@ -134,6 +142,44 @@ variables have an effect on leeway:
134142
if verbose {
135143
log.SetLevel(log.DebugLevel)
136144
}
145+
146+
// Initialize OpenTelemetry tracing if endpoint is configured
147+
otelEndpoint, _ := cmd.Flags().GetString("otel-endpoint")
148+
if otelEndpoint != "" {
149+
telemetry.SetLeewayVersion(leeway.Version)
150+
151+
otelInsecure, _ := cmd.Flags().GetBool("otel-insecure")
152+
if err := telemetry.Initialize(cmd.Context(), otelEndpoint, otelInsecure); err != nil {
153+
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
154+
} else {
155+
// Parse trace context if provided
156+
traceParent, _ := cmd.Flags().GetString("trace-parent")
157+
traceState, _ := cmd.Flags().GetString("trace-state")
158+
159+
parentCtx := cmd.Context()
160+
if traceParent != "" {
161+
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
162+
log.WithError(err).Warn("invalid trace-parent format")
163+
} else if ctx, err := telemetry.ParseTraceContext(traceParent, traceState); err != nil {
164+
log.WithError(err).Warn("failed to parse trace context")
165+
} else {
166+
parentCtx = ctx
167+
}
168+
}
169+
170+
// Create command span
171+
commandCtx, commandSpan = telemetry.StartSpan(parentCtx, "leeway.command",
172+
attribute.String("leeway.version", leeway.Version),
173+
attribute.String("leeway.command", cmd.Name()),
174+
)
175+
}
176+
}
177+
},
178+
PersistentPostRun: func(cmd *cobra.Command, args []string) {
179+
telemetry.FinishSpan(commandSpan, nil)
180+
if err := telemetry.Shutdown(context.Background()); err != nil {
181+
log.WithError(err).Warn("failed to shutdown tracer provider")
182+
}
137183
},
138184
BashCompletionFunction: bashCompletionFunc,
139185
}
@@ -183,6 +229,12 @@ func init() {
183229
rootCmd.PersistentFlags().StringVar(&variant, "variant", "", "selects a package variant")
184230
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "enables verbose logging")
185231
rootCmd.PersistentFlags().Bool("dut", false, "used for testing only - doesn't actually do anything")
232+
233+
// OpenTelemetry tracing flags
234+
rootCmd.PersistentFlags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
235+
rootCmd.PersistentFlags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
236+
rootCmd.PersistentFlags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
237+
rootCmd.PersistentFlags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
186238
}
187239

188240
func getWorkspace() (leeway.Workspace, error) {

cmd/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Should any of the scripts fail Leeway will exit with an exit code of 1 once all
2727
if script == nil {
2828
return errors.New("run needs a script")
2929
}
30-
opts, _, _ := getBuildOpts(cmd)
30+
opts, _ := getBuildOpts(cmd, cmd.Context())
3131
return script.Run(opts...)
3232
})
3333
}

cmd/sbom-export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ If no package is specified, the workspace's default target is used.`,
3232
}
3333

3434
// Get build options and cache
35-
_, localCache, _ := getBuildOpts(cmd)
35+
_, localCache := getBuildOpts(cmd, cmd.Context())
3636

3737
// Get output format and file
3838
format, _ := cmd.Flags().GetString("format")

cmd/sbom-scan.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ If no package is specified, the workspace's default target is used.`,
3030
}
3131

3232
// Get cache
33-
_, localCache, _ := getBuildOpts(cmd)
33+
_, localCache := getBuildOpts(cmd, cmd.Context())
3434

3535
// Get output directory
3636
outputDir, _ := cmd.Flags().GetString("output-dir")

0 commit comments

Comments
 (0)