Skip to content

Commit 74b1482

Browse files
authored
Merge pull request #323 from gitpod-io/wv/add-phase-spans-otel
feat: add nested phase spans for OpenTelemetry tracing
2 parents 05d3912 + c3153b5 commit 74b1482

9 files changed

Lines changed: 548 additions & 55 deletions

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,25 @@ export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY"
611611
leeway build :my-package
612612
```
613613

614+
The OpenTelemetry SDK automatically reads standard `OTEL_EXPORTER_OTLP_*` environment variables.
615+
616+
## Span Hierarchy
617+
618+
Leeway creates a nested span hierarchy for detailed build timeline visualization:
619+
620+
```
621+
leeway.build (root)
622+
├── leeway.package (component:package-1)
623+
│ ├── leeway.phase (prep)
624+
│ ├── leeway.phase (build)
625+
│ └── leeway.phase (test)
626+
└── leeway.package (component:package-2)
627+
├── leeway.phase (prep)
628+
└── leeway.phase (build)
629+
```
630+
631+
Each phase span captures timing, status, and errors for individual build phases (prep, pull, lint, test, build, package).
632+
614633
See [docs/observability.md](docs/observability.md) for configuration, examples, and span attributes.
615634

616635
# Provenance (SLSA) - EXPERIMENTAL

docs/observability.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,24 @@ OpenTelemetry tracing in leeway captures:
1919
```
2020
Root Span (leeway.build)
2121
├── Package Span 1 (leeway.package)
22+
│ ├── Phase Span (leeway.phase: prep)
23+
│ ├── Phase Span (leeway.phase: pull)
24+
│ ├── Phase Span (leeway.phase: lint)
25+
│ ├── Phase Span (leeway.phase: test)
26+
│ ├── Phase Span (leeway.phase: build)
27+
│ └── Phase Span (leeway.phase: package)
2228
├── Package Span 2 (leeway.package)
29+
│ ├── Phase Span (leeway.phase: prep)
30+
│ └── Phase Span (leeway.phase: build)
2331
└── Package Span N (leeway.package)
32+
└── ...
2433
```
2534

2635
- **Root Span**: Created when `BuildStarted` is called, represents the entire build operation
2736
- **Package Spans**: Created for each package being built, as children of the root span
37+
- **Phase Spans**: Created for each build phase (prep, pull, lint, test, build, package) as children of package spans
2838

29-
Build phase durations (prep, pull, lint, test, build, package) are captured as attributes on package spans, not as separate spans. This design provides lower overhead and simpler hierarchy while maintaining visibility into phase-level performance.
39+
Phase spans provide detailed timeline visualization and capture individual phase errors. Only phases with commands are executed and create spans.
3040

3141
### Context Propagation
3242

@@ -35,6 +45,7 @@ Leeway supports W3C Trace Context propagation, allowing builds to be part of lar
3545
1. **Parent Context**: Accepts `traceparent` and `tracestate` headers from upstream systems
3646
2. **Root Context**: Creates a root span linked to the parent context
3747
3. **Package Context**: Each package span is a child of the root span
48+
4. **Phase Context**: Each phase span is a child of its package span
3849

3950
## Configuration
4051

@@ -109,11 +120,24 @@ leeway build :my-package
109120
| `leeway.package.builddir` | string | Build directory | `"/tmp/leeway/build/..."` |
110121
| `leeway.package.last_phase` | string | Last completed phase | `"build"` |
111122
| `leeway.package.duration_ms` | int64 | Total build duration (ms) | `15234` |
112-
| `leeway.package.phase.{phase}.duration_ms` | int64 | Phase duration (ms) | `5432` |
113123
| `leeway.package.test.coverage_percentage` | int | Test coverage % | `85` |
114124
| `leeway.package.test.functions_with_test` | int | Functions with tests | `42` |
115125
| `leeway.package.test.functions_without_test` | int | Functions without tests | `8` |
116126

127+
### Phase Span Attributes
128+
129+
Phase spans are created for each build phase (prep, pull, lint, test, build, package) that has commands to execute.
130+
131+
| Attribute | Type | Description | Example |
132+
|-----------|------|-------------|---------|
133+
| `leeway.phase.name` | string | Phase name | `"prep"`, `"build"`, `"test"`, etc. |
134+
135+
**Span Status:**
136+
- `OK`: Phase completed successfully
137+
- `ERROR`: Phase failed (error details in span events)
138+
139+
**Span Duration:** The span's start and end times capture the phase execution duration automatically.
140+
117141
### GitHub Actions Attributes
118142

119143
When running in GitHub Actions (`GITHUB_ACTIONS=true`), the following attributes are added to the root span:

pkg/leeway/build.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,12 +1262,19 @@ func (p *Package) build(buildctx *buildContext) (err error) {
12621262
// Generate SBOM if enabled (after packaging - written alongside artifact)
12631263
// SBOM files are stored outside the tar.gz to maintain artifact determinism.
12641264
if p.C.W.SBOM.Enabled {
1265+
if par, ok := buildctx.Reporter.(PhaseAwareReporter); ok {
1266+
par.PackageBuildPhaseStarted(p, PackageBuildPhaseSBOM)
1267+
}
12651268
pkgRep.phaseEnter[PackageBuildPhaseSBOM] = time.Now()
12661269
pkgRep.Phases = append(pkgRep.Phases, PackageBuildPhaseSBOM)
1267-
if err := writeSBOMToCache(buildctx, p, builddir); err != nil {
1268-
return err
1269-
}
1270+
sbomErr := writeSBOMToCache(buildctx, p, builddir)
12701271
pkgRep.phaseDone[PackageBuildPhaseSBOM] = time.Now()
1272+
if par, ok := buildctx.Reporter.(PhaseAwareReporter); ok {
1273+
par.PackageBuildPhaseFinished(p, PackageBuildPhaseSBOM, sbomErr)
1274+
}
1275+
if sbomErr != nil {
1276+
return sbomErr
1277+
}
12711278
}
12721279

12731280
// Register newly built package
@@ -1365,6 +1372,11 @@ func executeBuildPhase(buildctx *buildContext, p *Package, builddir string, bld
13651372
return nil
13661373
}
13671374

1375+
// Notify phase-aware reporters
1376+
if par, ok := buildctx.Reporter.(PhaseAwareReporter); ok {
1377+
par.PackageBuildPhaseStarted(p, phase)
1378+
}
1379+
13681380
if phase != PackageBuildPhasePrep {
13691381
pkgRep.phaseEnter[phase] = time.Now()
13701382
pkgRep.Phases = append(pkgRep.Phases, phase)
@@ -1375,6 +1387,11 @@ func executeBuildPhase(buildctx *buildContext, p *Package, builddir string, bld
13751387
err := executeCommandsForPackage(buildctx, p, builddir, cmds)
13761388
pkgRep.phaseDone[phase] = time.Now()
13771389

1390+
// Notify phase-aware reporters
1391+
if par, ok := buildctx.Reporter.(PhaseAwareReporter); ok {
1392+
par.PackageBuildPhaseFinished(p, phase, err)
1393+
}
1394+
13781395
return err
13791396
}
13801397

@@ -3153,7 +3170,7 @@ func runGoTestWithTracing(buildctx *buildContext, p *Package, env []string, cwd,
31533170
}
31543171

31553172
// Create tracer and parse output
3156-
goTracer := NewGoTestTracer(tracer, parentCtx)
3173+
goTracer := NewGoTestTracer(tracer, parentCtx, p.FullName())
31573174
outputWriter := &reporterStream{R: buildctx.Reporter, P: p, IsErr: false}
31583175

31593176
if err := goTracer.parseJSONOutput(stdout, outputWriter); err != nil {

pkg/leeway/gotest_trace.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,21 @@ type testSpanData struct {
3535

3636
// GoTestTracer handles parsing Go test JSON output and creating OpenTelemetry spans
3737
type GoTestTracer struct {
38-
tracer trace.Tracer
39-
parentCtx context.Context
38+
tracer trace.Tracer
39+
parentCtx context.Context
40+
leewayPkgName string
4041

4142
mu sync.Mutex
4243
spans map[string]*testSpanData // key: "package/testname" or just "package" for package-level
4344
}
4445

4546
// NewGoTestTracer creates a new GoTestTracer that will create spans as children of the given context
46-
func NewGoTestTracer(tracer trace.Tracer, parentCtx context.Context) *GoTestTracer {
47+
func NewGoTestTracer(tracer trace.Tracer, parentCtx context.Context, leewayPkgName string) *GoTestTracer {
4748
return &GoTestTracer{
48-
tracer: tracer,
49-
parentCtx: parentCtx,
50-
spans: make(map[string]*testSpanData),
49+
tracer: tracer,
50+
parentCtx: parentCtx,
51+
leewayPkgName: leewayPkgName,
52+
spans: make(map[string]*testSpanData),
5153
}
5254
}
5355

@@ -163,6 +165,7 @@ func (t *GoTestTracer) handleRun(event *goTestEvent) {
163165
)
164166

165167
span.SetAttributes(
168+
attribute.String("leeway.package.name", t.leewayPkgName),
166169
attribute.String("test.name", event.Test),
167170
attribute.String("test.package", event.Package),
168171
attribute.String("test.framework", "go"),
@@ -193,6 +196,7 @@ func (t *GoTestTracer) handlePackageStart(event *goTestEvent) {
193196
)
194197

195198
span.SetAttributes(
199+
attribute.String("leeway.package.name", t.leewayPkgName),
196200
attribute.String("test.package", event.Package),
197201
attribute.String("test.framework", "go"),
198202
attribute.String("test.scope", "package"),

pkg/leeway/gotest_trace_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func TestGoTestTracer_ParseJSONOutput(t *testing.T) {
2424
ctx, parentSpan := tracer.Start(context.Background(), "parent")
2525
defer parentSpan.End()
2626

27-
goTracer := NewGoTestTracer(tracer, ctx)
27+
goTracer := NewGoTestTracer(tracer, ctx, "test:pkg")
2828

2929
// Simulate go test -json output
3030
jsonOutput := `{"Time":"2024-01-01T10:00:00Z","Action":"start","Package":"example.com/pkg"}
@@ -123,7 +123,7 @@ func TestGoTestTracer_ParallelTests(t *testing.T) {
123123
ctx, parentSpan := tracer.Start(context.Background(), "parent")
124124
defer parentSpan.End()
125125

126-
goTracer := NewGoTestTracer(tracer, ctx)
126+
goTracer := NewGoTestTracer(tracer, ctx, "test:pkg")
127127

128128
// Simulate parallel test execution with pause/cont events
129129
jsonOutput := `{"Time":"2024-01-01T10:00:00Z","Action":"run","Package":"example.com/pkg","Test":"TestParallel"}
@@ -167,7 +167,7 @@ func TestGoTestTracer_ParallelTests(t *testing.T) {
167167

168168
func TestGoTestTracer_NoTracer(t *testing.T) {
169169
// Test that nil tracer doesn't panic
170-
goTracer := NewGoTestTracer(nil, context.Background())
170+
goTracer := NewGoTestTracer(nil, context.Background(), "test:pkg")
171171

172172
jsonOutput := `{"Time":"2024-01-01T10:00:00Z","Action":"run","Package":"example.com/pkg","Test":"TestOne"}
173173
{"Time":"2024-01-01T10:00:00.100Z","Action":"pass","Package":"example.com/pkg","Test":"TestOne","Elapsed":0.1}

pkg/leeway/reporter.go

Lines changed: 110 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,17 @@ type Reporter interface {
5252
PackageBuildFinished(pkg *Package, rep *PackageBuildReport)
5353
}
5454

55+
// PhaseAwareReporter is an optional interface that reporters can implement
56+
// to receive phase-level notifications for creating nested spans or tracking.
57+
// This follows the Go pattern of optional interfaces (like io.Closer, io.Seeker).
58+
type PhaseAwareReporter interface {
59+
Reporter
60+
// PackageBuildPhaseStarted is called when a build phase starts
61+
PackageBuildPhaseStarted(pkg *Package, phase PackageBuildPhase)
62+
// PackageBuildPhaseFinished is called when a build phase completes
63+
PackageBuildPhaseFinished(pkg *Package, phase PackageBuildPhase, err error)
64+
}
65+
5566
type PackageBuildReport struct {
5667
phaseEnter map[PackageBuildPhase]time.Time
5768
phaseDone map[PackageBuildPhase]time.Time
@@ -544,7 +555,26 @@ func (cr CompositeReporter) PackageBuildStarted(pkg *Package, builddir string) {
544555
}
545556
}
546557

558+
// PackageBuildPhaseStarted implements PhaseAwareReporter
559+
func (cr CompositeReporter) PackageBuildPhaseStarted(pkg *Package, phase PackageBuildPhase) {
560+
for _, r := range cr {
561+
if par, ok := r.(PhaseAwareReporter); ok {
562+
par.PackageBuildPhaseStarted(pkg, phase)
563+
}
564+
}
565+
}
566+
567+
// PackageBuildPhaseFinished implements PhaseAwareReporter
568+
func (cr CompositeReporter) PackageBuildPhaseFinished(pkg *Package, phase PackageBuildPhase, err error) {
569+
for _, r := range cr {
570+
if par, ok := r.(PhaseAwareReporter); ok {
571+
par.PackageBuildPhaseFinished(pkg, phase, err)
572+
}
573+
}
574+
}
575+
547576
var _ Reporter = CompositeReporter{}
577+
var _ PhaseAwareReporter = CompositeReporter{}
548578

549579
type NoopReporter struct{}
550580

@@ -701,6 +731,8 @@ type OTelReporter struct {
701731
rootSpan trace.Span
702732
packageCtxs map[string]context.Context
703733
packageSpans map[string]trace.Span
734+
phaseSpans map[string]trace.Span // key: "packageName:phaseName"
735+
phaseCtxs map[string]context.Context // key: "packageName:phaseName"
704736
mu sync.RWMutex
705737
}
706738

@@ -714,6 +746,8 @@ func NewOTelReporter(tracer trace.Tracer, parentCtx context.Context) *OTelReport
714746
parentCtx: parentCtx,
715747
packageCtxs: make(map[string]context.Context),
716748
packageSpans: make(map[string]trace.Span),
749+
phaseSpans: make(map[string]trace.Span),
750+
phaseCtxs: make(map[string]context.Context),
717751
}
718752
}
719753

@@ -866,16 +900,6 @@ func (r *OTelReporter) PackageBuildFinished(pkg *Package, rep *PackageBuildRepor
866900
attribute.Int64("leeway.package.duration_ms", rep.TotalTime().Milliseconds()),
867901
)
868902

869-
// Add phase durations
870-
for _, phase := range rep.Phases {
871-
duration := rep.PhaseDuration(phase)
872-
if duration >= 0 {
873-
span.SetAttributes(
874-
attribute.Int64(fmt.Sprintf("leeway.package.phase.%s.duration_ms", phase), duration.Milliseconds()),
875-
)
876-
}
877-
}
878-
879903
// Add test coverage if available
880904
if rep.TestCoverageAvailable {
881905
span.SetAttributes(
@@ -901,6 +925,71 @@ func (r *OTelReporter) PackageBuildFinished(pkg *Package, rep *PackageBuildRepor
901925
delete(r.packageCtxs, pkgName)
902926
}
903927

928+
// PackageBuildPhaseStarted implements PhaseAwareReporter
929+
func (r *OTelReporter) PackageBuildPhaseStarted(pkg *Package, phase PackageBuildPhase) {
930+
if r.tracer == nil {
931+
return
932+
}
933+
934+
r.mu.Lock()
935+
defer r.mu.Unlock()
936+
937+
pkgName := pkg.FullName()
938+
packageCtx, ok := r.packageCtxs[pkgName]
939+
if !ok {
940+
log.WithField("package", pkgName).Warn("PackageBuildPhaseStarted called without package context")
941+
return
942+
}
943+
944+
// Create phase span as child of package span
945+
phaseKey := fmt.Sprintf("%s:%s", pkgName, phase)
946+
phaseCtx, span := r.tracer.Start(packageCtx, "leeway.phase",
947+
trace.WithSpanKind(trace.SpanKindInternal),
948+
)
949+
950+
// Add phase attributes
951+
span.SetAttributes(
952+
attribute.String("leeway.package.name", pkgName),
953+
attribute.String("leeway.phase.name", string(phase)),
954+
)
955+
956+
r.phaseSpans[phaseKey] = span
957+
r.phaseCtxs[phaseKey] = phaseCtx
958+
}
959+
960+
// PackageBuildPhaseFinished implements PhaseAwareReporter
961+
func (r *OTelReporter) PackageBuildPhaseFinished(pkg *Package, phase PackageBuildPhase, err error) {
962+
if r.tracer == nil {
963+
return
964+
}
965+
966+
r.mu.Lock()
967+
defer r.mu.Unlock()
968+
969+
pkgName := pkg.FullName()
970+
phaseKey := fmt.Sprintf("%s:%s", pkgName, phase)
971+
span, ok := r.phaseSpans[phaseKey]
972+
if !ok {
973+
log.WithField("package", pkgName).WithField("phase", phase).Warn("PackageBuildPhaseFinished called without corresponding PackageBuildPhaseStarted")
974+
return
975+
}
976+
977+
// Set error status if phase failed
978+
if err != nil {
979+
span.RecordError(err)
980+
span.SetStatus(codes.Error, err.Error())
981+
} else {
982+
span.SetStatus(codes.Ok, "phase completed successfully")
983+
}
984+
985+
// End span
986+
span.End()
987+
988+
// Clean up
989+
delete(r.phaseSpans, phaseKey)
990+
delete(r.phaseCtxs, phaseKey)
991+
}
992+
904993
// addGitHubAttributes adds GitHub Actions context attributes to the span
905994
func (r *OTelReporter) addGitHubAttributes(span trace.Span) {
906995
// Check if running in GitHub Actions
@@ -945,7 +1034,8 @@ func (r *OTelReporter) addGitHubAttributes(span trace.Span) {
9451034
}
9461035

9471036
// GetPackageContext returns the tracing context for a package build.
948-
// This can be used to create child spans for operations within the package build.
1037+
// If a phase is currently active, returns the phase context so child spans
1038+
// are nested under the phase. Otherwise returns the package context.
9491039
// Returns nil if no context is available for the package.
9501040
func (r *OTelReporter) GetPackageContext(pkg *Package) context.Context {
9511041
if r.tracer == nil {
@@ -956,6 +1046,15 @@ func (r *OTelReporter) GetPackageContext(pkg *Package) context.Context {
9561046
defer r.mu.RUnlock()
9571047

9581048
pkgName := pkg.FullName()
1049+
1050+
// Check for active phase context first
1051+
for key, ctx := range r.phaseCtxs {
1052+
if len(key) > len(pkgName) && key[:len(pkgName)] == pkgName && key[len(pkgName)] == ':' {
1053+
return ctx
1054+
}
1055+
}
1056+
1057+
// Fall back to package context
9591058
ctx, ok := r.packageCtxs[pkgName]
9601059
if !ok {
9611060
return nil

0 commit comments

Comments
 (0)