Skip to content

Commit a9cad24

Browse files
smoserclaude
andauthored
fix(observability): probe only when observability is installed (#2482)
We should not train people or machines to ignore red ERROR messages. With this change and #2479, we have zero ERROR log entries in a successful build. Previously RetrieveObservabilityEvents always sent three `test -f` SSH commands to probe for the observability events file, even when the hook was never installed. Each probe exits non-zero (file not found), causing sendSSHCommand to log ERROR three times for every build to the console. During CPIO generation, scan the base initramfs for the hook's sentinel file (etc/tetragon/tetragon.tp.d/network-monitor.yaml) and record the result in cfg.ObservabilityHook. This is accurate regardless of how the package got into the image — QEMU_ADDITIONAL_PACKAGES, QEMU_BASE_INITRAMFS, or any other mechanism. RetrieveObservabilityEvents returns immediately when ObservabilityHook is false, and treats a missing events file as an error when it is true. We can now also correctly ERROR when there _was_ a observability hook installed rather than just assuming it was not there. Store the result of that scan in a sidecar (<cpio>.observability) so we do not have to scan on cached initramfs. The sidecar is invalidated automatically when the CPIO is newer (fresh build, QEMU_ADDITIONAL_PACKAGES change, etc.). Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 5962652 commit a9cad24

File tree

4 files changed

+86
-9
lines changed

4 files changed

+86
-9
lines changed

pkg/build/build.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ func (b *Build) BuildPackage(ctx context.Context) error {
774774
// is installed. Only applicable to QEMU builds which run in a full VM.
775775
if b.Runner.Name() == container.QemuName {
776776
if obsEvents, err := container.RetrieveObservabilityEvents(ctx, cfg); err != nil {
777-
log.Warnf("failed to retrieve observability events: %v", err)
777+
log.Errorf("failed to retrieve observability events: %v", err)
778778
} else if obsEvents != nil {
779779
container.LogObservabilityEvents(ctx, obsEvents)
780780
}

pkg/container/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,9 @@ type Config struct {
8181
VirtiofsEnabled bool // Whether virtiofs is enabled for cache
8282
VirtiofsdPID int // PID of virtiofsd daemon for cleanup
8383
VirtiofsdSocketPath string // Path to Unix socket for virtiofsd
84+
85+
// ObservabilityHook is true when the observability hook's sentinel file
86+
// was found in the initramfs CPIO during VM setup. When false,
87+
// RetrieveObservabilityEvents returns immediately without probing the VM.
88+
ObservabilityHook bool
8489
}

pkg/container/observability.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ var observabilityEventPaths = []string{
3131
"/tmp/observability/events.log",
3232
}
3333

34+
// observabilityHookSentinel is a file installed exclusively by the
35+
// observability hook package. Its presence in the initramfs CPIO confirms
36+
// the hook is installed, regardless of how it was included.
37+
// CPIO record names are stored without a leading slash.
38+
const observabilityHookSentinel = "etc/tetragon/tetragon.tp.d/network-monitor.yaml"
39+
3440
// ObservabilityEvents holds parsed event data retrieved from the build VM.
3541
type ObservabilityEvents struct {
3642
// RawData is the raw NDJSON event data.
@@ -60,18 +66,19 @@ type NetworkConnection struct {
6066
// via the SSHControlClient (port 2223, unchrooted root access). This should
6167
// be called after the build completes but before TerminatePod.
6268
//
63-
// Returns nil with no error if the observability hook is not installed or no
64-
// events were generated. This makes the feature fully optional — default
65-
// builds without the hook are completely unaffected.
69+
// If cfg.ObservabilityHook is false the function returns immediately without
70+
// probing the VM. If true, a missing events file is treated as an error.
6671
func RetrieveObservabilityEvents(ctx context.Context, cfg *Config) (*ObservabilityEvents, error) {
72+
if !cfg.ObservabilityHook {
73+
return nil, nil
74+
}
6775
if cfg.SSHControlClient == nil {
6876
return nil, nil
6977
}
7078

7179
log := clog.FromContext(ctx)
7280

73-
// Probe known event file locations. If none exist, the observability
74-
// hook is not installed and we silently return nil.
81+
// Probe known event file locations.
7582
eventsPath := ""
7683
for _, path := range observabilityEventPaths {
7784
err := sendSSHCommand(ctx, cfg.SSHControlClient, cfg, nil, nil, nil, false,
@@ -83,9 +90,7 @@ func RetrieveObservabilityEvents(ctx context.Context, cfg *Config) (*Observabili
8390
}
8491

8592
if eventsPath == "" {
86-
// No events file found — observability hook is not installed.
87-
// This is the normal case for default builds; return silently.
88-
return nil, nil
93+
return nil, fmt.Errorf("observability hook is installed but no events file found at any known path: %v", observabilityEventPaths)
8994
}
9095

9196
log.Infof("qemu: found observability events at %s", eventsPath)

pkg/container/qemu_runner.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,6 +2231,11 @@ func generateCpio(ctx context.Context, cfg *Config) (string, error) {
22312231
}
22322232
}
22332233

2234+
// Detect whether the observability hook is present. We cache the result in
2235+
// a sidecar file (<cpio>.observability) so we only scan the archive once
2236+
// per cached initramfs rather than on every melange invocation.
2237+
cfg.ObservabilityHook = observabilityHookPresent(ctx, baseInitramfs)
2238+
22342239
// Inject SSH host keys and modules
22352240
return injectRuntimeData(ctx, cfg, os.Getenv("QEMU_KERNEL_MODULES"), baseInitramfs)
22362241
}
@@ -2317,6 +2322,68 @@ func GenerateBaseInitramfs(ctx context.Context, arch apko_types.Architecture, cf
23172322
return nil
23182323
}
23192324

2325+
// cpioContainsPath reports whether target exists as a record name in the CPIO
2326+
// archive at cpioFile. CPIO record names are stored without a leading slash.
2327+
func cpioContainsPath(cpioFile, target string) (bool, error) {
2328+
f, err := os.Open(cpioFile)
2329+
if err != nil {
2330+
return false, err
2331+
}
2332+
defer f.Close()
2333+
2334+
rr, err := cpio.Newc.NewFileReader(f)
2335+
if err != nil {
2336+
return false, err
2337+
}
2338+
2339+
errFound := errors.New("found")
2340+
err = cpio.ForEachRecord(rr, func(r cpio.Record) error {
2341+
if r.Name == target {
2342+
return errFound
2343+
}
2344+
return nil
2345+
})
2346+
if errors.Is(err, errFound) {
2347+
return true, nil
2348+
}
2349+
return false, err
2350+
}
2351+
2352+
// observabilityHookPresent reports whether the observability hook is present in
2353+
// the CPIO archive at cpioFile. The result is cached in a sidecar file
2354+
// (<cpioFile>.observability) so the archive is only scanned once per cached
2355+
// initramfs. Subsequent calls just read the tiny sidecar.
2356+
func observabilityHookPresent(ctx context.Context, cpioFile string) bool {
2357+
sidecar := cpioFile + ".observability"
2358+
2359+
// Use the cached result when the sidecar is at least as new as the CPIO.
2360+
cpioInfo, cpioErr := os.Stat(cpioFile)
2361+
sidecarInfo, sidecarErr := os.Stat(sidecar)
2362+
if cpioErr == nil && sidecarErr == nil && !sidecarInfo.ModTime().Before(cpioInfo.ModTime()) {
2363+
data, err := os.ReadFile(sidecar)
2364+
if err == nil {
2365+
return strings.TrimSpace(string(data)) == "true"
2366+
}
2367+
}
2368+
2369+
// Sidecar missing or stale — scan the archive.
2370+
present, err := cpioContainsPath(cpioFile, observabilityHookSentinel)
2371+
if err != nil {
2372+
clog.FromContext(ctx).Debugf("qemu: could not inspect initramfs for observability hook: %v", err)
2373+
return false
2374+
}
2375+
2376+
// Write the sidecar so future invocations skip the scan.
2377+
val := "false"
2378+
if present {
2379+
val = "true"
2380+
}
2381+
if err := os.WriteFile(sidecar, []byte(val+"\n"), 0o600); err != nil {
2382+
clog.FromContext(ctx).Debugf("qemu: could not write observability sidecar: %v", err)
2383+
}
2384+
return present
2385+
}
2386+
23202387
// getAdditionalPackages parses and validates the QEMU_ADDITIONAL_PACKAGES environment variable.
23212388
// Returns a list of package names to add to the initramfs, or empty slice if none/invalid.
23222389
func getAdditionalPackages(ctx context.Context) []string {

0 commit comments

Comments
 (0)