Skip to content

Commit 613d628

Browse files
committed
OCPBUGS-62269 - Fix race condition causing missing audit log entries for rapid commands
OCPBUGS-62269 - Fix GitHub Actions linting issues OCPBUGS-62269 - Fix GitHub Actions linting issues
1 parent 5838168 commit 613d628

File tree

1 file changed

+83
-2
lines changed

1 file changed

+83
-2
lines changed

internal/pkg/daemon/enricher/jsonenricher.go

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ func (e *JsonEnricher) Run(ctx context.Context, runErr chan<- error) {
204204

205205
e.logger.V(config.VerboseLevel).Info("Emit audit log for process",
206206
"pid", logItem.Key())
207-
e.dispatchSeccompLine(auditLogBucket, nodeName)
207+
e.dispatchSeccompLine(ctx, auditLogBucket, logItem.Key(), nodeName)
208208
})
209209

210210
e.logger.Info(fmt.Sprintf("Setting up caches with expiry of %v", defaultCacheTimeout))
@@ -411,8 +411,82 @@ func (e *JsonEnricher) fetchProcessInfo(processId int, executable string, uid, g
411411
return processInfo
412412
}
413413

414+
// retryBpfCmdLine retries fetching cmdLine from BPF cache with progressive delays.
415+
// This handles the race condition where audit events arrive before BPF events are processed.
416+
func (e *JsonEnricher) retryBpfCmdLine(logBucket *types.LogBucket) {
417+
if e.bpfProcessCache == nil || logBucket.ProcessInfo.CmdLine != "" || logBucket.ProcessInfo.Pid == 0 {
418+
return
419+
}
420+
421+
maxRetries := 3
422+
retryDelays := []time.Duration{0, 10 * time.Millisecond, 50 * time.Millisecond}
423+
424+
for attempt := 0; attempt < maxRetries && logBucket.ProcessInfo.CmdLine == ""; attempt++ {
425+
if attempt > 0 {
426+
time.Sleep(retryDelays[attempt])
427+
}
428+
429+
cmdLine, errCmdLine := e.bpfProcessCache.GetCmdLine(logBucket.ProcessInfo.Pid)
430+
if errCmdLine == nil {
431+
logBucket.ProcessInfo.CmdLine = cmdLine
432+
e.logger.V(1).Info("cmdline found in eBPF on retry",
433+
"processId", logBucket.ProcessInfo.Pid, "cmdLine", cmdLine, "attempt", attempt+1)
434+
435+
break
436+
}
437+
}
438+
}
439+
440+
// retryBpfRequestUID retries fetching requestUID from BPF environment with progressive delays.
441+
func (e *JsonEnricher) retryBpfRequestUID(logBucket *types.LogBucket) {
442+
if e.bpfProcessCache == nil || logBucket.ProcessInfo.ExecRequestId != nil {
443+
return
444+
}
445+
446+
maxRetries := 3
447+
retryDelays := []time.Duration{0, 10 * time.Millisecond, 50 * time.Millisecond}
448+
449+
for attempt := 0; attempt < maxRetries && logBucket.ProcessInfo.ExecRequestId == nil; attempt++ {
450+
if attempt > 0 {
451+
time.Sleep(retryDelays[attempt])
452+
}
453+
454+
procEnv, errEnv := e.bpfProcessCache.GetEnv(logBucket.ProcessInfo.Pid)
455+
if errEnv == nil {
456+
reqId, ok := procEnv[requestIdEnv]
457+
if ok {
458+
logBucket.ProcessInfo.ExecRequestId = &reqId
459+
e.logger.V(1).Info("exec request id found in eBPF on retry", "reqId", reqId,
460+
"processId", logBucket.ProcessInfo.Pid, "attempt", attempt+1)
461+
462+
break
463+
}
464+
}
465+
}
466+
}
467+
468+
// retryContainerInfo retries fetching container info if it's missing.
469+
func (e *JsonEnricher) retryContainerInfo(
470+
ctx context.Context, logBucket *types.LogBucket, processID int, nodeName string,
471+
) {
472+
if logBucket.ContainerInfo != nil {
473+
return
474+
}
475+
476+
e.logger.V(config.VerboseLevel).Info("Container info not found, retrying",
477+
"processID", processID)
478+
479+
logBucket.ContainerInfo = e.fetchContainerInfo(ctx, processID, nodeName)
480+
if logBucket.ContainerInfo != nil {
481+
e.logger.V(1).Info("Container info found on retry",
482+
"processID", processID,
483+
"pod", logBucket.ContainerInfo.PodName,
484+
"namespace", logBucket.ContainerInfo.Namespace)
485+
}
486+
}
487+
414488
func (e *JsonEnricher) dispatchSeccompLine(
415-
logBucket *types.LogBucket, nodeName string,
489+
ctx context.Context, logBucket *types.LogBucket, processID int, nodeName string,
416490
) {
417491
var syscallNames []string
418492

@@ -443,6 +517,13 @@ func (e *JsonEnricher) dispatchSeccompLine(
443517
return
444518
}
445519

520+
// Retry BPF cache lookups with progressive delays to allow ring buffer polling to catch up
521+
e.retryBpfCmdLine(logBucket)
522+
e.retryBpfRequestUID(logBucket)
523+
524+
// Retry container info lookup if it's still missing
525+
e.retryContainerInfo(ctx, logBucket, processID, nodeName)
526+
446527
if logBucket.ContainerInfo == nil {
447528
e.logger.V(config.VerboseLevel).Info("Container info not found in cache")
448529
}

0 commit comments

Comments
 (0)