Skip to content

Commit 5709686

Browse files
authored
Fix stack-sending logic in Go custom labels path (#180)
unwind_stop is supposed to be the centralized place to send stacks. It also does some logic like recording error frames. When the Go custom labels path was moved to a separate tail call, the stack-sending call was moved there, instead of going through the main path in unwind_stop. So for Go programs specifically, we are not adding error frames (and in some cases just sending empty stacks).
1 parent fe0e8fa commit 5709686

File tree

6 files changed

+34
-37
lines changed

6 files changed

+34
-37
lines changed

support/ebpf/go_labels.ebpf.c

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,20 @@ static EBPF_INLINE int go_labels(struct pt_regs *ctx)
173173
u32 pid = record->trace.pid;
174174
GoLabelsOffsets *offsets = bpf_map_lookup_elem(&go_labels_procs, &pid);
175175
if (!offsets) {
176-
DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary", pid);
177-
return -1;
178-
}
179-
DEBUG_PRINT(
180-
"cl: go offsets found, %d recognized as a go binary: m_ptr: %lx",
181-
pid,
182-
(unsigned long)record->customLabelsState.go_m_ptr);
183-
bool success = get_go_custom_labels(record, offsets);
184-
if (!success) {
185-
increment_metric(metricID_UnwindGoLabelsFailures);
176+
DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary. This should never happen!", pid);
177+
// We shouldn't tail call
178+
// into this program if there is no map element.
179+
// Nevertheless, if that happens due to some bug,
180+
// fall through rather than swallowing the trace.
181+
} else {
182+
DEBUG_PRINT(
183+
"cl: go offsets found, %d recognized as a go binary: m_ptr: %lx",
184+
pid,
185+
(unsigned long)record->customLabelsState.go_m_ptr);
186+
bool success = get_go_custom_labels(record, offsets);
187+
if (!success) {
188+
increment_metric(metricID_UnwindGoLabelsFailures);
189+
}
186190
}
187191

188192
send_trace(ctx, &record->trace);

support/ebpf/interpreter_dispatcher.ebpf.c

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -215,28 +215,24 @@ static EBPF_INLINE void *get_m_ptr(struct GoLabelsOffsets *offs, UNUSED UnwindSt
215215

216216
static EBPF_INLINE void maybe_add_go_custom_labels_legacy(struct pt_regs *ctx, PerCPURecord *record)
217217
{
218-
u32 pid = record->trace.pid;
218+
u32 pid = record->trace.pid;
219219
// The Go label extraction code is too big to fit in this program, so we need to
220-
// tail call it, in order to keep the hashing and clearing code in this program it
221-
// will tail call back to us with this bool set.
222-
if (!record->state.processed_go_labels) {
223-
GoCustomLabelsOffsets *offsets = bpf_map_lookup_elem(&go_labels_procs, &pid);
224-
if (!offsets) {
225-
DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary", pid);
226-
return;
227-
}
228-
229-
void *m_ptr_addr = get_m_ptr_legacy(offsets, &record->state);
230-
if (!m_ptr_addr) {
231-
return;
232-
}
233-
record->customLabelsState.go_m_ptr = m_ptr_addr;
220+
// tail call it
221+
GoCustomLabelsOffsets *offsets = bpf_map_lookup_elem(&go_labels_procs, &pid);
222+
if (!offsets) {
223+
DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary", pid);
224+
return;
225+
}
234226

235-
DEBUG_PRINT("cl: trace is within a process with Go custom labels enabled");
236-
increment_metric(metricID_UnwindGoCustomLabelsAttempts);
237-
record->state.processed_go_labels = true;
238-
tail_call(ctx, PROG_GO_LABELS);
227+
void *m_ptr_addr = get_m_ptr_legacy(offsets, &record->state);
228+
if (!m_ptr_addr) {
229+
return;
239230
}
231+
record->customLabelsState.go_m_ptr = m_ptr_addr;
232+
233+
DEBUG_PRINT("cl: trace is within a process with Go custom labels enabled");
234+
increment_metric(metricID_UnwindGoCustomLabelsAttempts);
235+
tail_call(ctx, PROG_GO_LABELS);
240236
}
241237

242238
static EBPF_INLINE void maybe_add_go_custom_labels(struct pt_regs *ctx, PerCPURecord *record)
@@ -770,12 +766,8 @@ static EBPF_INLINE int unwind_stop(struct pt_regs *ctx)
770766
Trace *trace = &record->trace;
771767
UnwindState *state = &record->state;
772768

773-
// Do Go first since we might tail call out and back again.
774-
// Try legacy Go custom labels first, then new Go labels implementation
775769
// TODO: maybe instead of adding a per-language call here, we
776770
// should have "path to CLs" be a standard part of some per-pid map?
777-
maybe_add_go_custom_labels_legacy(ctx, record);
778-
maybe_add_go_custom_labels(ctx, record);
779771
maybe_add_native_custom_labels(record);
780772
if (!maybe_add_node_custom_labels(record))
781773
increment_metric(metricID_UnwindNodeCustomLabelsFailures);
@@ -833,6 +825,11 @@ static EBPF_INLINE int unwind_stop(struct pt_regs *ctx)
833825
}
834826
// TEMPORARY HACK END
835827

828+
// Try legacy Go custom labels first, then new Go labels implementation
829+
// Must be last since it may not return (it will call send_trace).
830+
maybe_add_go_custom_labels_legacy(ctx, record);
831+
maybe_add_go_custom_labels(ctx, record);
832+
836833
send_trace(ctx, trace);
837834

838835
return 0;

support/ebpf/tracemgmt.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ static inline EBPF_INLINE PerCPURecord *get_pristine_per_cpu_record()
230230
record->state.return_address = false;
231231
record->state.error_metric = -1;
232232
record->state.unwind_error = ERR_OK;
233-
record->state.processed_go_labels = false;
234233
record->perlUnwindState.stackinfo = 0;
235234
record->perlUnwindState.cop = 0;
236235
record->pythonUnwindState.py_frame = 0;

support/ebpf/tracer.ebpf.amd64

-128 Bytes
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

1.9 KB
Binary file not shown.

support/ebpf/types.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -703,9 +703,6 @@ typedef struct UnwindState {
703703
// Consider calling unwinder_mark_nonleaf_frame rather than setting this directly.
704704
bool return_address;
705705

706-
// Make sure we only do this once.
707-
bool processed_go_labels;
708-
709706
#if defined(__aarch64__)
710707
// On aarch64, whether to forbid LR-based unwinding.
711708
// LR unwinding is only allowed for leaf user-mode frames. Frames making a syscall

0 commit comments

Comments
 (0)