Skip to content

Commit bc34ad4

Browse files
committed
review feedback
1 parent 5691d84 commit bc34ad4

File tree

5 files changed

+70
-63
lines changed

5 files changed

+70
-63
lines changed

interpreter/gpu/cuda.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gpu // import "go.opentelemetry.io/ebpf-profiler/interpreter/gpu"
22

33
import (
4+
"fmt"
45
"runtime"
56

67
log "github.com/sirupsen/logrus"
@@ -46,7 +47,9 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
4647
}
4748

4849
// Validate probe arguments match what cuda.ebpf.c expects
49-
validateProbeArguments(parcagpuProbes, info.FileName())
50+
if err := validateProbeArguments(parcagpuProbes, info.FileName()); err != nil {
51+
return nil, err
52+
}
5053

5154
return &data{path: info.FileName(),
5255
probes: parcagpuProbes}, nil
@@ -55,8 +58,8 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
5558
}
5659

5760
// validateProbeArguments checks that the USDT probe arguments match the expectations
58-
// in cuda.ebpf.c and logs errors if they don't match.
59-
func validateProbeArguments(probes []pfelf.USDTProbe, path string) {
61+
// in cuda.ebpf.c and returns an error if they don't match.
62+
func validateProbeArguments(probes []pfelf.USDTProbe, path string) error {
6063
var expectedProbes map[string]string
6164

6265
switch runtime.GOARCH {
@@ -73,9 +76,8 @@ func validateProbeArguments(probes []pfelf.USDTProbe, path string) {
7376
"graph_executed": "8@x1 8@x2 8@[sp, 88] 4@x3 4@x0",
7477
}
7578
default:
76-
log.Warnf("[cuda] Unknown architecture %s, cannot validate USDT probe arguments for %s",
79+
return fmt.Errorf("unknown architecture %s, cannot validate USDT probe arguments for %s",
7780
runtime.GOARCH, path)
78-
return
7981
}
8082

8183
probeMap := make(map[string]string)
@@ -86,17 +88,16 @@ func validateProbeArguments(probes []pfelf.USDTProbe, path string) {
8688
for name, expectedArgs := range expectedProbes {
8789
actualArgs, ok := probeMap[name]
8890
if !ok {
89-
log.Errorf("[cuda] Missing expected USDT probe '%s' in %s", name, path)
90-
continue
91+
return fmt.Errorf("missing expected USDT probe '%s' in %s", name, path)
9192
}
9293
if actualArgs != expectedArgs {
93-
log.Errorf("[cuda] USDT probe '%s' in %s has incorrect arguments:\n"+
94-
" Expected: %s\n"+
95-
" Actual: %s\n"+
96-
" This will cause incorrect data collection. cuda.ebpf.c needs to be updated.",
94+
return fmt.Errorf("USDT probe '%s' in %s has incorrect arguments: "+
95+
"expected: %s"+
96+
"actual: %s",
9797
name, path, expectedArgs, actualArgs)
9898
}
9999
}
100+
return nil
100101
}
101102

102103
func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Address,

parcagpu/parcagpu.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ func (p *gpuTraceFixer) addTime(key mapKey, ev *kernelTimingEvent) *host.Trace {
5656
return nil
5757
}
5858

59-
// uprobes aren't perfect and we may miss matching timing to trace at attach boundary
60-
// so clear them if they get too big.
61-
func (p *gpuTraceFixer) clear() {
59+
// maybeClear clears the maps if they get too big. uprobes aren't perfect and
60+
// we may miss matching timing to trace at attach boundary.
61+
func (p *gpuTraceFixer) maybeClear() {
6262
p.mu.Lock()
6363
defer p.mu.Unlock()
6464
if len(p.timesAwaitingTraces) > 100 || len(p.tracesAwaitingTimes) > 100 {
@@ -92,6 +92,7 @@ func prepTrace(tr *host.Trace, ev *kernelTimingEvent) {
9292
tr.CustomLabels["cuda_graph"] = strconv.FormatUint(uint64(ev.graph), 10)
9393
}
9494
if len(ev.kernelName) > 0 {
95+
// TODO: is there a better way to pass this through?
9596
tr.CustomLabels["_temp_cuda_kernel"] = string(ev.kernelName[:])
9697
// ConvertTrace will add a pseudo-frame for the kernel.
9798
tr.Frames = append([]host.Frame{{
@@ -118,7 +119,7 @@ func Start(ctx context.Context, traceInCh <-chan *host.Trace,
118119
select {
119120
case <-timer.C:
120121
// We don't want to leak memory, so we purge the readers map every 60 seconds.
121-
fixer.clear()
122+
fixer.maybeClear()
122123
case <-ctx.Done():
123124
return
124125
case t := <-traceInCh:

support/ebpf/cuda.ebpf.c

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,36 @@ static EBPF_INLINE int cuda_correlation(struct pt_regs *ctx)
1212
return 0;
1313
}
1414

15-
u32 cudaId = 0;
15+
u32 cuda_id = 0;
1616
int err;
1717

1818
#if defined(__aarch64__)
1919
// ARM64: Arguments: 4@[sp, 36]
2020
u64 sp = ctx->sp;
21-
err = bpf_probe_read_user(&cudaId, sizeof(cudaId), (void *)(sp + 36));
21+
err = bpf_probe_read_user(&cuda_id, sizeof(cuda_id), (void *)(sp + 36));
2222
#else
2323
// AMD64: Arguments: 4@-36(%rbp)
2424
u64 rbp = ctx->bp;
25-
err = bpf_probe_read_user(&cudaId, sizeof(cudaId), (void *)rbp - 36);
25+
err = bpf_probe_read_user(&cuda_id, sizeof(cuda_id), (void *)rbp - 36);
2626
#endif
2727

2828
if (err)
2929
return err;
30-
DEBUG_PRINT("cuda_correlation_probe: correlationId=%u", cudaId);
30+
DEBUG_PRINT("cuda_correlation_probe: correlation_id=%u", cuda_id);
3131

3232
u64 ts = bpf_ktime_get_ns();
33-
return collect_trace(ctx, TRACE_CUDA_LAUNCH, pid, tid, ts, 0, cudaId);
33+
return collect_trace(ctx, TRACE_CUDA_LAUNCH, pid, tid, ts, 0, cuda_id);
3434
}
3535

3636
struct kernel_timing {
3737
u32 pid;
3838
u32 correlation_id;
3939
u64 start;
4040
u64 end;
41-
u32 deviceId;
42-
u32 streamId;
43-
u32 graphId;
44-
char kernelName[128];
41+
u32 device_id;
42+
u32 stream_id;
43+
u32 graph_id;
44+
char kernel_name[128];
4545
};
4646

4747
bpf_map_def SEC("maps") cuda_timing_events = {
@@ -51,7 +51,7 @@ bpf_map_def SEC("maps") cuda_timing_events = {
5151
.max_entries = 0,
5252
};
5353

54-
// uint64_t start, uint64_t end, uint32_t correlationId, uint32_t deviceId, const char *kernelName
54+
// uint64_t start, uint64_t end, uint32_t correlation_id, uint32_t device_id, const char *kernelName
5555
// AMD64 Arguments: 8@%rax 8@%rdx 8@-40(%rbp) 4@%ecx 8@%rsi
5656
// ARM64 Arguments: 8@x1 8@x2 8@[sp, 112] 4@x3 8@x0
5757
static EBPF_INLINE int cuda_kernel_exec(struct pt_regs *ctx)
@@ -60,8 +60,8 @@ static EBPF_INLINE int cuda_kernel_exec(struct pt_regs *ctx)
6060
u32 pid = pid_tgid >> 32;
6161

6262
u64 start, end;
63-
u64 correlationId = 0;
64-
u32 deviceId;
63+
u64 correlation_id = 0;
64+
u32 device_id;
6565
const char *name;
6666
int err;
6767

@@ -70,28 +70,28 @@ static EBPF_INLINE int cuda_kernel_exec(struct pt_regs *ctx)
7070
start = PT_REGS_PARM2(ctx); // x1
7171
end = PT_REGS_PARM3(ctx); // x2
7272
u64 sp = ctx->sp;
73-
err = bpf_probe_read_user(&correlationId, sizeof(correlationId), (void *)(sp + 112));
73+
err = bpf_probe_read_user(&correlation_id, sizeof(correlation_id), (void *)(sp + 112));
7474
if (err) {
75-
correlationId = 0;
75+
correlation_id = 0;
7676
}
77-
deviceId = PT_REGS_PARM4(ctx); // x3
78-
name = (const char *)PT_REGS_PARM1(ctx); // x0
77+
device_id = PT_REGS_PARM4(ctx); // x3
78+
name = (const char *)PT_REGS_PARM1(ctx); // x0
7979
#else
8080
// AMD64: 8@%rax 8@%rdx 8@-40(%rbp) 4@%ecx 8@%rsi
8181
start = ctx->ax;
8282
end = ctx->dx;
8383
u64 rbp = ctx->bp;
84-
err = bpf_probe_read_user(&correlationId, sizeof(correlationId), (void *)rbp - 40);
84+
err = bpf_probe_read_user(&correlation_id, sizeof(correlation_id), (void *)rbp - 40);
8585
if (err) {
86-
correlationId = 0;
86+
correlation_id = 0;
8787
}
88-
deviceId = ctx->cx;
89-
name = (const char *)ctx->si;
88+
device_id = ctx->cx;
89+
name = (const char *)ctx->si;
9090
#endif
9191

92-
u32 cuda_id = correlationId & 0xFFFFFFFF;
93-
u32 devId = deviceId;
94-
u32 streamId = (correlationId >> 32) & 0xFFFFFFFF;
92+
u32 cuda_id = correlation_id & 0xFFFFFFFF;
93+
u32 dev_id = device_id;
94+
u32 stream_id = (correlation_id >> 32) & 0xFFFFFFFF;
9595
u64 duration_ns = end - start;
9696

9797
DEBUG_PRINT(
@@ -103,27 +103,29 @@ static EBPF_INLINE int cuda_kernel_exec(struct pt_regs *ctx)
103103
.correlation_id = cuda_id,
104104
.start = start,
105105
.end = end,
106-
.deviceId = devId,
107-
.streamId = streamId,
108-
.graphId = 0,
106+
.device_id = dev_id,
107+
.stream_id = stream_id,
108+
.graph_id = 0,
109109
};
110110

111111
// copy name into timing.name
112-
int chars = bpf_probe_read_user_str((char *)&timing.kernelName, sizeof(timing.kernelName), name);
112+
int chars =
113+
bpf_probe_read_user_str((char *)&timing.kernel_name, sizeof(timing.kernel_name), name);
113114
// empty string is a graph launch so put in a sentinel value
114115
if (chars <= 0) {
115116
// error reading string
116-
timing.kernelName[0] = '\1';
117-
timing.kernelName[1] = '\2';
118-
timing.kernelName[2] = '\3';
117+
timing.kernel_name[0] = 'e';
118+
timing.kernel_name[1] = 'r';
119+
timing.kernel_name[2] = 'r';
120+
timing.kernel_name[3] = '\0';
119121
}
120122

121123
bpf_perf_event_output(ctx, &cuda_timing_events, BPF_F_CURRENT_CPU, &timing, sizeof(timing));
122124

123125
return 0;
124126
}
125127

126-
// uint64_t start, uint64_t end, uint32_t correlationId, uint32_t deviceId, uint32_t graphId
128+
// uint64_t start, uint64_t end, uint32_t correlation_id, uint32_t device_id, uint32_t graph_id
127129
// AMD64 Arguments: 8@%rax 8@%rdx 8@-64(%rbp) 4@%ecx 4@%esi
128130
// ARM64 Arguments: 8@x1 8@x2 8@[sp, 88] 4@x3 4@x0
129131
static EBPF_INLINE int cuda_graph_exec(struct pt_regs *ctx)
@@ -132,51 +134,54 @@ static EBPF_INLINE int cuda_graph_exec(struct pt_regs *ctx)
132134
u32 pid = pid_tgid >> 32;
133135

134136
u64 start, end;
135-
u64 correlationId = 0;
136-
u32 deviceId, graphId;
137+
u64 correlation_id = 0;
138+
u32 device_id, graph_id;
137139
int err;
138140

139141
#if defined(__aarch64__)
140142
// ARM64: 8@x1 8@x2 8@[sp, 88] 4@x3 4@x0
141143
start = PT_REGS_PARM2(ctx); // x1
142144
end = PT_REGS_PARM3(ctx); // x2
143145
u64 sp = ctx->sp;
144-
err = bpf_probe_read_user(&correlationId, sizeof(correlationId), (void *)(sp + 88));
146+
err = bpf_probe_read_user(&correlation_id, sizeof(correlation_id), (void *)(sp + 88));
145147
if (err) {
146-
correlationId = 0;
148+
correlation_id = 0;
147149
}
148-
deviceId = PT_REGS_PARM4(ctx); // x3
149-
graphId = PT_REGS_PARM1(ctx); // x0
150+
device_id = PT_REGS_PARM4(ctx); // x3
151+
graph_id = PT_REGS_PARM1(ctx); // x0
150152
#else
151153
// AMD64: 8@%rax 8@%rdx 8@-64(%rbp) 4@%ecx 4@%esi
152154
start = ctx->ax;
153155
end = ctx->dx;
154156
u64 rbp = ctx->bp;
155-
err = bpf_probe_read_user(&correlationId, sizeof(correlationId), (void *)rbp - 64);
157+
err = bpf_probe_read_user(&correlation_id, sizeof(correlation_id), (void *)rbp - 64);
156158
if (err) {
157-
correlationId = 0;
159+
correlation_id = 0;
158160
}
159-
deviceId = ctx->cx;
160-
graphId = ctx->si;
161+
device_id = ctx->cx;
162+
graph_id = ctx->si;
161163
#endif
162164

163-
u32 cuda_id = correlationId & 0xFFFFFFFF;
164-
u32 devId = deviceId;
165-
u32 streamId = (correlationId >> 32) & 0xFFFFFFFF;
165+
u32 cuda_id = correlation_id & 0xFFFFFFFF;
166+
u32 dev_id = device_id;
167+
u32 stream_id = (correlation_id >> 32) & 0xFFFFFFFF;
166168
u64 duration_ns = end - start;
167169

168170
DEBUG_PRINT(
169-
"cuda_graph_exec: kernel_id=%u, duration_ns=%llu graph_id=%u\n", cuda_id, duration_ns, graphId);
171+
"cuda_graph_exec: kernel_id=%u, duration_ns=%llu graph_id=%u\n",
172+
cuda_id,
173+
duration_ns,
174+
graph_id);
170175

171176
// Send the actual timing data from the function parameters
172177
struct kernel_timing timing = {
173178
.pid = pid,
174179
.correlation_id = cuda_id,
175180
.start = start,
176181
.end = end,
177-
.deviceId = devId,
178-
.streamId = streamId,
179-
.graphId = graphId,
182+
.device_id = dev_id,
183+
.stream_id = stream_id,
184+
.graph_id = graph_id,
180185
};
181186

182187
bpf_perf_event_output(ctx, &cuda_timing_events, BPF_F_CURRENT_CPU, &timing, sizeof(timing));

support/ebpf/tracer.ebpf.amd64

-88 Bytes
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

-88 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)