Skip to content

Commit db5b63f

Browse files
authored
libbpf-tools/klockstat: Track netlink message types when taking locks (#5230)
The klockstat tool is useful to debug stalls on kernel locks, but when tracking the RTNL lock, a lot of the stack traces will just show rtnetlink_rcv_msg() or netlink_dump() as the callers that take the lock. It is not possible to tell from the stack trace which netlink command is being executed that caused the lock to be taken, which makes it harder to debug. To help with this, add probes to those two netlink functions that capture the nlmsg_type and store it in a hashmap keyed on the current tid. When accounting a lock operation, store the nlmsg type along with the max_comm. On the userspace side, print the message type if it is set. Parse the vmlinux BTF to turn the values into the constants names for better readability. This gives output like the following: Caller Avg Wait Count Max Wait Total Wait rtnl_dumpit+0x74 1.7 us 1 1.7 us 1.7 us netlink_dump+0x138 __netlink_dump_start+0x1eb rtnetlink_rcv_msg+0x2ae netlink_rcv_skb+0x50 Max PID 1124, COMM bird, Lock rtnl_mutex (0xffffffff9e422b60) nltype RTM_GETLINK Since the type is only printed along with a stack dump, the new probes are only enabled when -s is set to a value larger than 1. Signed-off-by: Toke Høiland-Jørgensen <[email protected]>
1 parent e706277 commit db5b63f

File tree

3 files changed

+237
-7
lines changed

3 files changed

+237
-7
lines changed

libbpf-tools/klockstat.bpf.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct task_lock {
3434

3535
struct lockholder_info {
3636
s32 stack_id;
37+
u16 nlmsg_type;
3738
u64 task_id;
3839
u64 try_at;
3940
u64 acq_at;
@@ -69,6 +70,13 @@ struct {
6970
__type(value, void *);
7071
} locks SEC(".maps");
7172

73+
struct {
74+
__uint(type, BPF_MAP_TYPE_HASH);
75+
__uint(max_entries, MAX_ENTRIES);
76+
__type(key, u32);
77+
__type(value, u16);
78+
} nltypes SEC(".maps");
79+
7280
static bool tracing_task(u64 task_id)
7381
{
7482
u32 tgid = task_id >> 32;
@@ -125,6 +133,8 @@ static void lock_aborted(void *lock)
125133
static void lock_acquired(void *lock)
126134
{
127135
u64 task_id;
136+
u32 tid;
137+
u16 *nltype;
128138
struct lockholder_info *li;
129139
struct task_lock tl = {};
130140

@@ -141,6 +151,11 @@ static void lock_acquired(void *lock)
141151
return;
142152

143153
li->acq_at = bpf_ktime_get_ns();
154+
155+
tid = (u32)task_id;
156+
nltype = bpf_map_lookup_elem(&nltypes, &tid);
157+
if (nltype)
158+
li->nlmsg_type = *nltype;
144159
}
145160

146161
static void account(struct lockholder_info *li)
@@ -181,6 +196,7 @@ static void account(struct lockholder_info *li)
181196
WRITE_ONCE(ls->acq_max_time, delta);
182197
WRITE_ONCE(ls->acq_max_id, li->task_id);
183198
WRITE_ONCE(ls->acq_max_lock_ptr, li->lock_ptr);
199+
WRITE_ONCE(ls->acq_max_nltype, li->nlmsg_type);
184200
/*
185201
* Potentially racy, if multiple threads think they are the max,
186202
* so you may get a clobbered write.
@@ -196,6 +212,7 @@ static void account(struct lockholder_info *li)
196212
WRITE_ONCE(ls->hld_max_time, delta);
197213
WRITE_ONCE(ls->hld_max_id, li->task_id);
198214
WRITE_ONCE(ls->hld_max_lock_ptr, li->lock_ptr);
215+
WRITE_ONCE(ls->hld_max_nltype, li->nlmsg_type);
199216
if (!per_thread)
200217
bpf_get_current_comm(ls->hld_max_comm, TASK_COMM_LEN);
201218
}
@@ -224,6 +241,58 @@ static void lock_released(void *lock)
224241
bpf_map_delete_elem(&lockholder_map, &tl);
225242
}
226243

244+
static void record_nltype(const struct nlmsghdr *hdr)
245+
{
246+
u32 tid = (u32)bpf_get_current_pid_tgid();
247+
u16 nltype = 0;
248+
249+
nltype = BPF_CORE_READ(hdr, nlmsg_type);
250+
251+
bpf_map_update_elem(&nltypes, &tid, &nltype, BPF_ANY);
252+
}
253+
254+
static void release_nltype(void)
255+
{
256+
u32 tid = (u32)bpf_get_current_pid_tgid();
257+
258+
bpf_map_delete_elem(&nltypes, &tid);
259+
}
260+
261+
SEC("fentry/rtnetlink_rcv_msg")
262+
int BPF_PROG(rtnetlink_rcv_msg, struct sk_buff *skb, struct nlmsghdr *nlh,
263+
struct netlink_ext_ack *extack)
264+
{
265+
record_nltype(nlh);
266+
return 0;
267+
}
268+
269+
SEC("fexit/rtnetlink_rcv_msg")
270+
int BPF_PROG(rtnetlink_rcv_msg_exit, struct sk_buff *skb, struct nlmsghdr *nlh,
271+
struct netlink_ext_ack *extack, long ret)
272+
{
273+
release_nltype();
274+
return 0;
275+
}
276+
277+
SEC("fentry/netlink_dump")
278+
int BPF_PROG(netlink_dump, struct sock *sk, bool lock_taken)
279+
{
280+
struct netlink_sock *nlk = container_of(sk, struct netlink_sock, sk);
281+
const struct nlmsghdr *nlh;
282+
283+
nlh = BPF_CORE_READ(nlk, cb.nlh);
284+
record_nltype(nlh);
285+
return 0;
286+
}
287+
288+
SEC("fexit/netlink_dump")
289+
int BPF_PROG(netlink_dump_exit, struct sk_buff *skb, struct nlmsghdr *nlh,
290+
struct netlink_ext_ack *extack)
291+
{
292+
release_nltype();
293+
return 0;
294+
}
295+
227296
SEC("fentry/mutex_lock")
228297
int BPF_PROG(mutex_lock, struct mutex *lock)
229298
{
@@ -729,4 +798,38 @@ int BPF_KPROBE(kprobe_up_write, struct rw_semaphore *lock)
729798
return 0;
730799
}
731800

801+
SEC("kprobe/rtnetlink_rcv_msg")
802+
int BPF_KPROBE(kprobe_rtnetlink_rcv_msg, struct sk_buff *skb, struct nlmsghdr *nlh,
803+
struct netlink_ext_ack *ext)
804+
{
805+
record_nltype(nlh);
806+
return 0;
807+
}
808+
809+
SEC("kretprobe/rtnetlink_rcv_msg")
810+
int BPF_KRETPROBE(kprobe_rtnetlink_rcv_msg_exit, long ret)
811+
{
812+
release_nltype();
813+
return 0;
814+
}
815+
816+
SEC("kprobe/netlink_dump")
817+
int BPF_KPROBE(kprobe_netlink_dump, struct sock *sk, bool lock_taken)
818+
{
819+
struct netlink_sock *nlk = container_of(sk, struct netlink_sock, sk);
820+
const struct nlmsghdr *nlh;
821+
822+
nlh = BPF_CORE_READ(nlk, cb.nlh);
823+
record_nltype(nlh);
824+
return 0;
825+
}
826+
827+
SEC("kretprobe/netlink_dump")
828+
int BPF_KRETPROBE(kprobe_netlink_dump_exit, long ret)
829+
{
830+
release_nltype();
831+
return 0;
832+
}
833+
834+
732835
char LICENSE[] SEC("license") = "GPL";

libbpf-tools/klockstat.c

Lines changed: 132 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include <bpf/libbpf.h>
2727
#include <bpf/bpf.h>
28+
#include <bpf/btf.h>
2829
#include "klockstat.h"
2930
#include "klockstat.skel.h"
3031
#include "compat.h"
@@ -117,6 +118,10 @@ static const char *lock_ksym_names[] = {
117118

118119
static unsigned long lock_ksym_addr[ARRAY_SIZE(lock_ksym_names)];
119120

121+
static struct btf *vmlinux_btf;
122+
static const char **nltype_map;
123+
static size_t nltype_max = 0;
124+
120125
static const struct argp_option opts[] = {
121126
{ "pid", 'p', "PID", 0, "Filter by process ID", 0 },
122127
{ "tid", 't', "TID", 0, "Filter by thread ID", 0 },
@@ -139,6 +144,93 @@ static const struct argp_option opts[] = {
139144
{},
140145
};
141146

147+
static char *get_nltype(unsigned long nltype)
148+
{
149+
static char buf[100];
150+
151+
if (!nltype)
152+
return "";
153+
154+
if (nltype >= nltype_max || !nltype_map[nltype])
155+
snprintf(buf, sizeof(buf), " nltype %lu", nltype);
156+
else
157+
snprintf(buf, sizeof(buf), " nltype %s", nltype_map[nltype]);
158+
159+
buf[sizeof(buf)-1] = '\0';
160+
return buf;
161+
}
162+
163+
static int build_nlmsg_name_table(const struct btf_type *t)
164+
{
165+
const struct btf_enum *e;
166+
unsigned short vlen;
167+
size_t max_val = 0;
168+
const char *name;
169+
int i;
170+
171+
vlen = btf_vlen(t);
172+
e = btf_enum(t);
173+
174+
/* first pass - find the value of __RTM_MAX to size the allocation */
175+
for (i = 0; i < vlen; i++) {
176+
name = btf__name_by_offset(vmlinux_btf, e[i].name_off);
177+
if (!strcmp(name, "__RTM_MAX")) {
178+
max_val = e[i].val;
179+
break;
180+
}
181+
}
182+
183+
if (!max_val)
184+
return -ENOENT;
185+
186+
nltype_map = calloc(max_val, sizeof(void *));
187+
if (!nltype_map)
188+
return -ENOMEM;
189+
190+
nltype_max = max_val;
191+
192+
for (i = 0; i < vlen; i++) {
193+
if (e[i].val >= max_val)
194+
break;
195+
196+
nltype_map[e[i].val] = btf__name_by_offset(vmlinux_btf, e[i].name_off);
197+
}
198+
199+
return 0;
200+
}
201+
202+
static int resolve_nlmsg_names(void)
203+
{
204+
const struct btf_type *t;
205+
const struct btf_enum *e;
206+
int nr_types, i, j, err;
207+
unsigned short vlen;
208+
const char *name;
209+
210+
if (!vmlinux_btf)
211+
vmlinux_btf = btf__load_vmlinux_btf();
212+
213+
if ((err = libbpf_get_error(vmlinux_btf)))
214+
return err;
215+
216+
nr_types = btf__type_cnt(vmlinux_btf);
217+
for (i = 1; i < nr_types; i++) {
218+
t = btf__type_by_id(vmlinux_btf, i);
219+
if (!btf_is_enum(t))
220+
continue;
221+
222+
vlen = btf_vlen(t);
223+
e = btf_enum(t);
224+
for (j = 0; j < vlen; j++) {
225+
name = btf__name_by_offset(vmlinux_btf, e[j].name_off);
226+
if (!strcmp(name, "RTM_BASE"))
227+
return build_nlmsg_name_table(t);
228+
}
229+
}
230+
231+
return -ENOENT;
232+
}
233+
142234
static void *parse_lock_addr(const char *lock_name)
143235
{
144236
unsigned long lock_addr;
@@ -479,11 +571,12 @@ static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
479571
printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
480572
}
481573
if (nr_stack_entries > 1 && !env.per_thread)
482-
printf(" Max PID %llu, COMM %s, Lock %s (0x%llx)\n",
574+
printf(" Max PID %llu, COMM %s, Lock %s (0x%llx)%s\n",
483575
ss->ls.acq_max_id >> 32,
484576
ss->ls.acq_max_comm,
485-
get_lock_name(ksyms, ss->ls.acq_max_lock_ptr),
486-
ss->ls.acq_max_lock_ptr);
577+
get_lock_name(ksyms, ss->ls.acq_max_lock_ptr),
578+
ss->ls.acq_max_lock_ptr,
579+
get_nltype(ss->ls.acq_max_nltype));
487580
}
488581

489582
static void print_acq_task(struct stack_stat *ss)
@@ -533,11 +626,12 @@ static void print_hld_stat(struct ksyms *ksyms, struct stack_stat *ss,
533626
printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
534627
}
535628
if (nr_stack_entries > 1 && !env.per_thread)
536-
printf(" Max PID %llu, COMM %s, Lock %s (0x%llx)\n",
629+
printf(" Max PID %llu, COMM %s, Lock %s (0x%llx)%s\n",
537630
ss->ls.hld_max_id >> 32,
538631
ss->ls.hld_max_comm,
539-
get_lock_name(ksyms, ss->ls.hld_max_lock_ptr),
540-
ss->ls.hld_max_lock_ptr);
632+
get_lock_name(ksyms, ss->ls.hld_max_lock_ptr),
633+
ss->ls.hld_max_lock_ptr,
634+
get_nltype(ss->ls.hld_max_nltype));
541635
}
542636

543637
static void print_hld_task(struct stack_stat *ss)
@@ -633,7 +727,7 @@ static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
633727
if (env.reset) {
634728
ss = stats[i];
635729
bpf_map_delete_elem(stat_map, &ss->stack_id);
636-
}
730+
}
637731
free(stats[i]);
638732
}
639733
free(stats);
@@ -688,6 +782,11 @@ static void enable_fentry(struct klockstat_bpf *obj)
688782
bpf_program__set_autoload(obj->progs.kprobe_down_write_killable_exit, false);
689783
bpf_program__set_autoload(obj->progs.kprobe_up_write, false);
690784

785+
bpf_program__set_autoload(obj->progs.kprobe_rtnetlink_rcv_msg, false);
786+
bpf_program__set_autoload(obj->progs.kprobe_rtnetlink_rcv_msg_exit, false);
787+
bpf_program__set_autoload(obj->progs.kprobe_netlink_dump, false);
788+
bpf_program__set_autoload(obj->progs.kprobe_netlink_dump_exit, false);
789+
691790
/* CONFIG_DEBUG_LOCK_ALLOC is on */
692791
debug_lock = fentry_can_attach("mutex_lock_nested", NULL);
693792
if (!debug_lock)
@@ -749,6 +848,24 @@ static void enable_kprobes(struct klockstat_bpf *obj)
749848
bpf_program__set_autoload(obj->progs.down_write_killable, false);
750849
bpf_program__set_autoload(obj->progs.down_write_killable_exit, false);
751850
bpf_program__set_autoload(obj->progs.up_write, false);
851+
852+
bpf_program__set_autoload(obj->progs.rtnetlink_rcv_msg, false);
853+
bpf_program__set_autoload(obj->progs.rtnetlink_rcv_msg_exit, false);
854+
bpf_program__set_autoload(obj->progs.netlink_dump, false);
855+
bpf_program__set_autoload(obj->progs.netlink_dump_exit, false);
856+
}
857+
858+
static void disable_nldump_probes(struct klockstat_bpf *obj)
859+
{
860+
bpf_program__set_autoload(obj->progs.rtnetlink_rcv_msg, false);
861+
bpf_program__set_autoload(obj->progs.rtnetlink_rcv_msg_exit, false);
862+
bpf_program__set_autoload(obj->progs.netlink_dump, false);
863+
bpf_program__set_autoload(obj->progs.netlink_dump_exit, false);
864+
865+
bpf_program__set_autoload(obj->progs.kprobe_rtnetlink_rcv_msg, false);
866+
bpf_program__set_autoload(obj->progs.kprobe_rtnetlink_rcv_msg_exit, false);
867+
bpf_program__set_autoload(obj->progs.kprobe_netlink_dump, false);
868+
bpf_program__set_autoload(obj->progs.kprobe_netlink_dump_exit, false);
752869
}
753870

754871
int main(int argc, char **argv)
@@ -815,6 +932,14 @@ int main(int argc, char **argv)
815932
else
816933
enable_kprobes(obj);
817934

935+
if (env.nr_stack_entries != 1) {
936+
err = resolve_nlmsg_names();
937+
if (err)
938+
warn("failed to resolve nlmsg names\n");
939+
} else {
940+
disable_nldump_probes(obj);
941+
}
942+
818943
err = klockstat_bpf__load(obj);
819944
if (err) {
820945
warn("failed to load BPF object\n");

libbpf-tools/klockstat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ struct lock_stat {
1212
__u64 acq_max_time;
1313
__u64 acq_max_id;
1414
__u64 acq_max_lock_ptr;
15+
__u64 acq_max_nltype;
1516
char acq_max_comm[TASK_COMM_LEN];
1617
__u64 hld_count;
1718
__u64 hld_total_time;
1819
__u64 hld_max_time;
1920
__u64 hld_max_id;
2021
__u64 hld_max_lock_ptr;
22+
__u64 hld_max_nltype;
2123
char hld_max_comm[TASK_COMM_LEN];
2224
};
2325

0 commit comments

Comments
 (0)