Skip to content

Commit 07634ba

Browse files
jotakjpinsonneau
andauthored
NETOBSERV-2523: Split maps to avoid stack size error (#855)
* Split maps to avoid stack size error - All feature-related data is now seggregated in their own maps (except for rtt and ipsec as that's just a couple of bytes) - As a result, userland code is refactored; maps accesses can now be conditioned by feature - Since we have gauges that track maps sizes, we can remove the "enriched" stats metric as it becomes redundant * Padding, fix tests * Apply suggestions from code review Co-authored-by: Julien Pinsonneau <[email protected]> * Remove TODO comment --------- Co-authored-by: Julien Pinsonneau <[email protected]>
1 parent 00c2a95 commit 07634ba

27 files changed

+1226
-880
lines changed

.mk/bc.mk

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ define MAPS
3131
{
3232
"direct_flows":"ringbuf",
3333
"aggregated_flows":"hash",
34+
"aggregated_flows_dns":"per_cpu_hash",
35+
"aggregated_flows_pkt_drop":"per_cpu_hash",
36+
"aggregated_flows_network_events":"per_cpu_hash",
37+
"aggregated_flows_xlat":"per_cpu_hash",
3438
"additional_flow_metrics":"per_cpu_hash",
3539
"packet_record":"ringbuf",
3640
"dns_flows":"hash",

bpf/flows.c

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,16 @@ static __always_inline void update_existing_flow(flow_metrics *aggregate_flow, p
112112
}
113113
}
114114

115-
static inline void update_dns(additional_metrics *extra_metrics, pkt_info *pkt, int dns_errno) {
115+
static inline void update_dns(dns_metrics *dns_metrics, pkt_info *pkt, int dns_errno) {
116116
if (pkt->dns_id != 0) {
117-
extra_metrics->end_mono_time_ts = pkt->current_ts;
118-
extra_metrics->dns_record.id = pkt->dns_id;
119-
extra_metrics->dns_record.flags = pkt->dns_flags;
120-
extra_metrics->dns_record.latency = pkt->dns_latency;
121-
__builtin_memcpy(extra_metrics->dns_record.name, pkt->dns_name, DNS_NAME_MAX_LEN);
117+
dns_metrics->end_mono_time_ts = pkt->current_ts;
118+
dns_metrics->id = pkt->dns_id;
119+
dns_metrics->flags = pkt->dns_flags;
120+
dns_metrics->latency = pkt->dns_latency;
121+
__builtin_memcpy(dns_metrics->name, pkt->dns_name, DNS_NAME_MAX_LEN);
122122
}
123123
if (dns_errno != 0) {
124-
extra_metrics->dns_record.errno = dns_errno;
124+
dns_metrics->errno = dns_errno;
125125
}
126126
}
127127

@@ -241,31 +241,29 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
241241

242242
// Update additional metrics (per-CPU map)
243243
if (pkt.dns_id != 0 || dns_errno != 0) {
244-
additional_metrics *extra_metrics =
245-
(additional_metrics *)bpf_map_lookup_elem(&additional_flow_metrics, &id);
244+
dns_metrics *extra_metrics = (dns_metrics *)bpf_map_lookup_elem(&aggregated_flows_dns, &id);
246245
if (extra_metrics != NULL) {
247246
update_dns(extra_metrics, &pkt, dns_errno);
248247
} else {
249-
additional_metrics new_metrics;
248+
dns_metrics new_metrics;
250249
__builtin_memset(&new_metrics, 0, sizeof(new_metrics));
251250
new_metrics.start_mono_time_ts = pkt.current_ts;
252251
new_metrics.end_mono_time_ts = pkt.current_ts;
253252
new_metrics.eth_protocol = eth_protocol;
254-
new_metrics.dns_record.id = pkt.dns_id;
255-
new_metrics.dns_record.flags = pkt.dns_flags;
256-
new_metrics.dns_record.latency = pkt.dns_latency;
257-
__builtin_memcpy(new_metrics.dns_record.name, pkt.dns_name, DNS_NAME_MAX_LEN);
258-
new_metrics.dns_record.errno = dns_errno;
259-
long ret =
260-
bpf_map_update_elem(&additional_flow_metrics, &id, &new_metrics, BPF_NOEXIST);
253+
new_metrics.id = pkt.dns_id;
254+
new_metrics.flags = pkt.dns_flags;
255+
new_metrics.latency = pkt.dns_latency;
256+
__builtin_memcpy(new_metrics.name, pkt.dns_name, DNS_NAME_MAX_LEN);
257+
new_metrics.errno = dns_errno;
258+
long ret = bpf_map_update_elem(&aggregated_flows_dns, &id, &new_metrics, BPF_NOEXIST);
261259
if (ret != 0) {
262260
if (trace_messages && ret != -EEXIST) {
263261
bpf_printk("error adding DNS %d\n", ret);
264262
}
265263
if (ret == -EEXIST) {
266264
// Concurrent write from another CPU; retry
267-
additional_metrics *extra_metrics =
268-
(additional_metrics *)bpf_map_lookup_elem(&additional_flow_metrics, &id);
265+
dns_metrics *extra_metrics =
266+
(dns_metrics *)bpf_map_lookup_elem(&aggregated_flows_dns, &id);
269267
if (extra_metrics != NULL) {
270268
update_dns(extra_metrics, &pkt, dns_errno);
271269
} else {

bpf/maps_definition.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,46 @@ struct {
2020
__uint(pinning, LIBBPF_PIN_BY_NAME);
2121
} aggregated_flows SEC(".maps");
2222

23+
// Key: the flow identifier. Value: dns metrics for that identifier.
24+
struct {
25+
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
26+
__type(key, flow_id);
27+
__type(value, dns_metrics);
28+
__uint(max_entries, 1 << 24);
29+
__uint(map_flags, BPF_F_NO_PREALLOC);
30+
__uint(pinning, LIBBPF_PIN_BY_NAME);
31+
} aggregated_flows_dns SEC(".maps");
32+
33+
// Key: the flow identifier. Value: drops metrics for that identifier.
34+
struct {
35+
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
36+
__type(key, flow_id);
37+
__type(value, pkt_drop_metrics);
38+
__uint(max_entries, 1 << 24);
39+
__uint(map_flags, BPF_F_NO_PREALLOC);
40+
__uint(pinning, LIBBPF_PIN_BY_NAME);
41+
} aggregated_flows_pkt_drop SEC(".maps");
42+
43+
// Key: the flow identifier. Value: network events metrics for that identifier.
44+
struct {
45+
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
46+
__type(key, flow_id);
47+
__type(value, network_events_metrics);
48+
__uint(max_entries, 1 << 24);
49+
__uint(map_flags, BPF_F_NO_PREALLOC);
50+
__uint(pinning, LIBBPF_PIN_BY_NAME);
51+
} aggregated_flows_network_events SEC(".maps");
52+
53+
// Key: the flow identifier. Value: xlat metrics for that identifier.
54+
struct {
55+
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
56+
__type(key, flow_id);
57+
__type(value, xlat_metrics);
58+
__uint(max_entries, 1 << 24);
59+
__uint(map_flags, BPF_F_NO_PREALLOC);
60+
__uint(pinning, LIBBPF_PIN_BY_NAME);
61+
} aggregated_flows_xlat SEC(".maps");
62+
2363
// Key: the flow identifier. Value: extra metrics for that identifier.
2464
struct {
2565
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);

bpf/network_events_monitoring.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ static inline int lookup_and_update_existing_flow_network_events(flow_id *id, u8
2222

2323
bpf_probe_read_kernel(cookie, md_len, user_cookie);
2424

25-
additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
25+
network_events_metrics *extra_metrics =
26+
bpf_map_lookup_elem(&aggregated_flows_network_events, id);
2627
if (extra_metrics != NULL) {
2728
u8 idx = extra_metrics->network_events_idx;
2829
extra_metrics->end_mono_time_ts = bpf_ktime_get_ns();
@@ -93,15 +94,15 @@ static inline int trace_network_events(struct sk_buff *skb, struct psample_metad
9394

9495
// there is no matching flows so lets create new one and add the network event metadata
9596
u64 current_time = bpf_ktime_get_ns();
96-
additional_metrics new_flow;
97+
network_events_metrics new_flow;
9798
__builtin_memset(&new_flow, 0, sizeof(new_flow));
9899
new_flow.start_mono_time_ts = current_time;
99100
new_flow.end_mono_time_ts = current_time;
100101
new_flow.eth_protocol = eth_protocol;
101102
new_flow.network_events_idx = 0;
102103
bpf_probe_read_kernel(new_flow.network_events[0], md_len, user_cookie);
103104
new_flow.network_events_idx++;
104-
ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST);
105+
ret = bpf_map_update_elem(&aggregated_flows_network_events, &id, &new_flow, BPF_NOEXIST);
105106
if (ret != 0) {
106107
if (trace_messages && ret != -EEXIST) {
107108
bpf_printk("error network events creating new flow %d\n", ret);

bpf/pkt_drops.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99

1010
static inline long pkt_drop_lookup_and_update_flow(flow_id *id, u8 state, u16 flags,
1111
enum skb_drop_reason reason, u64 len) {
12-
additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
12+
pkt_drop_metrics *extra_metrics = bpf_map_lookup_elem(&aggregated_flows_pkt_drop, id);
1313
if (extra_metrics != NULL) {
1414
extra_metrics->end_mono_time_ts = bpf_ktime_get_ns();
15-
extra_metrics->pkt_drops.packets += 1;
16-
extra_metrics->pkt_drops.bytes += len;
17-
extra_metrics->pkt_drops.latest_state = state;
18-
extra_metrics->pkt_drops.latest_flags = flags;
19-
extra_metrics->pkt_drops.latest_drop_cause = reason;
15+
extra_metrics->packets += 1;
16+
extra_metrics->bytes += len;
17+
extra_metrics->latest_state = state;
18+
extra_metrics->latest_flags = flags;
19+
extra_metrics->latest_drop_cause = reason;
2020
return 0;
2121
}
2222
return -1;
@@ -75,17 +75,17 @@ static inline int trace_pkt_drop(void *ctx, u8 state, struct sk_buff *skb,
7575
}
7676
// there is no matching flows so lets create new one and add the drops
7777
u64 current_time = bpf_ktime_get_ns();
78-
additional_metrics new_flow;
78+
pkt_drop_metrics new_flow;
7979
__builtin_memset(&new_flow, 0, sizeof(new_flow));
8080
new_flow.start_mono_time_ts = current_time;
8181
new_flow.end_mono_time_ts = current_time;
8282
new_flow.eth_protocol = eth_protocol;
83-
new_flow.pkt_drops.packets = 1;
84-
new_flow.pkt_drops.bytes = len;
85-
new_flow.pkt_drops.latest_state = state;
86-
new_flow.pkt_drops.latest_flags = flags;
87-
new_flow.pkt_drops.latest_drop_cause = reason;
88-
ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST);
83+
new_flow.packets = 1;
84+
new_flow.bytes = len;
85+
new_flow.latest_state = state;
86+
new_flow.latest_flags = flags;
87+
new_flow.latest_drop_cause = reason;
88+
ret = bpf_map_update_elem(&aggregated_flows_pkt_drop, &id, &new_flow, BPF_NOEXIST);
8989
if (ret != 0) {
9090
if (trace_messages && ret != -EEXIST) {
9191
bpf_printk("error packet drop creating new flow %d\n", ret);

bpf/pkt_translation.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
#define s6_addr in6_u.u6_addr8
1111

12-
static inline void dump_xlated_flow(struct translated_flow_t *flow) {
12+
static inline void dump_xlated_flow(struct xlat_metrics_t *flow) {
1313
BPF_PRINTK("zone_id %d sport %d dport %d\n", flow->zone_id, flow->sport, flow->dport);
1414
int i;
1515
for (i = 0; i < IP_MAX_LEN; i += 4) {
@@ -22,9 +22,8 @@ static inline void dump_xlated_flow(struct translated_flow_t *flow) {
2222
}
2323
}
2424

25-
static void __always_inline parse_tuple(struct nf_conntrack_tuple *t,
26-
struct translated_flow_t *flow, u16 zone_id, u16 family,
27-
u8 protocol, bool invert) {
25+
static void __always_inline parse_tuple(struct nf_conntrack_tuple *t, struct xlat_metrics_t *flow,
26+
u16 zone_id, u16 family, u8 protocol, bool invert) {
2827
__builtin_memset(flow, 0, sizeof(*flow));
2928
if (invert) {
3029
if (is_transport_protocol(protocol)) {
@@ -74,7 +73,7 @@ static inline long translate_lookup_and_update_flow(flow_id *id, u16 flags,
7473
struct nf_conntrack_tuple *reply_t, u16 zone_id,
7574
u16 family, u16 eth_protocol) {
7675
long ret = 0;
77-
struct translated_flow_t orig;
76+
struct xlat_metrics_t orig;
7877

7978
parse_tuple(orig_t, &orig, zone_id, family, id->transport_protocol, false);
8079

@@ -85,33 +84,30 @@ static inline long translate_lookup_and_update_flow(flow_id *id, u16 flags,
8584
id->dst_port = orig.dport;
8685
u64 current_time = bpf_ktime_get_ns();
8786

88-
additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
87+
xlat_metrics *extra_metrics = bpf_map_lookup_elem(&aggregated_flows_xlat, id);
8988
if (extra_metrics != NULL) {
9089
extra_metrics->end_mono_time_ts = current_time;
91-
parse_tuple(reply_t, &extra_metrics->translated_flow, zone_id, family,
92-
id->transport_protocol, true);
90+
parse_tuple(reply_t, extra_metrics, zone_id, family, id->transport_protocol, true);
9391
return ret;
9492
}
9593

9694
// there is no matching flows so lets create new one and add the xlation
97-
additional_metrics new_extra_metrics;
95+
xlat_metrics new_extra_metrics;
9896
__builtin_memset(&new_extra_metrics, 0, sizeof(new_extra_metrics));
9997
new_extra_metrics.start_mono_time_ts = current_time;
10098
new_extra_metrics.end_mono_time_ts = current_time;
10199
new_extra_metrics.eth_protocol = eth_protocol;
102-
parse_tuple(reply_t, &new_extra_metrics.translated_flow, zone_id, family,
103-
id->transport_protocol, true);
104-
ret = bpf_map_update_elem(&additional_flow_metrics, id, &new_extra_metrics, BPF_NOEXIST);
100+
parse_tuple(reply_t, &new_extra_metrics, zone_id, family, id->transport_protocol, true);
101+
ret = bpf_map_update_elem(&aggregated_flows_xlat, id, &new_extra_metrics, BPF_NOEXIST);
105102
if (ret != 0) {
106103
if (trace_messages && ret != -EEXIST) {
107104
bpf_printk("error packet translation creating new flow %d\n", ret);
108105
}
109106
if (ret == -EEXIST) {
110-
additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
107+
xlat_metrics *extra_metrics = bpf_map_lookup_elem(&aggregated_flows_xlat, id);
111108
if (extra_metrics != NULL) {
112109
extra_metrics->end_mono_time_ts = current_time;
113-
parse_tuple(reply_t, &extra_metrics->translated_flow, zone_id, family,
114-
id->transport_protocol, true);
110+
parse_tuple(reply_t, extra_metrics, zone_id, family, id->transport_protocol, true);
115111
return 0;
116112
}
117113
}

bpf/types.h

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -116,43 +116,62 @@ typedef struct flow_metrics_t {
116116
// Force emitting enums/structs into the ELF
117117
const static struct flow_metrics_t *unused2 __attribute__((unused));
118118

119-
typedef struct additional_metrics_t {
119+
typedef struct dns_metrics_t {
120+
u64 start_mono_time_ts;
121+
u64 end_mono_time_ts;
122+
u64 latency;
123+
u16 id;
124+
u16 flags;
125+
u16 eth_protocol;
126+
u8 errno;
127+
char name[DNS_NAME_MAX_LEN];
128+
} dns_metrics;
129+
130+
typedef struct pkt_drop_metrics_t {
131+
u64 start_mono_time_ts;
132+
u64 end_mono_time_ts;
133+
u64 bytes;
134+
u32 packets;
135+
u32 latest_drop_cause;
136+
u16 latest_flags;
137+
u16 eth_protocol;
138+
u8 latest_state;
139+
} pkt_drop_metrics;
140+
141+
typedef struct network_events_metrics_t {
120142
u64 start_mono_time_ts;
121143
u64 end_mono_time_ts;
122-
struct dns_record_t {
123-
u64 latency;
124-
u16 id;
125-
u16 flags;
126-
u8 errno;
127-
char name[DNS_NAME_MAX_LEN];
128-
} dns_record;
129-
struct pkt_drops_t {
130-
u64 bytes;
131-
u32 packets;
132-
u32 latest_drop_cause;
133-
u16 latest_flags;
134-
u8 latest_state;
135-
} pkt_drops;
136-
u64 flow_rtt;
137144
u8 network_events[MAX_NETWORK_EVENTS][MAX_EVENT_MD];
138-
struct translated_flow_t {
139-
u8 saddr[IP_MAX_LEN];
140-
u8 daddr[IP_MAX_LEN];
141-
u16 sport;
142-
u16 dport;
143-
u16 zone_id;
144-
} translated_flow;
145145
u16 eth_protocol;
146146
u8 network_events_idx;
147-
bool ipsec_encrypted;
147+
} network_events_metrics;
148+
149+
typedef struct xlat_metrics_t {
150+
u64 start_mono_time_ts;
151+
u64 end_mono_time_ts;
152+
u8 saddr[IP_MAX_LEN];
153+
u8 daddr[IP_MAX_LEN];
154+
u16 sport;
155+
u16 dport;
156+
u16 zone_id;
157+
u16 eth_protocol;
158+
} xlat_metrics;
159+
160+
typedef struct additional_metrics_t {
161+
u64 start_mono_time_ts;
162+
u64 end_mono_time_ts;
163+
u64 flow_rtt;
148164
int ipsec_encrypted_ret;
165+
u16 eth_protocol;
166+
bool ipsec_encrypted;
149167
} additional_metrics;
150168

151169
// Force emitting enums/structs into the ELF
170+
const static struct dns_metrics_t *unused4 __attribute__((unused));
171+
const static struct pkt_drop_metrics_t *unused5 __attribute__((unused));
172+
const static struct network_events_metrics_t *unused6 __attribute__((unused));
173+
const static struct xlat_metrics_t *unused13 __attribute__((unused));
152174
const static struct additional_metrics_t *unused3 __attribute__((unused));
153-
const static struct dns_record_t *unused4 __attribute__((unused));
154-
const static struct pkt_drops_t *unused5 __attribute__((unused));
155-
const static struct translated_flow_t *unused6 __attribute__((unused));
156175

157176
// Attributes that uniquely identify a flow
158177
typedef struct flow_id_t {

0 commit comments

Comments
 (0)