1313
1414use aya_ebpf:: {
1515 bindings:: {
16- bpf_fib_lookup, bpf_fib_lookup__bindgen_ty_1, bpf_fib_lookup__bindgen_ty_2,
17- bpf_fib_lookup__bindgen_ty_3, bpf_fib_lookup__bindgen_ty_4,
18- bpf_fib_lookup__bindgen_ty_5, xdp_action, BPF_FIB_LKUP_RET_BLACKHOLE ,
19- BPF_FIB_LKUP_RET_FRAG_NEEDED , BPF_FIB_LKUP_RET_NO_NEIGH , BPF_FIB_LKUP_RET_PROHIBIT ,
20- BPF_FIB_LKUP_RET_SUCCESS , BPF_FIB_LKUP_RET_UNREACHABLE ,
16+ bpf_fib_lookup, xdp_action, BPF_FIB_LKUP_RET_BLACKHOLE , BPF_FIB_LKUP_RET_FRAG_NEEDED ,
17+ BPF_FIB_LKUP_RET_NO_NEIGH , BPF_FIB_LKUP_RET_PROHIBIT , BPF_FIB_LKUP_RET_SUCCESS ,
18+ BPF_FIB_LKUP_RET_UNREACHABLE ,
2119 } ,
2220 helpers:: gen:: {
2321 bpf_fib_lookup as fib_lookup_helper, bpf_xdp_adjust_head, bpf_xdp_adjust_tail,
@@ -252,54 +250,67 @@ fn handle_ipv4(
252250 return dispatch_custom_fib ( custom, ctx, eth, ip as * mut u8 , true , ingress_vid) ;
253251 }
254252
255- // Build the FIB-lookup struct via a struct literal, NOT
256- // `mem::zeroed()`. LLVM lowers a 60-byte zero-init into a memset
257- // libcall that the BPF backend emits as a separate bpf-to-bpf
258- // function. Combined with v0.2.5's `bpf_tail_call`, that violates
259- // the kernel verifier's "tail_calls are not allowed in non-JITed
260- // programs with bpf-to-bpf calls" guard on UniFi 5.15, which runs
261- // BPF in interpreter mode. Each union is initialized via its
262- // largest variant so every byte is written explicitly — no memset
263- // libcall, no subprogram. See SPEC §11.x and the v0.2.5 UniFi
264- // post-mortem in tail-call-architecture.md.
265- let mut fib = bpf_fib_lookup {
266- family : AF_INET ,
267- l4_protocol : proto,
268- sport,
269- dport,
270- __bindgen_anon_1 : bpf_fib_lookup__bindgen_ty_1 {
271- tot_len : u16:: from_be_bytes ( unsafe { ( * ip) . tot_len } ) ,
272- } ,
273- ifindex : unsafe { ( * ctx. ctx ) . ingress_ifindex } ,
274- // 4-byte union: write the 4-byte `flowinfo` arm so all bytes
275- // are covered. For IPv4 the kernel only reads `tos` (the LSB),
276- // so put tos in the low byte and zero the rest.
277- __bindgen_anon_2 : bpf_fib_lookup__bindgen_ty_2 {
278- flowinfo : unsafe { ( * ip) . tos } as u32 ,
279- } ,
280- // 16-byte unions: write the 16-byte `ipv6_*` arms. For IPv4
281- // the kernel reads the first 4 bytes as `ipv4_*`; that's the
282- // first u32 of the array, with the rest zero.
283- __bindgen_anon_3 : bpf_fib_lookup__bindgen_ty_3 {
284- ipv6_src : [ u32:: from_ne_bytes ( src_bytes) , 0 , 0 , 0 ] ,
285- } ,
286- __bindgen_anon_4 : bpf_fib_lookup__bindgen_ty_4 {
287- ipv6_dst : [ u32:: from_ne_bytes ( dst_bytes) , 0 , 0 , 0 ] ,
288- } ,
289- __bindgen_anon_5 : bpf_fib_lookup__bindgen_ty_5 { tbid : 0 } ,
290- smac : [ 0 ; 6 ] ,
291- dmac : [ 0 ; 6 ] ,
292- } ;
253+ // Build `bpf_fib_lookup` via `MaybeUninit` + selective field
254+ // writes — NEVER as a struct literal with zero-padded arrays
255+ // ([0; 6], [val, 0, 0, 0], etc.) and NEVER with `mem::zeroed()`.
256+ //
257+ // Why: LLVM's "merge stores" pass recognizes any run of adjacent
258+ // zero stores and lowers them into a `memset` libcall. The BPF
259+ // backend has no libc to link against, so memset becomes a
260+ // separate BPF function — a bpf-to-bpf subprogram call. That
261+ // combined with `bpf_tail_call` (which fast_path uses to enter
262+ // finalize) trips the kernel verifier's
263+ // "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls"
264+ // guard on kernels whose JIT returns false from
265+ // `bpf_jit_supports_subprog_tailcalls()` (UniFi 5.15-ui-cn9670
266+ // aarch64 is the canonical hit).
267+ //
268+ // We can't fix this with `#[no_mangle] #[inline(always)]` shim
269+ // functions either: rustc warns "`#[inline]` is ignored on
270+ // externally exported functions" and refuses to inline at the
271+ // libcall site.
272+ //
273+ // The only reliable mitigation is to never produce the merge-able
274+ // pattern in the first place. So: write each input field
275+ // individually via raw pointer; leave smac/dmac uninit (kernel
276+ // writes them as outputs); leave the unread union bytes uninit
277+ // (kernel doesn't read offsets 13-15 of __bindgen_anon_2 or
278+ // 20-31/36-47 of __bindgen_anon_3/4 in the IPv4 path).
279+ let mut fib_uninit: core:: mem:: MaybeUninit < bpf_fib_lookup > =
280+ core:: mem:: MaybeUninit :: uninit ( ) ;
281+ let p = fib_uninit. as_mut_ptr ( ) ;
282+ unsafe {
283+ ( * p) . family = AF_INET ;
284+ ( * p) . l4_protocol = proto;
285+ ( * p) . sport = sport;
286+ ( * p) . dport = dport;
287+ ( * p) . __bindgen_anon_1 . tot_len = u16:: from_be_bytes ( ( * ip) . tot_len ) ;
288+ ( * p) . ifindex = ( * ctx. ctx ) . ingress_ifindex ;
289+ ( * p) . __bindgen_anon_2 . tos = ( * ip) . tos ;
290+ ( * p) . __bindgen_anon_3 . ipv4_src = u32:: from_ne_bytes ( src_bytes) ;
291+ ( * p) . __bindgen_anon_4 . ipv4_dst = u32:: from_ne_bytes ( dst_bytes) ;
292+ // tbid: routing table ID. 0 = main table. Single u32 store —
293+ // no merge with other zero stores because adjacent fields are
294+ // either non-zero (the writes above) or kernel-output (smac
295+ // immediately follows tbid at offset 52).
296+ ( * p) . __bindgen_anon_5 . tbid = 0 ;
297+ }
293298
294299 let ret = unsafe {
295300 fib_lookup_helper (
296301 ctx. ctx as * mut _ ,
297- & mut fib as * mut _ ,
302+ p as * mut _ ,
298303 mem:: size_of :: < bpf_fib_lookup > ( ) as i32 ,
299304 0 ,
300305 )
301306 } ;
302307
308+ // Borrow from the raw pointer for the rest of the function.
309+ // After fib_lookup_helper, the kernel has written smac/dmac on
310+ // success; on failure, dispatch_fib doesn't read them. Other
311+ // unread union bytes stay uninit but aren't observed.
312+ let fib_ref = unsafe { & * p } ;
313+
303314 if compare {
304315 // Compare mode: we also ran above iff `use_custom`; but since
305316 // `compare` implies `use_custom` (userspace rejects otherwise),
@@ -309,10 +320,10 @@ fn handle_ipv4(
309320 // or manual map poke), the branch above is unreachable and
310321 // we still do only the kernel lookup here.
311322 let custom = fib:: lookup_v4 ( src_bytes, dst_bytes, proto, sport_h, dport_h) ;
312- compare_and_bump ( ret as u32 , & fib , & custom) ;
323+ compare_and_bump ( ret as u32 , fib_ref , & custom) ;
313324 }
314325
315- dispatch_fib ( ret as u32 , ctx, eth, ip as * mut u8 , true , & fib , ingress_vid)
326+ dispatch_fib ( ret as u32 , ctx, eth, ip as * mut u8 , true , fib_ref , ingress_vid)
316327}
317328
318329#[ inline( always) ]
@@ -379,47 +390,44 @@ fn handle_ipv6(
379390 return dispatch_custom_fib ( custom, ctx, eth, ip as * mut u8 , false , ingress_vid) ;
380391 }
381392
382- // See handle_ipv4 for the rationale behind the struct-literal
393+ // See handle_ipv4 for the rationale behind the MaybeUninit
383394 // pattern (avoids the LLVM-emitted memset bpf-to-bpf subprogram
384395 // that conflicts with bpf_tail_call on non-JITed kernels).
385- let mut fib = bpf_fib_lookup {
386- family : AF_INET6 ,
387- l4_protocol : next,
388- sport,
389- dport,
390- __bindgen_anon_1 : bpf_fib_lookup__bindgen_ty_1 {
391- tot_len : u16:: from_be_bytes ( unsafe { ( * ip) . payload_len } ) + Ipv6Hdr :: LEN as u16 ,
392- } ,
393- ifindex : unsafe { ( * ctx. ctx ) . ingress_ifindex } ,
394- __bindgen_anon_2 : bpf_fib_lookup__bindgen_ty_2 {
395- flowinfo : u32:: from_be_bytes ( unsafe { ( * ip) . vcf } ) ,
396- } ,
397- __bindgen_anon_3 : bpf_fib_lookup__bindgen_ty_3 {
398- ipv6_src : bytes_to_u32x4 ( & src_bytes) ,
399- } ,
400- __bindgen_anon_4 : bpf_fib_lookup__bindgen_ty_4 {
401- ipv6_dst : bytes_to_u32x4 ( & dst_bytes) ,
402- } ,
403- __bindgen_anon_5 : bpf_fib_lookup__bindgen_ty_5 { tbid : 0 } ,
404- smac : [ 0 ; 6 ] ,
405- dmac : [ 0 ; 6 ] ,
406- } ;
396+ let mut fib_uninit: core:: mem:: MaybeUninit < bpf_fib_lookup > =
397+ core:: mem:: MaybeUninit :: uninit ( ) ;
398+ let p = fib_uninit. as_mut_ptr ( ) ;
399+ unsafe {
400+ ( * p) . family = AF_INET6 ;
401+ ( * p) . l4_protocol = next;
402+ ( * p) . sport = sport;
403+ ( * p) . dport = dport;
404+ ( * p) . __bindgen_anon_1 . tot_len =
405+ u16:: from_be_bytes ( ( * ip) . payload_len ) + Ipv6Hdr :: LEN as u16 ;
406+ ( * p) . ifindex = ( * ctx. ctx ) . ingress_ifindex ;
407+ ( * p) . __bindgen_anon_2 . flowinfo = u32:: from_be_bytes ( ( * ip) . vcf ) ;
408+ ( * p) . __bindgen_anon_3 . ipv6_src = bytes_to_u32x4 ( & src_bytes) ;
409+ ( * p) . __bindgen_anon_4 . ipv6_dst = bytes_to_u32x4 ( & dst_bytes) ;
410+ ( * p) . __bindgen_anon_5 . tbid = 0 ;
411+ // smac, dmac: kernel-output, leave uninit.
412+ }
407413
408414 let ret = unsafe {
409415 fib_lookup_helper (
410416 ctx. ctx as * mut _ ,
411- & mut fib as * mut _ ,
417+ p as * mut _ ,
412418 mem:: size_of :: < bpf_fib_lookup > ( ) as i32 ,
413419 0 ,
414420 )
415421 } ;
416422
423+ let fib_ref = unsafe { & * p } ;
424+
417425 if compare {
418426 let custom = fib:: lookup_v6 ( src_bytes, dst_bytes, next, sport_h, dport_h) ;
419- compare_and_bump ( ret as u32 , & fib , & custom) ;
427+ compare_and_bump ( ret as u32 , fib_ref , & custom) ;
420428 }
421429
422- dispatch_fib ( ret as u32 , ctx, eth, ip as * mut u8 , false , & fib , ingress_vid)
430+ dispatch_fib ( ret as u32 , ctx, eth, ip as * mut u8 , false , fib_ref , ingress_vid)
423431}
424432
425433#[ inline( always) ]
@@ -529,7 +537,11 @@ fn forward_success(
529537 ( * mctx_ptr) . ingress_vid = ingress_vid;
530538 ( * mctx_ptr) . ip_offset = ip_offset as u32 ;
531539 ( * mctx_ptr) . is_v4 = u8:: from ( is_v4) ;
532- ( * mctx_ptr) . _pad = [ 0 ; 3 ] ;
540+ // _pad: per-CPU array elements are zero-initialized at map
541+ // creation and we never write the padding region — leaving
542+ // it as the residual zeros saves a memset libcall (now
543+ // inlined via libcalls.rs anyway, but the moot store is
544+ // still worth eliding).
533545 }
534546 } else {
535547 // Per-CPU array index 0 is always present; this branch should
0 commit comments