Skip to content

Commit 5d8ef79

Browse files
committed
bond: add lacp support
Implement Link Aggregation Control Protocol (802.3ad/802.1AX) for bond interfaces. LACP provides dynamic link aggregation with automatic member discovery, synchronization, and failover based on protocol negotiation rather than simple link state monitoring. Add a new GR_BOND_MODE_LACP operational mode alongside the existing active-backup mode. LACP mode includes three load balancing algorithms for distributing egress traffic across active members: - rss: Reuse hardware RSS hash when available for zero-cost balancing - l2: Hash based on destination MAC address and VLAN tag - l3+l4: Hash based on IP addresses and TCP/UDP port numbers The control plane implementation handles LACP protocol state machines including receiving and validating LACP PDUs, tracking partner state, managing synchronization and collecting/distributing flags, and handling fast/slow periodic timers and timeout detection. A periodic timer runs every second to send LACP PDUs and detect partner timeouts based on the negotiated timeout intervals. NB: port numbers in LACP PDUs are 1-based rather than 0-based to ensure interoperability with switches that reject port ID zero as invalid. Members transition to active state when LACP negotiation succeeds and both sides report synchronized state. Active members are added to a 256-entry redirection table that maps hash values to member indices. The table is populated by distributing active member IDs in round-robin fashion across all entries, ensuring even load distribution while maintaining flow consistency. For example, with 3 active members, entry 0 maps to member 0, entry 1 to member 1, entry 2 to member 2, entry 3 back to member 0, and so on. Traffic is directed by computing a hash from packet headers, taking modulo 256 to get a table index, and using the stored member ID at that position. This provides deterministic per-flow member selection with minimal disruption when members are added or removed. The bond interface transitions to running state when at least one member becomes active. Signed-off-by: Robin Jarry <[email protected]>
1 parent e99b0c0 commit 5d8ef79

File tree

14 files changed

+837
-18
lines changed

14 files changed

+837
-18
lines changed

modules/infra/api/gr_infra.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,34 @@ struct gr_iface_info_vlan {
122122
// Bond operational modes
123123
typedef enum : uint8_t {
124124
GR_BOND_MODE_ACTIVE_BACKUP = 1,
125+
GR_BOND_MODE_LACP,
125126
} gr_bond_mode_t;
126127

127128
static inline char *gr_bond_mode_name(gr_bond_mode_t mode) {
128129
switch (mode) {
129130
case GR_BOND_MODE_ACTIVE_BACKUP:
130131
return "active-backup";
132+
case GR_BOND_MODE_LACP:
133+
return "lacp";
134+
}
135+
return "?";
136+
}
137+
138+
// Bond balancing algorithms
139+
typedef enum : uint8_t {
140+
GR_BOND_ALGO_RSS = 1,
141+
GR_BOND_ALGO_L2,
142+
GR_BOND_ALGO_L3_L4,
143+
} gr_bond_algo_t;
144+
145+
static inline char *gr_bond_algo_name(gr_bond_algo_t algo) {
146+
switch (algo) {
147+
case GR_BOND_ALGO_RSS:
148+
return "rss";
149+
case GR_BOND_ALGO_L2:
150+
return "l2";
151+
case GR_BOND_ALGO_L3_L4:
152+
return "l3+l4";
131153
}
132154
return "?";
133155
}
@@ -137,6 +159,7 @@ static inline char *gr_bond_mode_name(gr_bond_mode_t mode) {
137159
#define GR_BOND_SET_MEMBERS GR_BIT64(33)
138160
#define GR_BOND_SET_PRIMARY GR_BIT64(34)
139161
#define GR_BOND_SET_MAC GR_BIT64(35)
162+
#define GR_BOND_SET_ALGO GR_BIT64(36)
140163

141164
struct gr_bond_member {
142165
uint16_t iface_id;
@@ -146,6 +169,7 @@ struct gr_bond_member {
146169
// Info for GR_IFACE_TYPE_BOND interfaces
147170
struct gr_iface_info_bond {
148171
gr_bond_mode_t mode;
172+
gr_bond_algo_t algo; // Only for LACP
149173
struct rte_ether_addr mac;
150174

151175
uint8_t primary_member;

modules/infra/cli/bond.c

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ static void bond_show(struct gr_api_client *c, const struct gr_iface *iface) {
1818
const struct gr_iface_info_bond *bond = PAYLOAD(iface);
1919

2020
printf("mode: %s\n", gr_bond_mode_name(bond->mode));
21+
if (bond->mode == GR_BOND_MODE_LACP)
22+
printf("algo: %s\n", gr_bond_algo_name(bond->algo));
2123
printf("mac: " ETH_F "\n", &bond->mac);
2224
printf("members:\n");
2325
for (uint8_t i = 0; i < bond->n_members; i++) {
@@ -71,6 +73,9 @@ bond_list_info(struct gr_api_client *c, const struct gr_iface *iface, char *buf,
7173
else
7274
SAFE_BUF(snprintf, len, " primary=%s", i->name);
7375
break;
76+
case GR_BOND_MODE_LACP:
77+
SAFE_BUF(snprintf, len, " algo=%s", gr_bond_algo_name(bond->algo));
78+
break;
7479
}
7580

7681
err:
@@ -88,9 +93,29 @@ static int bond_mode_from_str(const char *str, gr_bond_mode_t *mode) {
8893
*mode = GR_BOND_MODE_ACTIVE_BACKUP;
8994
return 0;
9095
}
96+
if (strcmp(str, "lacp") == 0) {
97+
*mode = GR_BOND_MODE_LACP;
98+
return 0;
99+
}
91100
return errno_set(EPROTONOSUPPORT);
92101
}
93102

103+
static int bond_algo_from_str(const char *str, gr_bond_algo_t *algo) {
104+
if (strcmp(str, "rss") == 0) {
105+
*algo = GR_BOND_ALGO_RSS;
106+
return 0;
107+
}
108+
if (strcmp(str, "l2") == 0) {
109+
*algo = GR_BOND_ALGO_L2;
110+
return 0;
111+
}
112+
if (strcmp(str, "l3+l4") == 0) {
113+
*algo = GR_BOND_ALGO_L3_L4;
114+
return 0;
115+
}
116+
return errno_set(ESOCKTNOSUPPORT);
117+
}
118+
94119
static uint64_t parse_bond_args(
95120
struct gr_api_client *c,
96121
const struct ec_pnode *p,
@@ -109,6 +134,16 @@ static uint64_t parse_bond_args(
109134
set_attrs |= GR_BOND_SET_MODE;
110135
}
111136

137+
if ((str = arg_str(p, "ALGO")) != NULL) {
138+
if (bond->mode != GR_BOND_MODE_ACTIVE_BACKUP) {
139+
errno = EPROTOTYPE;
140+
goto err;
141+
}
142+
if (bond_algo_from_str(str, &bond->algo) < 0)
143+
goto err;
144+
set_attrs |= GR_BOND_SET_ALGO;
145+
}
146+
112147
if (arg_str(p, "MEMBER") != NULL) {
113148
const struct ec_pnode *m = NULL;
114149
bond->n_members = 0;
@@ -131,6 +166,10 @@ static uint64_t parse_bond_args(
131166
}
132167

133168
if ((str = arg_str(p, "PRIMARY")) != NULL) {
169+
if (bond->mode != GR_BOND_MODE_ACTIVE_BACKUP) {
170+
errno = EPROTOTYPE;
171+
goto err;
172+
}
134173
struct gr_iface *primary = iface_from_name(c, str);
135174
if (primary == NULL)
136175
goto err;
@@ -214,20 +253,37 @@ static cmd_status_t bond_set(struct gr_api_client *c, const struct ec_pnode *p)
214253
return ret;
215254
}
216255

217-
#define BOND_ATTRS_CMD IFACE_ATTRS_CMD ",(primary PRIMARY),(mac MAC)"
256+
#define BOND_ATTRS_CMD IFACE_ATTRS_CMD ",((primary PRIMARY)|(balance ALGO)),(mac MAC)"
218257
#define BOND_ATTRS_ARGS \
219258
IFACE_ATTRS_ARGS, \
220259
with_help( \
221260
"Bond mode.", \
222261
EC_NODE_OR( \
223262
"MODE", \
224-
with_help("Active backup mode.", ec_node_str("", "active-backup")) \
263+
with_help( \
264+
"Active backup mode.", ec_node_str("", "active-backup") \
265+
), \
266+
with_help("LACP mode.", ec_node_str("", "lacp")) \
225267
) \
226268
), \
227269
with_help( \
228270
"Primary member.", \
229271
ec_node_dyn("PRIMARY", complete_iface_names, INT2PTR(GR_IFACE_TYPE_PORT)) \
230272
), \
273+
with_help( \
274+
"Balancing algorithm.", \
275+
EC_NODE_OR( \
276+
"ALGO", \
277+
with_help("Reuse hardware RSS hash.", ec_node_str("", "rss")), \
278+
with_help( \
279+
"Hash based on Ethernet and VLAN.", ec_node_str("", "l2") \
280+
), \
281+
with_help( \
282+
"Hash based on IP/IPv6 and TCP/UDP.", \
283+
ec_node_str("", "l3+l4") \
284+
) \
285+
) \
286+
), \
231287
with_help("Set the bond MAC address.", ec_node_re("MAC", ETH_ADDR_RE))
232288

233289
static int ctx_init(struct ec_node *root) {

modules/infra/control/bond.c

Lines changed: 79 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Copyright (c) 2025 Robin Jarry
33

44
#include <gr_bond.h>
5+
#include <gr_eth.h>
56
#include <gr_event.h>
67
#include <gr_infra.h>
78
#include <gr_log.h>
@@ -190,8 +191,11 @@ static int bond_init_new_members(const struct iface *iface, const struct gr_ifac
190191
}
191192

192193
LOG(DEBUG, "adding %s to bond %s", member->name, iface->name);
194+
if (iface_add_eth_addr(member->id, &LACP_DST_MAC) < 0)
195+
return errno_log(errno, "iface_add_eth_addr(lacp)");
193196
port = iface_info_port(member);
194197
port->bond_iface_id = iface->id;
198+
memset(&bond->members[i], 0, sizeof(bond->members[i]));
195199
skip:;
196200
}
197201

@@ -225,17 +229,23 @@ static void bond_fini_old_members(const struct iface *iface, const struct gr_ifa
225229
member->name,
226230
strerror(errno));
227231
}
232+
if (iface_del_eth_addr(member->id, &LACP_DST_MAC) < 0) {
233+
LOG(WARNING,
234+
"failed to unconfigure mac address on member %s: %s",
235+
member->name,
236+
strerror(errno));
237+
}
228238

229239
port = iface_info_port(member);
230240
port->bond_iface_id = GR_IFACE_ID_UNDEF;
231241
skip:;
232242
}
233243
}
234244

235-
static void bond_update_active_members(struct iface *iface) {
245+
void bond_update_active_members(struct iface *iface) {
236246
struct iface_info_bond *bond = iface_info_bond(iface);
237-
uint8_t n_active_members = 0;
238247
const struct iface *member;
248+
uint8_t *active_ids = NULL;
239249

240250
switch (bond->mode) {
241251
case GR_BOND_MODE_ACTIVE_BACKUP:
@@ -250,7 +260,7 @@ static void bond_update_active_members(struct iface *iface) {
250260
for (uint8_t i = 0; i < bond->n_members; i++) {
251261
member = bond->members[i].iface;
252262
if (i == active_member) {
253-
n_active_members = 1;
263+
gr_vec_add(active_ids, i);
254264
LOG(INFO,
255265
"bond %s active member is now %s",
256266
iface->name,
@@ -260,11 +270,66 @@ static void bond_update_active_members(struct iface *iface) {
260270
}
261271
bond->active_member = active_member;
262272
break;
273+
case GR_BOND_MODE_LACP:
274+
for (uint8_t i = 0; i < bond->n_members; i++) {
275+
struct bond_member *member = &bond->members[i];
276+
const struct iface_info_port *port = iface_info_port(member->iface);
277+
278+
// The port_number must *never* be zero,
279+
// otherwise some switches reject the LACP packets.
280+
// Use a 1-based port_number.
281+
member->local.port_number = rte_cpu_to_be_16(i + 1);
282+
member->local.port_priority = RTE_BE16(0x8000);
283+
member->local.system_priority = RTE_BE16(0x8000);
284+
member->local.system_mac = bond->mac;
285+
// Key based on port speed (in Mb/s): simplified encoding for aggregation
286+
// Ports with same speed can aggregate together
287+
member->local.key = rte_cpu_to_be_16(port->link_speed);
288+
if (member->last_rx == 0) {
289+
member->local.state = LACP_STATE_ACTIVE | LACP_STATE_AGGREGATABLE
290+
| LACP_STATE_FAST | LACP_STATE_DEFAULTED
291+
| LACP_STATE_EXPIRED;
292+
member->active = false;
293+
member->need_to_transmit = true;
294+
member->next_tx = 0;
295+
LOG(DEBUG,
296+
"bond %s member %s reset local state",
297+
iface->name,
298+
member->iface->name);
299+
}
300+
301+
// Add to active members if link is up and LACP member is valid
302+
if ((member->iface->flags & GR_IFACE_F_UP)
303+
&& (member->iface->state & GR_IFACE_S_RUNNING) && member->active) {
304+
LOG(DEBUG,
305+
"bond %s member %s active",
306+
iface->name,
307+
member->iface->name);
308+
gr_vec_add(active_ids, i);
309+
}
310+
}
311+
break;
263312
}
264-
if (n_active_members > 0)
265-
iface->state |= GR_IFACE_S_RUNNING;
266-
else
267-
iface->state &= ~GR_IFACE_S_RUNNING;
313+
314+
if (gr_vec_len(active_ids) > 0) {
315+
for (unsigned i = 0; i < ARRAY_DIM(bond->redirection_table); i++) {
316+
bond->redirection_table[i] = active_ids[i % gr_vec_len(active_ids)];
317+
}
318+
if (!(iface->state & GR_IFACE_S_RUNNING)) {
319+
iface->state |= GR_IFACE_S_RUNNING;
320+
if (iface->flags & GR_IFACE_F_UP) {
321+
gr_event_push(GR_EVENT_IFACE_STATUS_UP, iface);
322+
}
323+
}
324+
} else {
325+
memset(bond->redirection_table, UINT8_MAX, sizeof(bond->redirection_table));
326+
if (iface->state & GR_IFACE_S_RUNNING) {
327+
iface->state &= ~GR_IFACE_S_RUNNING;
328+
gr_event_push(GR_EVENT_IFACE_STATUS_DOWN, iface);
329+
}
330+
}
331+
332+
gr_vec_free(active_ids);
268333
}
269334

270335
static int bond_reconfig(
@@ -279,6 +344,9 @@ static int bond_reconfig(
279344
if (set_attrs & GR_BOND_SET_MODE)
280345
bond->mode = api->mode;
281346

347+
if (set_attrs & GR_BOND_SET_ALGO)
348+
bond->algo = api->algo ?: GR_BOND_ALGO_RSS;
349+
282350
if (set_attrs & GR_BOND_SET_PRIMARY) {
283351
uint8_t n_members = (set_attrs & GR_BOND_SET_MEMBERS) ?
284352
api->n_members :
@@ -342,6 +410,7 @@ static void bond_to_api(void *info, const struct iface *iface) {
342410
struct gr_iface_info_bond *api = info;
343411

344412
api->mode = bond->mode;
413+
api->algo = bond->algo;
345414
api->mac = bond->mac;
346415
api->n_members = bond->n_members;
347416
api->primary_member = bond->primary_member;
@@ -351,6 +420,9 @@ static void bond_to_api(void *info, const struct iface *iface) {
351420
case GR_BOND_MODE_ACTIVE_BACKUP:
352421
api->members[i].active = i == bond->active_member;
353422
break;
423+
case GR_BOND_MODE_LACP:
424+
api->members[i].active = bond->members[i].active;
425+
break;
354426
}
355427
}
356428
}
@@ -393,11 +465,6 @@ static void bond_event(uint32_t, const void *obj) {
393465
assert(b->type == GR_IFACE_TYPE_BOND);
394466

395467
bond_update_active_members(b);
396-
397-
if (b->state & GR_IFACE_S_RUNNING && b->flags & GR_IFACE_F_UP)
398-
gr_event_push(GR_EVENT_IFACE_STATUS_UP, b);
399-
else
400-
gr_event_push(GR_EVENT_IFACE_STATUS_DOWN, b);
401468
}
402469

403470
static struct gr_event_subscription bond_event_handler = {

modules/infra/control/gr_bond.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <gr_iface.h>
77
#include <gr_infra.h>
8+
#include <gr_lacp.h>
89
#include <gr_vec.h>
910

1011
#include <stdint.h>
@@ -13,10 +14,18 @@
1314

1415
struct bond_member {
1516
const struct iface *iface;
17+
bool active;
18+
bool need_to_transmit; // Need to send immediately
19+
clock_t next_tx; // Next time we need to send a LACP packet
20+
clock_t last_rx; // Last time we received a LACP packet
21+
// For direct inclusion in LACP packets
22+
struct lacp_participant local;
23+
struct lacp_participant remote;
1624
};
1725

1826
GR_IFACE_INFO(GR_IFACE_TYPE_BOND, iface_info_bond, {
1927
gr_bond_mode_t mode;
28+
gr_bond_algo_t algo;
2029
struct rte_ether_addr mac;
2130

2231
uint8_t primary_member;
@@ -25,4 +34,8 @@ GR_IFACE_INFO(GR_IFACE_TYPE_BOND, iface_info_bond, {
2534
struct bond_member members[MEMBERS_MAX_LEN];
2635

2736
gr_vec struct rte_ether_addr *extra_macs;
37+
38+
uint8_t redirection_table[256];
2839
});
40+
41+
void bond_update_active_members(struct iface *);

0 commit comments

Comments
 (0)