Skip to content

Commit 7046281

Browse files
committed
xia/lpm: remove routing/forwarding deadlock
Due to the way that locking was written into this toy LPM implementation, the read-only code in lpm_deliver() obtains a *write* lock on the tree FIB. The effect of this is the unnatural combination of first obtaining an RCU read lock and then a FIB write lock in lpm_deliver(). This conflicts with functions like local_newroute(), which first obtain a FIB write lock and then wait for RCU readers to finish before flushing anchors. The problem is that deadlock can occur with the following interleaving: thread1 thread2 ======= ======= local_newroute(): get write lock lpm_deliver(): get RCU read lock local_newroute(): wait for RCU readers lpm_deliver(): wait for write lock Notice that this deadlock cannot occur if lpm_deliver() gets the write lock first. To fix this, we can duplicate the FIB entry whose anchor needs to be flushed, replace the old entry with the duplicate, and then release the lock. This allows writers of the tree, including the code in lpm_deliver(), to proceed. We then wait an RCU synchronization to flush the old entry's anchor, since routing mechanism code (not involved with the LPM tree) may still be reading that entry. Once all these readers are done, the old entry can be reclaimed. This removes the deadlock, but future iterations of the LPM principal should use RCU instead of rwlocks to avoid this unnatural locking.
1 parent 5f72283 commit 7046281

File tree

1 file changed

+109
-34
lines changed

1 file changed

+109
-34
lines changed

net/xia/ppal_lpm/main.c

+109-34
Original file line numberDiff line numberDiff line change
@@ -46,36 +46,107 @@ static inline struct fib_xid_lpm_local *fxid_llpm(struct fib_xid *fxid)
4646
*/
4747
const struct xia_ppal_rt_iops *lpm_rt_iops = &xia_ppal_tree_rt_iops;
4848

49+
/* Only call this function after an RCU synchronization,
50+
* such as by calling free_fxid.
51+
*/
52+
static void local_free_lpm(struct fib_xid_table *xtbl, struct fib_xid *fxid)
53+
{
54+
struct fib_xid_lpm_local *llpm = fxid_llpm(fxid);
55+
56+
xdst_free_anchor(&llpm->anchor);
57+
kfree(llpm);
58+
}
59+
4960
/* Assuming the FIB is locked, find the appropriate anchor,
50-
* flush it, and unlock the FIB.
61+
* flush it, and unlock the FIB. To do this, we create a copy of
62+
* the predecessor so that some readers can continue using the tree while
63+
* we wait for other readers to finish to flush the anchor.
5164
*/
52-
static void newroute_flush_anchor_locked(struct fib_xid_table *xtbl,
53-
struct fib_xid *new_fxid,
54-
struct xip_deferred_negdep_flush *dnf)
65+
static int newroute_flush_anchor_locked(struct fib_xid_table *xtbl,
66+
struct fib_xid *new_fxid,
67+
struct xip_deferred_negdep_flush *dnf)
5568
{
56-
struct fib_xid *pred_fxid = tree_fib_get_pred_locked(new_fxid);
69+
/* At most one of @dup_llpm and @dup_mrd should be used. */
70+
struct fib_xid_lpm_local *dup_llpm = NULL;
71+
struct fib_xid_redirect_main *dup_mrd = NULL;
5772

73+
/* Find the predecessor. If it doesn't exist, we're done. */
74+
struct fib_xid *pred_fxid = tree_fib_get_pred_locked(new_fxid);
5875
if (!pred_fxid) {
5976
lpm_rt_iops->fib_unlock(xtbl, NULL);
6077
fib_defer_dnf(dnf, xtbl_net(xtbl), xtbl_ppalty(xtbl));
61-
return;
78+
return 0;
6279
}
6380

64-
fib_free_dnf(dnf);
65-
synchronize_rcu();
81+
/* Flush the predecessor's anchor by first making a copy,
82+
* replacing the old entry, waiting an RCU synchronization,
83+
* and then freeing the old entry when done.
84+
*/
6685
switch (pred_fxid->fx_table_id) {
6786
case XRTABLE_LOCAL_INDEX:
68-
xdst_free_anchor(&fxid_llpm(pred_fxid)->anchor);
87+
/* Allocate a duplicate of the predecessor entry. */
88+
dup_llpm = lpm_rt_iops->fxid_ppal_alloc(sizeof(*dup_llpm),
89+
GFP_ATOMIC);
90+
if (!dup_llpm) {
91+
/* Can't add this entry now due to lack of memory. */
92+
lpm_rt_iops->fxid_rm_locked(NULL, xtbl, new_fxid);
93+
lpm_rt_iops->fib_unlock(xtbl, NULL);
94+
return -ENOMEM;
95+
}
96+
97+
/* Replace the old predecessor with the new predecessor by
98+
* copying the generic struct fib_xid and replacing the old
99+
* node with the new one in the tree.
100+
*/
101+
xdst_init_anchor(&dup_llpm->anchor);
102+
dup_llpm->common = *pred_fxid;
103+
lpm_rt_iops->fxid_replace_locked(xtbl, pred_fxid,
104+
&dup_llpm->common);
105+
106+
/* Release write lock to let tree readers that get a write
107+
* lock (such as in lpm_deliver()) continue, avoiding deadlock.
108+
*/
109+
lpm_rt_iops->fib_unlock(xtbl, NULL);
110+
111+
/* Wait for existing RCU readers in routing mechanism to
112+
* finish, and then flush the anchor.
113+
*
114+
* The old predecessor is no longer accessible by the tree and
115+
* existing readers on its anchor have finished, so we can
116+
* release the old predecessor. Since we just called
117+
* synchronize_rcu(), we can directly call local_free_lpm().
118+
*/
119+
synchronize_rcu();
120+
local_free_lpm(xtbl, pred_fxid);
121+
BUG_ON(dup_mrd);
69122
break;
70123
case XRTABLE_MAIN_INDEX:
71-
xdst_invalidate_redirect(xtbl_net(xtbl), xtbl_ppalty(xtbl),
72-
pred_fxid->fx_xid,
73-
&fxid_mrd(pred_fxid)->gw);
124+
/* Same algorithm as above for main predecessor entries. */
125+
dup_mrd = lpm_rt_iops->fxid_ppal_alloc(sizeof(*dup_mrd),
126+
GFP_ATOMIC);
127+
if (!dup_mrd) {
128+
lpm_rt_iops->fxid_rm_locked(NULL, xtbl, new_fxid);
129+
lpm_rt_iops->fib_unlock(xtbl, NULL);
130+
return -ENOMEM;
131+
}
132+
133+
dup_mrd->gw = fxid_mrd(pred_fxid)->gw;
134+
dup_mrd->common = *pred_fxid;
135+
lpm_rt_iops->fxid_replace_locked(xtbl, pred_fxid,
136+
&dup_mrd->common);
137+
138+
lpm_rt_iops->fib_unlock(xtbl, NULL);
139+
140+
synchronize_rcu();
141+
fib_mrd_free(xtbl, pred_fxid);
142+
BUG_ON(dup_llpm);
74143
break;
75144
default:
76145
BUG();
77146
}
78-
lpm_rt_iops->fib_unlock(xtbl, NULL);
147+
148+
fib_free_dnf(dnf);
149+
return 0;
79150
}
80151

81152
static int local_newroute(struct xip_ppal_ctx *ctx,
@@ -111,15 +182,22 @@ static int local_newroute(struct xip_ppal_ctx *ctx,
111182
* atomically to flush the appropriate anchor.
112183
*/
113184
rc = tree_fib_newroute_lock(&new_llpm->common, xtbl, cfg, NULL);
114-
if (rc) {
115-
fib_free_dnf(dnf);
116-
fxid_free_norcu(xtbl, &new_llpm->common);
117-
return rc;
118-
}
185+
if (rc)
186+
goto unlock_and_free;
119187

120188
/* Flush appropriate anchor and release lock. */
121-
newroute_flush_anchor_locked(xtbl, &new_llpm->common, dnf);
189+
rc = newroute_flush_anchor_locked(xtbl, &new_llpm->common, dnf);
190+
if (rc)
191+
goto free;
192+
122193
return 0;
194+
195+
unlock_and_free:
196+
lpm_rt_iops->fib_unlock(NULL, xtbl);
197+
free:
198+
fib_free_dnf(dnf);
199+
fxid_free_norcu(xtbl, &new_llpm->common);
200+
return rc;
123201
}
124202

125203
static int local_dump_lpm(struct fib_xid *fxid, struct fib_xid_table *xtbl,
@@ -169,15 +247,6 @@ static int local_dump_lpm(struct fib_xid *fxid, struct fib_xid_table *xtbl,
169247
return -EMSGSIZE;
170248
}
171249

172-
/* Don't call this function! Use free_fxid instead. */
173-
static void local_free_lpm(struct fib_xid_table *xtbl, struct fib_xid *fxid)
174-
{
175-
struct fib_xid_lpm_local *llpm = fxid_llpm(fxid);
176-
177-
xdst_free_anchor(&llpm->anchor);
178-
kfree(llpm);
179-
}
180-
181250
static int main_newroute(struct xip_ppal_ctx *ctx, struct fib_xid_table *xtbl,
182251
struct xia_fib_config *cfg)
183252
{
@@ -208,15 +277,21 @@ static int main_newroute(struct xip_ppal_ctx *ctx, struct fib_xid_table *xtbl,
208277
* atomically to flush the appropriate anchor.
209278
*/
210279
rc = tree_fib_newroute_lock(&new_mrd->common, xtbl, cfg, NULL);
211-
if (rc) {
212-
fib_free_dnf(dnf);
213-
fxid_free_norcu(xtbl, &new_mrd->common);
214-
return rc;
215-
}
280+
if (rc)
281+
goto dnf;
216282

217283
/* Flush appropriate anchor and release lock. */
218-
newroute_flush_anchor_locked(xtbl, &new_mrd->common, dnf);
284+
rc = newroute_flush_anchor_locked(xtbl, &new_mrd->common, dnf);
285+
if (rc)
286+
goto fxid;
287+
219288
return 0;
289+
290+
dnf:
291+
fib_free_dnf(dnf);
292+
fxid:
293+
fxid_free_norcu(xtbl, &new_mrd->common);
294+
return rc;
220295
}
221296

222297
static const xia_ppal_all_rt_eops_t lpm_all_rt_eops = {

0 commit comments

Comments
 (0)