Skip to content

Commit 1fd645b

Browse files
lrw514bluca
authored andcommitted
net/mlx5: fix flow aging race condition
[ upstream commit 820ca73 ] When aging is configured, there is a background thread which queries all the counters in the pool. Meantime, per queue flow insertion/deletion/update changes the counter pool too. It introduces a race condition between resetting counters's in_used and age_idx fields during flow deletion and reading them in the background thread. To resolve it, all key members of counter's struct are placed in a single uint32_t and they are accessed atomically. To avoid the occasional timestamp equalization with age_idx, query_gen_when_free is moved out of the union. The total memory size is kept the same. Fixes: 04a4de7 ("net/mlx5: support flow age action with HWS") Signed-off-by: Rongwei Liu <[email protected]> Acked-by: Dariusz Sosnowski <[email protected]>
1 parent d7ea752 commit 1fd645b

File tree

3 files changed

+82
-55
lines changed

3 files changed

+82
-55
lines changed

drivers/net/mlx5/mlx5_flow_hw.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,7 +1933,7 @@ flow_hw_shared_action_construct(struct rte_eth_dev *dev, uint32_t queue,
19331933
return -1;
19341934
if (action_flags & MLX5_FLOW_ACTION_COUNT) {
19351935
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
1936-
if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx) < 0)
1936+
if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx, 1) < 0)
19371937
return -1;
19381938
flow->cnt_id = age_cnt;
19391939
param->nb_cnts++;
@@ -2321,7 +2321,8 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
23212321
/* Fall-through. */
23222322
case RTE_FLOW_ACTION_TYPE_COUNT:
23232323
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
2324-
ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
2324+
ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id,
2325+
age_idx, 0);
23252326
if (ret != 0) {
23262327
rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
23272328
action, "Failed to allocate flow counter");

drivers/net/mlx5/mlx5_hws_cnt.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ __mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh,
7272
uint32_t ret __rte_unused;
7373

7474
reset_cnt_num = rte_ring_count(reset_list);
75-
cpool->query_gen++;
7675
mlx5_aso_cnt_query(sh, cpool);
76+
__atomic_store_n(&cpool->query_gen, cpool->query_gen + 1, __ATOMIC_RELEASE);
7777
zcdr.n1 = 0;
7878
zcdu.n1 = 0;
7979
ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list,
@@ -143,14 +143,14 @@ mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool)
143143
uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool);
144144
uint16_t expected1 = HWS_AGE_CANDIDATE;
145145
uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING;
146-
uint32_t i;
146+
uint32_t i, age_idx, in_use;
147147

148148
cpool->time_of_last_age_check = curr_time;
149149
for (i = 0; i < nb_alloc_cnts; ++i) {
150-
uint32_t age_idx = cpool->pool[i].age_idx;
151150
uint64_t hits;
152151

153-
if (!cpool->pool[i].in_used || age_idx == 0)
152+
mlx5_hws_cnt_get_all(&cpool->pool[i], &in_use, NULL, &age_idx);
153+
if (!in_use || age_idx == 0)
154154
continue;
155155
param = mlx5_ipool_get(age_info->ages_ipool, age_idx);
156156
if (unlikely(param == NULL)) {

drivers/net/mlx5/mlx5_hws_cnt.h

Lines changed: 75 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -42,33 +42,36 @@ struct mlx5_hws_cnt_dcs_mng {
4242
struct mlx5_hws_cnt_dcs dcs[MLX5_HWS_CNT_DCS_NUM];
4343
};
4444

45-
struct mlx5_hws_cnt {
46-
struct flow_counter_stats reset;
47-
bool in_used; /* Indicator whether this counter in used or in pool. */
48-
union {
49-
struct {
50-
uint32_t share:1;
51-
/*
52-
* share will be set to 1 when this counter is used as
53-
* indirect action.
54-
*/
55-
uint32_t age_idx:24;
56-
/*
57-
* When this counter uses for aging, it save the index
58-
* of AGE parameter. For pure counter (without aging)
59-
* this index is zero.
60-
*/
61-
};
62-
/* This struct is only meaningful when user own this counter. */
63-
uint32_t query_gen_when_free;
45+
union mlx5_hws_cnt_state {
46+
uint32_t data;
47+
struct {
48+
uint32_t in_used:1;
49+
/* Indicator whether this counter in used or in pool. */
50+
uint32_t share:1;
51+
/*
52+
* share will be set to 1 when this counter is used as
53+
* indirect action.
54+
*/
55+
uint32_t age_idx:24;
6456
/*
65-
* When PMD own this counter (user put back counter to PMD
66-
* counter pool, i.e), this field recorded value of counter
67-
* pools query generation at time user release the counter.
57+
* When this counter uses for aging, it stores the index
58+
* of AGE parameter. Otherwise, this index is zero.
6859
*/
6960
};
7061
};
7162

63+
struct mlx5_hws_cnt {
64+
struct flow_counter_stats reset;
65+
union mlx5_hws_cnt_state cnt_state;
66+
/* This struct is only meaningful when user own this counter. */
67+
uint32_t query_gen_when_free;
68+
/*
69+
* When PMD own this counter (user put back counter to PMD
70+
* counter pool, i.e), this field recorded value of counter
71+
* pools query generation at time user release the counter.
72+
*/
73+
};
74+
7275
struct mlx5_hws_cnt_raw_data_mng {
7376
struct flow_counter_stats *raw;
7477
struct mlx5_pmd_mr mr;
@@ -179,6 +182,42 @@ mlx5_hws_cnt_id_valid(cnt_id_t cnt_id)
179182
MLX5_INDIRECT_ACTION_TYPE_COUNT ? true : false;
180183
}
181184

185+
static __rte_always_inline void
186+
mlx5_hws_cnt_set_age_idx(struct mlx5_hws_cnt *cnt, uint32_t value)
187+
{
188+
union mlx5_hws_cnt_state cnt_state;
189+
190+
cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
191+
cnt_state.age_idx = value;
192+
__atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELEASE);
193+
}
194+
195+
static __rte_always_inline void
196+
mlx5_hws_cnt_set_all(struct mlx5_hws_cnt *cnt, uint32_t in_used, uint32_t share, uint32_t age_idx)
197+
{
198+
union mlx5_hws_cnt_state cnt_state;
199+
200+
cnt_state.in_used = !!in_used;
201+
cnt_state.share = !!share;
202+
cnt_state.age_idx = age_idx;
203+
__atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELAXED);
204+
}
205+
206+
static __rte_always_inline void
207+
mlx5_hws_cnt_get_all(struct mlx5_hws_cnt *cnt, uint32_t *in_used, uint32_t *share,
208+
uint32_t *age_idx)
209+
{
210+
union mlx5_hws_cnt_state cnt_state;
211+
212+
cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
213+
if (in_used != NULL)
214+
*in_used = cnt_state.in_used;
215+
if (share != NULL)
216+
*share = cnt_state.share;
217+
if (age_idx != NULL)
218+
*age_idx = cnt_state.age_idx;
219+
}
220+
182221
/**
183222
* Generate Counter id from internal index.
184223
*
@@ -402,8 +441,7 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
402441
uint32_t iidx;
403442

404443
iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
405-
MLX5_ASSERT(cpool->pool[iidx].in_used);
406-
cpool->pool[iidx].in_used = false;
444+
mlx5_hws_cnt_set_all(&cpool->pool[iidx], 0, 0, 0);
407445
cpool->pool[iidx].query_gen_when_free =
408446
__atomic_load_n(&cpool->query_gen, __ATOMIC_RELAXED);
409447
if (likely(queue != NULL))
@@ -459,7 +497,7 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
459497
*/
460498
static __rte_always_inline int
461499
mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
462-
cnt_id_t *cnt_id, uint32_t age_idx)
500+
cnt_id_t *cnt_id, uint32_t age_idx, uint32_t shared)
463501
{
464502
unsigned int ret;
465503
struct rte_ring_zc_data zcdc = {0};
@@ -486,9 +524,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
486524
__hws_cnt_query_raw(cpool, *cnt_id,
487525
&cpool->pool[iidx].reset.hits,
488526
&cpool->pool[iidx].reset.bytes);
489-
MLX5_ASSERT(!cpool->pool[iidx].in_used);
490-
cpool->pool[iidx].in_used = true;
491-
cpool->pool[iidx].age_idx = age_idx;
527+
mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
492528
return 0;
493529
}
494530
ret = rte_ring_dequeue_zc_burst_elem_start(qcache, sizeof(cnt_id_t), 1,
@@ -526,10 +562,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
526562
__hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits,
527563
&cpool->pool[iidx].reset.bytes);
528564
rte_ring_dequeue_zc_elem_finish(qcache, 1);
529-
cpool->pool[iidx].share = 0;
530-
MLX5_ASSERT(!cpool->pool[iidx].in_used);
531-
cpool->pool[iidx].in_used = true;
532-
cpool->pool[iidx].age_idx = age_idx;
565+
mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
533566
return 0;
534567
}
535568

@@ -582,32 +615,23 @@ static __rte_always_inline int
582615
mlx5_hws_cnt_shared_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id,
583616
uint32_t age_idx)
584617
{
585-
int ret;
586-
uint32_t iidx;
587-
588-
ret = mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx);
589-
if (ret != 0)
590-
return ret;
591-
iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
592-
cpool->pool[iidx].share = 1;
593-
return 0;
618+
return mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx, 1);
594619
}
595620

596621
static __rte_always_inline void
597622
mlx5_hws_cnt_shared_put(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id)
598623
{
599-
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
600-
601-
cpool->pool[iidx].share = 0;
602624
mlx5_hws_cnt_pool_put(cpool, NULL, cnt_id);
603625
}
604626

605627
static __rte_always_inline bool
606628
mlx5_hws_cnt_is_shared(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
607629
{
608630
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
631+
uint32_t share;
609632

610-
return cpool->pool[iidx].share ? true : false;
633+
mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, NULL);
634+
return !!share;
611635
}
612636

613637
static __rte_always_inline void
@@ -616,17 +640,19 @@ mlx5_hws_cnt_age_set(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id,
616640
{
617641
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
618642

619-
MLX5_ASSERT(cpool->pool[iidx].share);
620-
cpool->pool[iidx].age_idx = age_idx;
643+
MLX5_ASSERT(cpool->pool[iidx].cnt_state.share);
644+
mlx5_hws_cnt_set_age_idx(&cpool->pool[iidx], age_idx);
621645
}
622646

623647
static __rte_always_inline uint32_t
624648
mlx5_hws_cnt_age_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
625649
{
626650
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
651+
uint32_t age_idx, share;
627652

628-
MLX5_ASSERT(cpool->pool[iidx].share);
629-
return cpool->pool[iidx].age_idx;
653+
mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, &age_idx);
654+
MLX5_ASSERT(share);
655+
return age_idx;
630656
}
631657

632658
static __rte_always_inline cnt_id_t

0 commit comments

Comments
 (0)