Skip to content

Commit 823972c

Browse files
PavanNikhileshJerin Jacob
authored andcommitted
app/test-eventdev: support measuring DMA adapter latency
Move DMA ops to use mempool to prevent using the same ops before completion. This also allows us to measure forward latency. Signed-off-by: Pavan Nikhilesh <[email protected]> Acked-by: Amit Prakash Shukla <[email protected]>
1 parent 49a22c5 commit 823972c

File tree

4 files changed

+106
-91
lines changed

4 files changed

+106
-91
lines changed

app/test-eventdev/test_perf_atq.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ perf_atq_worker(void *arg, const int enable_fwd_latency)
4141
struct rte_event ev;
4242
PERF_WORKER_INIT;
4343

44+
RTE_SET_USED(pe);
4445
while (t->done == false) {
4546
deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
4647

@@ -51,14 +52,14 @@ perf_atq_worker(void *arg, const int enable_fwd_latency)
5152

5253
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
5354
(ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
54-
if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
55+
if (perf_handle_crypto_ev(&ev))
5556
continue;
5657
}
5758

5859
stage = ev.sub_event_type % nb_stages;
5960
if (enable_fwd_latency && !prod_timer_type && stage == 0)
60-
/* first stage in pipeline, mark ts to compute fwd latency */
61-
perf_mark_fwd_latency(ev.event_ptr);
61+
/* first stage in pipeline, mark ts to compute fwd latency */
62+
perf_mark_fwd_latency(prod_type, &ev);
6263

6364
/* last stage in pipeline */
6465
if (unlikely(stage == laststage)) {
@@ -91,6 +92,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
9192
PERF_WORKER_INIT;
9293
uint16_t i;
9394

95+
RTE_SET_USED(pe);
9496
while (t->done == false) {
9597
nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0);
9698

@@ -102,7 +104,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
102104
for (i = 0; i < nb_rx; i++) {
103105
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
104106
(ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
105-
if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
107+
if (perf_handle_crypto_ev(&ev[i]))
106108
continue;
107109
}
108110

@@ -112,7 +114,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
112114
/* first stage in pipeline.
113115
* mark time stamp to compute fwd latency
114116
*/
115-
perf_mark_fwd_latency(ev[i].event_ptr);
117+
perf_mark_fwd_latency(prod_type, &ev[i]);
116118
}
117119
/* last stage in pipeline */
118120
if (unlikely(stage == laststage)) {
@@ -167,7 +169,7 @@ perf_atq_worker_vector(void *arg, const int enable_fwd_latency)
167169
stage = ev.sub_event_type % nb_stages;
168170
/* First q in pipeline, mark timestamp to compute fwd latency */
169171
if (enable_fwd_latency && !prod_timer_type && stage == 0)
170-
perf_mark_fwd_latency(pe);
172+
pe->timestamp = rte_get_timer_cycles();
171173

172174
/* Last stage in pipeline */
173175
if (unlikely(stage == laststage)) {

app/test-eventdev/test_perf_common.c

Lines changed: 54 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -562,37 +562,76 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
562562
static inline void
563563
dma_adapter_enq_op_fwd(struct prod_data *p)
564564
{
565+
struct rte_event_dma_adapter_op *ops[BURST_SIZE] = {NULL};
565566
struct test_perf *t = p->t;
566567
const uint32_t nb_flows = t->nb_flows;
567568
const uint64_t nb_pkts = t->nb_pkts;
568-
struct rte_event_dma_adapter_op *op;
569+
struct rte_event_dma_adapter_op op;
570+
struct rte_event evts[BURST_SIZE];
569571
const uint8_t dev_id = p->dev_id;
570572
struct evt_options *opt = t->opt;
571573
const uint8_t port = p->port_id;
572574
uint32_t flow_counter = 0;
575+
struct rte_mempool *pool;
573576
struct rte_event ev;
577+
uint8_t *src, *dst;
574578
uint64_t count = 0;
579+
uint32_t flow;
580+
int i;
575581

582+
pool = t->pool;
576583
if (opt->verbose_level > 1)
577584
printf("%s(): lcore %d port %d queue %d dma_dev_id %u dma_dev_vchan_id %u\n",
578585
__func__, rte_lcore_id(), port, p->queue_id,
579586
p->da.dma_dev_id, p->da.vchan_id);
580587

588+
src = rte_zmalloc(NULL, nb_flows * RTE_CACHE_LINE_SIZE, RTE_CACHE_LINE_SIZE);
589+
dst = rte_zmalloc(NULL, nb_flows * RTE_CACHE_LINE_SIZE, RTE_CACHE_LINE_SIZE);
590+
if (!src || !dst) {
591+
rte_free(src);
592+
rte_free(dst);
593+
evt_err("Failed to alloc memory for src/dst");
594+
return;
595+
}
596+
581597
ev.event = 0;
582598
ev.op = RTE_EVENT_OP_NEW;
583599
ev.queue_id = p->queue_id;
584600
ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
585601
ev.event_type = RTE_EVENT_TYPE_CPU;
586602

603+
op.dma_dev_id = p->da.dma_dev_id;
604+
op.vchan = p->da.vchan_id;
605+
op.op_mp = pool;
606+
op.flags = RTE_DMA_OP_FLAG_SUBMIT;
607+
op.nb_src = 1;
608+
op.nb_dst = 1;
609+
587610
while (count < nb_pkts && t->done == false) {
588-
op = p->da.dma_op[flow_counter++ % nb_flows];
589-
ev.event_ptr = op;
611+
if (rte_mempool_get_bulk(pool, (void **)ops, BURST_SIZE) < 0)
612+
continue;
613+
for (i = 0; i < BURST_SIZE; i++) {
614+
flow = flow_counter++ % nb_flows;
615+
*ops[i] = op;
616+
ops[i]->src_dst_seg[0].addr = (rte_iova_t)&src[flow * RTE_CACHE_LINE_SIZE];
617+
ops[i]->src_dst_seg[1].addr = (rte_iova_t)&dst[flow * RTE_CACHE_LINE_SIZE];
618+
ops[i]->src_dst_seg[0].length = RTE_CACHE_LINE_SIZE;
619+
ops[i]->src_dst_seg[1].length = RTE_CACHE_LINE_SIZE;
620+
621+
evts[i].event = ev.event;
622+
evts[i].flow_id = flow;
623+
evts[i].event_ptr = ops[i];
624+
}
590625

591-
while (rte_event_dma_adapter_enqueue(dev_id, port, &ev, 1) != 1 &&
592-
t->done == false)
626+
i = rte_event_dma_adapter_enqueue(dev_id, port, evts, BURST_SIZE);
627+
while (i < BURST_SIZE) {
628+
i += rte_event_dma_adapter_enqueue(dev_id, port, evts + i, BURST_SIZE - i);
629+
if (t->done)
630+
break;
593631
rte_pause();
632+
}
594633

595-
count++;
634+
count += BURST_SIZE;
596635
}
597636
}
598637

@@ -1489,8 +1528,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
14891528
}
14901529
} else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
14911530
struct rte_event_port_conf conf = *port_conf;
1492-
struct rte_event_dma_adapter_op *op;
1493-
struct rte_mempool *pool = t->pool;
14941531
uint8_t dma_dev_id = 0;
14951532
uint16_t vchan_id = 0;
14961533

@@ -1503,39 +1540,18 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
15031540
prod = 0;
15041541
for (; port < perf_nb_event_ports(opt); port++) {
15051542
struct prod_data *p = &t->prod[port];
1506-
uint32_t flow_id;
15071543

15081544
p->dev_id = opt->dev_id;
15091545
p->port_id = port;
15101546
p->queue_id = prod * stride;
15111547
p->da.dma_dev_id = dma_dev_id;
15121548
p->da.vchan_id = vchan_id;
1513-
p->da.dma_op = rte_zmalloc_socket(NULL, sizeof(void *) * t->nb_flows,
1514-
RTE_CACHE_LINE_SIZE, opt->socket_id);
1515-
15161549
p->t = t;
15171550

15181551
ret = perf_event_dma_adapter_setup(t, p);
15191552
if (ret)
15201553
return ret;
15211554

1522-
for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
1523-
rte_mempool_get(t->da_op_pool, (void **)&op);
1524-
1525-
op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
1526-
op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
1527-
op->src_dst_seg[0].length = 1024;
1528-
op->src_dst_seg[1].length = 1024;
1529-
op->nb_src = 1;
1530-
op->nb_dst = 1;
1531-
op->flags = RTE_DMA_OP_FLAG_SUBMIT;
1532-
op->op_mp = t->da_op_pool;
1533-
op->dma_dev_id = dma_dev_id;
1534-
op->vchan = vchan_id;
1535-
1536-
p->da.dma_op[flow_id] = op;
1537-
}
1538-
15391555
conf.event_port_cfg |=
15401556
RTE_EVENT_PORT_CFG_HINT_PRODUCER |
15411557
RTE_EVENT_PORT_CFG_HINT_CONSUMER;
@@ -2011,12 +2027,11 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
20112027
.direction = RTE_DMA_DIR_MEM_TO_MEM,
20122028
.nb_desc = 1024,
20132029
};
2014-
struct test_perf *t = evt_test_priv(test);
20152030
uint8_t dma_dev_count, dma_dev_id = 0;
2016-
unsigned int elt_size;
20172031
int vchan_id;
20182032
int ret;
20192033

2034+
RTE_SET_USED(test);
20202035
if (opt->prod_type != EVT_PROD_TYPE_EVENT_DMA_ADPTR)
20212036
return 0;
20222037

@@ -2026,14 +2041,6 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
20262041
return -ENODEV;
20272042
}
20282043

2029-
elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
2030-
t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
2031-
0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
2032-
if (t->da_op_pool == NULL) {
2033-
evt_err("Failed to create dma op pool");
2034-
return -ENOMEM;
2035-
}
2036-
20372044
ret = rte_dma_configure(dma_dev_id, &conf);
20382045
if (ret) {
20392046
evt_err("Failed to configure dma dev (%u)", dma_dev_id);
@@ -2052,7 +2059,6 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
20522059
return 0;
20532060
err:
20542061
rte_dma_close(dma_dev_id);
2055-
rte_mempool_free(t->da_op_pool);
20562062

20572063
return ret;
20582064
}
@@ -2069,16 +2075,6 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
20692075

20702076
for (port = t->nb_workers; port < perf_nb_event_ports(opt); port++) {
20712077
struct prod_data *p = &t->prod[port];
2072-
struct rte_event_dma_adapter_op *op;
2073-
uint32_t flow_id;
2074-
2075-
for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
2076-
op = p->da.dma_op[flow_id];
2077-
2078-
rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
2079-
rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
2080-
rte_mempool_put(op->op_mp, op);
2081-
}
20822078

20832079
rte_event_dma_adapter_vchan_del(TEST_PERF_DA_ID, p->da.dma_dev_id, p->da.vchan_id);
20842080
}
@@ -2087,8 +2083,6 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
20872083

20882084
rte_dma_stop(dma_dev_id);
20892085
rte_dma_close(dma_dev_id);
2090-
2091-
rte_mempool_free(t->da_op_pool);
20922086
}
20932087

20942088
int
@@ -2117,6 +2111,14 @@ perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
21172111
0, NULL, NULL,
21182112
NULL, /* obj constructor */
21192113
NULL, opt->socket_id, 0); /* flags */
2114+
} else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
2115+
t->pool = rte_mempool_create(test->name, /* mempool name */
2116+
opt->pool_sz, /* number of elements*/
2117+
sizeof(struct rte_event_dma_adapter_op) +
2118+
(sizeof(struct rte_dma_sge) * 2),
2119+
cache_sz, /* cache size*/
2120+
0, NULL, NULL, NULL, /* obj constructor */
2121+
NULL, opt->socket_id, 0); /* flags */
21202122
} else {
21212123
t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
21222124
opt->pool_sz, /* number of elements*/

app/test-eventdev/test_perf_common.h

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ struct crypto_adptr_data {
4848
struct dma_adptr_data {
4949
uint8_t dma_dev_id;
5050
uint16_t vchan_id;
51-
void **dma_op;
5251
};
5352

5453
struct __rte_cache_aligned prod_data {
@@ -81,7 +80,6 @@ struct __rte_cache_aligned test_perf {
8180
struct rte_mempool *ca_sess_pool;
8281
struct rte_mempool *ca_asym_sess_pool;
8382
struct rte_mempool *ca_vector_pool;
84-
struct rte_mempool *da_op_pool;
8583
};
8684

8785
struct __rte_cache_aligned perf_elt {
@@ -120,38 +118,46 @@ struct __rte_cache_aligned perf_elt {
120118
rte_lcore_id(), dev, port)
121119

122120
static __rte_always_inline void
123-
perf_mark_fwd_latency(struct perf_elt *const pe)
121+
perf_mark_fwd_latency(enum evt_prod_type prod_type, struct rte_event *const ev)
124122
{
125-
pe->timestamp = rte_get_timer_cycles();
123+
struct perf_elt *pe;
124+
125+
if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
126+
struct rte_crypto_op *op = ev->event_ptr;
127+
struct rte_mbuf *m;
128+
129+
if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
130+
if (op->sym->m_dst == NULL)
131+
m = op->sym->m_src;
132+
else
133+
m = op->sym->m_dst;
134+
135+
pe = rte_pktmbuf_mtod(m, struct perf_elt *);
136+
} else {
137+
pe = RTE_PTR_ADD(op->asym->modex.result.data,
138+
op->asym->modex.result.length);
139+
}
140+
pe->timestamp = rte_get_timer_cycles();
141+
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
142+
struct rte_event_dma_adapter_op *op = ev->event_ptr;
143+
144+
op->user_meta = rte_get_timer_cycles();
145+
} else {
146+
pe = ev->event_ptr;
147+
pe->timestamp = rte_get_timer_cycles();
148+
}
126149
}
127150

128151
static __rte_always_inline int
129-
perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
152+
perf_handle_crypto_ev(struct rte_event *ev)
130153
{
131154
struct rte_crypto_op *op = ev->event_ptr;
132-
struct rte_mbuf *m;
133-
134155

135156
if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
136157
rte_crypto_op_free(op);
137158
return op->status;
138159
}
139160

140-
/* Forward latency not enabled - perf data will not be accessed */
141-
if (!enable_fwd_latency)
142-
return 0;
143-
144-
/* Get pointer to perf data */
145-
if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
146-
if (op->sym->m_dst == NULL)
147-
m = op->sym->m_src;
148-
else
149-
m = op->sym->m_dst;
150-
*pe = rte_pktmbuf_mtod(m, struct perf_elt *);
151-
} else {
152-
*pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
153-
}
154-
155161
return 0;
156162
}
157163

@@ -243,8 +249,6 @@ perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_
243249
to_free_in_bulk = op->asym->modex.result.data;
244250
}
245251
rte_crypto_op_free(op);
246-
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
247-
return count;
248252
} else {
249253
to_free_in_bulk = ev->event_ptr;
250254
}
@@ -263,7 +267,7 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_ty
263267
struct rte_event *const ev, struct worker_data *const w,
264268
void *bufs[], int const buf_sz, uint8_t count)
265269
{
266-
uint64_t latency;
270+
uint64_t latency, tstamp;
267271
struct perf_elt *pe;
268272
void *to_free_in_bulk;
269273

@@ -290,15 +294,20 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_ty
290294
op->asym->modex.result.length);
291295
to_free_in_bulk = op->asym->modex.result.data;
292296
}
297+
tstamp = pe->timestamp;
293298
rte_crypto_op_free(op);
294299
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
295-
return count;
300+
struct rte_event_dma_adapter_op *op = ev->event_ptr;
301+
302+
to_free_in_bulk = op;
303+
tstamp = op->user_meta;
296304
} else {
297305
pe = ev->event_ptr;
306+
tstamp = pe->timestamp;
298307
to_free_in_bulk = pe;
299308
}
300309

301-
latency = rte_get_timer_cycles() - pe->timestamp;
310+
latency = rte_get_timer_cycles() - tstamp;
302311
w->latency += latency;
303312

304313
bufs[count++] = to_free_in_bulk;

0 commit comments

Comments
 (0)