Skip to content

Commit e634a59

Browse files
nithind1988Jerin Jacob
authored andcommitted
net/cnxk: support Tx multi-segment in CN20K
Add Tx multi-seg support in scalar for cn20k. Signed-off-by: Nithin Dabilpuram <[email protected]> Signed-off-by: Jerin Jacob <[email protected]> Signed-off-by: Rahul Bhansali <[email protected]> Signed-off-by: Pavan Nikhilesh <[email protected]>
1 parent 006c1da commit e634a59

File tree

1 file changed

+347
-5
lines changed

1 file changed

+347
-5
lines changed

drivers/net/cnxk/cn20k_tx.h

Lines changed: 347 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,183 @@ cn20k_nix_xmit_prepare_tstamp(struct cn20k_eth_txq *txq, uintptr_t lmt_addr,
862862
}
863863
}
864864

865+
static __rte_always_inline uint16_t
866+
cn20k_nix_prepare_mseg(struct cn20k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
867+
uint64_t *cmd, const uint16_t flags)
868+
{
869+
uint64_t prefree = 0, aura0, aura, nb_segs, segdw;
870+
struct nix_send_hdr_s *send_hdr;
871+
union nix_send_sg_s *sg, l_sg;
872+
union nix_send_sg2_s l_sg2;
873+
struct rte_mbuf *cookie;
874+
struct rte_mbuf *m_next;
875+
uint8_t off, is_sg2;
876+
uint64_t len, dlen;
877+
uint64_t ol_flags;
878+
uint64_t *slist;
879+
880+
send_hdr = (struct nix_send_hdr_s *)cmd;
881+
882+
if (flags & NIX_TX_NEED_EXT_HDR)
883+
off = 2;
884+
else
885+
off = 0;
886+
887+
sg = (union nix_send_sg_s *)&cmd[2 + off];
888+
len = send_hdr->w0.total;
889+
if (flags & NIX_TX_OFFLOAD_SECURITY_F)
890+
ol_flags = m->ol_flags;
891+
892+
/* Start from second segment, first segment is already there */
893+
dlen = m->data_len;
894+
is_sg2 = 0;
895+
l_sg.u = sg->u;
896+
/* Clear l_sg.u first seg length that might be stale from vector path */
897+
l_sg.u &= ~0xFFFFUL;
898+
l_sg.u |= dlen;
899+
len -= dlen;
900+
nb_segs = m->nb_segs - 1;
901+
m_next = m->next;
902+
m->next = NULL;
903+
m->nb_segs = 1;
904+
slist = &cmd[3 + off + 1];
905+
906+
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
907+
/* Set invert df if buffer is not to be freed by H/W */
908+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
909+
aura = send_hdr->w0.aura;
910+
prefree = cn20k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
911+
send_hdr->w0.aura = aura;
912+
l_sg.i1 = prefree;
913+
}
914+
915+
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
916+
/* Mark mempool object as "put" since it is freed by NIX */
917+
if (!prefree)
918+
RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
919+
rte_io_wmb();
920+
#else
921+
RTE_SET_USED(cookie);
922+
#endif
923+
924+
/* Quickly handle single segmented packets. With this if-condition
925+
* compiler will completely optimize out the below do-while loop
926+
* from the Tx handler when NIX_TX_MULTI_SEG_F offload is not set.
927+
*/
928+
if (!(flags & NIX_TX_MULTI_SEG_F))
929+
goto done;
930+
931+
aura0 = send_hdr->w0.aura;
932+
m = m_next;
933+
if (!m)
934+
goto done;
935+
936+
/* Fill mbuf segments */
937+
do {
938+
uint64_t iova;
939+
940+
/* Save the current mbuf properties. These can get cleared in
941+
* cnxk_nix_prefree_seg()
942+
*/
943+
m_next = m->next;
944+
iova = rte_mbuf_data_iova(m);
945+
dlen = m->data_len;
946+
len -= dlen;
947+
948+
nb_segs--;
949+
aura = aura0;
950+
prefree = 0;
951+
952+
m->next = NULL;
953+
954+
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
955+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
956+
aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
957+
prefree = cn20k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
958+
is_sg2 = aura != aura0 && !prefree;
959+
}
960+
961+
if (unlikely(is_sg2)) {
962+
/* This mbuf belongs to a different pool and
963+
* DF bit is not to be set, so use SG2 subdesc
964+
* so that it is freed to the appropriate pool.
965+
*/
966+
967+
/* Write the previous descriptor out */
968+
sg->u = l_sg.u;
969+
970+
/* If the current SG subdc does not have any
971+
* iovas in it, then the SG2 subdc can overwrite
972+
* that SG subdc.
973+
*
974+
* If the current SG subdc has 2 iovas in it, then
975+
* the current iova word should be left empty.
976+
*/
977+
slist += (-1 + (int)l_sg.segs);
978+
sg = (union nix_send_sg_s *)slist;
979+
980+
l_sg2.u = l_sg.u & 0xC00000000000000; /* LD_TYPE */
981+
l_sg2.subdc = NIX_SUBDC_SG2;
982+
l_sg2.aura = aura;
983+
l_sg2.seg1_size = dlen;
984+
l_sg.u = l_sg2.u;
985+
986+
slist++;
987+
*slist = iova;
988+
slist++;
989+
} else {
990+
*slist = iova;
991+
/* Set invert df if buffer is not to be freed by H/W */
992+
l_sg.u |= (prefree << (l_sg.segs + 55));
993+
/* Set the segment length */
994+
l_sg.u |= ((uint64_t)dlen << (l_sg.segs << 4));
995+
l_sg.segs += 1;
996+
slist++;
997+
}
998+
999+
if ((is_sg2 || l_sg.segs > 2) && nb_segs) {
1000+
sg->u = l_sg.u;
1001+
/* Next SG subdesc */
1002+
sg = (union nix_send_sg_s *)slist;
1003+
l_sg.u &= 0xC00000000000000; /* LD_TYPE */
1004+
l_sg.subdc = NIX_SUBDC_SG;
1005+
slist++;
1006+
}
1007+
1008+
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1009+
/* Mark mempool object as "put" since it is freed by NIX
1010+
*/
1011+
if (!prefree)
1012+
RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
1013+
#else
1014+
RTE_SET_USED(cookie);
1015+
#endif
1016+
m = m_next;
1017+
} while (nb_segs);
1018+
1019+
done:
1020+
/* Add remaining bytes of security data to last seg */
1021+
if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD && len) {
1022+
uint8_t shft = (l_sg.subdc == NIX_SUBDC_SG) ? ((l_sg.segs - 1) << 4) : 0;
1023+
1024+
dlen = ((l_sg.u >> shft) & 0xFFFFULL) + len;
1025+
l_sg.u = l_sg.u & ~(0xFFFFULL << shft);
1026+
l_sg.u |= dlen << shft;
1027+
}
1028+
1029+
/* Write the last subdc out */
1030+
sg->u = l_sg.u;
1031+
1032+
segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
1033+
/* Roundup extra dwords to multiple of 2 */
1034+
segdw = (segdw >> 1) + (segdw & 0x1);
1035+
/* Default dwords */
1036+
segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1037+
send_hdr->w0.sizem1 = segdw - 1;
1038+
1039+
return segdw;
1040+
}
1041+
8651042
static __rte_always_inline uint16_t
8661043
cn20k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts, uint16_t pkts,
8671044
uint64_t *cmd, const uint16_t flags)
@@ -1009,6 +1186,170 @@ cn20k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts, uin
10091186
return pkts;
10101187
}
10111188

1189+
static __rte_always_inline uint16_t
1190+
cn20k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts, uint16_t pkts,
1191+
uint64_t *cmd, const uint16_t flags)
1192+
{
1193+
struct cn20k_eth_txq *txq = tx_queue;
1194+
uintptr_t pa0, pa1, lbase = txq->lmt_base;
1195+
const rte_iova_t io_addr = txq->io_addr;
1196+
uint16_t segdw, lmt_id, burst, left, i;
1197+
struct rte_mbuf *extm = NULL;
1198+
uint8_t lnum, c_lnum, c_loff;
1199+
uintptr_t c_lbase = lbase;
1200+
uint64_t lso_tun_fmt = 0;
1201+
uint64_t mark_fmt = 0;
1202+
uint8_t mark_flag = 0;
1203+
uint64_t data0, data1;
1204+
rte_iova_t c_io_addr;
1205+
uint8_t shft, c_shft;
1206+
__uint128_t data128;
1207+
uint16_t c_lmt_id;
1208+
uint64_t sa_base;
1209+
uintptr_t laddr;
1210+
bool sec;
1211+
1212+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
1213+
handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
1214+
1215+
if (!(flags & NIX_TX_VWQE_F))
1216+
NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
1217+
1218+
/* Get cmd skeleton */
1219+
cn20k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1220+
1221+
if (flags & NIX_TX_OFFLOAD_TSO_F)
1222+
lso_tun_fmt = txq->lso_tun_fmt;
1223+
1224+
if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1225+
mark_fmt = txq->mark_fmt;
1226+
mark_flag = txq->mark_flag;
1227+
}
1228+
1229+
/* Get LMT base address and LMT ID as lcore id */
1230+
ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1231+
if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1232+
ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1233+
c_io_addr = txq->cpt_io_addr;
1234+
sa_base = txq->sa_base;
1235+
}
1236+
1237+
left = pkts;
1238+
again:
1239+
burst = left > 32 ? 32 : left;
1240+
shft = 16;
1241+
data128 = 0;
1242+
1243+
lnum = 0;
1244+
if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1245+
c_lnum = 0;
1246+
c_loff = 0;
1247+
c_shft = 16;
1248+
}
1249+
1250+
for (i = 0; i < burst; i++) {
1251+
cn20k_nix_tx_mbuf_validate(tx_pkts[i], flags);
1252+
1253+
/* Perform header writes for TSO, barrier at
1254+
* lmt steorl will suffice.
1255+
*/
1256+
if (flags & NIX_TX_OFFLOAD_TSO_F)
1257+
cn20k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1258+
1259+
cn20k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt, &sec,
1260+
mark_flag, mark_fmt);
1261+
1262+
laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1263+
1264+
/* Prepare CPT instruction and get nixtx addr */
1265+
if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1266+
cn20k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase, &c_lnum, &c_loff,
1267+
&c_shft, sa_base, flags);
1268+
1269+
/* Move NIX desc to LMT/NIXTX area */
1270+
cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1271+
/* Store sg list directly on lmt line */
1272+
segdw = cn20k_nix_prepare_mseg(txq, tx_pkts[i], &extm, (uint64_t *)laddr, flags);
1273+
cn20k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags, segdw, flags);
1274+
if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1275+
lnum++;
1276+
data128 |= (((__uint128_t)(segdw - 1)) << shft);
1277+
shft += 3;
1278+
}
1279+
}
1280+
1281+
if ((flags & NIX_TX_VWQE_F) && !(ws[3] & BIT_ULL(35)))
1282+
ws[3] = roc_sso_hws_head_wait(ws[0]);
1283+
1284+
left -= burst;
1285+
tx_pkts += burst;
1286+
1287+
/* Submit CPT instructions if any */
1288+
if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1289+
uint16_t sec_pkts = ((c_lnum << 1) + c_loff);
1290+
1291+
/* Reduce pkts to be sent to CPT */
1292+
burst -= sec_pkts;
1293+
if (flags & NIX_TX_VWQE_F)
1294+
cn20k_nix_vwqe_wait_fc(txq, sec_pkts);
1295+
cn20k_nix_sec_fc_wait(txq, sec_pkts);
1296+
cn20k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff, c_shft);
1297+
}
1298+
1299+
data0 = (uint64_t)data128;
1300+
data1 = (uint64_t)(data128 >> 64);
1301+
/* Make data0 similar to data1 */
1302+
data0 >>= 16;
1303+
/* Trigger LMTST */
1304+
if (burst > 16) {
1305+
pa0 = io_addr | (data0 & 0x7) << 4;
1306+
data0 &= ~0x7ULL;
1307+
/* Move lmtst1..15 sz to bits 63:19 */
1308+
data0 <<= 16;
1309+
data0 |= (15ULL << 12);
1310+
data0 |= (uint64_t)lmt_id;
1311+
1312+
if (flags & NIX_TX_VWQE_F)
1313+
cn20k_nix_vwqe_wait_fc(txq, 16);
1314+
/* STEOR0 */
1315+
roc_lmt_submit_steorl(data0, pa0);
1316+
1317+
pa1 = io_addr | (data1 & 0x7) << 4;
1318+
data1 &= ~0x7ULL;
1319+
data1 <<= 16;
1320+
data1 |= ((uint64_t)(burst - 17)) << 12;
1321+
data1 |= (uint64_t)(lmt_id + 16);
1322+
1323+
if (flags & NIX_TX_VWQE_F)
1324+
cn20k_nix_vwqe_wait_fc(txq, burst - 16);
1325+
/* STEOR1 */
1326+
roc_lmt_submit_steorl(data1, pa1);
1327+
} else if (burst) {
1328+
pa0 = io_addr | (data0 & 0x7) << 4;
1329+
data0 &= ~0x7ULL;
1330+
/* Move lmtst1..15 sz to bits 63:19 */
1331+
data0 <<= 16;
1332+
data0 |= ((burst - 1ULL) << 12);
1333+
data0 |= (uint64_t)lmt_id;
1334+
1335+
if (flags & NIX_TX_VWQE_F)
1336+
cn20k_nix_vwqe_wait_fc(txq, burst);
1337+
/* STEOR0 */
1338+
roc_lmt_submit_steorl(data0, pa0);
1339+
}
1340+
1341+
rte_io_wmb();
1342+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) {
1343+
cn20k_nix_free_extmbuf(extm);
1344+
extm = NULL;
1345+
}
1346+
1347+
if (left)
1348+
goto again;
1349+
1350+
return pkts;
1351+
}
1352+
10121353
#define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
10131354
#define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
10141355
#define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
@@ -1213,10 +1554,12 @@ NIX_TX_FASTPATH_MODES
12131554
uint16_t __rte_noinline __rte_hot fn(void *tx_queue, struct rte_mbuf **tx_pkts, \
12141555
uint16_t pkts) \
12151556
{ \
1216-
RTE_SET_USED(tx_queue); \
1217-
RTE_SET_USED(tx_pkts); \
1218-
RTE_SET_USED(pkts); \
1219-
return 0; \
1557+
uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
1558+
/* For TSO inner checksum is a must */ \
1559+
if (((flags) & NIX_TX_OFFLOAD_TSO_F) && !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
1560+
return 0; \
1561+
return cn20k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, cmd, \
1562+
flags | NIX_TX_MULTI_SEG_F); \
12201563
}
12211564

12221565
#define NIX_TX_XMIT_VEC(fn, sz, flags) \
@@ -1246,5 +1589,4 @@ uint16_t __rte_noinline __rte_hot cn20k_nix_xmit_pkts_all_offload(void *tx_queue
12461589
uint16_t __rte_noinline __rte_hot cn20k_nix_xmit_pkts_vec_all_offload(void *tx_queue,
12471590
struct rte_mbuf **tx_pkts,
12481591
uint16_t pkts);
1249-
12501592
#endif /* __CN20K_TX_H__ */

0 commit comments

Comments
 (0)