@@ -862,6 +862,183 @@ cn20k_nix_xmit_prepare_tstamp(struct cn20k_eth_txq *txq, uintptr_t lmt_addr,
862862 }
863863}
864864
865+ static __rte_always_inline uint16_t
866+ cn20k_nix_prepare_mseg (struct cn20k_eth_txq * txq , struct rte_mbuf * m , struct rte_mbuf * * extm ,
867+ uint64_t * cmd , const uint16_t flags )
868+ {
869+ uint64_t prefree = 0 , aura0 , aura , nb_segs , segdw ;
870+ struct nix_send_hdr_s * send_hdr ;
871+ union nix_send_sg_s * sg , l_sg ;
872+ union nix_send_sg2_s l_sg2 ;
873+ struct rte_mbuf * cookie ;
874+ struct rte_mbuf * m_next ;
875+ uint8_t off , is_sg2 ;
876+ uint64_t len , dlen ;
877+ uint64_t ol_flags ;
878+ uint64_t * slist ;
879+
880+ send_hdr = (struct nix_send_hdr_s * )cmd ;
881+
882+ if (flags & NIX_TX_NEED_EXT_HDR )
883+ off = 2 ;
884+ else
885+ off = 0 ;
886+
887+ sg = (union nix_send_sg_s * )& cmd [2 + off ];
888+ len = send_hdr -> w0 .total ;
889+ if (flags & NIX_TX_OFFLOAD_SECURITY_F )
890+ ol_flags = m -> ol_flags ;
891+
892+ /* Start from second segment, first segment is already there */
893+ dlen = m -> data_len ;
894+ is_sg2 = 0 ;
895+ l_sg .u = sg -> u ;
896+ /* Clear l_sg.u first seg length that might be stale from vector path */
897+ l_sg .u &= ~0xFFFFUL ;
898+ l_sg .u |= dlen ;
899+ len -= dlen ;
900+ nb_segs = m -> nb_segs - 1 ;
901+ m_next = m -> next ;
902+ m -> next = NULL ;
903+ m -> nb_segs = 1 ;
904+ slist = & cmd [3 + off + 1 ];
905+
906+ cookie = RTE_MBUF_DIRECT (m ) ? m : rte_mbuf_from_indirect (m );
907+ /* Set invert df if buffer is not to be freed by H/W */
908+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F ) {
909+ aura = send_hdr -> w0 .aura ;
910+ prefree = cn20k_nix_prefree_seg (m , extm , txq , send_hdr , & aura );
911+ send_hdr -> w0 .aura = aura ;
912+ l_sg .i1 = prefree ;
913+ }
914+
915+ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
916+ /* Mark mempool object as "put" since it is freed by NIX */
917+ if (!prefree )
918+ RTE_MEMPOOL_CHECK_COOKIES (cookie -> pool , (void * * )& cookie , 1 , 0 );
919+ rte_io_wmb ();
920+ #else
921+ RTE_SET_USED (cookie );
922+ #endif
923+
924+ /* Quickly handle single segmented packets. With this if-condition
925+ * compiler will completely optimize out the below do-while loop
926+ * from the Tx handler when NIX_TX_MULTI_SEG_F offload is not set.
927+ */
928+ if (!(flags & NIX_TX_MULTI_SEG_F ))
929+ goto done ;
930+
931+ aura0 = send_hdr -> w0 .aura ;
932+ m = m_next ;
933+ if (!m )
934+ goto done ;
935+
936+ /* Fill mbuf segments */
937+ do {
938+ uint64_t iova ;
939+
940+ /* Save the current mbuf properties. These can get cleared in
941+ * cnxk_nix_prefree_seg()
942+ */
943+ m_next = m -> next ;
944+ iova = rte_mbuf_data_iova (m );
945+ dlen = m -> data_len ;
946+ len -= dlen ;
947+
948+ nb_segs -- ;
949+ aura = aura0 ;
950+ prefree = 0 ;
951+
952+ m -> next = NULL ;
953+
954+ cookie = RTE_MBUF_DIRECT (m ) ? m : rte_mbuf_from_indirect (m );
955+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F ) {
956+ aura = roc_npa_aura_handle_to_aura (m -> pool -> pool_id );
957+ prefree = cn20k_nix_prefree_seg (m , extm , txq , send_hdr , & aura );
958+ is_sg2 = aura != aura0 && !prefree ;
959+ }
960+
961+ if (unlikely (is_sg2 )) {
962+ /* This mbuf belongs to a different pool and
963+ * DF bit is not to be set, so use SG2 subdesc
964+ * so that it is freed to the appropriate pool.
965+ */
966+
967+ /* Write the previous descriptor out */
968+ sg -> u = l_sg .u ;
969+
970+ /* If the current SG subdc does not have any
971+ * iovas in it, then the SG2 subdc can overwrite
972+ * that SG subdc.
973+ *
974+ * If the current SG subdc has 2 iovas in it, then
975+ * the current iova word should be left empty.
976+ */
977+ slist += (-1 + (int )l_sg .segs );
978+ sg = (union nix_send_sg_s * )slist ;
979+
980+ l_sg2 .u = l_sg .u & 0xC00000000000000 ; /* LD_TYPE */
981+ l_sg2 .subdc = NIX_SUBDC_SG2 ;
982+ l_sg2 .aura = aura ;
983+ l_sg2 .seg1_size = dlen ;
984+ l_sg .u = l_sg2 .u ;
985+
986+ slist ++ ;
987+ * slist = iova ;
988+ slist ++ ;
989+ } else {
990+ * slist = iova ;
991+ /* Set invert df if buffer is not to be freed by H/W */
992+ l_sg .u |= (prefree << (l_sg .segs + 55 ));
993+ /* Set the segment length */
994+ l_sg .u |= ((uint64_t )dlen << (l_sg .segs << 4 ));
995+ l_sg .segs += 1 ;
996+ slist ++ ;
997+ }
998+
999+ if ((is_sg2 || l_sg .segs > 2 ) && nb_segs ) {
1000+ sg -> u = l_sg .u ;
1001+ /* Next SG subdesc */
1002+ sg = (union nix_send_sg_s * )slist ;
1003+ l_sg .u &= 0xC00000000000000 ; /* LD_TYPE */
1004+ l_sg .subdc = NIX_SUBDC_SG ;
1005+ slist ++ ;
1006+ }
1007+
1008+ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1009+ /* Mark mempool object as "put" since it is freed by NIX
1010+ */
1011+ if (!prefree )
1012+ RTE_MEMPOOL_CHECK_COOKIES (cookie -> pool , (void * * )& cookie , 1 , 0 );
1013+ #else
1014+ RTE_SET_USED (cookie );
1015+ #endif
1016+ m = m_next ;
1017+ } while (nb_segs );
1018+
1019+ done :
1020+ /* Add remaining bytes of security data to last seg */
1021+ if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD && len ) {
1022+ uint8_t shft = (l_sg .subdc == NIX_SUBDC_SG ) ? ((l_sg .segs - 1 ) << 4 ) : 0 ;
1023+
1024+ dlen = ((l_sg .u >> shft ) & 0xFFFFULL ) + len ;
1025+ l_sg .u = l_sg .u & ~(0xFFFFULL << shft );
1026+ l_sg .u |= dlen << shft ;
1027+ }
1028+
1029+ /* Write the last subdc out */
1030+ sg -> u = l_sg .u ;
1031+
1032+ segdw = (uint64_t * )slist - (uint64_t * )& cmd [2 + off ];
1033+ /* Roundup extra dwords to multiple of 2 */
1034+ segdw = (segdw >> 1 ) + (segdw & 0x1 );
1035+ /* Default dwords */
1036+ segdw += (off >> 1 ) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F );
1037+ send_hdr -> w0 .sizem1 = segdw - 1 ;
1038+
1039+ return segdw ;
1040+ }
1041+
8651042static __rte_always_inline uint16_t
8661043cn20k_nix_xmit_pkts (void * tx_queue , uint64_t * ws , struct rte_mbuf * * tx_pkts , uint16_t pkts ,
8671044 uint64_t * cmd , const uint16_t flags )
@@ -1009,6 +1186,170 @@ cn20k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts, uin
10091186 return pkts ;
10101187}
10111188
1189+ static __rte_always_inline uint16_t
1190+ cn20k_nix_xmit_pkts_mseg (void * tx_queue , uint64_t * ws , struct rte_mbuf * * tx_pkts , uint16_t pkts ,
1191+ uint64_t * cmd , const uint16_t flags )
1192+ {
1193+ struct cn20k_eth_txq * txq = tx_queue ;
1194+ uintptr_t pa0 , pa1 , lbase = txq -> lmt_base ;
1195+ const rte_iova_t io_addr = txq -> io_addr ;
1196+ uint16_t segdw , lmt_id , burst , left , i ;
1197+ struct rte_mbuf * extm = NULL ;
1198+ uint8_t lnum , c_lnum , c_loff ;
1199+ uintptr_t c_lbase = lbase ;
1200+ uint64_t lso_tun_fmt = 0 ;
1201+ uint64_t mark_fmt = 0 ;
1202+ uint8_t mark_flag = 0 ;
1203+ uint64_t data0 , data1 ;
1204+ rte_iova_t c_io_addr ;
1205+ uint8_t shft , c_shft ;
1206+ __uint128_t data128 ;
1207+ uint16_t c_lmt_id ;
1208+ uint64_t sa_base ;
1209+ uintptr_t laddr ;
1210+ bool sec ;
1211+
1212+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq -> tx_compl .ena )
1213+ handle_tx_completion_pkts (txq , flags & NIX_TX_VWQE_F );
1214+
1215+ if (!(flags & NIX_TX_VWQE_F ))
1216+ NIX_XMIT_FC_CHECK_RETURN (txq , pkts );
1217+
1218+ /* Get cmd skeleton */
1219+ cn20k_nix_tx_skeleton (txq , cmd , flags , !(flags & NIX_TX_VWQE_F ));
1220+
1221+ if (flags & NIX_TX_OFFLOAD_TSO_F )
1222+ lso_tun_fmt = txq -> lso_tun_fmt ;
1223+
1224+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F ) {
1225+ mark_fmt = txq -> mark_fmt ;
1226+ mark_flag = txq -> mark_flag ;
1227+ }
1228+
1229+ /* Get LMT base address and LMT ID as lcore id */
1230+ ROC_LMT_BASE_ID_GET (lbase , lmt_id );
1231+ if (flags & NIX_TX_OFFLOAD_SECURITY_F ) {
1232+ ROC_LMT_CPT_BASE_ID_GET (c_lbase , c_lmt_id );
1233+ c_io_addr = txq -> cpt_io_addr ;
1234+ sa_base = txq -> sa_base ;
1235+ }
1236+
1237+ left = pkts ;
1238+ again :
1239+ burst = left > 32 ? 32 : left ;
1240+ shft = 16 ;
1241+ data128 = 0 ;
1242+
1243+ lnum = 0 ;
1244+ if (flags & NIX_TX_OFFLOAD_SECURITY_F ) {
1245+ c_lnum = 0 ;
1246+ c_loff = 0 ;
1247+ c_shft = 16 ;
1248+ }
1249+
1250+ for (i = 0 ; i < burst ; i ++ ) {
1251+ cn20k_nix_tx_mbuf_validate (tx_pkts [i ], flags );
1252+
1253+ /* Perform header writes for TSO, barrier at
1254+ * lmt steorl will suffice.
1255+ */
1256+ if (flags & NIX_TX_OFFLOAD_TSO_F )
1257+ cn20k_nix_xmit_prepare_tso (tx_pkts [i ], flags );
1258+
1259+ cn20k_nix_xmit_prepare (txq , tx_pkts [i ], & extm , cmd , flags , lso_tun_fmt , & sec ,
1260+ mark_flag , mark_fmt );
1261+
1262+ laddr = (uintptr_t )LMT_OFF (lbase , lnum , 0 );
1263+
1264+ /* Prepare CPT instruction and get nixtx addr */
1265+ if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec )
1266+ cn20k_nix_prep_sec (tx_pkts [i ], cmd , & laddr , c_lbase , & c_lnum , & c_loff ,
1267+ & c_shft , sa_base , flags );
1268+
1269+ /* Move NIX desc to LMT/NIXTX area */
1270+ cn20k_nix_xmit_mv_lmt_base (laddr , cmd , flags );
1271+ /* Store sg list directly on lmt line */
1272+ segdw = cn20k_nix_prepare_mseg (txq , tx_pkts [i ], & extm , (uint64_t * )laddr , flags );
1273+ cn20k_nix_xmit_prepare_tstamp (txq , laddr , tx_pkts [i ]-> ol_flags , segdw , flags );
1274+ if (!(flags & NIX_TX_OFFLOAD_SECURITY_F ) || !sec ) {
1275+ lnum ++ ;
1276+ data128 |= (((__uint128_t )(segdw - 1 )) << shft );
1277+ shft += 3 ;
1278+ }
1279+ }
1280+
1281+ if ((flags & NIX_TX_VWQE_F ) && !(ws [3 ] & BIT_ULL (35 )))
1282+ ws [3 ] = roc_sso_hws_head_wait (ws [0 ]);
1283+
1284+ left -= burst ;
1285+ tx_pkts += burst ;
1286+
1287+ /* Submit CPT instructions if any */
1288+ if (flags & NIX_TX_OFFLOAD_SECURITY_F ) {
1289+ uint16_t sec_pkts = ((c_lnum << 1 ) + c_loff );
1290+
1291+ /* Reduce pkts to be sent to CPT */
1292+ burst -= sec_pkts ;
1293+ if (flags & NIX_TX_VWQE_F )
1294+ cn20k_nix_vwqe_wait_fc (txq , sec_pkts );
1295+ cn20k_nix_sec_fc_wait (txq , sec_pkts );
1296+ cn20k_nix_sec_steorl (c_io_addr , c_lmt_id , c_lnum , c_loff , c_shft );
1297+ }
1298+
1299+ data0 = (uint64_t )data128 ;
1300+ data1 = (uint64_t )(data128 >> 64 );
1301+ /* Make data0 similar to data1 */
1302+ data0 >>= 16 ;
1303+ /* Trigger LMTST */
1304+ if (burst > 16 ) {
1305+ pa0 = io_addr | (data0 & 0x7 ) << 4 ;
1306+ data0 &= ~0x7ULL ;
1307+ /* Move lmtst1..15 sz to bits 63:19 */
1308+ data0 <<= 16 ;
1309+ data0 |= (15ULL << 12 );
1310+ data0 |= (uint64_t )lmt_id ;
1311+
1312+ if (flags & NIX_TX_VWQE_F )
1313+ cn20k_nix_vwqe_wait_fc (txq , 16 );
1314+ /* STEOR0 */
1315+ roc_lmt_submit_steorl (data0 , pa0 );
1316+
1317+ pa1 = io_addr | (data1 & 0x7 ) << 4 ;
1318+ data1 &= ~0x7ULL ;
1319+ data1 <<= 16 ;
1320+ data1 |= ((uint64_t )(burst - 17 )) << 12 ;
1321+ data1 |= (uint64_t )(lmt_id + 16 );
1322+
1323+ if (flags & NIX_TX_VWQE_F )
1324+ cn20k_nix_vwqe_wait_fc (txq , burst - 16 );
1325+ /* STEOR1 */
1326+ roc_lmt_submit_steorl (data1 , pa1 );
1327+ } else if (burst ) {
1328+ pa0 = io_addr | (data0 & 0x7 ) << 4 ;
1329+ data0 &= ~0x7ULL ;
1330+ /* Move lmtst1..15 sz to bits 63:19 */
1331+ data0 <<= 16 ;
1332+ data0 |= ((burst - 1ULL ) << 12 );
1333+ data0 |= (uint64_t )lmt_id ;
1334+
1335+ if (flags & NIX_TX_VWQE_F )
1336+ cn20k_nix_vwqe_wait_fc (txq , burst );
1337+ /* STEOR0 */
1338+ roc_lmt_submit_steorl (data0 , pa0 );
1339+ }
1340+
1341+ rte_io_wmb ();
1342+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq -> tx_compl .ena ) {
1343+ cn20k_nix_free_extmbuf (extm );
1344+ extm = NULL ;
1345+ }
1346+
1347+ if (left )
1348+ goto again ;
1349+
1350+ return pkts ;
1351+ }
1352+
10121353#define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
10131354#define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
10141355#define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
@@ -1213,10 +1554,12 @@ NIX_TX_FASTPATH_MODES
12131554 uint16_t __rte_noinline __rte_hot fn(void *tx_queue, struct rte_mbuf **tx_pkts, \
12141555 uint16_t pkts) \
12151556 { \
1216- RTE_SET_USED(tx_queue); \
1217- RTE_SET_USED(tx_pkts); \
1218- RTE_SET_USED(pkts); \
1219- return 0; \
1557+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
1558+ /* For TSO inner checksum is a must */ \
1559+ if (((flags ) & NIX_TX_OFFLOAD_TSO_F ) && !((flags ) & NIX_TX_OFFLOAD_L3_L4_CSUM_F )) \
1560+ return 0 ; \
1561+ return cn20k_nix_xmit_pkts_mseg (tx_queue , NULL , tx_pkts , pkts , cmd , \
1562+ flags | NIX_TX_MULTI_SEG_F ); \
12201563 }
12211564
12221565#define NIX_TX_XMIT_VEC (fn , sz , flags ) \
@@ -1246,5 +1589,4 @@ uint16_t __rte_noinline __rte_hot cn20k_nix_xmit_pkts_all_offload(void *tx_queue
12461589uint16_t __rte_noinline __rte_hot cn20k_nix_xmit_pkts_vec_all_offload (void * tx_queue ,
12471590 struct rte_mbuf * * tx_pkts ,
12481591 uint16_t pkts );
1249-
12501592#endif /* __CN20K_TX_H__ */
0 commit comments