Skip to content

Commit d5f044e

Browse files
MortenBroeruptmonjalo
authored andcommitted
mbuf: optimize segment prefree
Refactored rte_pktmbuf_prefree_seg() for both performance and readability. With the optimized RTE_MBUF_DIRECT() macro, the common likely code path now fits within one instruction cache line on x86-64 when built with GCC. Signed-off-by: Morten Brørup <[email protected]> Acked-by: Konstantin Ananyev <[email protected]> Acked-by: Chengwen Feng <[email protected]> Reviewed-by: Bruce Richardson <[email protected]>
1 parent d3c9c60 commit d5f044e

File tree

2 files changed

+61
-34
lines changed

2 files changed

+61
-34
lines changed

lib/mbuf/rte_mbuf.h

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
* http://www.kohala.com/start/tcpipiv2.html
3232
*/
3333

34+
#include <stdbool.h>
3435
#include <stdint.h>
3536

3637
#include <rte_common.h>
@@ -1458,44 +1459,30 @@ static inline int __rte_pktmbuf_pinned_extbuf_decref(struct rte_mbuf *m)
14581459
static __rte_always_inline struct rte_mbuf *
14591460
rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
14601461
{
1461-
__rte_mbuf_sanity_check(m, 0);
1462-
1463-
if (likely(rte_mbuf_refcnt_read(m) == 1)) {
1464-
1465-
if (!RTE_MBUF_DIRECT(m)) {
1466-
rte_pktmbuf_detach(m);
1467-
if (RTE_MBUF_HAS_EXTBUF(m) &&
1468-
RTE_MBUF_HAS_PINNED_EXTBUF(m) &&
1469-
__rte_pktmbuf_pinned_extbuf_decref(m))
1470-
return NULL;
1471-
}
1472-
1473-
if (m->next != NULL)
1474-
m->next = NULL;
1475-
if (m->nb_segs != 1)
1476-
m->nb_segs = 1;
1462+
bool refcnt_not_one;
14771463

1478-
return m;
1464+
__rte_mbuf_sanity_check(m, 0);
14791465

1480-
} else if (__rte_mbuf_refcnt_update(m, -1) == 0) {
1466+
refcnt_not_one = unlikely(rte_mbuf_refcnt_read(m) != 1);
1467+
if (refcnt_not_one && __rte_mbuf_refcnt_update(m, -1) != 0)
1468+
return NULL;
14811469

1482-
if (!RTE_MBUF_DIRECT(m)) {
1483-
rte_pktmbuf_detach(m);
1484-
if (RTE_MBUF_HAS_EXTBUF(m) &&
1485-
RTE_MBUF_HAS_PINNED_EXTBUF(m) &&
1486-
__rte_pktmbuf_pinned_extbuf_decref(m))
1487-
return NULL;
1488-
}
1470+
if (unlikely(!RTE_MBUF_DIRECT(m))) {
1471+
rte_pktmbuf_detach(m);
1472+
if (RTE_MBUF_HAS_EXTBUF(m) &&
1473+
RTE_MBUF_HAS_PINNED_EXTBUF(m) &&
1474+
__rte_pktmbuf_pinned_extbuf_decref(m))
1475+
return NULL;
1476+
}
14891477

1490-
if (m->next != NULL)
1491-
m->next = NULL;
1492-
if (m->nb_segs != 1)
1493-
m->nb_segs = 1;
1478+
if (refcnt_not_one)
14941479
rte_mbuf_refcnt_set(m, 1);
1480+
if (m->nb_segs != 1)
1481+
m->nb_segs = 1;
1482+
if (m->next != NULL)
1483+
m->next = NULL;
14951484

1496-
return m;
1497-
}
1498-
return NULL;
1485+
return m;
14991486
}
15001487

15011488
/**

lib/mbuf/rte_mbuf_core.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,49 @@ struct rte_mbuf_ext_shared_info {
711711
*
712712
* If a mbuf embeds its own data after the rte_mbuf structure, this mbuf
713713
* can be defined as a direct mbuf.
714-
*/
714+
*
715+
* Note: Macro optimized for code size.
716+
*
717+
* The plain macro would be:
718+
* \code{.c}
719+
* #define RTE_MBUF_DIRECT(mb) \
720+
* (!((mb)->ol_flags & (RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL)))
721+
* \endcode
722+
*
723+
* The flags RTE_MBUF_F_INDIRECT and RTE_MBUF_F_EXTERNAL are both in the MSB
724+
* (most significant byte) of the 64-bit ol_flags field,
725+
* so we only compare this one byte instead of all 64 bits.
726+
*
727+
* E.g., GCC version 16.0.0 20251019 (experimental) generates the following code for x86-64.
728+
*
729+
* With the plain macro, 17 bytes of instructions:
730+
* \code
731+
* movabs rax,0x6000000000000000 // 10 bytes
732+
* and rax,QWORD PTR [rdi+0x18] // 4 bytes
733+
* sete al // 3 bytes
734+
* \endcode
735+
* With this optimized macro, only 7 bytes of instructions:
736+
* \code
737+
* test BYTE PTR [rdi+0x1f],0x60 // 4 bytes
738+
* sete al // 3 bytes
739+
* \endcode
740+
*/
741+
#ifdef __DOXYGEN__
715742
#define RTE_MBUF_DIRECT(mb) \
716-
(!((mb)->ol_flags & (RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL)))
743+
!(((const char *)(&(mb)->ol_flags))[MSB_OFFSET /* 7 or 0, depending on endianness */] & \
744+
(char)((RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL) >> (7 * CHAR_BIT)) /* 0x60 */)
745+
#else /* !__DOXYGEN__ */
746+
#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
747+
/* On little endian architecture, the MSB of a 64-bit integer is at byte offset 7. */
748+
#define RTE_MBUF_DIRECT(mb) !(((const char *)(&(mb)->ol_flags))[7] & 0x60)
749+
#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
750+
/* On big endian architecture, the MSB of a 64-bit integer is at byte offset 0. */
751+
#define RTE_MBUF_DIRECT(mb) !(((const char *)(&(mb)->ol_flags))[0] & 0x60)
752+
#endif /* RTE_BYTE_ORDER */
753+
#endif /* !__DOXYGEN__ */
754+
/* Verify the optimization above. */
755+
static_assert((RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL) == UINT64_C(0x60) << (7 * CHAR_BIT),
756+
"(RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL) is not 0x60 at MSB");
717757

718758
/** Uninitialized or unspecified port. */
719759
#define RTE_MBUF_PORT_INVALID UINT16_MAX

0 commit comments

Comments
 (0)