Skip to content

Commit f5ba46d

Browse files
committed
prov/efa: Add passive debug instrumentation for duplicate completion diagnosis
Add circular buffer to track packet lifecycle events for diagnosing 'Packet already processed' errors. Key features: - 9 event types: SEND/RECV/READ/WRITE × POST/COMPLETION + RECV_RDMA_WITH_IMM - 48 entries per packet (8 bytes each) - Debug info allocated separately from buffer pool - Packet struct remains 128 bytes (no size increase) - Zero overhead in production builds Implementation: - New struct efa_rdm_pke_debug_info_buffer (392 bytes) - 1 byte counter + 7 bytes padding + 384 bytes entries - Packet entry has 8-byte pointer to debug buffer - Buffer allocated on first use, reused across packet lifetime - History preserved across packet reuse (when poisoning enabled) - ASAN builds: always allocate fresh (no history preservation) - NULL checks prevent crashes if allocation fails Memory layout: - Debug buffer: counter (1) + padding (7) + entries (48 × 8) - Bit packing: qpn (10 bits) + gen (6 bits) in 16-bit field - Event type in separate byte (9 types: 0-8) Signed-off-by: Yin Li <yinliq@amazon.com>
1 parent 12f8d71 commit f5ba46d

File tree

8 files changed

+298
-3
lines changed

8 files changed

+298
-3
lines changed

prov/efa/src/rdm/efa_rdm_cq.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,14 @@ static void efa_rdm_cq_proc_ibv_recv_rdma_with_imm_completion(
230230
*/
231231
assert(pkt_entry);
232232
ep->efa_rx_pkts_posted--;
233+
#if ENABLE_DEBUG
234+
/* Record RECV_RDMA_WITH_IMM event */
235+
efa_rdm_pke_record_debug_info(pkt_entry,
236+
ep->base_ep.qp->qp_num,
237+
ep->base_ep.qp->qkey,
238+
pkt_entry->gen,
239+
EFA_RDM_PKE_DEBUG_EVENT_RECV_RDMA_WITH_IMM);
240+
#endif
233241
efa_rdm_cq_increment_pkt_entry_gen(pkt_entry);
234242
efa_rdm_pke_release_rx(pkt_entry);
235243
}
@@ -524,6 +532,15 @@ static void efa_rdm_cq_handle_recv_completion(struct efa_ibv_cq *ibv_cq, struct
524532
uint32_t imm_data = 0;
525533
bool has_imm_data = false;
526534

535+
#if ENABLE_DEBUG
536+
/* Record RECV_COMPLETION event */
537+
efa_rdm_pke_record_debug_info(pkt_entry,
538+
ep->base_ep.qp->qp_num,
539+
ep->base_ep.qp->qkey,
540+
pkt_entry->gen,
541+
EFA_RDM_PKE_DEBUG_EVENT_RECV_COMPLETION);
542+
#endif
543+
527544
EFA_DBG(FI_LOG_CQ, "Processing receive completion for packet %p\n", pkt_entry);
528545

529546
if (pkt_entry->alloc_type == EFA_RDM_PKE_FROM_USER_RX_POOL) {

prov/efa/src/rdm/efa_rdm_ep.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ struct efa_rdm_ep {
131131
struct ofi_bufpool *peer_robuf_pool;
132132

133133
#if ENABLE_DEBUG
134+
/* buffer pool for packet debug info */
135+
struct ofi_bufpool *pke_debug_info_pool;
134136
/* tx/rx_entries waiting to receive data in
135137
* long CTS msg/read/write protocols */
136138
struct dlist_entry ope_recv_list;

prov/efa/src/rdm/efa_rdm_ep_fiops.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,18 @@ int efa_rdm_ep_create_buffer_pools(struct efa_rdm_ep *ep)
292292
if (ret)
293293
goto err_free;
294294

295+
#if ENABLE_DEBUG
296+
/* Create debug info pool - one buffer per packet entry */
297+
ret = ofi_bufpool_create(&ep->pke_debug_info_pool,
298+
sizeof(struct efa_rdm_pke_debug_info_buffer),
299+
EFA_RDM_BUFPOOL_ALIGNMENT,
300+
0, /* no limit to max_cnt */
301+
efa_rdm_ep_get_tx_pool_size(ep) + efa_rdm_ep_get_rx_pool_size(ep),
302+
OFI_BUFPOOL_NO_TRACK);
303+
if (ret)
304+
goto err_free;
305+
#endif
306+
295307
return 0;
296308

297309
err_free:
@@ -331,6 +343,11 @@ int efa_rdm_ep_create_buffer_pools(struct efa_rdm_ep *ep)
331343
if (ep->peer_robuf_pool)
332344
ofi_bufpool_destroy(ep->peer_robuf_pool);
333345

346+
#if ENABLE_DEBUG
347+
if (ep->pke_debug_info_pool)
348+
ofi_bufpool_destroy(ep->pke_debug_info_pool);
349+
#endif
350+
334351
return ret;
335352
}
336353

@@ -889,6 +906,11 @@ static void efa_rdm_ep_destroy_buffer_pools(struct efa_rdm_ep *efa_rdm_ep)
889906

890907
if (efa_rdm_ep->peer_robuf_pool)
891908
ofi_bufpool_destroy(efa_rdm_ep->peer_robuf_pool);
909+
910+
#if ENABLE_DEBUG
911+
if (efa_rdm_ep->pke_debug_info_pool)
912+
ofi_bufpool_destroy(efa_rdm_ep->pke_debug_info_pool);
913+
#endif
892914
}
893915

894916
/**

prov/efa/src/rdm/efa_rdm_ep_utils.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,24 @@ void efa_rdm_ep_record_tx_op_submitted(struct efa_rdm_ep *ep, struct efa_rdm_pke
381381
}
382382
#if ENABLE_DEBUG
383383
ep->efa_total_posted_tx_ops++;
384+
/* Record post event based on operation type */
385+
int event_type;
386+
switch (ope->op) {
387+
case ofi_op_read_req:
388+
event_type = EFA_RDM_PKE_DEBUG_EVENT_READ_POST;
389+
break;
390+
case ofi_op_write:
391+
event_type = EFA_RDM_PKE_DEBUG_EVENT_WRITE_POST;
392+
break;
393+
default:
394+
event_type = EFA_RDM_PKE_DEBUG_EVENT_SEND_POST;
395+
break;
396+
}
397+
efa_rdm_pke_record_debug_info(pkt_entry,
398+
ep->base_ep.qp->qp_num,
399+
ep->base_ep.qp->qkey,
400+
pkt_entry->gen,
401+
event_type);
384402
#endif
385403
}
386404

@@ -421,6 +439,31 @@ void efa_rdm_ep_record_tx_op_completed(struct efa_rdm_ep *ep, struct efa_rdm_pke
421439
{
422440
struct efa_rdm_ope *ope = NULL;
423441

442+
#if ENABLE_DEBUG
443+
/*
444+
* Record completion event based on operation type.
445+
* Note: qp_num is truncated to 10 bits, gen to 6 bits.
446+
*/
447+
int event_type = EFA_RDM_PKE_DEBUG_EVENT_SEND_COMPLETION;
448+
if (pkt_entry->ope) {
449+
switch (pkt_entry->ope->op) {
450+
case ofi_op_read_req:
451+
event_type = EFA_RDM_PKE_DEBUG_EVENT_READ_COMPLETION;
452+
break;
453+
case ofi_op_write:
454+
event_type = EFA_RDM_PKE_DEBUG_EVENT_WRITE_COMPLETION;
455+
break;
456+
default:
457+
break;
458+
}
459+
}
460+
efa_rdm_pke_record_debug_info(pkt_entry,
461+
ep->base_ep.qp->qp_num,
462+
ep->base_ep.qp->qkey,
463+
pkt_entry->gen,
464+
event_type);
465+
#endif
466+
424467
ope = pkt_entry->ope;
425468
/*
426469
* peer can be NULL when:

prov/efa/src/rdm/efa_rdm_peer.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ int efa_rdm_peer_reorder_msg(struct efa_rdm_peer *peer, struct efa_rdm_ep *ep,
163163
EFA_WARN(FI_LOG_EP_CTRL,
164164
"Error: message id has already been processed. received: %" PRIu32 " expected: %"
165165
PRIu32 "\n", msg_id, ofi_recvwin_next_exp_id(robuf));
166+
167+
#if ENABLE_DEBUG
168+
/* Print debug info on duplicate message */
169+
EFA_WARN(FI_LOG_EP_CTRL,
170+
" pkt_entry=%p gen=%u\n"
171+
" Debug info history:\n",
172+
pkt_entry, pkt_entry->gen);
173+
efa_rdm_pke_print_debug_info(pkt_entry);
174+
#endif
166175
return -FI_EALREADY;
167176
} else {
168177
/* Current receive window size is too small to hold incoming messages.

prov/efa/src/rdm/efa_rdm_pke.c

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,49 @@ struct efa_rdm_pke *efa_rdm_pke_alloc(struct efa_rdm_ep *ep,
4545
if (!pkt_entry)
4646
return NULL;
4747

48+
#ifdef ENABLE_EFA_POISONING
49+
/* Preserve gen across poisoning */
50+
uint8_t gen = pkt_entry->gen;
51+
#if ENABLE_DEBUG
52+
/* Preserve debug_info pointer across poisoning to maintain packet history.
53+
* On first allocation from a freshly poisoned region, this will be 0xdeadbeef.
54+
* On reuse, this preserves the existing buffer. */
55+
struct efa_rdm_pke_debug_info_buffer *debug_info = pkt_entry->debug_info;
56+
/* If debug_info contains poison pattern, treat as NULL (uninitialized).
57+
* This happens when packets are allocated from freshly poisoned bufpool regions. */
58+
if (((uintptr_t)debug_info & 0xffffffffUL) == 0xdeadbeefUL) {
59+
debug_info = NULL;
60+
}
61+
#endif
62+
efa_rdm_poison_mem_region(pkt_entry, pkt_pool->attr.size);
63+
pkt_entry->gen = gen;
64+
#if ENABLE_DEBUG
65+
pkt_entry->debug_info = debug_info;
66+
#endif
67+
#endif
68+
/* Without poisoning, debug_info pointer is naturally preserved in memory. */
69+
4870
pkt_entry->gen &= EFA_RDM_PACKET_GEN_MASK;
4971
dlist_init(&pkt_entry->entry);
5072

5173
#if ENABLE_DEBUG
5274
dlist_init(&pkt_entry->dbg_entry);
75+
/* Allocate debug info if not already allocated */
76+
if (!pkt_entry->debug_info) {
77+
pkt_entry->debug_info = ofi_buf_alloc(ep->pke_debug_info_pool);
78+
if (!pkt_entry->debug_info) {
79+
/* Debug info allocation failed from unlimited pool - indicates heap exhaustion.
80+
* Write EQ error since retrying won't help (debug_info is never released). */
81+
EFA_WARN(FI_LOG_EP_CTRL,
82+
"Failed to allocate debug_info buffer from unlimited pool - heap exhaustion likely\n");
83+
efa_base_ep_write_eq_error(&ep->base_ep, FI_ENOMEM, FI_EFA_ERR_OOM);
84+
efa_rdm_pke_release(pkt_entry);
85+
return NULL;
86+
}
87+
pkt_entry->debug_info->counter = 0;
88+
memset(pkt_entry->debug_info->entries, 0,
89+
sizeof(pkt_entry->debug_info->entries));
90+
}
5391
#endif
5492
/* Initialize necessary fields in pkt_entry.
5593
* The memory region allocated by ofi_buf_alloc_ex is not initalized.
@@ -93,12 +131,18 @@ struct efa_rdm_pke *efa_rdm_pke_alloc(struct efa_rdm_ep *ep,
93131
void efa_rdm_pke_release(struct efa_rdm_pke *pkt_entry)
94132
{
95133
#ifdef ENABLE_EFA_POISONING
96-
/* Restore pkt_entry->gen after poisoning. Otherwise, all pkts will have
97-
* the same gen when poisoning is enabled. */
134+
/* Preserve gen and debug_info pointer across poisoning to maintain packet history */
98135
uint8_t gen = pkt_entry->gen;
136+
#if ENABLE_DEBUG
137+
struct efa_rdm_pke_debug_info_buffer *debug_info = pkt_entry->debug_info;
138+
#endif
99139
efa_rdm_poison_mem_region(pkt_entry, ofi_buf_pool(pkt_entry)->attr.size);
100140
pkt_entry->gen = gen;
141+
#if ENABLE_DEBUG
142+
pkt_entry->debug_info = debug_info;
143+
#endif
101144
#endif
145+
/* Without poisoning, debug_info pointer is naturally preserved in memory. */
102146
pkt_entry->flags = 0;
103147
ofi_buf_free(pkt_entry);
104148
}
@@ -708,6 +752,15 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec,
708752
recv_wr = &ep->base_ep.efa_recv_wr_vec[i];
709753
recv_wr->wr.wr_id = efa_rdm_pke_get_wr_id(pke_vec[i]);
710754

755+
#if ENABLE_DEBUG
756+
/* Record RECV_POST event */
757+
efa_rdm_pke_record_debug_info(pke_vec[i],
758+
ep->base_ep.qp->qp_num,
759+
ep->base_ep.qp->qkey,
760+
pke_vec[i]->gen,
761+
EFA_RDM_PKE_DEBUG_EVENT_RECV_POST);
762+
#endif
763+
711764
recv_wr->wr.num_sge = 1;
712765
recv_wr->wr.sg_list = recv_wr->sge;
713766
recv_wr->wr.sg_list[0].length = pke_vec[i]->pkt_size;
@@ -787,3 +840,50 @@ ssize_t efa_rdm_pke_user_recvv(struct efa_rdm_pke **pke_vec,
787840

788841
return err;
789842
}
843+
844+
845+
#if ENABLE_DEBUG
846+
/* Compile-time assertion that debug_info gen field can hold all possible gen values */
847+
_Static_assert(EFA_RDM_PKE_DEBUG_GEN_MASK >= EFA_RDM_PACKET_GEN_MASK,
848+
"DEBUG_GEN_BITS insufficient to hold EFA_RDM_PACKET_GEN_MASK");
849+
850+
/**
851+
* @brief Print debug info history for packet entry
852+
*
853+
* @param pkt_entry Packet entry
854+
*/
855+
void efa_rdm_pke_print_debug_info(struct efa_rdm_pke *pkt_entry)
856+
{
857+
static const char *event_str[] = {
858+
"SEND_POST",
859+
"SEND_COMPLETION",
860+
"RECV_POST",
861+
"RECV_COMPLETION",
862+
"READ_POST",
863+
"READ_COMPLETION",
864+
"WRITE_POST",
865+
"WRITE_COMPLETION",
866+
"RECV_RDMA_WITH_IMM"
867+
};
868+
int i, count;
869+
int start_idx;
870+
871+
if (!pkt_entry->debug_info)
872+
return;
873+
874+
count = MIN(EFA_RDM_PKE_DEBUG_INFO_SIZE, pkt_entry->debug_info->counter);
875+
start_idx = (pkt_entry->debug_info->counter >= EFA_RDM_PKE_DEBUG_INFO_SIZE) ?
876+
(pkt_entry->debug_info->counter % EFA_RDM_PKE_DEBUG_INFO_SIZE) : 0;
877+
878+
for (i = 0; i < count; i++) {
879+
int idx = (start_idx + i) % EFA_RDM_PKE_DEBUG_INFO_SIZE;
880+
struct efa_rdm_pke_debug_info *info = &pkt_entry->debug_info->entries[idx];
881+
uint8_t event = EFA_RDM_PKE_DEBUG_INFO_GET_EVENT(info);
882+
EFA_WARN(FI_LOG_EP_DATA,
883+
" [%d] counter=%u gen=%u qpn=%u qkey=%u (%s)\n",
884+
i, info->counter, EFA_RDM_PKE_DEBUG_INFO_GET_GEN(info),
885+
EFA_RDM_PKE_DEBUG_INFO_GET_QPN(info), info->qkey,
886+
event < EFA_RDM_PKE_DEBUG_EVENT_TYPE_COUNT ? event_str[event] : "UNKNOWN");
887+
}
888+
}
889+
#endif

0 commit comments

Comments
 (0)