Skip to content

Commit 12f8d71

Browse files
chenbraschhshijin-aws
authored andcommitted
prov/efa: Add rdma-core errno for feature mismatch
Add proper error handling for when the peer doesn't support or didn't enable a locally requested feature. This provides clearer diagnostics instead of vague failures. Reviewed-by: Michael Margolin <mrgolin@amazon.com> Signed-off-by: Chen Brasch <cbrasch@amazon.com>
1 parent bbcb4cf commit 12f8d71

File tree

4 files changed

+11
-2
lines changed

4 files changed

+11
-2
lines changed

fabtests/prov/efa/src/efagda/efa_io_defs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
22
/*
3-
* Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
3+
* Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
44
*/
55

66
#ifndef _EFA_IO_H_
@@ -65,6 +65,8 @@ enum efa_io_comp_status {
6565
EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
6666
/* Unreachable remote - never received a response */
6767
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
68+
/* Remote feature mismatch */
69+
EFA_IO_COMP_STATUS_REMOTE_ERROR_FEATURE_MISMATCH = 18,
6870
};
6971

7072
enum efa_io_frwr_pbl_mode {

prov/efa/src/efa_data_path_direct_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ EFA_ALWAYS_INLINE enum ibv_wc_status to_ibv_status(enum efa_errno status)
141141
return IBV_WC_REM_INV_RD_REQ_ERR;
142142
case EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS:
143143
return IBV_WC_BAD_RESP_ERR;
144+
case EFA_IO_COMP_STATUS_REMOTE_ERROR_FEATURE_MISMATCH:
144145
case EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH:
145146
return IBV_WC_REM_INV_REQ_ERR;
146147
case EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE:

prov/efa/src/efa_errno.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@
7171
_(12, REMOTE_ERROR_BAD_STATUS, Unexpected status returned by responder) \
7272
_(13, LOCAL_ERROR_UNRESP_REMOTE, Unresponsive remote (was previously responsive)) \
7373
_(14, REMOTE_ERROR_UNKNOWN_PEER, No valid address handle at remote side (required for RDMA operations)) \
74-
_(15, LOCAL_ERROR_UNREACH_REMOTE, Unreachable remote (never received a response))
74+
_(15, LOCAL_ERROR_UNREACH_REMOTE, Unreachable remote - never received a response) \
75+
_(18, REMOTE_ERROR_FEATURE_MISMATCH, Remote feature mismatch) \
7576

7677
/**
7778
* @brief EFA provider proprietary error codes
@@ -157,6 +158,7 @@ static inline int to_fi_errno(enum efa_errno err) {
157158
case EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY:
158159
case EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP:
159160
case EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS:
161+
case EFA_IO_COMP_STATUS_REMOTE_ERROR_FEATURE_MISMATCH:
160162
return FI_EINVAL;
161163
case EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE:
162164
return FI_EHOSTUNREACH;

prov/efa/src/efa_strerror.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ void efa_show_help(enum efa_errno err) {
9494
"Please consider matching the local and remote libfabric versions, or turning off "
9595
"the zero-copy recv feature by setting FI_EFA_USE_ZCPY_RX=0 in the environment";
9696
break;
97+
case EFA_IO_COMP_STATUS_REMOTE_ERROR_FEATURE_MISMATCH:
98+
help = "This error is detected remotely. "
99+
"The peer doesn't support or didn't enable a locally requested feature.";
100+
break;
97101
default:
98102
return;
99103
}

0 commit comments

Comments
 (0)