Skip to content

Commit 84ca6b6

Browse files
committed
prov/verbs: Reload the list of interfaces on each call to fi_getinfo()
The verbs provider loads the list of verbs devices available on the node only once, on the first call to fi_getinfo(). If a network device is added (hot-plugged) after this initial call to fi_getinfo(), it won't be visible in libfabric. A subsidiary problem is that fi_getinfo() only returns network adapters with active links. If the link is initially inactive and becomes active after the first call to fi_getinfo(), this interface will not be visible in libfabric. This is a particularly a problem for long-running services where restarting a process to discover newly added network devices is not an option. With this patch, the list of verbs interfaces is reloaded on each call to fi_getinfo(). Fixes #10881 Signed-off-by: Sylvain Didelot <[email protected]>
1 parent 7be4eb6 commit 84ca6b6

File tree

3 files changed

+83
-67
lines changed

3 files changed

+83
-67
lines changed

prov/verbs/src/verbs_info.c

+61-50
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,6 @@ const struct verbs_ep_domain verbs_dgram_domain = {
154154
.protocol = FI_PROTO_UNSPEC,
155155
};
156156

157-
/* The list (not thread safe) is populated once when the provider is initialized */
158-
DEFINE_LIST(verbs_devs);
159-
160157
int vrb_check_ep_attr(const struct fi_info *hints,
161158
const struct fi_info *info)
162159
{
@@ -877,7 +874,7 @@ static int vrb_alloc_info(struct ibv_context *ctx, struct fi_info **info,
877874
return ret;
878875
}
879876

880-
static void verbs_devs_print(void)
877+
static void verbs_devs_print(struct dlist_entry *verbs_devs)
881878
{
882879
struct verbs_dev_info *dev;
883880
struct verbs_addr *addr;
@@ -886,7 +883,7 @@ static void verbs_devs_print(void)
886883

887884
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
888885
"list of verbs devices found for FI_EP_MSG:\n");
889-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info,
886+
dlist_foreach_container(verbs_devs, struct verbs_dev_info,
890887
dev, entry) {
891888
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
892889
"#%d %s - IPoIB addresses:\n", ++i, dev->name);
@@ -1186,7 +1183,7 @@ static int vrb_getifaddrs(struct dlist_entry *verbs_devs)
11861183
num_verbs_ifs++;
11871184
}
11881185

1189-
verbs_devs_print();
1186+
verbs_devs_print(verbs_devs);
11901187

11911188
freeifaddrs(ifaddr);
11921189
return num_verbs_ifs ? 0 : -FI_ENODATA;
@@ -1237,7 +1234,8 @@ static inline int vrb_cmp_domain_and_dev_name(const char *domain_name,
12371234
return strncmp(domain_name, dev_name, cmp_len);
12381235
}
12391236

1240-
static int vrb_get_srcaddr_devs(struct fi_info **info)
1237+
static int vrb_get_srcaddr_devs(struct dlist_entry *verbs_devs,
1238+
struct fi_info **info)
12411239
{
12421240
struct verbs_dev_info *dev;
12431241
struct fi_info *fi;
@@ -1246,7 +1244,7 @@ static int vrb_get_srcaddr_devs(struct fi_info **info)
12461244
for (fi = *info; fi; fi = fi->next) {
12471245
if (fi->ep_attr->type == FI_EP_DGRAM)
12481246
continue;
1249-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info,
1247+
dlist_foreach_container(verbs_devs, struct verbs_dev_info,
12501248
dev, entry) {
12511249
if (!vrb_cmp_domain_and_dev_name(fi->domain_attr->name,
12521250
dev->name)) {
@@ -1299,7 +1297,8 @@ static int vrb_set_info_addrs(struct fi_info *info,
12991297
return FI_SUCCESS;
13001298
}
13011299

1302-
static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
1300+
static int vrb_fill_addr(struct dlist_entry *verbs_devs,
1301+
struct rdma_addrinfo *rai, struct fi_info **info,
13031302
struct rdma_cm_id *id)
13041303
{
13051304
struct sockaddr *local_addr;
@@ -1313,7 +1312,7 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
13131312
goto rai_to_fi;
13141313

13151314
if (!id->verbs)
1316-
return vrb_get_srcaddr_devs(info);
1315+
return vrb_get_srcaddr_devs(verbs_devs, info);
13171316

13181317
/* Handle the case when rdma_cm doesn't fill src address even
13191318
* though it fills the destination address (presence of id->verbs
@@ -1336,11 +1335,12 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
13361335
NULL, NULL);
13371336
}
13381337

1339-
static int vrb_device_has_ipoib_addr(const char *dev_name)
1338+
static int vrb_device_has_ipoib_addr(struct dlist_entry *verbs_devs,
1339+
const char *dev_name)
13401340
{
13411341
struct verbs_dev_info *dev;
13421342

1343-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info, dev, entry) {
1343+
dlist_foreach_container(verbs_devs, struct verbs_dev_info, dev, entry) {
13441344
if (!strcmp(dev_name, dev->name))
13451345
return 1;
13461346
}
@@ -1349,40 +1349,20 @@ static int vrb_device_has_ipoib_addr(const char *dev_name)
13491349

13501350
#define VERBS_NUM_DOMAIN_TYPES 3
13511351

1352-
static int vrb_init_info(struct fi_info **all_infos)
1352+
static int vrb_init_info(struct dlist_entry *verbs_devs,
1353+
struct fi_info **all_infos)
13531354
{
13541355
struct ibv_context **ctx_list;
13551356
struct fi_info *fi = NULL, *tail = NULL;
13561357
const struct verbs_ep_domain *ep_type[VERBS_NUM_DOMAIN_TYPES];
13571358
int ret = 0, i, j, num_devices, dom_count = 0;
1358-
static bool initialized = false;
13591359

13601360
vrb_prof_func_start(__func__);
13611361

1362-
if (initialized)
1363-
goto done;
1364-
1365-
initialized = true;
1362+
vrb_devs_free(verbs_devs);
1363+
fi_freeinfo(*all_infos);
13661364
*all_infos = NULL;
13671365

1368-
if (vrb_os_ini()) {
1369-
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
1370-
"failed in OS specific device initialization\n");
1371-
ret = -FI_ENODATA;
1372-
goto done;
1373-
}
1374-
1375-
vrb_prof_func_start("vrb_os_mem_support");
1376-
vrb_os_mem_support(&vrb_gl_data.peer_mem_support,
1377-
&vrb_gl_data.dmabuf_support);
1378-
vrb_prof_func_end("vrb_os_mem_support");
1379-
1380-
if (vrb_read_params()) {
1381-
VRB_INFO(FI_LOG_FABRIC, "failed to read parameters\n");
1382-
ret = -FI_ENODATA;
1383-
goto done;
1384-
}
1385-
13861366
if (!vrb_have_device()) {
13871367
VRB_INFO(FI_LOG_FABRIC, "no RDMA devices found\n");
13881368
ret = -FI_ENODATA;
@@ -1398,13 +1378,14 @@ static int vrb_init_info(struct fi_info **all_infos)
13981378
"XRC not built into provider, skip allocating "
13991379
"fi_info for XRC FI_EP_MSG endpoints\n");
14001380
}
1381+
14011382
vrb_prof_func_start("vrb_getifaddrs");
1402-
vrb_getifaddrs(&verbs_devs);
1383+
vrb_getifaddrs(verbs_devs);
14031384
vrb_prof_func_end("vrb_getifaddrs");
14041385
if (!vrb_gl_data.iface)
1405-
vrb_get_sib(&verbs_devs);
1386+
vrb_get_sib(verbs_devs);
14061387

1407-
if (dlist_empty(&verbs_devs))
1388+
if (dlist_empty(verbs_devs))
14081389
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
14091390
"no valid IPoIB interfaces found, FI_EP_MSG endpoint "
14101391
"type would not be available\n");
@@ -1435,7 +1416,7 @@ static int vrb_init_info(struct fi_info **all_infos)
14351416

14361417
for (j = 0; j < dom_count; j++) {
14371418
if (ep_type[j]->type == FI_EP_MSG &&
1438-
!vrb_device_has_ipoib_addr(ctx_list[i]->device->name)) {
1419+
!vrb_device_has_ipoib_addr(verbs_devs, ctx_list[i]->device->name)) {
14391420
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
14401421
"skipping device: %s for FI_EP_MSG, "
14411422
"it may have a filtered IPoIB interface"
@@ -1788,7 +1769,8 @@ static int vrb_handle_ib_ud_addr(const char *node, const char *service,
17881769
return ret;
17891770
}
17901771

1791-
static int vrb_handle_sock_addr(const char *node, const char *service,
1772+
static int vrb_handle_sock_addr(struct dlist_entry *verbs_devs,
1773+
const char *node, const char *service,
17921774
uint64_t flags, const struct fi_info *hints,
17931775
struct fi_info **info)
17941776
{
@@ -1807,15 +1789,16 @@ static int vrb_handle_sock_addr(const char *node, const char *service,
18071789
goto out;
18081790
}
18091791

1810-
ret = vrb_fill_addr(rai, info, id);
1792+
ret = vrb_fill_addr(verbs_devs, rai, info, id);
18111793
out:
18121794
rdma_freeaddrinfo(rai);
18131795
if (rdma_destroy_id(id))
18141796
VRB_WARN_ERRNO(FI_LOG_FABRIC, "rdma_destroy_id");
18151797
return ret;
18161798
}
18171799

1818-
static int vrb_get_match_infos(uint32_t version, const char *node,
1800+
static int vrb_get_match_infos(struct dlist_entry *verbs_devs,
1801+
uint32_t version, const char *node,
18191802
const char *service, uint64_t flags,
18201803
const struct fi_info *hints,
18211804
const struct fi_info *raw_info,
@@ -1832,7 +1815,7 @@ static int vrb_get_match_infos(uint32_t version, const char *node,
18321815

18331816
if (!hints || !hints->ep_attr || hints->ep_attr->type == FI_EP_MSG ||
18341817
hints->ep_attr->type == FI_EP_UNSPEC) {
1835-
ret_sock_addr = vrb_handle_sock_addr(node, service, flags, hints, info);
1818+
ret_sock_addr = vrb_handle_sock_addr(verbs_devs, node, service, flags, hints, info);
18361819
if (ret_sock_addr) {
18371820
VRB_INFO(FI_LOG_FABRIC,
18381821
"handling of the socket address fails - %d\n",
@@ -1911,21 +1894,49 @@ static void vrb_filter_info_by_addr_format(struct fi_info **info, int addr_forma
19111894
}
19121895
}
19131896

1897+
void vrb_devs_free(struct dlist_entry *verbs_devs)
1898+
{
1899+
struct verbs_dev_info *dev;
1900+
struct verbs_addr *addr;
1901+
1902+
while (!dlist_empty(verbs_devs)) {
1903+
dlist_pop_front(verbs_devs, struct verbs_dev_info, dev, entry);
1904+
while (!dlist_empty(&dev->addrs)) {
1905+
dlist_pop_front(&dev->addrs, struct verbs_addr, addr, entry);
1906+
rdma_freeaddrinfo(addr->rai);
1907+
free(addr);
1908+
}
1909+
free(dev->name);
1910+
free(dev);
1911+
}
1912+
}
1913+
19141914
int vrb_getinfo(uint32_t version, const char *node, const char *service,
19151915
uint64_t flags, const struct fi_info *hints,
19161916
struct fi_info **info)
19171917
{
1918+
static bool init_done = false;
19181919
int ret;
19191920

19201921
vrb_prof_func_start(__func__);
1921-
ofi_mutex_lock(&vrb_info_mutex);
1922-
ret = vrb_init_info(&vrb_util_prov.info);
1923-
if (ret) {
1924-
ofi_mutex_unlock(&vrb_info_mutex);
1925-
goto out;
1922+
if (!init_done || flags & FI_RESCAN) {
1923+
if (!init_done) {
1924+
ret = vrb_init();
1925+
if (ret) {
1926+
ofi_mutex_unlock(&vrb_info_mutex);
1927+
goto out;
1928+
}
1929+
init_done = true;
1930+
}
1931+
1932+
ret = vrb_init_info(&vrb_devs, &vrb_util_prov.info);
1933+
if (ret) {
1934+
ofi_mutex_unlock(&vrb_info_mutex);
1935+
goto out;
1936+
}
19261937
}
19271938

1928-
ret = vrb_get_match_infos(version, node, service,
1939+
ret = vrb_get_match_infos(&vrb_devs, version, node, service,
19291940
flags, hints,
19301941
vrb_util_prov.info, info);
19311942
ofi_mutex_unlock(&vrb_info_mutex);

prov/verbs/src/verbs_init.c

+19-15
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ struct util_prov vrb_util_prov = {
9797

9898
/* mutex for guarding the initialization of vrb_util_prov.info */
9999
ofi_mutex_t vrb_info_mutex;
100+
DEFINE_LIST(vrb_devs);
100101

101102
int vrb_sockaddr_len(struct sockaddr *addr)
102103
{
@@ -634,7 +635,7 @@ static int vrb_get_param_str(const char *param_name,
634635
return 0;
635636
}
636637

637-
int vrb_read_params(void)
638+
static int vrb_read_params(void)
638639
{
639640
/* Common parameters */
640641
if (vrb_get_param_int("tx_size", "Default maximum tx context size",
@@ -767,21 +768,25 @@ int vrb_read_params(void)
767768
return FI_SUCCESS;
768769
}
769770

770-
static void verbs_devs_free(void)
771+
int vrb_init()
771772
{
772-
struct verbs_dev_info *dev;
773-
struct verbs_addr *addr;
774-
775-
while (!dlist_empty(&verbs_devs)) {
776-
dlist_pop_front(&verbs_devs, struct verbs_dev_info, dev, entry);
777-
while (!dlist_empty(&dev->addrs)) {
778-
dlist_pop_front(&dev->addrs, struct verbs_addr, addr, entry);
779-
rdma_freeaddrinfo(addr->rai);
780-
free(addr);
781-
}
782-
free(dev->name);
783-
free(dev);
773+
if (vrb_os_ini()) {
774+
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
775+
"failed in OS specific device initialization\n");
776+
return -FI_ENODATA;
777+
}
778+
779+
vrb_prof_func_start("vrb_os_mem_support");
780+
vrb_os_mem_support(&vrb_gl_data.peer_mem_support,
781+
&vrb_gl_data.dmabuf_support);
782+
vrb_prof_func_end("vrb_os_mem_support");
783+
784+
if (vrb_read_params()) {
785+
VRB_INFO(FI_LOG_FABRIC, "failed to read parameters\n");
786+
return -FI_ENODATA;
784787
}
788+
789+
return FI_SUCCESS;
785790
}
786791

787792
static void vrb_fini(void)
@@ -793,7 +798,6 @@ static void vrb_fini(void)
793798
#endif
794799
ofi_mutex_destroy(&vrb_info_mutex);
795800
fi_freeinfo(vrb_util_prov.info);
796-
verbs_devs_free();
797801
vrb_os_fini();
798802
vrb_util_prov.info = NULL;
799803
}

prov/verbs/src/verbs_ofi.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ typedef void vrb_profile_t;
173173
extern struct fi_provider vrb_prov;
174174
extern struct util_prov vrb_util_prov;
175175
extern ofi_mutex_t vrb_info_mutex;
176-
extern struct dlist_entry verbs_devs;
176+
extern struct dlist_entry vrb_devs;
177177

178178
extern struct vrb_gl_data {
179179
int def_tx_size;
@@ -858,7 +858,8 @@ int vrb_ep_destroy_xrc_qp(struct vrb_xrc_ep *ep);
858858

859859
int vrb_xrc_close_srq(struct vrb_srx *srx);
860860

861-
int vrb_read_params(void);
861+
int vrb_init(void);
862+
void vrb_devs_free(struct dlist_entry *verbs_devs);
862863
int vrb_getinfo(uint32_t version, const char *node, const char *service,
863864
uint64_t flags, const struct fi_info *hints,
864865
struct fi_info **info);

0 commit comments

Comments
 (0)