Skip to content

Commit 209a93e

Browse files
committed
prov/verbs: Reload the list of interfaces on each call to fi_getinfo()
The verbs provider loads the list of verbs devices available on the node only once, on the first call to fi_getinfo(). If a network device is added (hot-plugged) after this initial call to fi_getinfo(), it won't be visible in libfabric. A subsidiary problem is that fi_getinfo() only returns network adapters with active links. If the link is initially inactive and becomes active after the first call to fi_getinfo(), this interface will not be visible in libfabric. This is a particularly a problem for long-running services where restarting a process to discover newly added network devices is not an option. With this patch, the list of verbs interfaces is reloaded on each call to fi_getinfo(). Fixes #10881 Signed-off-by: Sylvain Didelot <[email protected]>
1 parent 2d53b72 commit 209a93e

File tree

3 files changed

+65
-61
lines changed

3 files changed

+65
-61
lines changed

prov/verbs/src/verbs_info.c

+65-42
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,6 @@ const struct verbs_ep_domain verbs_dgram_domain = {
154154
.protocol = FI_PROTO_UNSPEC,
155155
};
156156

157-
/* The list (not thread safe) is populated once when the provider is initialized */
158-
DEFINE_LIST(verbs_devs);
159-
160157
int vrb_check_ep_attr(const struct fi_info *hints,
161158
const struct fi_info *info)
162159
{
@@ -877,7 +874,7 @@ static int vrb_alloc_info(struct ibv_context *ctx, struct fi_info **info,
877874
return ret;
878875
}
879876

880-
static void verbs_devs_print(void)
877+
static void verbs_devs_print(struct dlist_entry *verbs_devs)
881878
{
882879
struct verbs_dev_info *dev;
883880
struct verbs_addr *addr;
@@ -886,7 +883,7 @@ static void verbs_devs_print(void)
886883

887884
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
888885
"list of verbs devices found for FI_EP_MSG:\n");
889-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info,
886+
dlist_foreach_container(verbs_devs, struct verbs_dev_info,
890887
dev, entry) {
891888
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
892889
"#%d %s - IPoIB addresses:\n", ++i, dev->name);
@@ -1186,7 +1183,7 @@ static int vrb_getifaddrs(struct dlist_entry *verbs_devs)
11861183
num_verbs_ifs++;
11871184
}
11881185

1189-
verbs_devs_print();
1186+
verbs_devs_print(verbs_devs);
11901187

11911188
freeifaddrs(ifaddr);
11921189
return num_verbs_ifs ? 0 : -FI_ENODATA;
@@ -1237,7 +1234,8 @@ static inline int vrb_cmp_domain_and_dev_name(const char *domain_name,
12371234
return strncmp(domain_name, dev_name, cmp_len);
12381235
}
12391236

1240-
static int vrb_get_srcaddr_devs(struct fi_info **info)
1237+
static int vrb_get_srcaddr_devs(struct dlist_entry *verbs_devs,
1238+
struct fi_info **info)
12411239
{
12421240
struct verbs_dev_info *dev;
12431241
struct fi_info *fi;
@@ -1246,7 +1244,7 @@ static int vrb_get_srcaddr_devs(struct fi_info **info)
12461244
for (fi = *info; fi; fi = fi->next) {
12471245
if (fi->ep_attr->type == FI_EP_DGRAM)
12481246
continue;
1249-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info,
1247+
dlist_foreach_container(verbs_devs, struct verbs_dev_info,
12501248
dev, entry) {
12511249
if (!vrb_cmp_domain_and_dev_name(fi->domain_attr->name,
12521250
dev->name)) {
@@ -1299,7 +1297,8 @@ static int vrb_set_info_addrs(struct fi_info *info,
12991297
return FI_SUCCESS;
13001298
}
13011299

1302-
static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
1300+
static int vrb_fill_addr(struct dlist_entry *verbs_devs,
1301+
struct rdma_addrinfo *rai, struct fi_info **info,
13031302
struct rdma_cm_id *id)
13041303
{
13051304
struct sockaddr *local_addr;
@@ -1313,7 +1312,7 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
13131312
goto rai_to_fi;
13141313

13151314
if (!id->verbs)
1316-
return vrb_get_srcaddr_devs(info);
1315+
return vrb_get_srcaddr_devs(verbs_devs, info);
13171316

13181317
/* Handle the case when rdma_cm doesn't fill src address even
13191318
* though it fills the destination address (presence of id->verbs
@@ -1336,11 +1335,12 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info,
13361335
NULL, NULL);
13371336
}
13381337

1339-
static int vrb_device_has_ipoib_addr(const char *dev_name)
1338+
static int vrb_device_has_ipoib_addr(struct dlist_entry *verbs_devs,
1339+
const char *dev_name)
13401340
{
13411341
struct verbs_dev_info *dev;
13421342

1343-
dlist_foreach_container(&verbs_devs, struct verbs_dev_info, dev, entry) {
1343+
dlist_foreach_container(verbs_devs, struct verbs_dev_info, dev, entry) {
13441344
if (!strcmp(dev_name, dev->name))
13451345
return 1;
13461346
}
@@ -1349,38 +1349,39 @@ static int vrb_device_has_ipoib_addr(const char *dev_name)
13491349

13501350
#define VERBS_NUM_DOMAIN_TYPES 3
13511351

1352-
static int vrb_init_info(const struct fi_info **all_infos)
1352+
static int vrb_init_info(struct dlist_entry *verbs_devs,
1353+
const struct fi_info **all_infos)
13531354
{
13541355
struct ibv_context **ctx_list;
13551356
struct fi_info *fi = NULL, *tail = NULL;
13561357
const struct verbs_ep_domain *ep_type[VERBS_NUM_DOMAIN_TYPES];
13571358
int ret = 0, i, j, num_devices, dom_count = 0;
1358-
static bool initialized = false;
1359+
static bool first_init_done = false;
13591360

13601361
vrb_prof_func_start(__func__);
13611362

1362-
if (initialized)
1363-
goto done;
1364-
1365-
initialized = true;
1363+
fi_freeinfo((void *)vrb_util_prov.info);
13661364
*all_infos = NULL;
13671365

1368-
if (vrb_os_ini()) {
1369-
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
1370-
"failed in OS specific device initialization\n");
1371-
ret = -FI_ENODATA;
1372-
goto done;
1373-
}
1366+
if (!first_init_done) {
1367+
if (vrb_os_ini()) {
1368+
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
1369+
"failed in OS specific device initialization\n");
1370+
ret = -FI_ENODATA;
1371+
goto done;
1372+
}
13741373

1375-
vrb_prof_func_start("vrb_os_mem_support");
1376-
vrb_os_mem_support(&vrb_gl_data.peer_mem_support,
1377-
&vrb_gl_data.dmabuf_support);
1378-
vrb_prof_func_end("vrb_os_mem_support");
1374+
vrb_prof_func_start("vrb_os_mem_support");
1375+
vrb_os_mem_support(&vrb_gl_data.peer_mem_support,
1376+
&vrb_gl_data.dmabuf_support);
1377+
vrb_prof_func_end("vrb_os_mem_support");
13791378

1380-
if (vrb_read_params()) {
1381-
VRB_INFO(FI_LOG_FABRIC, "failed to read parameters\n");
1382-
ret = -FI_ENODATA;
1383-
goto done;
1379+
if (vrb_read_params()) {
1380+
VRB_INFO(FI_LOG_FABRIC, "failed to read parameters\n");
1381+
ret = -FI_ENODATA;
1382+
goto done;
1383+
}
1384+
first_init_done = true;
13841385
}
13851386

13861387
if (!vrb_have_device()) {
@@ -1398,13 +1399,14 @@ static int vrb_init_info(const struct fi_info **all_infos)
13981399
"XRC not built into provider, skip allocating "
13991400
"fi_info for XRC FI_EP_MSG endpoints\n");
14001401
}
1402+
14011403
vrb_prof_func_start("vrb_getifaddrs");
1402-
vrb_getifaddrs(&verbs_devs);
1404+
vrb_getifaddrs(verbs_devs);
14031405
vrb_prof_func_end("vrb_getifaddrs");
14041406
if (!vrb_gl_data.iface)
1405-
vrb_get_sib(&verbs_devs);
1407+
vrb_get_sib(verbs_devs);
14061408

1407-
if (dlist_empty(&verbs_devs))
1409+
if (dlist_empty(verbs_devs))
14081410
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
14091411
"no valid IPoIB interfaces found, FI_EP_MSG endpoint "
14101412
"type would not be available\n");
@@ -1435,7 +1437,7 @@ static int vrb_init_info(const struct fi_info **all_infos)
14351437

14361438
for (j = 0; j < dom_count; j++) {
14371439
if (ep_type[j]->type == FI_EP_MSG &&
1438-
!vrb_device_has_ipoib_addr(ctx_list[i]->device->name)) {
1440+
!vrb_device_has_ipoib_addr(verbs_devs, ctx_list[i]->device->name)) {
14391441
FI_INFO(&vrb_prov, FI_LOG_FABRIC,
14401442
"skipping device: %s for FI_EP_MSG, "
14411443
"it may have a filtered IPoIB interface"
@@ -1785,7 +1787,8 @@ static int vrb_handle_ib_ud_addr(const char *node, const char *service,
17851787
return ret;
17861788
}
17871789

1788-
static int vrb_handle_sock_addr(const char *node, const char *service,
1790+
static int vrb_handle_sock_addr(struct dlist_entry *verbs_devs,
1791+
const char *node, const char *service,
17891792
uint64_t flags, const struct fi_info *hints,
17901793
struct fi_info **info)
17911794
{
@@ -1804,15 +1807,16 @@ static int vrb_handle_sock_addr(const char *node, const char *service,
18041807
goto out;
18051808
}
18061809

1807-
ret = vrb_fill_addr(rai, info, id);
1810+
ret = vrb_fill_addr(verbs_devs, rai, info, id);
18081811
out:
18091812
rdma_freeaddrinfo(rai);
18101813
if (rdma_destroy_id(id))
18111814
VRB_WARN_ERRNO(FI_LOG_FABRIC, "rdma_destroy_id");
18121815
return ret;
18131816
}
18141817

1815-
static int vrb_get_match_infos(uint32_t version, const char *node,
1818+
static int vrb_get_match_infos(struct dlist_entry *verbs_devs,
1819+
uint32_t version, const char *node,
18161820
const char *service, uint64_t flags,
18171821
const struct fi_info *hints,
18181822
const struct fi_info **raw_info,
@@ -1829,7 +1833,7 @@ static int vrb_get_match_infos(uint32_t version, const char *node,
18291833

18301834
if (!hints || !hints->ep_attr || hints->ep_attr->type == FI_EP_MSG ||
18311835
hints->ep_attr->type == FI_EP_UNSPEC) {
1832-
ret_sock_addr = vrb_handle_sock_addr(node, service, flags, hints, info);
1836+
ret_sock_addr = vrb_handle_sock_addr(verbs_devs, node, service, flags, hints, info);
18331837
if (ret_sock_addr) {
18341838
VRB_INFO(FI_LOG_FABRIC,
18351839
"handling of the socket address fails - %d\n",
@@ -1908,21 +1912,39 @@ static void vrb_filter_info_by_addr_format(struct fi_info **info, int addr_forma
19081912
}
19091913
}
19101914

1915+
static void vrb_devs_free(struct dlist_entry *verbs_devs)
1916+
{
1917+
struct verbs_dev_info *dev;
1918+
struct verbs_addr *addr;
1919+
1920+
while (!dlist_empty(verbs_devs)) {
1921+
dlist_pop_front(verbs_devs, struct verbs_dev_info, dev, entry);
1922+
while (!dlist_empty(&dev->addrs)) {
1923+
dlist_pop_front(&dev->addrs, struct verbs_addr, addr, entry);
1924+
rdma_freeaddrinfo(addr->rai);
1925+
free(addr);
1926+
}
1927+
free(dev->name);
1928+
free(dev);
1929+
}
1930+
}
1931+
19111932
int vrb_getinfo(uint32_t version, const char *node, const char *service,
19121933
uint64_t flags, const struct fi_info *hints,
19131934
struct fi_info **info)
19141935
{
1936+
DEFINE_LIST(verbs_devs);
19151937
int ret;
19161938

19171939
ofi_mutex_lock(&vrb_info_mutex);
19181940
vrb_prof_func_start(__func__);
1919-
ret = vrb_init_info(&vrb_util_prov.info);
1941+
ret = vrb_init_info(&verbs_devs, &vrb_util_prov.info);
19201942
if (ret) {
19211943
ofi_mutex_unlock(&vrb_info_mutex);
19221944
goto out;
19231945
}
19241946

1925-
ret = vrb_get_match_infos(version, node, service,
1947+
ret = vrb_get_match_infos(&verbs_devs, version, node, service,
19261948
flags, hints,
19271949
&vrb_util_prov.info, info);
19281950
ofi_mutex_unlock(&vrb_info_mutex);
@@ -1936,6 +1958,7 @@ int vrb_getinfo(uint32_t version, const char *node, const char *service,
19361958
if (hints)
19371959
vrb_filter_info_by_addr_format(info, hints->addr_format);
19381960
out:
1961+
vrb_devs_free(&verbs_devs);
19391962
vrb_prof_func_end(__func__);
19401963
if (!ret || ret == -FI_ENOMEM || ret == -FI_ENODEV)
19411964
return ret;

prov/verbs/src/verbs_init.c

-18
Original file line numberDiff line numberDiff line change
@@ -767,23 +767,6 @@ int vrb_read_params(void)
767767
return FI_SUCCESS;
768768
}
769769

770-
static void verbs_devs_free(void)
771-
{
772-
struct verbs_dev_info *dev;
773-
struct verbs_addr *addr;
774-
775-
while (!dlist_empty(&verbs_devs)) {
776-
dlist_pop_front(&verbs_devs, struct verbs_dev_info, dev, entry);
777-
while (!dlist_empty(&dev->addrs)) {
778-
dlist_pop_front(&dev->addrs, struct verbs_addr, addr, entry);
779-
rdma_freeaddrinfo(addr->rai);
780-
free(addr);
781-
}
782-
free(dev->name);
783-
free(dev);
784-
}
785-
}
786-
787770
static void vrb_fini(void)
788771
{
789772
#if HAVE_VERBS_DL
@@ -793,7 +776,6 @@ static void vrb_fini(void)
793776
#endif
794777
ofi_mutex_destroy(&vrb_info_mutex);
795778
fi_freeinfo((void *)vrb_util_prov.info);
796-
verbs_devs_free();
797779
vrb_os_fini();
798780
vrb_util_prov.info = NULL;
799781
}

prov/verbs/src/verbs_ofi.h

-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ typedef void vrb_profile_t;
173173
extern struct fi_provider vrb_prov;
174174
extern struct util_prov vrb_util_prov;
175175
extern ofi_mutex_t vrb_info_mutex;
176-
extern struct dlist_entry verbs_devs;
177176

178177
extern struct vrb_gl_data {
179178
int def_tx_size;

0 commit comments

Comments
 (0)