Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ fabtests.spec
fabtests/config
fabtests/ubertest/fabtest
fabtests/ubertest/fi_ubertest
fabtests/common/check_hmem
fabtests/regression/sighandler_test
fabtests/benchmarks/fi_*
fabtests/component/sock_test
fabtests/component/dmabuf-rdma/xe_*
fabtests/component/dmabuf-rdma/fi_*
fabtests/component/dmabuf-rdma/cuda_*
fabtests/functional/fi_*
fabtests/unit/fi_*
fabtests/multinode/fi_*
Expand Down
38 changes: 19 additions & 19 deletions fabtests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ bin_PROGRAMS = \
unit/fi_dom_test \
unit/fi_getinfo_test \
unit/fi_setopt_test \
unit/fi_check_hmem \
ubertest/fi_ubertest \
multinode/fi_multinode \
multinode/fi_multinode_coll \
component/sock_test \
regression/sighandler_test \
common/check_hmem \
common/check_cuda_dmabuf
component/dmabuf-rdma/cuda_check_dmabuf \
regression/sighandler_test

if HAVE_ZE_DEVEL
if HAVE_VERBS_DEVEL
Expand Down Expand Up @@ -487,6 +487,14 @@ unit_fi_setopt_test_SOURCES = \
$(unit_srcs)
unit_fi_setopt_test_LDADD = libfabtests.la

unit_fi_check_hmem_SOURCES = \
unit/check_hmem.c

unit_fi_check_hmem_LDADD = libfabtests.la

unit_fi_check_hmem_CFLAGS = \
$(AM_CFLAGS)

ubertest_fi_ubertest_SOURCES = \
ubertest/fabtest.h \
ubertest/uber.c \
Expand Down Expand Up @@ -535,6 +543,14 @@ component_sock_test_LDADD = libfabtests.la
component_sock_test_CFLAGS = \
$(AM_CFLAGS)

component_dmabuf_rdma_cuda_check_dmabuf_SOURCES = \
component/dmabuf-rdma/check_cuda_dmabuf.c

component_dmabuf_rdma_cuda_check_dmabuf_LDADD = libfabtests.la

component_dmabuf_rdma_cuda_check_dmabuf_CFLAGS = \
$(AM_CFLAGS)

if HAVE_ZE_DEVEL
if HAVE_VERBS_DEVEL
component_dmabuf_rdma_xe_rdmabw_SOURCES = \
Expand Down Expand Up @@ -616,22 +632,6 @@ regression_sighandler_test_LDADD = libfabtests.la
regression_sighandler_test_CFLAGS = \
$(AM_CFLAGS)

common_check_hmem_SOURCES = \
common/check_hmem.c

common_check_hmem_LDADD = libfabtests.la

common_checK_hmem_CFLAGS = \
$(AM_CFLAGS)

common_check_cuda_dmabuf_SOURCES = \
common/check_cuda_dmabuf.c

common_check_cuda_dmabuf_LDADD = libfabtests.la

common_check_cuda_dmabuf_CFLAGS = \
$(AM_CFLAGS)

real_man_pages = \
man/man7/fabtests.7

Expand Down
121 changes: 60 additions & 61 deletions fabtests/common/hmem_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,61 +154,61 @@ static int ft_cuda_pointer_set_attribute(void *buf)
static int ft_cuda_detect_memory_support(void)
{
#if HAVE_CUDA_DMABUF
CUresult cuda_ret;
CUdevice dev;
int cc_major = 0, cc_minor = 0;
int dma_buf_attr = 0;
int gdr_attr = 0;
cuda_memory_support = FT_CUDA_NOT_INITIALIZED;
CUresult cuda_ret;
CUdevice dev;
int cc_major = 0, cc_minor = 0;
int dma_buf_attr = 0;
int gdr_attr = 0;
cuda_memory_support = FT_CUDA_NOT_INITIALIZED;

cuda_ret = cuda_ops.cuDeviceGet(&dev, 0);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGet");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&cc_major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(CC_MAJOR)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGet(&dev, 0);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGet");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&cc_major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(CC_MAJOR)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&cc_minor,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(CC_MINOR)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&dma_buf_attr,
CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(DMA_BUF_SUPPORTED)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&gdr_attr,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(GPU_DIRECT_RDMA_SUPPORTED)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

dmabuf_supported = (dma_buf_attr == 1);

// Blackwell or newer: nv-p2p deprecated
if (cc_major >= 10)
gdr_supported = false;
else
gdr_supported = (gdr_attr == 1);

// Final truth table
cuda_ret = cuda_ops.cuDeviceGetAttribute(&cc_minor,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(CC_MINOR)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&dma_buf_attr,
CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(DMA_BUF_SUPPORTED)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&gdr_attr,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, dev);
if (cuda_ret != CUDA_SUCCESS) {
ft_cuda_driver_api_print_error(cuda_ret, "cuDeviceGetAttribute(GPU_DIRECT_RDMA_SUPPORTED)");
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
return -FI_EIO;
}

dmabuf_supported = (dma_buf_attr == 1);

// Blackwell or newer: nv-p2p deprecated
if (cc_major >= 10)
gdr_supported = false;
else
gdr_supported = (gdr_attr == 1);

// Final truth table
if (!gdr_supported && !dmabuf_supported)
cuda_memory_support = FT_CUDA_NOT_SUPPORTED;
else if (gdr_supported && dmabuf_supported)
Expand All @@ -218,15 +218,14 @@ static int ft_cuda_detect_memory_support(void)
else
cuda_memory_support = FT_CUDA_GDR_ONLY;

return FI_SUCCESS;
return FI_SUCCESS;

#else
cuda_memory_support = FT_CUDA_NOT_INITIALIZED;
return FI_SUCCESS;
cuda_memory_support = FT_CUDA_NOT_INITIALIZED;
return FI_SUCCESS;
#endif
}


int ft_cuda_init(void)
{
cudaError_t cuda_ret;
Expand Down Expand Up @@ -370,8 +369,8 @@ int ft_cuda_init(void)
goto err_dlclose_cuda;
}

ret = ft_cuda_detect_memory_support();
if (ret != FI_SUCCESS) {
ret = ft_cuda_detect_memory_support();
if (ret != FI_SUCCESS) {
goto err_dlclose_cuda;
}

Expand Down Expand Up @@ -595,7 +594,7 @@ int ft_cuda_put_dmabuf_fd(int fd)

enum ft_cuda_memory_support ft_cuda_memory_support(void)
{
return cuda_memory_support;
return cuda_memory_support;
}

#else
Expand Down
2 changes: 1 addition & 1 deletion fabtests/pytest/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def has_rocr(ip):
def has_hmem_support(cmdline_args, ip):
binpath = cmdline_args.binpath or ""
cmd = "timeout " + str(cmdline_args.timeout) \
+ " " + os.path.join(binpath, "check_hmem") \
+ " " + os.path.join(binpath, "fi_check_hmem") \
+ " " + "-p " + cmdline_args.provider
if cmdline_args.environments:
cmd = cmdline_args.environments + " " + cmd
Expand Down
2 changes: 1 addition & 1 deletion fabtests/pytest/efa/efa_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def get_cuda_memory_support(cmdline_args, ip):
"""
binpath = cmdline_args.binpath or ""
cmd = "timeout " + str(cmdline_args.timeout) \
+ " " + os.path.join(binpath, "check_cuda_dmabuf") \
+ " " + os.path.join(binpath, "cuda_check_dmabuf") \
+ " -p " + cmdline_args.provider
if cmdline_args.environments:
cmd = cmdline_args.environments + " " + cmd
Expand Down
File renamed without changes.