Skip to content

Commit 06b01ed

Browse files
committed
hmem/fabtests: Use neuron nrt_get_dmabuf_fd_v2 if available
nrt_get_dmabuf_fd() does not return an offset value through its api. This means that if a non-aligned VA is given to it, the call will fail because the dmabuf fd has to point to an aligned address region and there is no way to return to the caller the offset that their non-aligned VA will have in the region. nrt_get_dmabuf_fd_v2() fixes this by adding an offset. The new code will look to see if it can link to nrt_get_dmabuf_fd_v2() and if it can't, it will fall back to nrt_get_dmabuf_fd(), and set the offset to 0, as that is the only case in which it will work. Signed-off-by: Charles Stoll <stollcha@amazon.com>
1 parent 102872c commit 06b01ed

File tree

2 files changed

+63
-32
lines changed

2 files changed

+63
-32
lines changed

fabtests/common/hmem_neuron.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,41 @@ struct neuron_ops {
5353
void *(*nrt_tensor_get_va)(const nrt_tensor_t *tensor);
5454
NRT_STATUS (*nrt_tensor_read)(const nrt_tensor_t *tensor, void *buf, size_t offset, size_t size);
5555
NRT_STATUS (*nrt_tensor_write)(nrt_tensor_t *tensor, const void *buf, size_t offset, size_t size);
56-
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd);
56+
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd, uint64_t *offset);
5757
NRT_STATUS (*nrt_init)(nrt_framework_type_t framework, const char *fw_version, const char *fal_version);
5858
};
5959

60-
static void *neuron_handle = NULL;
60+
struct neuron_ops_internal {
61+
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd);
62+
} neuron_ops_internal;
63+
64+
static void *neuron_handle;
6165
static struct neuron_ops neuron_ops;
6266

67+
NRT_STATUS nrt_get_dmabuf_fd_proxy(uint64_t va, uint64_t size, int* fd, uint64_t *offset) {
68+
*offset = 0;
69+
return neuron_ops_internal.nrt_get_dmabuf_fd(va, size, fd);
70+
}
71+
72+
static void ft_setup_nrt_get_dmabuf_fd(void)
73+
{
74+
neuron_ops.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd_v2");
75+
if (neuron_ops.nrt_get_dmabuf_fd)
76+
return;
77+
78+
FI_INFO(&core_prov, FI_LOG_CORE,
79+
"Failed to find nrt_get_dmabuf_fd_v2, falling back to nrt_get_dmabuf_fd");
80+
81+
neuron_ops_internal.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd");
82+
if (!neuron_ops_internal.nrt_get_dmabuf_fd) {
83+
FI_INFO(&core_prov, FI_LOG_CORE,
84+
"Failed to find nrt_get_dmabuf_fd, "
85+
"dmabuf feature will not be used for Neuron devices\n");
86+
return;
87+
}
88+
neuron_ops.nrt_get_dmabuf_fd = &nrt_get_dmabuf_fd_proxy;
89+
}
90+
6391
/*
6492
* List to lookup the handle based on the pointer. Not optimal, but probably
6593
* fine for fabtests and this is better than changing the alloc/free functions
@@ -123,11 +151,7 @@ int ft_neuron_init(void)
123151
goto err;
124152
}
125153

126-
neuron_ops.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd");
127-
if (!neuron_ops.nrt_get_dmabuf_fd) {
128-
FT_INFO("Failed to find nrt_get_dmabuf_fd, "
129-
"dmabuf feature will not be used for Neuron devices\n");
130-
}
154+
ft_setup_nrt_get_dmabuf_fd();
131155

132156
dlist_init(&neuron_alloc_list);
133157

@@ -345,18 +369,9 @@ int ft_neuron_get_dmabuf_fd(void *addr, size_t size, int *fd,
345369
uint64_t *offset)
346370
{
347371
int ret = NRT_SUCCESS;
348-
struct neuron_allocation *region;
349-
350-
/*
351-
* The assumption is that nrt_get_dmabuf_fd() would fail for
352-
* any addr that is not the starting address of the dma-buf
353-
* object. Otherwise we need a low level op to get the base
354-
* address of the dma-buf object.
355-
*/
356-
*offset = ft_neuron_find_region(addr, &region);
357372

358373
if (neuron_ops.nrt_get_dmabuf_fd) {
359-
ret = neuron_ops.nrt_get_dmabuf_fd((uintptr_t)region->ptr, size, fd);
374+
ret = neuron_ops.nrt_get_dmabuf_fd((uint64_t)addr, size, fd, offset);
360375
if (ret != NRT_SUCCESS) {
361376
FT_WARN("failed to get dmabuf fd\n");
362377
return -FI_EIO;

src/hmem_neuron.c

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,41 @@ struct neuron_ops {
5151
void (*nrt_tensor_free)(nrt_tensor_t **tensor);
5252
void *(*nrt_tensor_get_va)(const nrt_tensor_t *tensor);
5353
NRT_STATUS (*nrt_memcpy_to_device)(void *dest, const void *src, size_t size);
54-
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd);
54+
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd, uint64_t *offset);
5555
NRT_STATUS (*nrt_get_total_nc_count)(uint32_t *nc_count);
5656
};
5757

58+
struct neuron_ops_internal {
59+
NRT_STATUS (*nrt_get_dmabuf_fd)(uint64_t va, uint64_t size, int* fd);
60+
} neuron_ops_internal;
61+
5862
static void *neuron_handle;
5963
static struct neuron_ops neuron_ops;
6064

65+
NRT_STATUS nrt_get_dmabuf_fd_proxy(uint64_t va, uint64_t size, int* fd, uint64_t *offset) {
66+
*offset = 0;
67+
return neuron_ops_internal.nrt_get_dmabuf_fd(va, size, fd);
68+
}
69+
70+
static void setup_nrt_get_dmabuf_fd(void)
71+
{
72+
neuron_ops.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd_v2");
73+
if (neuron_ops.nrt_get_dmabuf_fd)
74+
return;
75+
76+
FI_INFO(&core_prov, FI_LOG_CORE,
77+
"Failed to find nrt_get_dmabuf_fd_v2, falling back to nrt_get_dmabuf_fd");
78+
79+
neuron_ops_internal.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd");
80+
if (!neuron_ops_internal.nrt_get_dmabuf_fd) {
81+
FI_INFO(&core_prov, FI_LOG_CORE,
82+
"Failed to find nrt_get_dmabuf_fd, "
83+
"dmabuf feature will not be used for Neuron devices\n");
84+
return;
85+
}
86+
neuron_ops.nrt_get_dmabuf_fd = &nrt_get_dmabuf_fd_proxy;
87+
}
88+
6189
static int neuron_dl_init(void)
6290
{
6391
neuron_handle = dlopen("libnrt.so.1", RTLD_NOW);
@@ -91,12 +119,7 @@ static int neuron_dl_init(void)
91119
goto err;
92120
}
93121

94-
neuron_ops.nrt_get_dmabuf_fd = dlsym(neuron_handle, "nrt_get_dmabuf_fd");
95-
if (!neuron_ops.nrt_get_dmabuf_fd) {
96-
FI_INFO(&core_prov, FI_LOG_CORE,
97-
"Failed to find nrt_get_dmabuf_fd, "
98-
"dmabuf feature will not be used for Neuron devices\n");
99-
}
122+
setup_nrt_get_dmabuf_fd();
100123

101124
neuron_ops.nrt_get_total_nc_count = dlsym(neuron_handle, "nrt_get_total_nc_count");
102125
if (!neuron_ops.nrt_get_total_nc_count) {
@@ -228,16 +251,9 @@ int neuron_get_dmabuf_fd(const void *addr, uint64_t size, int *fd,
228251
return -FI_EOPNOTSUPP;
229252
}
230253

231-
ret = neuron_ops.nrt_get_dmabuf_fd((uintptr_t)addr, size, fd);
254+
ret = neuron_ops.nrt_get_dmabuf_fd((uintptr_t)addr, size, fd, offset);
232255

233256
if (ret == NRT_SUCCESS) {
234-
/*
235-
* The assumption is that nrt_get_dmabuf_fd() would fail for
236-
* any addr that is not the starting address of the dma-buf
237-
* object. Otherwise we need a low level op to get the base
238-
* address of the dma-buf object.
239-
*/
240-
*offset = 0;
241257
return FI_SUCCESS;
242258
} else if (ret == NRT_RESOURCE) {
243259
/* real error from Neuron */

0 commit comments

Comments
 (0)