Skip to content

Commit 15e5f59

Browse files
Maximilianclaude
andcommitted
Add gpu flag to initRDMA for GPU P2P RDMA support
Adds optional gpu and gpu_dev_id parameters to initRDMA(); when gpu=true, the RDMA buffer is allocated from GPU memory instead of hugepages, enabling direct FPGA-to-GPU data delivery without a CPU-staged copy. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0b49d62 commit 15e5f59

2 files changed

Lines changed: 9 additions & 3 deletions

File tree

sw/include/coyote/cThread.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,11 @@ class cThread {
344344
* @param buffer_size Size of the buffer to be allocated for RDMA operations
345345
* @param port Port number to be used for the out-of-band connection
346346
* @param server_address Optional server address to connect to; if not provided, this cThread acts as the server
347+
* @param gpu If true, allocates GPU memory as the RDMA buffer instead of hugepages (enables GPU P2P RDMA)
348+
* @param gpu_dev_id GPU device ID to use when gpu=true
347349
*/
348-
void* initRDMA(uint32_t buffer_size, uint16_t port, const char* server_address = nullptr);
350+
void* initRDMA(uint32_t buffer_size, uint16_t port, const char* server_address = nullptr,
351+
bool gpu = false, uint32_t gpu_dev_id = 0);
349352

350353
/**
351354
* @brief Opposite of initRDMA; releases the the out-of-band connection which was used to exchange QP

sw/src/cThread.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,8 @@ void cThread::connSync(bool client) {
10261026
}
10271027
}
10281028

1029-
void* cThread::initRDMA(uint32_t buffer_size, uint16_t port, const char* server_address) {
1029+
void* cThread::initRDMA(uint32_t buffer_size, uint16_t port, const char* server_address,
1030+
bool gpu, uint32_t gpu_dev_id) {
10301031
// Served address provided, so this node is the client
10311032
if (server_address) {
10321033
DBG3("cThread: initRDMA called from client side with server address " << server_address);
@@ -1067,7 +1068,9 @@ void* cThread::initRDMA(uint32_t buffer_size, uint16_t port, const char* server_
10671068
}
10681069

10691070
// Allocate memory for RDMA operations
1070-
void *mem = getMem({CoyoteAllocType::HPF, buffer_size, true});
1071+
void *mem = gpu
1072+
? getMem({CoyoteAllocType::GPU, buffer_size, true, gpu_dev_id})
1073+
: getMem({CoyoteAllocType::HPF, buffer_size, true});
10711074

10721075
// Send the memory address to the server
10731076
if (write(connfd, &(qpair->local), sizeof(ibvQ)) != sizeof(ibvQ)) {

0 commit comments

Comments
 (0)