22
22
#include < vector>
23
23
24
24
#include < map>
25
+ #include < list>
25
26
26
27
#include < csignal>
27
28
#include < setjmp.h>
34
35
#define DEFAULT_PORT 14833
35
36
#define MAX_CLIENTS 10
36
37
37
- std::map<conn_t *, std::map<void *, size_t >> managed_ptrs;
38
+ struct ManagedPtr {
39
+ void * src;
40
+ void * dst;
41
+ size_t size;
42
+ cudaMemcpyKind kind;
43
+
44
+ ManagedPtr () : src(nullptr ), dst(nullptr ), size(0 ), kind(cudaMemcpyHostToDevice) {}
45
+
46
+ ManagedPtr (void * src, void * dst, size_t s, cudaMemcpyKind k)
47
+ : src(src), dst(dst), size(s), kind(k) {}
48
+ };
49
+
50
+
51
+ std::map<conn_t *, ManagedPtr> managed_ptrs;
38
52
std::map<conn_t *, void *> host_funcs;
39
53
40
54
static jmp_buf catch_segfault;
@@ -55,43 +69,83 @@ static void segfault(int sig, siginfo_t *info, void *unused) {
55
69
56
70
std::cout << " segfault!!" << faulting_address << std::endl;
57
71
58
- for (const auto &conn_entry : managed_ptrs) {
59
- for (const auto &mem_entry : conn_entry.second ) {
60
- size_t allocated_size = mem_entry.second ;
72
+ for (const auto & conn_entry : managed_ptrs) {
73
+ const ManagedPtr& mem_entry = conn_entry.second ;
74
+
75
+ void * allocated_ptr;
76
+ size_t allocated_size = mem_entry.size ;
77
+
78
+ if (mem_entry.kind == cudaMemcpyDeviceToHost) {
79
+ allocated_ptr = mem_entry.dst ;
80
+ } else if (mem_entry.kind == cudaMemcpyHostToDevice) {
81
+ allocated_ptr = mem_entry.src ;
82
+ }
61
83
62
- // Check if faulting address is inside this allocated region
63
- if ((uintptr_t )mem_entry.first <= (uintptr_t )faulting_address &&
64
- (uintptr_t )faulting_address <
65
- ((uintptr_t )mem_entry.first + allocated_size)) {
66
- found = 1 ;
67
- size = allocated_size;
84
+ // Check if faulting address is within allocated memory
85
+ if ((uintptr_t )allocated_ptr <= (uintptr_t )faulting_address &&
86
+ (uintptr_t )faulting_address < (uintptr_t )allocated_ptr + allocated_size) {
87
+ found = 1 ;
88
+ size = allocated_size;
68
89
69
- // Align memory allocation to the closest possible address
70
- uintptr_t aligned = (uintptr_t )faulting_address & ~(allocated_size - 1 );
90
+ // Align to system page size
91
+ size_t page_size = sysconf (_SC_PAGE_SIZE);
92
+ uintptr_t aligned_addr = (uintptr_t )faulting_address & ~(page_size - 1 );
71
93
72
- // Allocate memory at the faulting address
73
- void *allocated =
74
- mmap ((void *)aligned,
75
- allocated_size + (uintptr_t )faulting_address - aligned,
76
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1 , 0 );
94
+ // Allocate memory at the faulting address
95
+ void * allocated = mmap ((void *)aligned_addr, allocated_size,
96
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 );
77
97
78
- if (allocated == MAP_FAILED) {
98
+ if (allocated == MAP_FAILED) {
79
99
perror (" Failed to allocate memory at faulting address" );
80
100
_exit (1 );
81
- }
101
+ }
82
102
83
- printf (" The address of x is: %p\n " , (void *)allocated);
103
+ char msg[128 ];
104
+ snprintf (msg, sizeof (msg), " Allocated memory at: %p\n " , allocated);
105
+ write (STDERR_FILENO, msg, strlen (msg));
84
106
85
- // if (rpc_write(conn_entry.first, (void*)&allocated, sizeof(void*)) <
86
- // 0) {
87
- // std::cout << "failed to write memory: " << &faulting_address <<
88
- // std::endl;
89
- // }
107
+ void * scuda_intercept_result;
90
108
91
- // printf("wrote data...\n");
109
+ // Validate connection
110
+ if (!conn_entry.first ) {
111
+ std::cerr << " Error: Connection is NULL in invoke_host_func" << std::endl;
112
+ return ;
113
+ }
92
114
115
+ printf (" sending memory %p\n " , allocated_ptr);
116
+
117
+ if (rpc_write_start_request (conn_entry.first , 3 ) < 0 || rpc_write (conn_entry.first , &mem_entry.kind , sizeof (enum cudaMemcpyKind)) < 0 )
118
+ return ;
119
+
120
+ // we need to swap device directions in this case
121
+ switch (mem_entry.kind ) {
122
+ case cudaMemcpyDeviceToHost:
123
+ if (rpc_write (conn_entry.first , &mem_entry.src , sizeof (void *)) < 0 ||
124
+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
125
+ rpc_wait_for_response (conn_entry.first ) < 0 || rpc_read (conn_entry.first , mem_entry.dst , size) < 0 )
126
+ return ;
127
+ case cudaMemcpyHostToDevice:
128
+ if (rpc_write (conn_entry.first , &mem_entry.dst , sizeof (void *)) < 0 ||
129
+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
130
+ rpc_write (conn_entry.first , allocated, size) < 0 || rpc_wait_for_response (conn_entry.first ) < 0 ) {
131
+ return ;
132
+ }
93
133
break ;
134
+ case cudaMemcpyDeviceToDevice:
135
+ if (rpc_write (conn_entry.first , &mem_entry.dst , sizeof (void *)) < 0 ||
136
+ rpc_write (conn_entry.first , &mem_entry.src , sizeof (void *)) < 0 ||
137
+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
138
+ rpc_wait_for_response (conn_entry.first ) < 0 )
139
+ break ;
94
140
}
141
+
142
+ cudaError_t return_value;
143
+
144
+ if (rpc_read (conn_entry.first , &return_value, sizeof (cudaError_t)) < 0 ||
145
+ rpc_read_end (conn_entry.first ) < 0 )
146
+ return ;
147
+
148
+ return ;
95
149
}
96
150
}
97
151
@@ -169,11 +223,10 @@ void append_host_func_ptr(const void *conn, void *ptr) {
169
223
host_funcs[(conn_t *)conn] = ptr;
170
224
}
171
225
172
- void append_managed_ptr (const void *conn, cudaPitchedPtr ptr ) {
226
+ void append_managed_ptr (const void *conn, void * srcPtr, void * dstPtr, size_t size, cudaMemcpyKind kind ) {
173
227
conn_t *connfd = (conn_t *)conn;
174
228
175
- // Ensure the inner map exists before inserting the cudaPitchedPtr
176
- managed_ptrs[connfd][ptr.ptr ] = ptr.pitch ;
229
+ managed_ptrs[connfd] = ManagedPtr (srcPtr, dstPtr, size, kind);
177
230
}
178
231
179
232
static void set_segfault_handlers () {
0 commit comments