@@ -20,19 +20,20 @@ allocated_list *device_allocasync;
2020#define __CHUNK_SIZE__ CHUNK_SIZE
2121
2222extern size_t initial_offset ;
23- extern CUresult cuMemoryAllocate (CUdeviceptr * dptr , size_t bytesize , size_t * bytesallocated ,void * data );
23+ extern CUresult
24+ cuMemoryAllocate (CUdeviceptr * dptr , size_t bytesize , void * data );
2425extern CUresult cuMemoryFree (CUdeviceptr dptr );
2526
2627pthread_once_t allocator_allocate_flag = PTHREAD_ONCE_INIT ;
2728pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER ;
2829
29- size_t round_up (size_t size ,size_t unit ){
30+ size_t round_up (size_t size , size_t unit ) {
3031 if (size & (unit - 1 ))
3132 return ((size / unit ) + 1 ) * unit ;
3233 return size ;
3334}
3435
35- int oom_check (const int dev ,size_t addon ) {
36+ int oom_check (const int dev , size_t addon ) {
3637 int count1 = 0 ;
3738 CUDA_OVERRIDE_CALL (cuda_library_entry ,cuDeviceGetCount ,& count1 );
3839 CUdevice d ;
@@ -59,7 +60,7 @@ int oom_check(const int dev,size_t addon) {
5960 return 0 ;
6061}
6162
62- CUresult view_vgpu_allocator (){
63+ CUresult view_vgpu_allocator () {
6364 allocated_list_entry * al ;
6465 size_t total ;
6566 total = 0 ;
@@ -74,7 +75,7 @@ CUresult view_vgpu_allocator(){
7475 return 0 ;
7576}
7677
77- CUresult get_listsize (allocated_list * al ,size_t * size ){
78+ CUresult get_listsize (allocated_list * al , size_t * size ) {
7879 if (al -> length == 0 ){
7980 * size = 0 ;
8081 return CUDA_SUCCESS ;
@@ -88,7 +89,7 @@ CUresult get_listsize(allocated_list *al,size_t *size){
8889 return CUDA_SUCCESS ;
8990}
9091
91- void allocator_init (){
92+ void allocator_init () {
9293 LOG_DEBUG ("Allocator_init\n" );
9394
9495 device_overallocated = malloc (sizeof (allocated_list ));
@@ -99,7 +100,7 @@ void allocator_init(){
99100 pthread_mutex_init (& mutex ,NULL );
100101}
101102
102- int add_chunk (CUdeviceptr * address ,size_t size ){
103+ int add_chunk (CUdeviceptr * address , size_t size ) {
103104 size_t addr = 0 ;
104105 size_t allocsize ;
105106 CUresult res = CUDA_SUCCESS ;
@@ -113,9 +114,8 @@ int add_chunk(CUdeviceptr *address,size_t size){
113114 if (size <= IPCSIZE )
114115 res = CUDA_OVERRIDE_CALL (cuda_library_entry ,cuMemAlloc_v2 ,& e -> entry -> address ,size );
115116 else {
116- //size = round_up(size,ALIGN);
117117 e -> entry -> length = size ;
118- res = cuMemoryAllocate (& e -> entry -> address ,size ,& e -> entry -> length , e -> entry -> allocHandle );
118+ res = cuMemoryAllocate (& e -> entry -> address , size , e -> entry -> allocHandle );
119119 }
120120 if (res != CUDA_SUCCESS ){
121121 LOG_ERROR ("cuMemoryAllocate failed res=%d" ,res );
@@ -126,11 +126,11 @@ int add_chunk(CUdeviceptr *address,size_t size){
126126 * address = e -> entry -> address ;
127127 allocsize = size ;
128128 cuCtxGetDevice (& dev );
129- add_gpu_device_memory_usage (getpid (),dev ,allocsize ,2 );
129+ add_gpu_device_memory_usage (getpid (), dev , allocsize , 2 );
130130 return 0 ;
131131}
132132
133- int add_chunk_only (CUdeviceptr address ,size_t size ){
133+ int add_chunk_only (CUdeviceptr address , size_t size ) {
134134 pthread_mutex_lock (& mutex );
135135 size_t addr = 0 ;
136136 size_t allocsize ;
@@ -147,7 +147,7 @@ int add_chunk_only(CUdeviceptr address,size_t size){
147147 e -> entry -> address = address ;
148148 allocsize = size ;
149149 cuCtxGetDevice (& dev );
150- add_gpu_device_memory_usage (getpid (),dev ,allocsize ,2 );
150+ add_gpu_device_memory_usage (getpid (), dev , allocsize , 2 );
151151 pthread_mutex_unlock (& mutex );
152152 return 0 ;
153153}
@@ -162,50 +162,70 @@ int check_memory_type(CUdeviceptr address) {
162162 return CU_MEMORYTYPE_HOST ;
163163}
164164
165- int remove_chunk (allocated_list * a_list , CUdeviceptr dptr ){
165+ int remove_chunk (allocated_list * a_list , CUdeviceptr dptr ) {
166166 size_t t_size ;
167167 if (a_list -> length == 0 ) {
168168 return -1 ;
169169 }
170170 allocated_list_entry * val ;
171171 for (val = a_list -> head ;val != NULL ;val = val -> next ){
172- if (val -> entry -> address == dptr ){
172+ if (val -> entry -> address == dptr ) {
173173 t_size = val -> entry -> length ;
174174 cuMemoryFree (dptr );
175175 LIST_REMOVE (a_list ,val );
176-
177176 CUdevice dev ;
178177 cuCtxGetDevice (& dev );
179- rm_gpu_device_memory_usage (getpid (),dev ,t_size ,2 );
178+ rm_gpu_device_memory_usage (getpid (), dev , t_size , 2 );
180179 return 0 ;
181180 }
182181 }
183182 return -1 ;
184183}
185184
186- int allocate_raw (CUdeviceptr * dptr , size_t size ){
185+ int remove_chunk_only (CUdeviceptr dptr ) {
186+ allocated_list * a_list = device_overallocated ;
187+ size_t t_size ;
188+ if (a_list -> length == 0 ) {
189+ return -1 ;
190+ }
191+ allocated_list_entry * val ;
192+ for (val = a_list -> head ; val != NULL ; val = val -> next ) {
193+ if (val -> entry -> address == dptr ) {
194+ t_size = val -> entry -> length ;
195+ LIST_REMOVE (a_list , val );
196+ CUdevice dev ;
197+ cuCtxGetDevice (& dev );
198+ rm_gpu_device_memory_usage (getpid (), dev , t_size , 2 );
199+ return 0 ;
200+ }
201+ }
202+ return -1 ;
203+ }
204+
205+ int allocate_raw (CUdeviceptr * dptr , size_t size ) {
187206 int tmp ;
188207 pthread_mutex_lock (& mutex );
189- tmp = add_chunk (dptr ,size );
208+ tmp = add_chunk (dptr , size );
190209 pthread_mutex_unlock (& mutex );
191210 return tmp ;
192211}
193212
194- int free_raw (CUdeviceptr dptr ){
213+ int free_raw (CUdeviceptr dptr ) {
195214 pthread_mutex_lock (& mutex );
196- unsigned int tmp = remove_chunk (device_overallocated ,dptr );
215+ unsigned int tmp = remove_chunk (device_overallocated , dptr );
197216 pthread_mutex_unlock (& mutex );
198217 return tmp ;
199218}
200219
201- int remove_chunk_async (allocated_list * a_list , CUdeviceptr dptr , CUstream hStream ){
220+ int remove_chunk_async (
221+ allocated_list * a_list , CUdeviceptr dptr , CUstream hStream ) {
202222 size_t t_size ;
203- if (a_list -> length == 0 ) {
223+ if (a_list -> length == 0 ) {
204224 return -1 ;
205225 }
206226 allocated_list_entry * val ;
207- for (val = a_list -> head ;val != NULL ;val = val -> next ){
208- if (val -> entry -> address == dptr ){
227+ for (val = a_list -> head ; val != NULL ; val = val -> next ) {
228+ if (val -> entry -> address == dptr ) {
209229 t_size = val -> entry -> length ;
210230 CUDA_OVERRIDE_CALL (cuda_library_entry ,cuMemFreeAsync ,dptr ,hStream );
211231 LIST_REMOVE (a_list ,val );
@@ -219,14 +239,14 @@ int remove_chunk_async(allocated_list *a_list, CUdeviceptr dptr, CUstream hStrea
219239 return -1 ;
220240}
221241
222- int free_raw_async (CUdeviceptr dptr , CUstream hStream ){
242+ int free_raw_async (CUdeviceptr dptr , CUstream hStream ) {
223243 pthread_mutex_lock (& mutex );
224- unsigned int tmp = remove_chunk_async (device_allocasync ,dptr ,hStream );
244+ unsigned int tmp = remove_chunk_async (device_allocasync , dptr , hStream );
225245 pthread_mutex_unlock (& mutex );
226246 return tmp ;
227247}
228248
229- int add_chunk_async (CUdeviceptr * address ,size_t size , CUstream hStream ){
249+ int add_chunk_async (CUdeviceptr * address , size_t size , CUstream hStream ) {
230250 size_t addr = 0 ;
231251 size_t allocsize ;
232252 CUresult res = CUDA_SUCCESS ;
@@ -238,28 +258,28 @@ int add_chunk_async(CUdeviceptr *address,size_t size, CUstream hStream){
238258 allocated_list_entry * e ;
239259 INIT_ALLOCATED_LIST_ENTRY (e ,addr ,size );
240260 res = CUDA_OVERRIDE_CALL (cuda_library_entry ,cuMemAllocAsync ,& e -> entry -> address ,size ,hStream );
241- if (res != CUDA_SUCCESS ){
261+ if (res != CUDA_SUCCESS ) {
242262 LOG_ERROR ("cuMemoryAllocate failed res=%d" ,res );
243263 return res ;
244264 }
245265 * address = e -> entry -> address ;
246266 CUmemoryPool pool ;
247267 res = CUDA_OVERRIDE_CALL (cuda_library_entry ,cuDeviceGetMemPool ,& pool ,dev );
248- if (res != CUDA_SUCCESS ){
268+ if (res != CUDA_SUCCESS ) {
249269 LOG_ERROR ("cuDeviceGetMemPool failed res=%d" ,res );
250270 return res ;
251271 }
252272 size_t poollimit ;
253273 res = CUDA_OVERRIDE_CALL (cuda_library_entry ,cuMemPoolGetAttribute ,pool ,CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH ,& poollimit );
254- if (res != CUDA_SUCCESS ) {
274+ if (res != CUDA_SUCCESS ) {
255275 LOG_ERROR ("cuMemPoolGetAttribute failed res=%d" ,res );
256276 return res ;
257277 }
258- if (poollimit != 0 ) {
278+ if (poollimit != 0 ) {
259279 if (poollimit > device_allocasync -> limit ) {
260280 allocsize = (poollimit - device_allocasync -> limit < size )? poollimit - device_allocasync -> limit : size ;
261281 cuCtxGetDevice (& dev );
262- add_gpu_device_memory_usage (getpid (),dev ,allocsize ,2 );
282+ add_gpu_device_memory_usage (getpid (), dev , allocsize , 2 );
263283 device_allocasync -> limit = device_allocasync -> limit + allocsize ;
264284 e -> entry -> length = allocsize ;
265285 }else {
@@ -270,7 +290,7 @@ int add_chunk_async(CUdeviceptr *address,size_t size, CUstream hStream){
270290 return 0 ;
271291}
272292
273- int allocate_async_raw (CUdeviceptr * dptr , size_t size , CUstream hStream ){
293+ int allocate_async_raw (CUdeviceptr * dptr , size_t size , CUstream hStream ) {
274294 int tmp ;
275295 pthread_mutex_lock (& mutex );
276296 tmp = add_chunk_async (dptr ,size ,hStream );
0 commit comments