44
55 Copyright (C) 2012 Institute for Computer Graphics and Vision,
66 Graz University of Technology
7+ Copyright (C) 2014 Institute of Radiation Physics,
8+ Helmholtz-Zentrum Dresden - Rossendorf
79
810 Author(s): Markus Steinberger - steinberger ( at ) icg.tugraz.at
11+ Rene Widera - r.widera ( at ) hzdr.de
912
1013 Permission is hereby granted, free of charge, to any person obtaining a copy
1114 of this software and associated documentation files (the "Software"), to deal
2932#ifndef HEAP_CUH
3033#define HEAP_CUH
3134
35+ #include < stdio.h>
3236#include " tools/utils.h"
3337
3438namespace GPUTools
@@ -114,7 +118,7 @@ namespace GPUTools
114118 volatile uint* _regions;
115119 PAGE* _page;
116120 uint _numpages;
117- uint _memsize;
121+ size_t _memsize;
118122 uint _pagebasedMutex;
119123 volatile uint _firstFreePageBased;
120124 volatile uint _firstfreeblock;
@@ -171,7 +175,6 @@ namespace GPUTools
171175 return -1 ;
172176 spot = nextspot (old, spot, spots);
173177 }
174- return -1 ;
175178 }
176179
177180 /* *
@@ -273,7 +276,7 @@ namespace GPUTools
273276 {
274277 for (uint b = startblock; b < accessblocks; ++b)
275278 {
276- while (ptetry < b *pagesperblock)
279+ while (ptetry < (b+ 1 ) *pagesperblock)
277280 {
278281 uint region = ptetry/regionsize;
279282 uint regionfilllevel = _regions[region];
@@ -579,7 +582,7 @@ namespace GPUTools
579582 // take care of padding
580583 bytes = (bytes + dataAlignment - 1 ) & ~(dataAlignment-1 );
581584
582- bool use_coalescing = false ;
585+ bool can_use_coalescing = false ;
583586 uint myoffset = 0 ;
584587 uint warpid = GPUTools::warpid ();
585588
@@ -592,15 +595,15 @@ namespace GPUTools
592595 if (coalescible && threadcount > 1 )
593596 {
594597 myoffset = atomicAdd (&warp_sizecounter[warpid], bytes);
595- use_coalescing = true ;
598+ can_use_coalescing = true ;
596599 }
597600
598601 uint req_size = bytes;
599- if (use_coalescing )
602+ if (can_use_coalescing )
600603 req_size = (myoffset == 16 ) ? warp_sizecounter[warpid] : 0 ;
601604
602605 char * myalloc = (char *)alloc_internal_direct (req_size);
603- if (req_size && use_coalescing )
606+ if (req_size && can_use_coalescing )
604607 {
605608 warp_res[warpid] = myalloc;
606609 if (myalloc != 0 )
@@ -609,7 +612,7 @@ namespace GPUTools
609612 __threadfence_block ();
610613
611614 void *myres = myalloc;
612- if (use_coalescing )
615+ if (can_use_coalescing )
613616 {
614617 if (warp_res[warpid] != 0 )
615618 myres = warp_res[warpid] + myoffset;
@@ -661,7 +664,7 @@ namespace GPUTools
661664 * @param memory pointer to the memory used for the heap
662665 * @param memsize size of the memory in bytes
663666 */
664- __device__ void init (void * memory, uint memsize)
667+ __device__ void init (void * memory, size_t memsize)
665668 {
666669 uint linid = threadIdx .x + blockDim .x *(threadIdx .y + threadIdx .z *blockDim .y );
667670 uint threads = blockDim .x *blockDim .y *blockDim .z ;
@@ -711,7 +714,7 @@ namespace GPUTools
711714 _pagebasedMutex = 0 ;
712715 _firstFreePageBased = numpages-1 ;
713716
714- if (_page[ numpages]. data - 1 >= (char *)(memory) + memsize)
717+ if ( ( char *) ( _page+ numpages) > (char *)(memory) + memsize)
715718 printf (" error in heap alloc: numpages too high\n " );
716719 }
717720
@@ -748,7 +751,7 @@ namespace GPUTools
748751 * global init heap method
749752 */
750753 template <uint pagesize, uint accessblocks, uint regionsize, uint wastefactor, bool use_coalescing, bool resetfreedpages>
751- __global__ void initHeap (DeviceHeap<pagesize, accessblocks, regionsize, wastefactor, use_coalescing, resetfreedpages>* heap, void * heapmem, uint memsize)
754+ __global__ void initHeap (DeviceHeap<pagesize, accessblocks, regionsize, wastefactor, use_coalescing, resetfreedpages>* heap, void * heapmem, size_t memsize)
752755 {
753756 heap->init (heapmem, memsize);
754757 }
0 commit comments