Skip to content

Commit 1314bf2

Browse files
committed
Merge pull request #76 from ComputationalRadiationPhysics/dev
Merge dev into master for release 2.0.1crp
2 parents ddeae86 + 15730e4 commit 1314bf2

File tree

10 files changed

+130
-40
lines changed

10 files changed

+130
-40
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
11
Change Log / Release Log for mallocMC
22
================================================================
33

4+
2.0.1crp
5+
-------------
6+
**Date:** 2015-01-13
7+
8+
This release fixes several bugs that occured after the release of 2.0.0crp.
9+
We closed all issues documented in
10+
[Milestone *Bugfixes*](https://github.com/ComputationalRadiationPhysics/mallocMC/issues?milestone=4&state=closed)
11+
12+
### Changes to mallocMC 2.0.0crp
13+
14+
**Bug fixes**
15+
- page table metadata was not correctly initialized with 0 #70
16+
- freeing pages would not work under certain circumstances #66
17+
- the bitmask in a page table entry could be wrong due to a racecondition #62
18+
- not all regions were initialized correctly #60
19+
- getAvailableSlots could sometimes miss blocks #59
20+
- the counter for elements in a page could get too high due to a racecondition #61
21+
- Out of Memory (OOM) Policy sometimes did not recognize allocation failures correctly #67
22+
23+
**Misc:**
24+
- See the full changes at https://github.com/ComputationalRadiationPhysics/mallocMC/compare/2.0.0crp...2.0.1crp
25+
26+
427
2.0.0crp
528
-------------
629
**Date:** 2014-06-02

CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,30 @@ if(Boost_VERSION EQUAL 105500)
3939
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
4040
endif(Boost_VERSION EQUAL 105500)
4141

42+
43+
################################################################################
44+
# Warnings
45+
################################################################################
46+
# GNU
47+
if(CMAKE_COMPILER_IS_GNUCXX)
48+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
49+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
50+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
51+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
52+
# new warning in gcc 4.8 (flag ignored in previous version)
53+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs")
54+
# ICC
55+
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
56+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
57+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_VARIADIC_TEMPLATES")
58+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_CXX11_VARIADIC_TEMPLATES")
59+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_FENV_H")
60+
# PGI
61+
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
62+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minform=inform")
63+
endif()
64+
65+
4266
###############################################################################
4367
# Installation
4468
###############################################################################

INSTALL.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,47 @@ This is an example how to compile `mallocMC` and test the example code snippets
4545
- `./mallocMC_Example02`
4646
- `./VerifyHeap`
4747
- additional options: see `./VerifyHeap --help`
48+
49+
50+
Linking to your Project
51+
-----------------------
52+
53+
To use mallocMC in your project, you must include the header `mallocMC/mallocMC.hpp` and
54+
add the correct include path.
55+
56+
Because we are linking to Boost and CUDA, the following **external dependencies** must be linked:
57+
- `-lboost`, `-lcudart`
58+
59+
If you are using CMake you can download our `FindmallocMC.cmake` module with
60+
```bash
61+
wget https://raw.githubusercontent.com/ComputationalRadiationPhysics/picongpu/dev/src/cmake/FindmallocMC.cmake
62+
# read the documentation
63+
cmake -DCMAKE_MODULE_PATH=. --help-module FindmallocMC | less
64+
```
65+
66+
and use the following lines in your `CMakeLists.txt`:
67+
```cmake
68+
# this example will require at least CMake 2.8.5
69+
cmake_minimum_required(VERSION 2.8.5)
70+
71+
# add path to FindmallocMC.cmake, e.g. in the directory in cmake/
72+
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/)
73+
74+
# find the packages that are required by mallocMC. This has to be done BEFORE
75+
# loading mallocMC
76+
find_package(Boost REQUIRED)
77+
set(LIBS ${LIBS} ${Boost_LIBRARIES})
78+
79+
find_package(CUDA REQUIRED)
80+
cuda_include_directories(${CUDA_INCLUDE_DIRS})
81+
82+
# find mallocMC installation
83+
find_package(mallocMC 2.0.1 REQUIRED)
84+
85+
# where to find headers (-I includes for compiler)
86+
include_directories(SYSTEM ${mallocMC_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
87+
88+
add_executable(yourBinary ${SOURCES})
89+
90+
target_link_libraries(yourBinary ${LIBS})
91+
```

src/include/mallocMC/alignmentPolicies/Shrink_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ namespace Shrink2NS{
7474
#endif
7575
static const uint32 dataAlignment = MALLOCMC_AP_SHRINK_DATAALIGNMENT;
7676

77-
BOOST_STATIC_ASSERT(dataAlignment > 0);
77+
BOOST_STATIC_ASSERT(static_cast<uint32>(dataAlignment) > 0);
7878
//dataAlignment must also be a power of 2!
7979
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );
8080

src/include/mallocMC/creationPolicies/OldMalloc_impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ namespace CreationPolicies{
5252
free(mem);
5353
}
5454

55-
__device__ bool isOOM(void* p){
56-
return 32 == __popc(__ballot(p == NULL));
55+
__device__ bool isOOM(void* p, size_t s){
56+
return s && (p == NULL);
5757
}
5858

5959
template < typename T>

src/include/mallocMC/creationPolicies/Scatter_impl.hpp

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,11 @@ namespace ScatterKernelDetail{
187187
* bit fields when the page is used for a small chunk size
188188
* @param previous_chunksize the chunksize which was uses for the page before
189189
*/
190-
__device__ void init(uint32 previous_chunksize = 0)
190+
__device__ void init()
191191
{
192-
//TODO: we can speed this up for pages being freed, because we know the
193-
//chunksize used before (these bits must be zero again)
194-
195-
//init the entire data which can hold bitfields
196-
uint32 max_bits = min(32*32,pagesize/minChunkSize1);
197-
uint32 max_entries = divup<uint32>(max_bits/8,sizeof(uint32))*sizeof(uint32);
198-
uint32* write = (uint32*)(data+(pagesize-max_entries));
192+
//clear the entire data which can hold bitfields
193+
uint32 first_possible_metadata = 32*HierarchyThreshold;
194+
uint32* write = (uint32*)(data+(pagesize-first_possible_metadata));
199195
while(write < (uint32*)(data + pagesize))
200196
*write++ = 0;
201197
}
@@ -319,6 +315,9 @@ namespace ScatterKernelDetail{
319315
*/
320316
__device__ inline void* tryUsePage(uint32 page, uint32 chunksize)
321317
{
318+
319+
void* chunk_ptr = NULL;
320+
322321
//increse the fill level
323322
uint32 filllevel = atomicAdd((uint32*)&(_ptes[page].count), 1);
324323
//recheck chunck size (it could be that the page got freed in the meanwhile...)
@@ -333,19 +332,21 @@ namespace ScatterKernelDetail{
333332
fullsegments = pagesize / segmentsize;
334333
additional_chunks = max(0,(int)pagesize - (int)fullsegments*segmentsize - (int)sizeof(uint32))/chunksize;
335334
if(filllevel < fullsegments * 32 + additional_chunks)
336-
return addChunkHierarchy(chunksize, fullsegments, additional_chunks, page);
335+
chunk_ptr = addChunkHierarchy(chunksize, fullsegments, additional_chunks, page);
337336
}
338337
else
339338
{
340339
uint32 chunksinpage = min(pagesize / chunksize, 32);
341340
if(filllevel < chunksinpage)
342-
return addChunkNoHierarchy(chunksize, page, chunksinpage);
341+
chunk_ptr = addChunkNoHierarchy(chunksize, page, chunksinpage);
343342
}
344343
}
345344

346345
//this one is full/not useable
347-
atomicSub((uint32*)&(_ptes[page].count), 1);
348-
return 0;
346+
if(chunk_ptr == NULL)
347+
atomicSub((uint32*)&(_ptes[page].count), 1);
348+
349+
return chunk_ptr;
349350
}
350351

351352

@@ -444,9 +445,8 @@ namespace ScatterKernelDetail{
444445
uint32* onpagemasks = (uint32*)(_page[page].data + chunksize*(fullsegments*32 + additional_chunks));
445446
uint32 old = atomicAnd(onpagemasks + segment, ~(1 << withinsegment));
446447

447-
uint32 elementsinsegment = segment < fullsegments ? 32 : additional_chunks;
448-
if(__popc(old) == elementsinsegment)
449-
atomicAnd((uint32*)&_ptes[page].bitmask, ~(1 << segment));
448+
// always do this, since it might fail due to a race-condition with addChunkHierarchy
449+
atomicAnd((uint32*)&_ptes[page].bitmask, ~(1 << segment));
450450
}
451451
else
452452
{
@@ -718,7 +718,7 @@ namespace ScatterKernelDetail{
718718
ptes[i].init();
719719
page[i].init();
720720
}
721-
for(uint32 i = linid; i < numregions; i+= numregions)
721+
for(uint32 i = linid; i < numregions; i+= threads)
722722
regions[i] = 0;
723723

724724
if(linid == 0)
@@ -777,9 +777,9 @@ namespace ScatterKernelDetail{
777777
}
778778
}
779779

780-
__device__ bool isOOM(void* p){
781-
// all threads in a warp return get NULL
782-
return 32 == __popc(__ballot(p == NULL));
780+
__device__ bool isOOM(void* p, size_t s){
781+
// one thread that requested memory returned null
782+
return s && (p == NULL);
783783
}
784784

785785

@@ -869,7 +869,8 @@ namespace ScatterKernelDetail{
869869
if(gid > 0) return 0; //do this serially
870870
uint32 pagestoalloc = divup((uint32)slotSize, pagesize);
871871
uint32 freecount = 0;
872-
for(uint32 currentpage = _numpages; currentpage > 0; --currentpage){ //this already includes all superblocks
872+
for(uint32 currentpage = _numpages; currentpage > 0;){ //this already includes all superblocks
873+
--currentpage;
873874
if(_ptes[currentpage].chunksize == 0){
874875
if(++freecount == pagestoalloc){
875876
freecount = 0;

src/include/mallocMC/distributionPolicies/XMallocSIMD_impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ namespace DistributionPolicies{
7777

7878
//all the properties must be unsigned integers > 0
7979
BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::pagesize::type>::is_signed);
80-
BOOST_STATIC_ASSERT(pagesize > 0);
80+
BOOST_STATIC_ASSERT(static_cast<uint32>(pagesize) > 0);
8181

8282
public:
8383
static const uint32 _pagesize = pagesize;
@@ -97,7 +97,7 @@ namespace DistributionPolicies{
9797
//second half: make sure that all coalesced allocations can fit within one page
9898
//necessary for offset calculation
9999
bool coalescible = bytes > 0 && bytes < (pagesize / 32);
100-
uint32 threadcount = __popc(__ballot(coalescible));
100+
threadcount = __popc(__ballot(coalescible));
101101

102102
if (coalescible && threadcount > 1)
103103
{

src/include/mallocMC/mallocMC_constraints.hpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,19 @@ namespace mallocMC{
3737
/** The default PolicyCheckers (do always succeed)
3838
*/
3939
template<typename Policy1>
40-
struct PolicyCheck1{};
40+
class PolicyCheck1{};
4141

4242
template<typename Policy1, typename Policy2>
43-
struct PolicyCheck2{};
43+
class PolicyCheck2{};
4444

4545
template<typename Policy1, typename Policy2, typename Policy3>
46-
struct PolicyCheck3{};
46+
class PolicyCheck3{};
4747

4848
template<typename Policy1, typename Policy2, typename Policy3, typename Policy4>
49-
struct PolicyCheck4{};
49+
class PolicyCheck4{};
5050

5151
template<typename Policy1, typename Policy2, typename Policy3, typename Policy4, typename Policy5>
52-
struct PolicyCheck5{};
52+
class PolicyCheck5{};
5353

5454

5555
/** Enforces constraints on policies or combinations of polices
@@ -63,8 +63,9 @@ namespace mallocMC{
6363
typename T_GetHeapPolicy,
6464
typename T_AlignmentPolicy
6565
>
66-
class PolicyConstraints{
67-
PolicyCheck2<T_CreationPolicy, T_DistributionPolicy> c;
66+
67+
class PolicyConstraints:PolicyCheck2<T_CreationPolicy, T_DistributionPolicy>{
68+
6869
};
6970

7071

@@ -75,7 +76,7 @@ namespace mallocMC{
7576
* the same value for their "pagesize"-parameter.
7677
*/
7778
template<typename x, typename y, typename z >
78-
struct PolicyCheck2<
79+
class PolicyCheck2<
7980
typename CreationPolicies::Scatter<x,y>,
8081
typename DistributionPolicies::XMallocSIMD<z>
8182
>{

src/include/mallocMC/mallocMC_hostclass.hpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ namespace mallocMC{
8383
public T_CreationPolicy,
8484
public T_OOMPolicy,
8585
public T_ReservePoolPolicy,
86-
public T_AlignmentPolicy
86+
public T_AlignmentPolicy,
87+
public PolicyConstraints<T_CreationPolicy,T_DistributionPolicy,T_OOMPolicy,T_ReservePoolPolicy,T_AlignmentPolicy>
8788
{
8889
public:
8990
typedef T_CreationPolicy CreationPolicy;
@@ -96,10 +97,6 @@ namespace mallocMC{
9697
typedef boost::uint32_t uint32;
9798
void* pool;
9899

99-
//Instantiating the constraints checker will execute the check
100-
PolicyConstraints<CreationPolicy,DistributionPolicy,
101-
OOMPolicy,ReservePoolPolicy,AlignmentPolicy> c;
102-
103100
public:
104101

105102
typedef Allocator<CreationPolicy,DistributionPolicy,
@@ -112,7 +109,7 @@ namespace mallocMC{
112109
bytes = AlignmentPolicy::applyPadding(bytes);
113110
uint32 req_size = distributionPolicy.collect(bytes);
114111
void* memBlock = CreationPolicy::create(req_size);
115-
const bool oom = CreationPolicy::isOOM(memBlock);
112+
const bool oom = CreationPolicy::isOOM(memBlock, req_size);
116113
if(oom) memBlock = OOMPolicy::handleOOM(memBlock);
117114
void* myPart = distributionPolicy.distribute(memBlock);
118115

src/include/mallocMC/version.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
/** the mallocMC version: major API changes should be reflected here */
4040
#define MALLOCMC_VERSION_MAJOR 2
4141
#define MALLOCMC_VERSION_MINOR 0
42-
#define MALLOCMC_VERSION_PATCH 0
42+
#define MALLOCMC_VERSION_PATCH 1
4343

4444
/** the mallocMC flavor is used to differenciate the releases of the
4545
* Computational Radiation Physics group (crp) from other releases

0 commit comments

Comments
 (0)