Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 53 additions & 16 deletions include/mallocMC/creationPolicies/FlatterScatter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@

#include <alpaka/core/Common.hpp>
#include <alpaka/core/Positioning.hpp>
#include <alpaka/extent/Traits.hpp>
#include <alpaka/idx/Accessors.hpp>
#include <alpaka/idx/MapIdx.hpp>
#include <alpaka/kernel/Traits.hpp>
#include <alpaka/mem/fence/Traits.hpp>
#include <alpaka/mem/view/Traits.hpp>
#include <alpaka/mem/view/ViewPlainPtr.hpp>
#include <alpaka/vec/Vec.hpp>
#include <alpaka/workdiv/Traits.hpp>
#include <alpaka/workdiv/WorkDivHelpers.hpp>
#include <alpaka/workdiv/WorkDivMembers.hpp>

#include <sys/types.h>
Expand Down Expand Up @@ -86,14 +90,38 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc
MyAccessBlock* accessBlocks{};
uint32_t volatile block = 0U;

ALPAKA_FN_INLINE ALPAKA_FN_ACC auto init() -> void
ALPAKA_FN_INLINE ALPAKA_FN_ACC static auto init(auto const& acc, void* accessBlocksPointer, auto heapSize)
-> void
{
for(uint32_t i = 0; i < numBlocks(); ++i)
auto threadsInGrid = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
auto numThreads = threadsInGrid.prod();
auto const [idx] = alpaka::mapIdx<1U>(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc), threadsInGrid);
auto* accessBlocks = static_cast<MyAccessBlock*>(accessBlocksPointer);

for(uint32_t i = idx; i < numBlocks(heapSize) * MyAccessBlock::numPages(); i += numThreads)
{
accessBlocks[i].init();
auto blockIdx = i / MyAccessBlock::numPages();
auto pageIdx = i % MyAccessBlock::numPages();

accessBlocks[blockIdx].init(acc, pageIdx);
}
}

ALPAKA_FN_INLINE ALPAKA_FN_ACC auto init(auto const& acc) -> void
{
init(acc, accessBlocks, heapSize);
}

/**
* @brief Number of access blocks assuming the given heapSize.
*
* @return Number of access blocks in the heap.
*/
ALPAKA_FN_INLINE ALPAKA_FN_ACC static constexpr auto numBlocks(auto heapSize) -> uint32_t
{
return heapSize / T_HeapConfig::accessblocksize;
}

/**
* @brief Number of access blocks in the heap. This is a runtime quantity because it depends on the given heap
* size.
Expand All @@ -102,7 +130,7 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc
*/
ALPAKA_FN_INLINE ALPAKA_FN_ACC auto numBlocks() const -> uint32_t
{
return heapSize / T_HeapConfig::accessblocksize;
return numBlocks(heapSize);
}

/**
Expand Down Expand Up @@ -307,15 +335,22 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc
{
template<typename T_HeapConfig, typename T_HashConfig, typename T_AlignmentPolicy>
ALPAKA_FN_INLINE ALPAKA_FN_ACC auto operator()(
auto const& /*unused*/,
auto const& acc,
Heap<T_HeapConfig, T_HashConfig, T_AlignmentPolicy>* m_heap,
void* m_heapmem,
size_t const m_memsize) const
{
m_heap->accessBlocks
= static_cast<Heap<T_HeapConfig, T_HashConfig, T_AlignmentPolicy>::MyAccessBlock*>(m_heapmem);
m_heap->heapSize = m_memsize;
m_heap->init();
auto const idx = alpaka::mapIdx<1U>(
alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc),
alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc));
if(idx == 0)
{
m_heap->accessBlocks
= static_cast<Heap<T_HeapConfig, T_HashConfig, T_AlignmentPolicy>::MyAccessBlock*>(m_heapmem);
m_heap->heapSize = m_memsize;
}
// We can't rely on thread 0 to finish the above before we start, so we use the static version:
Heap<T_HeapConfig, T_HashConfig, T_AlignmentPolicy>::init(acc, m_heapmem, m_memsize);
}
};

Expand Down Expand Up @@ -374,13 +409,15 @@ namespace mallocMC::CreationPolicies
template<typename TAcc>
static void initHeap([[maybe_unused]] auto& dev, auto& queue, auto* heap, void* pool, size_t memsize)
{
using Dim = typename alpaka::trait::DimType<TAcc>::type;
using Idx = typename alpaka::trait::IdxType<TAcc>::type;
using VecType = alpaka::Vec<Dim, Idx>;

auto workDivSingleThread
= alpaka::WorkDivMembers<Dim, Idx>{VecType::ones(), VecType::ones(), VecType::ones()};
alpaka::exec<TAcc>(queue, workDivSingleThread, FlatterScatterAlloc::InitKernel{}, heap, pool, memsize);
using MyHeap = FlatterScatterAlloc::Heap<T_HeapConfig, T_HashConfig, T_AlignmentPolicy>;
auto numBlocks = MyHeap::numBlocks(memsize);
auto numPagesPerBlock = MyHeap::MyAccessBlock::numPages();

alpaka::KernelCfg<TAcc> const kernelCfg
= {numBlocks * numPagesPerBlock, 1U, false, alpaka::GridBlockExtentSubDivRestrictions::Unrestricted};
auto workDiv
= alpaka::getValidWorkDiv(kernelCfg, dev, FlatterScatterAlloc::InitKernel{}, heap, pool, memsize);
alpaka::exec<TAcc>(queue, workDiv, FlatterScatterAlloc::InitKernel{}, heap, pool, memsize);
alpaka::wait(queue);
}

Expand Down
29 changes: 23 additions & 6 deletions include/mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,36 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc

// This class is supposed to be reinterpeted on a piece of raw memory and not instantiated directly. We set it
// protected, so we can still test stuff in the future easily.
AccessBlock()
AccessBlock(auto const& acc)
{
init();
init(acc);
}

public:
ALPAKA_FN_INLINE ALPAKA_FN_ACC auto init() -> void
/**
* @brief Single-threaded initialisation loop. Used only for testing.
*/
ALPAKA_FN_INLINE ALPAKA_FN_ACC auto init(auto const& acc) -> void
{
pageTable.cleanup();
for(uint32_t i = 0; i < numPages(); i++)
{
init(acc, i);
}
}

/**
* @brief Initialise the page given by its index. 0th also initialises the pageTable.
*/
ALPAKA_FN_INLINE ALPAKA_FN_ACC auto init(auto const& /*acc*/, auto const pageIdx) -> void
{
if(pageIdx == 0U)
{
pageTable.cleanup();
}
constexpr uint32_t dummyChunkSize = 1U;
for(auto& page : pages)
if(pageIdx < numPages())
{
MyPageInterpretation(page, dummyChunkSize).cleanupFull();
interpret(pageIdx, dummyChunkSize).cleanupFull();
}
}

Expand Down
12 changes: 7 additions & 5 deletions test/unit/source/AccessBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ struct TestableAccessBlock
: mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock<T_HeapConfig, T_AlignmentPolicy>
{
public:
TestableAccessBlock() = default;
explicit TestableAccessBlock(auto const& acc)
: mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock<T_HeapConfig, T_AlignmentPolicy>(acc) {};
using mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock<T_HeapConfig, T_AlignmentPolicy>::blockSize;
using mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock<T_HeapConfig, T_AlignmentPolicy>::pageSize;
using mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock<T_HeapConfig, T_AlignmentPolicy>::wasteFactor;
Expand Down Expand Up @@ -116,7 +117,7 @@ TEMPLATE_LIST_TEST_CASE("AccessBlock", "", AccessBlocks)
constexpr auto const blockSize = AccessBlock::blockSize;
constexpr auto const pageSize = AccessBlock::pageSize;

AccessBlock accessBlock{};
AccessBlock accessBlock{accSerial};

SECTION("knows its number of pages.")
{
Expand Down Expand Up @@ -312,7 +313,8 @@ TEMPLATE_LIST_TEST_CASE("AccessBlock", "", AccessBlocks)
SECTION("with waste factor")
{
constexpr uint32_t const wastefactor = 3U;
TestableAccessBlock<HeapConfig<blockSize, pageSize, wastefactor>, AlignmentPolicy> wastedAccessBlock{};
TestableAccessBlock<HeapConfig<blockSize, pageSize, wastefactor>, AlignmentPolicy> wastedAccessBlock{
accSerial};
auto pointers = fillWith(wastedAccessBlock, chunkSize);

auto smallerChunkSize = chunkSize / (wastefactor - 1U);
Expand Down Expand Up @@ -374,7 +376,7 @@ TEMPLATE_LIST_TEST_CASE("AccessBlock", "", AccessBlocks)
TestableAccessBlock<
SelectivelyWastedHeapConfig<blockSize, pageSize, wastefactor, selectedNumBytes>,
AlignmentPolicy>
wastedAccessBlock{};
wastedAccessBlock{accSerial};
auto pointers = fillWith(wastedAccessBlock, chunkSize);

auto notSelectedNumBytes = chunkSize / (wastefactor - 1U);
Expand Down Expand Up @@ -596,7 +598,7 @@ TEST_CASE("AccessBlock (Regression)")
using AccessBlock
= TestableAccessBlock<HeapConfig<(pageSize + pageTableEntrySize), pageSize, wastefactor>, AlignmentPolicy>;

AccessBlock accessBlock{};
AccessBlock accessBlock{accSerial};

REQUIRE(accessBlock.getAvailableSlots(accSerial, chunkSizeOneMask) == numChunksOneMask);
REQUIRE(accessBlock.getAvailableSlots(accSerial, chunkSizeTwoMasks) == numChunksTwoMasks);
Expand Down