Skip to content
This repository was archived by the owner on Feb 5, 2024. It is now read-only.
1 change: 1 addition & 0 deletions pennylane_lightning_gpu/src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ target_sources(runner_gpu PRIVATE Test_StateVectorCudaManaged_NonParam.cpp
Test_ObservablesGPU.cpp
Test_GateCache.cpp
Test_DataBuffer.cpp
Test_DataBufferArena.cpp
TestHelpers.hpp
)

Expand Down
265 changes: 265 additions & 0 deletions pennylane_lightning_gpu/src/tests/Test_DataBufferArena.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@

#include <algorithm>
#include <complex>
#include <iostream>
#include <limits>
#include <type_traits>
#include <utility>
#include <vector>

#include <catch2/catch.hpp>

#include "DataBufferArena.hpp"
#include "DevTag.hpp"
#include "DevicePool.hpp"

#include <cuComplex.h> // cuDoubleComplex
#include <cuda.h>

#include "TestHelpers.hpp"

using namespace Pennylane;
using namespace CUDA;

namespace {
namespace cuUtil = Pennylane::CUDA::Util;
} // namespace

/**
* @brief Tests the constructability of the DataBufferArena class.
*
*/
TEMPLATE_TEST_CASE("DataBufferArena::DataBufferArena", "[DataBufferArena]",
char, int, unsigned int, long, float, double, float2,
double2) {
SECTION("DataBufferArena<GPUDataT>{std:vector<std::size_t>, int, "
"cudaStream_t, bool}") {
REQUIRE(std::is_constructible<DataBufferArena<TestType, int>,
std::vector<std::size_t>, int,
cudaStream_t, bool>::value);
}
SECTION("DataBufferArena<GPUDataT>{const std:vector<std::size_t>&, const "
"DevTag<int>&, bool}") {
REQUIRE(std::is_constructible<DataBufferArena<TestType, int>,
const std::vector<std::size_t> &,
const DevTag<int> &, bool>::value);
}

SECTION("DataBufferArena<GPUDataT>{const std:vector<std::size_t>&, "
"DevTag<int>&&, bool}") {
REQUIRE(std::is_constructible<DataBufferArena<TestType, int>,
const std::vector<std::size_t> &,
DevTag<int> &&, bool>::value);
}
SECTION("DataBufferArena<GPUDataT>=default non-constructable") {
REQUIRE_FALSE(std::is_default_constructible<
DataBufferArena<TestType, int>>::value);
}
}

TEMPLATE_TEST_CASE("DataBufferArena::memory allocation", "[DataBufferArena]",
float, double) {
SECTION("Allocate buffer memory = true, single partition") {
DataBufferArena<TestType, int> data_buffer1{{8}, 0, 0, true};
CHECK(data_buffer1.getData() != nullptr);
CHECK(data_buffer1.getLength() == 8);
CHECK(data_buffer1.getStream() == 0);
CHECK(data_buffer1.getDevice() == 0);
}
SECTION("Allocate buffer memory = false, single partition") {
DataBufferArena<TestType, int> data_buffer1{{7}, 0, 0, false};
CHECK(data_buffer1.getData() == nullptr);
CHECK(data_buffer1.getLength() == 7);
CHECK(data_buffer1.getStream() == 0);
CHECK(data_buffer1.getDevice() == 0);
}
SECTION("Allocate buffer memory = true, multiple partitions") {
DataBufferArena<TestType, int> data_buffer1{{8}, 0, 0, true};
CHECK(data_buffer1.getData() != nullptr);
CHECK(data_buffer1.getLength() == 8);
CHECK(data_buffer1.getStream() == 0);
CHECK(data_buffer1.getDevice() == 0);
}
SECTION("Allocate buffer memory = false, multiple partitions") {
std::vector<std::size_t> buffer_sizes{7, 2, 4};
DataBufferArena<TestType, int> data_buffer1{buffer_sizes, 0, 0, false};
CHECK(data_buffer1.getData() == nullptr);
CHECK(data_buffer1.getLength() == 13);
CHECK(data_buffer1.getNumBuffers() == buffer_sizes.size());
for (std::size_t index = 0; index < data_buffer1.getNumBuffers();
index++) {
CHECK(data_buffer1.getLength(index) == buffer_sizes[index]);
}
CHECK(data_buffer1.getBufferLengths() == buffer_sizes);
CHECK(data_buffer1.getStream() == 0);
CHECK(data_buffer1.getDevice() == 0);
}
}

TEMPLATE_TEST_CASE(
"DataBufferArena::Data locality and movement, single partition",
"[DataBufferArena]", float, double) {
SECTION("Single gpu movement") {
DataBufferArena<TestType, int> data_buffer1{{6}, 0, 0, true};
std::vector<TestType> host_data_in(6, 1);
std::vector<TestType> host_data_out(6, 0);
data_buffer1.CopyHostDataToGpu(host_data_in.data(), host_data_in.size(),
false);
DataBufferArena<TestType, int> data_buffer2(
data_buffer1.getBufferLengths(), data_buffer1.getDevTag(), true);
data_buffer2.CopyGpuDataToGpu(data_buffer1, false);
data_buffer2.CopyGpuDataToHost(host_data_out.data(), 6, false);
CHECK(host_data_in == host_data_out);
CHECK(data_buffer1.getLength() == data_buffer2.getLength());
CHECK(data_buffer1.getData() !=
data_buffer2.getData()); // Ptrs should not refer to same block
}
if (DevicePool<int>::getTotalDevices() > 1) {
SECTION("Multi-GPU copy") {
DevicePool<int> dev_pool;
std::vector<int> ids;
std::vector<DevTag<int>> tags;
std::vector<std::size_t> buffer_partitions{6};
std::vector<std::unique_ptr<DataBufferArena<TestType, int>>>
buffers;
for (std::size_t i = 0; i < dev_pool.getTotalDevices(); i++) {
ids.push_back(dev_pool.acquireDevice());
tags.push_back({ids.back(), 0U});
buffers.emplace_back(
std::make_unique<DataBufferArena<TestType, int>>(
buffer_partitions, tags.back(), true));
}

std::vector<TestType> host_data_in(6, 1);
std::vector<TestType> host_data_out(6, 0);
buffers[0]->CopyHostDataToGpu(host_data_in.data(),
host_data_in.size(), false);
for (std::size_t i = 1; i < dev_pool.getTotalDevices(); i++) {
buffers[i]->CopyGpuDataToGpu(*buffers[i - 1], false);
}
buffers.back()->CopyGpuDataToHost(host_data_out.data(), 6, false);
CHECK(host_data_in == host_data_out);
for (auto &id : ids) {
dev_pool.releaseDevice(id);
}
}
}
}

TEMPLATE_TEST_CASE(
"DataBufferArena::Data locality and movement, multiple partitions",
"[DataBufferArena]", float, double) {
SECTION("Single gpu movement, entire buffer") {
DataBufferArena<TestType, int> data_buffer1{
{6, 3, 7, 4, 1}, 0, 0, true};
std::vector<TestType> host_data_in(21, 0);
std::iota(host_data_in.begin(), host_data_in.end(), 0);

std::vector<TestType> host_data_out(21, 0);
data_buffer1.CopyHostDataToGpu(host_data_in.data(), host_data_in.size(),
false);
DataBufferArena<TestType, int> data_buffer2(
data_buffer1.getBufferLengths(), data_buffer1.getDevTag(), true);
data_buffer2.CopyGpuDataToGpu(data_buffer1, false);
data_buffer2.CopyGpuDataToHost(host_data_out.data(),
host_data_out.size(), false);
CHECK(host_data_in == host_data_out);
CHECK(data_buffer1.getLength() == data_buffer2.getLength());
CHECK(data_buffer1.getData() !=
data_buffer2.getData()); // Ptrs should not refer to same block
}
SECTION("Single gpu movement, separate buffer partitions") {
DataBufferArena<TestType, int> data_buffer1{
{6, 3, 7, 4, 1}, 0, 0, true};
std::vector<TestType> host_data_in(21, 0);
std::iota(host_data_in.begin(), host_data_in.end(), 0);

std::vector<TestType> host_data_out(21, 0);
data_buffer1.CopyHostDataToGpu(host_data_in.data(), host_data_in.size(),
false);

const auto &buffer_s = data_buffer1.getBufferLengths();

DataBufferArena<TestType, int> data_buffer2(
buffer_s, data_buffer1.getDevTag(), true);

for (std::size_t index = 0; index < buffer_s.size(); index++) {
data_buffer2.CopyGpuDataToGpu(data_buffer1, false, index, index);
}
data_buffer2.CopyGpuDataToHost(host_data_out.data(),
host_data_out.size(), false);

CHECK(host_data_in == host_data_out);
CHECK(data_buffer1.getLength() == data_buffer2.getLength());
CHECK(data_buffer1.getData() !=
data_buffer2.getData()); // Ptrs should not refer to same block
}

if (DevicePool<int>::getTotalDevices() > 1) {
SECTION("Multi-GPU movement, entire buffer") {
DevicePool<int> dev_pool;
std::vector<int> ids;
std::vector<DevTag<int>> tags;
std::vector<std::size_t> buffer_partitions{6, 3, 7, 4, 1};
std::vector<std::unique_ptr<DataBufferArena<TestType, int>>>
buffers;
for (std::size_t i = 0; i < dev_pool.getTotalDevices(); i++) {
ids.push_back(dev_pool.acquireDevice());
tags.push_back({ids.back(), 0U});
buffers.emplace_back(
std::make_unique<DataBufferArena<TestType, int>>(
buffer_partitions, tags.back(), true));
}

std::vector<TestType> host_data_in(21, 0);
std::iota(host_data_in.begin(), host_data_in.end(), 0);

std::vector<TestType> host_data_out(21, 0);
buffers[0]->CopyHostDataToGpu(host_data_in.data(),
host_data_in.size(), false);
for (std::size_t i = 1; i < dev_pool.getTotalDevices(); i++) {
buffers[i]->CopyGpuDataToGpu(*buffers[i - 1], false);
}
buffers.back()->CopyGpuDataToHost(host_data_out.data(), 6, false);
CHECK(host_data_in == host_data_out);
for (auto &id : ids) {
dev_pool.releaseDevice(id);
}
}
SECTION("Multi-GPU movement, separate buffer partitions") {
DevicePool<int> dev_pool;
std::vector<int> ids;
std::vector<DevTag<int>> tags;
std::vector<std::size_t> buffer_partitions{6, 3, 7, 4, 1};
std::vector<std::unique_ptr<DataBufferArena<TestType, int>>>
buffers;
for (std::size_t i = 0; i < dev_pool.getTotalDevices(); i++) {
ids.push_back(dev_pool.acquireDevice());
tags.push_back({ids.back(), 0U});
buffers.emplace_back(
std::make_unique<DataBufferArena<TestType, int>>(
buffer_partitions, tags.back(), true));
}

std::vector<TestType> host_data_in(21, 0);
std::iota(host_data_in.begin(), host_data_in.end(), 0);

std::vector<TestType> host_data_out(21, 0);
buffers[0]->CopyHostDataToGpu(host_data_in.data(),
host_data_in.size(), false);

for (std::size_t i = 1; i < dev_pool.getTotalDevices(); i++) {
for (std::size_t index = 0; index < buffer_partitions.size();
index++) {
buffers[i]->CopyGpuDataToGpu(*buffers[i - 1], false, index,
index);
}
}
buffers.back()->CopyGpuDataToHost(host_data_out.data(), 21, false);
CHECK(host_data_in == host_data_out);
for (auto &id : ids) {
dev_pool.releaseDevice(id);
}
}
}
}
Loading