Skip to content

Commit 2a513c9

Browse files
committed
Implement complex number example
- example is not finish yet - needs some overwork - CUDA backend is not working - needs to be build separately, because complex numbers is not part of alpaka yet
1 parent 957c375 commit 2a513c9

File tree

2 files changed

+172
-0
lines changed

2 files changed

+172
-0
lines changed

example/complex/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2021 Simeon Ehrig
2+
#
3+
# This file is part of vikunja.
4+
#
5+
# This Source Code Form is subject to the terms of the Mozilla Public
6+
# License, v. 2.0. If a copy of the MPL was not distributed with this
7+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
8+
9+
cmake_minimum_required(VERSION 3.18)
10+
11+
project(complexVikunja)
12+
13+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
14+
# needs to be a cached variable, otherwise it is overwritten by alpaka
15+
# should be fixed in the future
16+
set(ALPAKA_CXX_STANDARD "17" CACHE STRING "C++ standard version")
17+
set(ALPAKA_CUDA_EXPT_EXTENDED_LAMBDA ON)
18+
set(ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA ON)
19+
20+
add_subdirectory(alpaka)
21+
add_subdirectory(vikunja)
22+
23+
alpaka_add_executable(${PROJECT_NAME} main.cpp)
24+
target_link_libraries(${PROJECT_NAME}
25+
PRIVATE
26+
vikunja::vikunja
27+
)

example/complex/main.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/* Copyright 2021 Hauke Mewes, Simeon Ehrig
2+
*
3+
* This file is part of vikunja.
4+
*
5+
* This Source Code Form is subject to the terms of the Mozilla Public
6+
* License, v. 2.0. If a copy of the MPL was not distributed with this
7+
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
8+
*/
9+
10+
#include <vikunja/transform/transform.hpp>
11+
#include <vikunja/reduce/reduce.hpp>
12+
13+
#include <alpaka/alpaka.hpp>
14+
15+
#include <iostream>
16+
#include <vector>
17+
#include <complex>
18+
19+
int main()
20+
{
21+
// Define the accelerator here. Must be one of the enabled accelerators.
22+
using Acc = alpaka::AccCpuSerial<alpaka::DimInt<1u>, std::uint64_t>;
23+
//using Acc = alpaka::AccCpuOmp2Blocks<alpaka::DimInt<1u>, std::uint64_t>;
24+
//using Acc = alpaka::AccGpuCudaRt<alpaka::DimInt<1u>, std::uint64_t>;
25+
26+
// Type of the data that will be reduced
27+
using Data = float;
28+
29+
// Alpaka index type
30+
using Idx = alpaka::Idx<Acc>;
31+
// Alpaka dimension type
32+
using Dim = alpaka::Dim<Acc>;
33+
// Type of the extent vector
34+
using Vec = alpaka::Vec<Dim, Idx>;
35+
// number of elements to reduce
36+
const Idx n = static_cast<Idx>(6400);
37+
// create extent
38+
Vec extent(Vec::all(static_cast<Idx>(n)));
39+
40+
// define device, platform, and queue types.
41+
using DevAcc = alpaka::Dev<Acc>;
42+
using PltfAcc = alpaka::Pltf<DevAcc>;
43+
// using QueueAcc = alpaka::test::queue::DefaultQueue<alpaka::Dev<Acc>>;
44+
using PltfHost = alpaka::PltfCpu;
45+
using DevHost = alpaka::Dev<PltfHost>;
46+
using QueueAcc = alpaka::Queue<Acc, alpaka::Blocking>;
47+
using QueueHost = alpaka::QueueCpuBlocking;
48+
49+
// Get the host device.
50+
DevHost devHost(alpaka::getDevByIdx<PltfHost>(0u));
51+
// Get a queue on the host device.
52+
QueueHost queueHost(devHost);
53+
// Select a device to execute on.
54+
DevAcc devAcc(alpaka::getDevByIdx<PltfAcc>(0u));
55+
// Get a queue on the accelerator device.
56+
QueueAcc queueAcc(devAcc);
57+
58+
// allocate memory both on host and device.
59+
auto hInputMem1(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devHost, extent));
60+
auto hInputMem2(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devHost, extent));
61+
auto hOutputMem(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devHost, extent));
62+
auto dInputMem1(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devAcc, extent));
63+
auto dInputMem2(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devAcc, extent));
64+
auto dOutputMem(alpaka::allocBuf<alpaka::Complex<Data>, Idx>(devAcc, extent));
65+
66+
// Fill memory on host with numbers from 0...n-1.
67+
alpaka::Complex<Data>* hInputMem1Ptr = alpaka::getPtrNative(hInputMem1);
68+
alpaka::Complex<Data>* hInputMem2Ptr = alpaka::getPtrNative(hInputMem2);
69+
for(Idx i = 0; i < n; ++i)
70+
{
71+
Data i_cast = static_cast<Data>(i);
72+
hInputMem1Ptr[i] = alpaka::Complex<Data>(i_cast, i_cast);
73+
hInputMem2Ptr[i] = static_cast<Data>(2) * alpaka::Complex<Data>(i_cast, i_cast);
74+
}
75+
76+
// Copy to accelerator.
77+
alpaka::memcpy(queueAcc, dInputMem1, hInputMem1, extent);
78+
alpaka::memcpy(queueAcc, dInputMem2, hInputMem2, extent);
79+
80+
// Use lambda function for transformation
81+
auto sub = [] ALPAKA_FN_HOST_ACC(alpaka::Complex<Data> const& a, alpaka::Complex<Data> const& b) { return a - b; };
82+
std::cout << "Testing accelerator: " << alpaka::getAccName<Acc>() << " with size: " << n << "\n";
83+
84+
85+
86+
// TRANSFORM CALL:
87+
// Takes the arguments: accelerator device, host device, accelerator queue, size of data, input pointer-like,
88+
// output pointer-like, transform lambda. Can be in-place or out-of-place.
89+
vikunja::transform::deviceTransform<Acc>(
90+
devAcc,
91+
queueAcc,
92+
n,
93+
alpaka::getPtrNative(dInputMem2),
94+
alpaka::getPtrNative(dInputMem1),
95+
alpaka::getPtrNative(dOutputMem),
96+
sub);
97+
98+
alpaka::memcpy(queueAcc, hOutputMem, dOutputMem, extent);
99+
100+
std::vector<alpaka::Complex<Data>> expected_result_transform(alpaka::getPtrNative(hInputMem1), alpaka::getPtrNative(hInputMem1) + n);
101+
102+
std::vector<alpaka::Complex<Data>> result_transform(alpaka::getPtrNative(hOutputMem), alpaka::getPtrNative(hOutputMem) + n);
103+
104+
if(expected_result_transform == result_transform)
105+
{
106+
std::cout << "Transform was successful!\n";
107+
}
108+
else
109+
{
110+
std::cout << "Transform was not successful!\n";
111+
}
112+
113+
auto transform = [] ALPAKA_FN_HOST_ACC(Acc const & acc, alpaka::Complex<Data> const& a) -> Data{
114+
return alpaka::math::abs(acc, a);
115+
};
116+
117+
auto reduce = [] ALPAKA_FN_HOST_ACC(Data const& sum, Data const& item){
118+
return sum + item;
119+
};
120+
121+
Data result_sum = vikunja::reduce::deviceTransformReduce<Acc>(
122+
devAcc,
123+
devHost,
124+
queueAcc,
125+
n,
126+
alpaka::getPtrNative(dOutputMem),
127+
transform,
128+
reduce);
129+
Data expected_result_sum = static_cast<Data>(0);
130+
131+
for(Idx i = 0; i < n; ++i){
132+
expected_result_sum += std::abs(static_cast<std::complex<Data>>(hInputMem1Ptr[i]));
133+
}
134+
135+
if(expected_result_sum == result_sum)
136+
{
137+
std::cout << "Reduce was successful!\n";
138+
}
139+
else
140+
{
141+
std::cout << "Reduce was not successful!\n";
142+
}
143+
144+
return 0;
145+
}

0 commit comments

Comments
 (0)