Skip to content
This repository was archived by the owner on Aug 11, 2023. It is now read-only.

Commit bca7f9c

Browse files
committed
Add new vector add example
This new vector example compares different kinds of kernel functions to show the differences in assembly between each.
1 parent ade90ab commit bca7f9c

File tree

3 files changed

+147
-0
lines changed

3 files changed

+147
-0
lines changed

samples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,6 @@ add_sample(TARGET using-function-objects SOURCES using-function-objects.cpp)
7171
# This property is not necessary here, but demonstrates how to use it.
7272
# Forces the integration header to appear after the main code.
7373
set_property(TARGET using-function-objects PROPERTY COMPUTECPP_INCLUDE_AFTER 1)
74+
add_sample(TARGET vector-addition SOURCES vector-addition-examples.cpp)
7475
add_sample(TARGET vptr SOURCES vptr.cpp)
7576
target_include_directories(vptr PRIVATE ${PROJECT_SOURCE_DIR}/include)

samples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ Sample File | Description
3737
`smart-pointer.cpp` | Custom Allocators in SYCL.
3838
`vptr.cpp` | Using the Virtual Pointer interface in SYCL on matrix addition kernel.
3939
`opencl-c-interop.cpp` | OpenCL/SYCL interopability example.
40+
`vector-addition-examples.cpp` | Comparison of different vector addition code to show masking, predication and vectorisation.

samples/vector-addition-examples.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/***************************************************************************
2+
*
3+
* Copyright (C) Codeplay Software Ltd.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
* Codeplay's ComputeCpp SDK
18+
*
19+
* vector-addition-examples.cpp
20+
*
21+
* Description:
22+
* Shows different code generation for vector addition kernels
23+
*/
24+
25+
#include <iostream>
26+
27+
#include <CL/sycl.hpp>
28+
29+
using namespace cl;
30+
31+
/* Base vector add function. */
32+
void vecAdd(const float* a, const float* b, float* c, size_t id) {
33+
c[id] = a[id] + b[id];
34+
}
35+
36+
/* Masked variant where the store is hidden behind a runtime branch. */
37+
void vecAddMasked(const float* a, const float* b, float* c, size_t id) {
38+
float v = a[id] + b[id];
39+
if (v < 0.0f) {
40+
c[id] = v;
41+
}
42+
}
43+
44+
/* Variant where the variable value is predicated on a branch. */
45+
void vecAddPredicated(const float* a, const float* b, float* c, size_t id) {
46+
float v = a[id] + b[id];
47+
if (v < 0.0f) {
48+
v = 0.0f;
49+
}
50+
c[id] = v;
51+
}
52+
53+
class VecAddKernel;
54+
class VecAddKernelMasked;
55+
class VecAddKernelPredicated;
56+
57+
void zeroBuffer(sycl::buffer<float, 1> b) {
58+
constexpr auto dwrite = sycl::access::mode::discard_write;
59+
auto h = b.get_access<dwrite>();
60+
for (auto i = 0u; i < b.get_range()[0]; i++) {
61+
h[i] = 0.f;
62+
}
63+
}
64+
65+
void sumBuffer(sycl::buffer<float, 1> b) {
66+
constexpr auto read = sycl::access::mode::read;
67+
auto h = b.get_access<read>();
68+
auto sum = 0.0f;
69+
for (auto i = 0u; i < b.get_range()[0]; i++) {
70+
sum += h[i];
71+
}
72+
std::cout << "computation result: " << sum << std::endl;
73+
}
74+
75+
/* This sample shows three different vector addition functions. It
76+
* is possible to inspect the assembly generated by these samples
77+
* using the ComputeSuite tooling to compare the different approaches.
78+
* The general flow is that the output buffer is zeroed, the calculation
79+
* scheduled, then the sum printed for each of the functions. */
80+
int main(int argc, char* argv[]) {
81+
constexpr auto read = sycl::access::mode::read;
82+
constexpr auto write = sycl::access::mode::write;
83+
constexpr auto dwrite = sycl::access::mode::discard_write;
84+
constexpr const size_t N = 100000;
85+
const sycl::range<1> VecSize{N};
86+
87+
sycl::buffer<float> bufA{VecSize};
88+
sycl::buffer<float> bufB{VecSize};
89+
sycl::buffer<float> bufC{VecSize};
90+
91+
{
92+
auto h_a = bufA.get_access<dwrite>();
93+
auto h_b = bufB.get_access<dwrite>();
94+
for (auto i = 0u; i < N; i++) {
95+
h_a[i] = sin(i);
96+
h_b[i] = cos(i);
97+
}
98+
}
99+
100+
sycl::queue myQueue;
101+
102+
{
103+
zeroBuffer(bufC);
104+
auto cg = [&](sycl::handler& h) {
105+
auto a = bufA.get_access<read>(h);
106+
auto b = bufB.get_access<read>(h);
107+
auto c = bufC.get_access<write>(h);
108+
109+
h.parallel_for<VecAddKernel>(
110+
VecSize, [=](sycl::id<1> i) { vecAdd(&a[0], &b[0], &c[0], i[0]); });
111+
};
112+
myQueue.submit(cg);
113+
sumBuffer(bufC);
114+
}
115+
{
116+
zeroBuffer(bufC);
117+
auto cg = [&](sycl::handler& h) {
118+
auto a = bufA.get_access<read>(h);
119+
auto b = bufB.get_access<read>(h);
120+
auto c = bufC.get_access<write>(h);
121+
122+
h.parallel_for<VecAddKernelMasked>(VecSize, [=](sycl::id<1> i) {
123+
vecAddMasked(&a[0], &b[0], &c[0], i[0]);
124+
});
125+
};
126+
myQueue.submit(cg);
127+
sumBuffer(bufC);
128+
}
129+
{
130+
zeroBuffer(bufC);
131+
auto cg = [&](sycl::handler& h) {
132+
auto a = bufA.get_access<read>(h);
133+
auto b = bufB.get_access<read>(h);
134+
auto c = bufC.get_access<write>(h);
135+
136+
h.parallel_for<VecAddKernelPredicated>(VecSize, [=](sycl::id<1> i) {
137+
vecAddPredicated(&a[0], &b[0], &c[0], i[0]);
138+
});
139+
};
140+
myQueue.submit(cg);
141+
sumBuffer(bufC);
142+
}
143+
144+
return 0;
145+
}

0 commit comments

Comments
 (0)