Skip to content

Commit 6060827

Browse files
committed
Add interface for reduce
1 parent bced3e9 commit 6060827

File tree

6 files changed

+311
-0
lines changed

6 files changed

+311
-0
lines changed

include/algorithm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
#pragma once
33

44
#include "algorithm/extrema.h"
5+
#include "algorithm/reduce.h"
56
#include "algorithm/sort.h"

include/algorithm/reduce.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
2+
#pragma once
3+
4+
#include "internal/defines.h"
5+
6+
#if defined(XTD_CUDA_BACKEND)
7+
#include <thrust/reduce.h>
8+
#elif defined(XTD_HIP_BACKEND)
9+
#include <rocthrust/reduce.h>
10+
#elif defined(XTD_SYCL_BACKEND)
11+
#include <oneapi/dpl/algorithm>
12+
#include <oneapi/dpl/execution>
13+
#else
14+
#include <algorithm>
15+
#endif
16+
17+
namespace xtd {
18+
19+
template <typename InputIterator>
20+
XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits<InputIterator>::value_type
21+
reduce(InputIterator first, InputIterator last) {
22+
#if defined(XTD_CUDA_BACKEND)
23+
return thrust::reduce(thrust::device, first, last);
24+
#elif defined(XTD_HIP_BACKEND)
25+
return rocthrust::reduce(thrust::hip::par, first, last);
26+
#elif defined(XTD_SYCL_BACKEND)
27+
return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last);
28+
#else
29+
return std::reduce(first, last);
30+
#endif
31+
}
32+
33+
template <typename ExecutionPolicy, typename ForwardIterator>
34+
XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits<ForwardIterator>::value_type
35+
reduce(ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) {
36+
#if defined(XTD_CUDA_BACKEND)
37+
return thrust::reduce(std::forward<ExecutionPolicy>(policy), first, last);
38+
#elif defined(XTD_HIP_BACKEND)
39+
return rocthrust::reduce(std::forward<ExecutionPolicy>(policy), first, last);
40+
#elif defined(XTD_SYCL_BACKEND)
41+
return oneapi::dpl::reduce(std::forward<ExecutionPolicy>(policy), first, last);
42+
#else
43+
return std::reduce(std::forward<ExecutionPolicy>(policy), first, last);
44+
#endif
45+
}
46+
47+
template <typename InputIterator, typename T>
48+
XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first, InputIterator last, T init) {
49+
#if defined(XTD_CUDA_BACKEND)
50+
return thrust::reduce(thrust::device, first, last, init);
51+
#elif defined(XTD_HIP_BACKEND)
52+
return rocthrust::reduce(thrust::hip::par, first, last, init);
53+
#elif defined(XTD_SYCL_BACKEND)
54+
return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init);
55+
#else
56+
return std::reduce(first, last, init);
57+
#endif
58+
}
59+
60+
template <typename ExecutionPolicy, typename ForwardIterator, typename T>
61+
XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy,
62+
ForwardIterator first,
63+
ForwardIterator last,
64+
T init) {
65+
#if defined(XTD_CUDA_BACKEND)
66+
return thrust::reduce(std::forward<ExecutionPolicy>(policy), first, last, init);
67+
#elif defined(XTD_HIP_BACKEND)
68+
return rocthrust::reduce(std::forward<ExecutionPolicy>(policy), first, last, init);
69+
#elif defined(XTD_SYCL_BACKEND)
70+
return oneapi::dpl::reduce(std::forward<ExecutionPolicy>(policy), first, last, init);
71+
#else
72+
return std::reduce(std::forward<ExecutionPolicy>(policy), first, last, init);
73+
#endif
74+
}
75+
76+
template <typename InputIterator, typename T, typename BinaryOperation>
77+
XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first,
78+
InputIterator last,
79+
T init,
80+
BinaryOperation op) {
81+
#if defined(XTD_CUDA_BACKEND)
82+
return thrust::reduce(thrust::device, first, last, init, op);
83+
#elif defined(XTD_HIP_BACKEND)
84+
return rocthrust::reduce(thrust::hip::par, first, last, init, op);
85+
#elif defined(XTD_SYCL_BACKEND)
86+
return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init, op);
87+
#else
88+
return std::reduce(first, last, init, op);
89+
#endif
90+
}
91+
92+
template <typename ExecutionPolicy, typename ForwardIterator, typename T, typename BinaryOperation>
93+
XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy,
94+
ForwardIterator first,
95+
ForwardIterator last,
96+
T init,
97+
BinaryOperation op) {
98+
#if defined(XTD_CUDA_BACKEND)
99+
return thrust::reduce(std::forward<ExecutionPolicy>(policy), first, last, init, op);
100+
#elif defined(XTD_HIP_BACKEND)
101+
return rocthrust::reduce(std::forward<ExecutionPolicy>(policy), first, last, init, op);
102+
#elif defined(XTD_SYCL_BACKEND)
103+
return oneapi::dpl::reduce(std::forward<ExecutionPolicy>(policy), first, last, init, op);
104+
#else
105+
return std::reduce(std::forward<ExecutionPolicy>(policy), first, last, init, op);
106+
#endif
107+
}
108+
109+
} // namespace xtd

test/reduce/reduce_t.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
#include <algorithm>
3+
#include <execution>
4+
#include <numeric>
5+
#include <ranges>
6+
#include <vector>
7+
8+
#define CATCH_CONFIG_MAIN
9+
#include <catch.hpp>
10+
11+
#include "algorithm.h"
12+
13+
TEST_CASE("reduceCPU", "[reduce]") {
14+
const int N = 100;
15+
std::random_device rd;
16+
std::mt19937 rng(rd());
17+
18+
std::vector<int> values(N);
19+
std::iota(values.begin(), values.end(), 0);
20+
21+
SECTION("Default reduction") {
22+
auto red = xtd::reduce(values.begin(), values.end());
23+
REQUIRE(red == std::reduce(values.begin(), values.end()));
24+
}
25+
26+
SECTION("Less comparison") {
27+
auto red = xtd::reduce(values.begin(), values.end(), -1, std::less<int>());
28+
REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less<int>()));
29+
}
30+
31+
SECTION("Unseq execution policy") {
32+
int red = xtd::reduce(std::execution::unseq, values.begin(), values.end());
33+
REQUIRE(red == std::reduce(values.begin(), values.end()));
34+
}
35+
36+
SECTION("Unseq execution policy with less comparison") {
37+
auto red =
38+
xtd::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less<int>());
39+
REQUIRE(red ==
40+
std::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less<int>()));
41+
}
42+
43+
SECTION("Reduction with initial value") {
44+
auto red = xtd::reduce(values.begin(), values.end(), 1);
45+
REQUIRE(red == std::reduce(values.begin(), values.end(), 1));
46+
}
47+
48+
SECTION("Reduction with initial value and unseq policy") {
49+
auto red = xtd::reduce(std::execution::unseq, values.begin(), values.end(), 1);
50+
REQUIRE(red == std::reduce(std::execution::unseq, values.begin(), values.end(), 1));
51+
}
52+
}

test/reduce/reduce_t.cu

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
2+
#include <algorithm>
3+
#include <execution>
4+
#include <numeric>
5+
#include <ranges>
6+
#include <vector>
7+
8+
#define CATCH_CONFIG_MAIN
9+
#include <catch.hpp>
10+
11+
#include "algorithm.h"
12+
13+
#include "common/cuda_check.h"
14+
#include <cuda_runtime.h>
15+
#include <thrust/copy.h>
16+
#include <thrust/execution_policy.h>
17+
18+
TEST_CASE("reduceCUDA", "[reduce]") {
19+
const int N = 100;
20+
std::random_device rd;
21+
std::mt19937 rng(rd());
22+
23+
std::vector<int> values(N);
24+
std::iota(values.begin(), values.end(), 0);
25+
std::shuffle(values.begin(), values.end(), rng);
26+
27+
cudaStream_t stream;
28+
CUDA_CHECK(cudaStreamCreate(&stream));
29+
30+
int* d_values;
31+
CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream));
32+
CUDA_CHECK(
33+
cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream));
34+
35+
SECTION("Default reduction") {
36+
auto red = xtd::reduce(d_values, d_values + N);
37+
REQUIRE(red == std::reduce(values.begin(), values.end()));
38+
}
39+
40+
SECTION("Less comparison") {
41+
auto red = xtd::reduce(d_values, d_values + N, -1, std::less<int>());
42+
REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less<int>()));
43+
}
44+
45+
SECTION("Reduction with initial value") {
46+
auto red = xtd::reduce(d_values, d_values + N, 1);
47+
REQUIRE(red == std::reduce(values.begin(), values.end(), 1));
48+
}
49+
}

test/reduce/reduce_t.hip.cc

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
#include <algorithm>
3+
#include <execution>
4+
#include <numeric>
5+
#include <ranges>
6+
#include <vector>
7+
8+
#define CATCH_CONFIG_MAIN
9+
#include <catch.hpp>
10+
11+
#include "algorithm.h"
12+
13+
#include "common/hip_check.h"
14+
#include <hip_runtime.h>
15+
#include <rocthrust/copy.h>
16+
17+
TEST_CASE("reduceHIP", "[reduce]") {
18+
const int N = 100;
19+
std::random_device rd;
20+
std::mt19937 rng(rd());
21+
22+
std::vector<int> values(N);
23+
std::iota(values.begin(), values.end(), 0);
24+
std::shuffle(values.begin(), values.end(), rng);
25+
26+
hipStream_t stream;
27+
HIP_CHECK(hipStreamCreate(&stream));
28+
29+
int* d_values;
30+
HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream));
31+
HIP_CHECK(
32+
hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream));
33+
34+
SECTION("Default reduction") {
35+
auto red = xtd::reduce(d_values, d_values + N);
36+
REQUIRE(red == std::reduce(values.begin(), values.end()));
37+
}
38+
39+
SECTION("Less comparison") {
40+
auto red = xtd::reduce(d_values, d_values + N, -1, std::less<int>());
41+
REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less<int>()));
42+
}
43+
44+
SECTION("Reduction with initial value") {
45+
auto red = xtd::reduce(d_values, d_values + N, 1);
46+
REQUIRE(red == std::reduce(values.begin(), values.end(), 1));
47+
}
48+
}

test/reduce/reduce_t.sycl.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
#include <algorithm>
3+
#include <numeric>
4+
#include <ranges>
5+
#include <vector>
6+
7+
#define CATCH_CONFIG_MAIN
8+
#include <catch.hpp>
9+
10+
#include "algorithm.h"
11+
12+
#include <sycl/sycl.hpp>
13+
14+
TEST_CASE("reduceSYCL", "[reduce]") {
15+
const int N = 100;
16+
std::random_device rd;
17+
std::mt19937 rng(rd());
18+
19+
#ifdef ONEAPI_CPU
20+
auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()};
21+
#else
22+
if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) {
23+
std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl;
24+
exit(EXIT_SUCCESS);
25+
}
26+
auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()};
27+
#endif
28+
29+
std::vector<int> values(N);
30+
std::iota(values.begin(), values.end(), 0);
31+
std::shuffle(values.begin(), values.end(), rng);
32+
33+
auto *d_values = sycl::malloc_device<int>(N, queue);
34+
queue.memcpy(d_values, values.data(), N * sizeof(int)).wait();
35+
36+
SECTION("Default reduction") {
37+
auto red = xtd::reduce(d_values, d_values + N);
38+
REQUIRE(red == std::reduce(values.begin(), values.end()));
39+
}
40+
41+
SECTION("Less comparison") {
42+
auto red = xtd::reduce(d_values, d_values + N, -1, std::less<int>());
43+
REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less<int>()));
44+
}
45+
46+
SECTION("Reduction with initial value") {
47+
auto red = xtd::reduce(d_values, d_values + N, 1);
48+
REQUIRE(red == std::reduce(values.begin(), values.end(), 1));
49+
}
50+
51+
sycl::free(d_values, queue);
52+
}

0 commit comments

Comments
 (0)