66
77#include < algorithm>
88
9+ #include " common/cuda_hip/base/config.hpp"
10+ #include " common/cuda_hip/base/runtime.hpp"
11+ #include " common/cuda_hip/base/types.hpp"
12+ #include " common/cuda_hip/components/thread_ids.hpp"
913#include " core/base/allocator.hpp"
1014#include " core/base/device_matrix_data_kernels.hpp"
1115#include " core/base/iterator_factory.hpp"
16+ #include " core/base/utils.hpp"
1217#include " core/components/prefix_sum_kernels.hpp"
1318#include " reference/distributed/partition_helpers.hpp"
1419
@@ -19,6 +24,51 @@ namespace GKO_DEVICE_NAMESPACE {
1924namespace bddc {
2025
2126
27+ constexpr int default_block_size = 512 ;
28+
29+
30+ namespace kernel {
31+
32+
33+ template <typename ValueType, typename IndexType>
34+ __global__ __launch_bounds__ (default_block_size) void generate_constraints(
35+ const IndexType* interface_offsets, size_type n_inactive_idxs,
36+ size_type n_edges_faces, IndexType* row_idxs, IndexType* col_idxs,
37+ ValueType* values)
38+ {
39+ const auto interface_idx = thread::get_thread_id_flat ();
40+ if (interface_idx < n_edges_faces) {
41+ const auto start = interface_offsets[interface_idx];
42+ const auto stop = interface_offsets[interface_idx + 1 ];
43+ const ValueType val =
44+ one<ValueType>() / static_cast <ValueType>(stop - start);
45+ for (size_type idx = start; idx < stop; idx++) {
46+ row_idxs[idx] = interface_idx;
47+ col_idxs[idx] = n_inactive_idxs + idx;
48+ values[idx] = val;
49+ }
50+ }
51+ }
52+
53+
54+ template <typename ValueType>
55+ __global__ __launch_bounds__ (default_block_size) void fill_coarse_data(
56+ size_type n_edges_faces, size_type n_corners, size_type lambda_stride,
57+ size_type phi_stride, ValueType* lambda, ValueType* phi)
58+ {
59+ const auto i = thread::get_thread_id_flat ();
60+ if (i < n_edges_faces) {
61+ lambda[i * lambda_stride + i] = one<ValueType>();
62+ }
63+ if (i < n_corners) {
64+ phi[i * phi_stride + n_edges_faces + i] = one<ValueType>();
65+ }
66+ }
67+
68+
69+ } // namespace kernel
70+
71+
2272template <typename ValueType, typename IndexType>
2373void classify_dofs (
2474 std::shared_ptr<const DefaultExecutor> exec,
@@ -38,11 +88,27 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE_BASE(
3888
3989
4090template <typename ValueType, typename IndexType>
41- void generate_constraints (
42- std::shared_ptr<const DefaultExecutor> exec,
43- const matrix::Dense<ValueType>* labels, size_type n_inner_idxs,
44- size_type n_edges_faces, const array<IndexType>& interface_sizes,
45- device_matrix_data<ValueType, IndexType>& constraints) GKO_NOT_IMPLEMENTED;
91+ void generate_constraints (std::shared_ptr<const DefaultExecutor> exec,
92+ const matrix::Dense<ValueType>* labels,
93+ size_type n_inactive_idxs, size_type n_edges_faces,
94+ const array<IndexType>& interface_sizes,
95+ device_matrix_data<ValueType, IndexType>& constraints)
96+ {
97+ array<IndexType> interface_offsets{exec, n_edges_faces + 1 };
98+ exec->copy (n_edges_faces, interface_sizes.get_const_data (),
99+ interface_offsets.get_data ());
100+ components::prefix_sum_nonnegative (exec, interface_offsets.get_data (),
101+ n_edges_faces + 1 );
102+
103+ const auto grid_dim = ceildiv (n_edges_faces, default_block_size);
104+ if (grid_dim > 0 ) {
105+ kernel::generate_constraints<<<grid_dim, default_block_size>>>(
106+ as_device_type (interface_offsets.get_const_data ()), n_inactive_idxs,
107+ n_edges_faces, as_device_type (constraints.get_row_idxs ()),
108+ as_device_type (constraints.get_col_idxs ()),
109+ as_device_type (constraints.get_values ()));
110+ }
111+ }
46112
47113GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE_BASE (
48114 GKO_DECLARE_GENERATE_CONSTRAINTS);
@@ -51,14 +117,27 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE_BASE(
51117template <typename ValueType>
52118void fill_coarse_data (std::shared_ptr<const DefaultExecutor> exec,
53119 matrix::Dense<ValueType>* phi_P,
54- matrix::Dense<ValueType>* lambda_rhs) GKO_NOT_IMPLEMENTED;
120+ matrix::Dense<ValueType>* lambda_rhs)
121+ {
122+ const auto n_edges_faces = lambda_rhs->get_size ()[0 ];
123+ const auto n_corners = phi_P->get_size ()[1 ];
124+ const auto grid_dim =
125+ ceildiv (std::max (n_edges_faces, n_corners), default_block_size);
126+ if (grid_dim > 0 ) {
127+ kernel::fill_coarse_data<<<grid_dim, default_block_size>>>(
128+ n_edges_faces, n_corners, lambda_rhs->get_stride (),
129+ phi_P->get_stride (), as_device_type (lambda_rhs->get_values ()),
130+ as_device_type (phi_P->get_values ()));
131+ }
132+ }
55133
56134GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE (GKO_DECLARE_FILL_COARSE_DATA);
57135
58136
59137template <typename ValueType, typename IndexType>
60138void build_coarse_contribution (
61139 std::shared_ptr<const DefaultExecutor> exec,
140+ const array<experimental::distributed::preconditioner::dof_type>& dof_types,
62141 const array<remove_complex<ValueType>>& local_labels,
63142 const array<IndexType>& local_tags,
64143 const array<remove_complex<ValueType>>& global_labels,
0 commit comments