Skip to content

Commit 46d6b8e

Browse files
committed
WIP rougly fix and check
1 parent 57b3519 commit 46d6b8e

173 files changed

Lines changed: 3156 additions & 3127 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmark/sparse_blas/operations.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -575,7 +575,8 @@ bool validate_symbolic_factorization(const Mtx* input, const Mtx* factors)
575575
const auto exec = factors->get_executor();
576576
bool valid = false;
577577
exec->run(make_symbolic_validate(
578-
input, factors, gko::matrix::csr::build_lookup(factors), valid));
578+
input->get_const_device_view(), factors->get_const_device_view(),
579+
gko::matrix::csr::build_lookup(factors), valid));
579580
return valid;
580581
}
581582

common/cuda_hip/factorization/cholesky_kernels.cpp

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -235,24 +235,24 @@ __global__ __launch_bounds__(default_block_size) void factorize(
235235
template <typename ValueType, typename IndexType>
236236
void symbolic_factorize(
237237
std::shared_ptr<const DefaultExecutor> exec,
238-
const matrix::Csr<ValueType, IndexType>* mtx,
238+
matrix::view::csr<const ValueType, const IndexType> mtx,
239239
const factorization::elimination_forest<IndexType>& forest,
240-
matrix::Csr<ValueType, IndexType>* l_factor,
240+
matrix::view::csr<ValueType, IndexType> l_factor,
241241
const array<IndexType>& tmp_storage)
242242
{
243-
const auto num_rows = static_cast<IndexType>(mtx->get_size()[0]);
243+
const auto num_rows = static_cast<IndexType>(mtx.size()[0]);
244244
if (num_rows == 0) {
245245
return;
246246
}
247-
const auto mtx_nnz = static_cast<IndexType>(mtx->get_num_stored_elements());
247+
const auto mtx_nnz = static_cast<IndexType>(mtx.num_stored_elements());
248248
const auto postorder_cols = tmp_storage.get_const_data();
249249
const auto lower_ends = postorder_cols + mtx_nnz;
250-
const auto row_ptrs = mtx->get_const_row_ptrs();
250+
const auto row_ptrs = mtx.row_ptrs();
251251
const auto postorder = forest.postorder.get_const_data();
252252
const auto inv_postorder = forest.inv_postorder.get_const_data();
253253
const auto postorder_parent = forest.postorder_parents.get_const_data();
254-
const auto out_row_ptrs = l_factor->get_const_row_ptrs();
255-
const auto out_cols = l_factor->get_col_idxs();
254+
const auto out_row_ptrs = l_factor.row_ptrs;
255+
const auto out_cols = l_factor.col_idxs;
256256
const auto num_blocks =
257257
ceildiv(num_rows, default_block_size / config::warp_size);
258258
kernel::symbolic_factorize<config::warp_size>
@@ -267,12 +267,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
267267

268268
template <typename ValueType, typename IndexType>
269269
void initialize(std::shared_ptr<const DefaultExecutor> exec,
270-
const matrix::Csr<ValueType, IndexType>* mtx,
270+
matrix::view::csr<const ValueType, const IndexType> mtx,
271271
const IndexType* factor_lookup_offsets,
272272
const int64* factor_lookup_descs,
273273
const int32* factor_lookup_storage, IndexType* diag_idxs,
274274
IndexType* transpose_idxs,
275-
matrix::Csr<ValueType, IndexType>* factors)
275+
matrix::view::csr<ValueType, IndexType> factors)
276276
{
277277
lu_factorization::initialize(exec, mtx, factor_lookup_offsets,
278278
factor_lookup_descs, factor_lookup_storage,
@@ -304,28 +304,26 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
304304
const int32* lookup_storage, const IndexType* diag_idxs,
305305
const IndexType* transpose_idxs,
306306
const factorization::elimination_forest<IndexType>& forest,
307-
matrix::Csr<ValueType, IndexType>* factors, bool full_fillin,
308-
array<int>& tmp_storage)
307+
matrix::view::csr<ValueType, IndexType> factors,
308+
bool full_fillin, array<int>& tmp_storage)
309309
{
310-
const auto num_rows = factors->get_size()[0];
310+
const auto num_rows = factors.size[0];
311311
if (num_rows > 0) {
312312
syncfree_storage storage(exec, tmp_storage, num_rows);
313313
const auto num_blocks =
314314
ceildiv(num_rows, default_block_size / config::warp_size);
315315
if (!full_fillin) {
316316
kernel::factorize<false>
317317
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
318-
factors->get_const_row_ptrs(),
319-
factors->get_const_col_idxs(), lookup_offsets,
318+
factors.row_ptrs, factors.col_idxs, lookup_offsets,
320319
lookup_storage, lookup_descs, diag_idxs, transpose_idxs,
321-
as_device_type(factors->get_values()), storage, num_rows);
320+
as_device_type(factors.values), storage, num_rows);
322321
} else {
323322
kernel::factorize<true>
324323
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
325-
factors->get_const_row_ptrs(),
326-
factors->get_const_col_idxs(), lookup_offsets,
324+
factors.row_ptrs, factors.col_idxs, lookup_offsets,
327325
lookup_storage, lookup_descs, diag_idxs, transpose_idxs,
328-
as_device_type(factors->get_values()), storage, num_rows);
326+
as_device_type(factors.values), storage, num_rows);
329327
}
330328
}
331329
}
@@ -335,20 +333,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE);
335333

336334
template <typename ValueType, typename IndexType>
337335
void symbolic_count(std::shared_ptr<const DefaultExecutor> exec,
338-
const matrix::Csr<ValueType, IndexType>* mtx,
336+
matrix::view::csr<const ValueType, const IndexType> mtx,
339337
const factorization::elimination_forest<IndexType>& forest,
340338
IndexType* row_nnz, array<IndexType>& tmp_storage)
341339
{
342-
const auto num_rows = static_cast<IndexType>(mtx->get_size()[0]);
340+
const auto num_rows = static_cast<IndexType>(mtx.size[0]);
343341
if (num_rows == 0) {
344342
return;
345343
}
346-
const auto mtx_nnz = static_cast<IndexType>(mtx->get_num_stored_elements());
344+
const auto mtx_nnz = static_cast<IndexType>(mtx.num_stored_elements);
347345
tmp_storage.resize_and_reset(mtx_nnz + num_rows);
348346
const auto postorder_cols = tmp_storage.get_data();
349347
const auto lower_ends = postorder_cols + mtx_nnz;
350-
const auto row_ptrs = mtx->get_const_row_ptrs();
351-
const auto cols = mtx->get_const_col_idxs();
348+
const auto row_ptrs = mtx.row_ptrs;
349+
const auto cols = mtx.col_idxs;
352350
const auto inv_postorder = forest.inv_postorder.get_const_data();
353351
const auto postorder_parent = forest.postorder_parents.get_const_data();
354352
// transform col indices to postorder indices

common/cuda_hip/factorization/elimination_forest_kernels.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -334,15 +334,14 @@ void build_children_from_parents(
334334

335335
template <typename ValueType, typename IndexType>
336336
void from_factor(std::shared_ptr<const DefaultExecutor> exec,
337-
const matrix::Csr<ValueType, IndexType>* factors,
337+
matrix::view::csr<const ValueType, const IndexType> factors,
338338
gko::factorization::elimination_forest<IndexType>& forest)
339339
{
340-
const auto num_rows = factors->get_size()[0];
340+
const auto num_rows = factors.size[0];
341341
const auto it = thrust::make_counting_iterator(IndexType{});
342342
thrust::transform(
343343
thrust_policy(exec), it, it + num_rows, forest.parents.get_data(),
344-
[row_ptrs = factors->get_const_row_ptrs(),
345-
col_idxs = factors->get_const_col_idxs(),
344+
[row_ptrs = factors.row_ptrs, col_idxs = factors.col_idxs,
346345
num_rows] __device__(IndexType l_col) {
347346
const auto llt_row_begin = row_ptrs[l_col];
348347
const auto llt_row_end = row_ptrs[l_col + 1];

common/cuda_hip/factorization/factorization_kernels.cpp

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -434,10 +434,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
434434
template <typename ValueType, typename IndexType>
435435
void initialize_row_ptrs_l_u(
436436
std::shared_ptr<const DefaultExecutor> exec,
437-
const matrix::Csr<ValueType, IndexType>* system_matrix,
437+
matrix::view::csr<const ValueType, const IndexType> system_matrix,
438438
IndexType* l_row_ptrs, IndexType* u_row_ptrs)
439439
{
440-
const size_type num_rows{system_matrix->get_size()[0]};
440+
const size_type num_rows{system_matrix.size[0]};
441441

442442
const auto block_size = default_block_size;
443443
const uint32 number_blocks =
@@ -447,10 +447,8 @@ void initialize_row_ptrs_l_u(
447447
if (grid_dim > 0) {
448448
kernel::count_nnz_per_l_u_row<<<grid_dim, block_size, 0,
449449
exec->get_stream()>>>(
450-
num_rows, system_matrix->get_const_row_ptrs(),
451-
system_matrix->get_const_col_idxs(),
452-
as_device_type(system_matrix->get_const_values()), l_row_ptrs,
453-
u_row_ptrs);
450+
num_rows, system_matrix.row_ptrs, system_matrix.col_idxs,
451+
as_device_type(system_matrix.values), l_row_ptrs, u_row_ptrs);
454452
}
455453

456454
components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1);
@@ -462,12 +460,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
462460

463461

464462
template <typename ValueType, typename IndexType>
465-
void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
466-
const matrix::Csr<ValueType, IndexType>* system_matrix,
467-
matrix::Csr<ValueType, IndexType>* csr_l,
468-
matrix::Csr<ValueType, IndexType>* csr_u)
463+
void initialize_l_u(
464+
std::shared_ptr<const DefaultExecutor> exec,
465+
matrix::view::csr<const ValueType, const IndexType> system_matrix,
466+
matrix::view::csr<ValueType, IndexType> csr_l,
467+
matrix::view::csr<ValueType, IndexType> csr_u)
469468
{
470-
const size_type num_rows{system_matrix->get_size()[0]};
469+
const size_type num_rows{system_matrix.size[0]};
471470
const auto block_size = helpers::default_block_size;
472471
const auto grid_dim = static_cast<uint32>(
473472
ceildiv(num_rows, static_cast<size_type>(block_size)));
@@ -480,13 +479,11 @@ void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
480479
auto u_closure = triangular_mtx_closure(identity{}, identity{});
481480
helpers::
482481
initialize_l_u<<<grid_dim, block_size, 0, exec->get_stream()>>>(
483-
num_rows, system_matrix->get_const_row_ptrs(),
484-
system_matrix->get_const_col_idxs(),
485-
as_device_type(system_matrix->get_const_values()),
486-
csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
487-
as_device_type(csr_l->get_values()),
488-
csr_u->get_const_row_ptrs(), csr_u->get_col_idxs(),
489-
as_device_type(csr_u->get_values()), l_closure, u_closure);
482+
num_rows, system_matrix.row_ptrs, system_matrix.col_idxs,
483+
as_device_type(system_matrix.values), csr_l.row_ptrs,
484+
csr_l.col_idxs, as_device_type(csr_l.get_values),
485+
csr_u.row_ptrs, csr_u.col_idxs,
486+
as_device_type(csr_u.get_values), l_closure, u_closure);
490487
}
491488
}
492489

@@ -497,10 +494,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
497494
template <typename ValueType, typename IndexType>
498495
void initialize_row_ptrs_l(
499496
std::shared_ptr<const DefaultExecutor> exec,
500-
const matrix::Csr<ValueType, IndexType>* system_matrix,
497+
matrix::view::csr<const ValueType, const IndexType> system_matrix,
501498
IndexType* l_row_ptrs)
502499
{
503-
const size_type num_rows{system_matrix->get_size()[0]};
500+
const size_type num_rows{system_matrix.size[0]};
504501

505502
const auto block_size = default_block_size;
506503
const uint32 number_blocks =
@@ -510,9 +507,8 @@ void initialize_row_ptrs_l(
510507
if (grid_dim > 0) {
511508
kernel::count_nnz_per_l_row<<<grid_dim, block_size, 0,
512509
exec->get_stream()>>>(
513-
num_rows, system_matrix->get_const_row_ptrs(),
514-
system_matrix->get_const_col_idxs(),
515-
as_device_type(system_matrix->get_const_values()), l_row_ptrs);
510+
num_rows, system_matrix.row_ptrs, system_matrix.col_idxs,
511+
as_device_type(system_matrix.values), l_row_ptrs);
516512
}
517513

518514
components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1);
@@ -523,11 +519,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
523519

524520

525521
template <typename ValueType, typename IndexType>
526-
void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
527-
const matrix::Csr<ValueType, IndexType>* system_matrix,
528-
matrix::Csr<ValueType, IndexType>* csr_l, bool diag_sqrt)
522+
void initialize_l(
523+
std::shared_ptr<const DefaultExecutor> exec,
524+
matrix::view::csr<const ValueType, const IndexType> system_matrix,
525+
matrix::view::csr<ValueType, IndexType> csr_l, bool diag_sqrt)
529526
{
530-
const size_type num_rows{system_matrix->get_size()[0]};
527+
const size_type num_rows{system_matrix.size[0]};
531528
const auto block_size = helpers::default_block_size;
532529
const auto grid_dim = static_cast<uint32>(
533530
ceildiv(num_rows, static_cast<size_type>(block_size)));
@@ -536,11 +533,9 @@ void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
536533
using namespace gko::factorization;
537534

538535
helpers::initialize_l<<<grid_dim, block_size, 0, exec->get_stream()>>>(
539-
num_rows, system_matrix->get_const_row_ptrs(),
540-
system_matrix->get_const_col_idxs(),
541-
as_device_type(system_matrix->get_const_values()),
542-
csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
543-
as_device_type(csr_l->get_values()),
536+
num_rows, system_matrix.row_ptrs, system_matrix.col_idxs,
537+
as_device_type(system_matrix.values), csr_l.row_ptrs,
538+
csr_l.col_idxs, as_device_type(csr_l.values),
544539
triangular_mtx_closure(
545540
[diag_sqrt] __device__(auto val) {
546541
if (diag_sqrt) {
@@ -562,18 +557,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
562557
template <typename ValueType, typename IndexType>
563558
void symbolic_validate(
564559
std::shared_ptr<const DefaultExecutor> exec,
565-
const matrix::Csr<ValueType, IndexType>* system_matrix,
566-
const matrix::Csr<ValueType, IndexType>* factors,
560+
matrix::view::csr<const ValueType, const IndexType> system_matrix,
561+
matrix::view::csr<const ValueType, const IndexType> factors,
567562
const matrix::csr::lookup_data<IndexType>& factors_lookup, bool& valid)
568563
{
569-
const auto size = system_matrix->get_size()[0];
570-
const auto row_ptrs = system_matrix->get_const_row_ptrs();
571-
const auto col_idxs = system_matrix->get_const_col_idxs();
572-
const auto factor_row_ptrs = factors->get_const_row_ptrs();
573-
const auto factor_col_idxs = factors->get_const_col_idxs();
564+
const auto size = system_matrix.size[0];
565+
const auto row_ptrs = system_matrix.row_ptrs;
566+
const auto col_idxs = system_matrix.col_idxs;
567+
const auto factor_row_ptrs = factors.row_ptrs;
568+
const auto factor_col_idxs = factors.col_idxs;
574569
// this stores for each factor nonzero whether it occurred as part of the
575570
// factorization.
576-
array<bool> found(exec, factors->get_num_stored_elements());
571+
array<bool> found(exec, factors.num_stored_elements);
577572
components::fill_array(exec, found.get_data(), found.get_size(), false);
578573
// this stores for each row whether there were any elements missing
579574
array<bool> missing(exec, size);

common/cuda_hip/factorization/ic_kernels.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -18,32 +18,30 @@ namespace ic_factorization {
1818

1919
template <typename ValueType, typename IndexType>
2020
void sparselib_ic(std::shared_ptr<const DefaultExecutor> exec,
21-
matrix::Csr<ValueType, IndexType>* m)
21+
matrix::view::csr<ValueType, IndexType> m)
2222
{
2323
const auto id = exec->get_device_id();
2424
auto handle = exec->get_sparselib_handle();
2525
auto desc = sparselib::create_mat_descr();
2626
auto info = sparselib::create_ic0_info();
2727

2828
// get buffer size for IC
29-
IndexType num_rows = m->get_size()[0];
30-
IndexType nnz = m->get_num_stored_elements();
29+
IndexType num_rows = m.size[0];
30+
IndexType nnz = m.num_stored_elements;
3131
size_type buffer_size{};
32-
sparselib::ic0_buffer_size(handle, num_rows, nnz, desc,
33-
m->get_const_values(), m->get_const_row_ptrs(),
34-
m->get_const_col_idxs(), info, buffer_size);
32+
sparselib::ic0_buffer_size(handle, num_rows, nnz, desc, m.values,
33+
m.row_ptrs, m.col_idxs, info, buffer_size);
3534

3635
array<char> buffer{exec, buffer_size};
3736

3837
// set up IC(0)
39-
sparselib::ic0_analysis(handle, num_rows, nnz, desc, m->get_const_values(),
40-
m->get_const_row_ptrs(), m->get_const_col_idxs(),
41-
info, SPARSELIB_SOLVE_POLICY_USE_LEVEL,
38+
sparselib::ic0_analysis(handle, num_rows, nnz, desc, m.values, m.row_ptrs,
39+
m.col_idxs, info, SPARSELIB_SOLVE_POLICY_USE_LEVEL,
4240
buffer.get_data());
4341

44-
sparselib::ic0(handle, num_rows, nnz, desc, m->get_values(),
45-
m->get_const_row_ptrs(), m->get_const_col_idxs(), info,
46-
SPARSELIB_SOLVE_POLICY_USE_LEVEL, buffer.get_data());
42+
sparselib::ic0(handle, num_rows, nnz, desc, m.values, m.row_ptrs,
43+
m.col_idxs, info, SPARSELIB_SOLVE_POLICY_USE_LEVEL,
44+
buffer.get_data());
4745

4846
// CUDA 11.4 has a use-after-free bug on Turing
4947
#if defined(GKO_COMPILING_CUDA) && (CUDA_VERSION >= 11040)

0 commit comments

Comments
 (0)