Skip to content

Commit 971ab2a

Browse files
authored
feat: 3-columns permutation matrix (#17)
# What ❔ This PR adds the the possibility to generate 3-column permutation matrix. ## Why ❔ This feature is needed for the fflonk prover implementation.
1 parent c66a9ba commit 971ab2a

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

src/pn.cu

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
1919
cudaMemPool_t pool = cfg.mem_pool;
2020
cudaStream_t stream = cfg.stream;
2121
unsigned int columns_count = cfg.columns_count;
22-
assert(columns_count == 4);
2322
unsigned int log_rows_count = cfg.log_rows_count;
2423
const unsigned cells_count = columns_count << log_rows_count;
2524
const unsigned bits_count = log2_ceiling(columns_count) + log_rows_count;
@@ -31,7 +30,16 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
3130
unsigned_ints sorted_values;
3231

3332
HANDLE_CUDA_ERROR(allocate(unsorted_keys, cells_count, pool, stream));
34-
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
33+
switch (columns_count) {
34+
case 3:
35+
HANDLE_CUDA_ERROR(transpose<3>(unsorted_keys, cfg.indexes, log_rows_count, stream));
36+
break;
37+
case 4:
38+
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
39+
break;
40+
default:
41+
assert(columns_count == 3 || columns_count == 4);
42+
}
3543
HANDLE_CUDA_ERROR(allocate(unsorted_values, cells_count, pool, stream));
3644
HANDLE_CUDA_ERROR(fill_transposed_range(unsorted_values, columns_count, log_rows_count, stream));
3745
HANDLE_CUDA_ERROR(allocate(sorted_keys, cells_count, pool, stream));

src/pn_kernels.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ template <unsigned COL_COUNT> cudaError_t transpose(unsigned *dst, const unsigne
4444
return cudaGetLastError();
4545
}
4646

47+
template cudaError_t transpose<3>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
4748
template cudaError_t transpose<4>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
4849
#undef BLOCK_SIZE
4950

0 commit comments

Comments
 (0)