Skip to content

Commit e71f98f

Browse files
committed
Update
1 parent c2c9fb9 commit e71f98f

File tree

5 files changed

+24
-18
lines changed

5 files changed

+24
-18
lines changed

cp-algo/math/cvector.hpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
#include "../util/checkpoint.hpp"
55
#include <experimental/simd>
66
#include <ranges>
7+
8+
namespace stdx = std::experimental;
79
namespace cp_algo::math::fft {
810
using ftype = double;
911
using point = complex<ftype>;
10-
using vftype = std::experimental::native_simd<ftype>;
12+
using vftype = stdx::native_simd<ftype>;
1113
using vpoint = complex<vftype>;
1214
static constexpr size_t flen = vftype::size();
1315

@@ -57,15 +59,15 @@ namespace cp_algo::math::fft {
5759
auto [Ax, Ay] = A.vget(k);
5860
alignas(32) ftype Bx[2 * flen];
5961
alignas(32) ftype By[2 * flen];
60-
Bvx.copy_to(Bx + flen, std::experimental::vector_aligned);
61-
Bvy.copy_to(By + flen, std::experimental::vector_aligned);
62-
Brvx.copy_to(Bx, std::experimental::vector_aligned);
63-
Brvy.copy_to(By, std::experimental::vector_aligned);
62+
Bvx.copy_to(Bx + flen, stdx::vector_aligned);
63+
Bvy.copy_to(By + flen, stdx::vector_aligned);
64+
Brvx.copy_to(Bx, stdx::vector_aligned);
65+
Brvy.copy_to(By, stdx::vector_aligned);
6466
vpoint res = {0, 0};
6567
for(size_t i = 0; i < flen; i++) {
6668
vftype Bsx, Bsy;
67-
Bsx.copy_from(Bx + flen - i, std::experimental::element_aligned);
68-
Bsy.copy_from(By + flen - i, std::experimental::element_aligned);
69+
Bsx.copy_from(Bx + flen - i, stdx::element_aligned);
70+
Bsy.copy_from(By + flen - i, stdx::element_aligned);
6971
res += vpoint(Ax[i], Ay[i]) * vpoint(Bsx, Bsy);
7072
}
7173
return res;

cp-algo/math/fft.hpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@ namespace cp_algo::math::fft {
1313

1414
dft(auto const& a, size_t n): A(n), B(n) {
1515
split = int(std::sqrt(base::mod())) + 1;
16-
cvector::exec_on_roots(2 * n, size(a), [&](size_t i, point rt) {
17-
size_t ti = std::min(i, i - n);
18-
auto rem = std::remainder(a[i].rem(), split);
19-
auto quo = (ftype(a[i].rem()) - rem) / split;
20-
A.set(ti, A.get(ti) + rem * rt);
21-
B.set(ti, B.get(ti) + quo * rt);
22-
16+
cvector::exec_on_roots(2 * n, std::min(n, size(a)), [&](size_t i, auto rt) {
17+
auto splt = [&](size_t i) {
18+
ftype ai = i < size(a) ? a[i].rem() : 0;
19+
auto rem = std::remainder(ai, split);
20+
auto quo = (ai - rem) / split;
21+
return std::pair{rem, quo};
22+
};
23+
auto [rai, qai] = splt(i);
24+
auto [rani, qani] = splt(n + i);
25+
A.set(i, point(rai, rani) * rt);
26+
B.set(i, point(qai, qani) * rt);
2327
});
2428
checkpoint("dft init");
2529
if(n) {
@@ -154,8 +158,9 @@ namespace cp_algo::math::fft {
154158
auto n = std::max(flen, std::bit_ceil(
155159
std::min(k, size(a)) + std::min(k, size(b)) - 1
156160
) / 2);
157-
a.resize(k);
158161
auto A = dft<base>(a, n);
162+
a.resize(k);
163+
checkpoint("resize a");
159164
if(&a == &b) {
160165
A.mul(A, a, k);
161166
} else {

cp-algo/number_theory/factorize.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace cp_algo::math {
1515
base x, y;
1616
base g = 1;
1717
while(g == 1) {
18-
for(int i = 1; i <= 64; i++) {
18+
for(int i = 0; i < 64; i++) {
1919
x = f(x);
2020
y = f(f(y));
2121
if(x == y) [[unlikely]] {

cp-algo/util/bump_alloc.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#define CP_ALGO_UTIL_BUMP_ALLOC_HPP
33
#include <cstddef>
44
namespace cp_algo {
5-
alignas(32) char buf[450 << 20];
5+
alignas(64) char buf[450 << 20];
66
size_t buf_ind = sizeof buf;
77
template<class T> struct bump_alloc {
88
typedef T value_type;

verify/number_theory/factorize.test.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,3 @@ signed main() {
2727
solve();
2828
}
2929
}
30-

0 commit comments

Comments
 (0)