Skip to content

Commit 3cbf663

Browse files
committed
Refactor dot_block
1 parent 07f2eb1 commit 3cbf663

File tree

2 files changed

+13
-30
lines changed

2 files changed

+13
-30
lines changed

cp-algo/math/cvector.hpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,13 @@ namespace cp_algo::math::fft {
119119
auto [Bvx, Bvy] = B.vget(k);
120120
auto [Brvx, Brvy] = vpoint(Bvx, Bvy) * vpoint(real(rt), imag(rt));
121121
auto [Ax, Ay] = A.vget(k);
122-
alignas(32) ftype Bx[2 * flen];
123-
alignas(32) ftype By[2 * flen];
124-
Bvx.copy_to(Bx + flen, stdx::vector_aligned);
125-
Bvy.copy_to(By + flen, stdx::vector_aligned);
126-
Brvx.copy_to(Bx, stdx::vector_aligned);
127-
Brvy.copy_to(By, stdx::vector_aligned);
122+
vftype Bx[2] = {Brvx, Bvx}, By[2] = {Brvy, Bvy};
128123
vpoint res = {0, 0};
129-
for(size_t i = 0; i < flen; i++) {
124+
for (size_t i = 0; i < flen; i++) {
130125
vftype Bsx, Bsy;
131-
Bsx.copy_from(Bx + flen - i, stdx::element_aligned);
132-
Bsy.copy_from(By + flen - i, stdx::element_aligned);
133-
res += vpoint(Ax[i], Ay[i]) * vpoint(Bsx, Bsy);
126+
Bsx.copy_from((ftype*)Bx + flen - i, stdx::element_aligned);
127+
Bsy.copy_from((ftype*)By + flen - i, stdx::element_aligned);
128+
res += vpoint(Ax[i], Ay[i]) * vpoint{Bsx, Bsy};
134129
}
135130
return res;
136131
}

cp-algo/math/fft.hpp

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,32 +50,20 @@ namespace cp_algo::math::fft {
5050
auto [Dvx, Dvy] = D.vget(k);
5151
auto [Crvx, Crvy] = vpoint(Cvx, Cvy) * vpoint(real(rt), imag(rt));
5252
auto [Drvx, Drvy] = vpoint(Dvx, Dvy) * vpoint(real(rt), imag(rt));
53-
alignas(32) ftype Cx[2 * flen];
54-
alignas(32) ftype Cy[2 * flen];
55-
alignas(32) ftype Dx[2 * flen];
56-
alignas(32) ftype Dy[2 * flen];
57-
Cvx.copy_to(Cx + flen, std::experimental::vector_aligned);
58-
Cvy.copy_to(Cy + flen, std::experimental::vector_aligned);
59-
Dvx.copy_to(Dx + flen, std::experimental::vector_aligned);
60-
Dvy.copy_to(Dy + flen, std::experimental::vector_aligned);
61-
Crvx.copy_to(Cx, std::experimental::vector_aligned);
62-
Crvy.copy_to(Cy, std::experimental::vector_aligned);
63-
Drvx.copy_to(Dx, std::experimental::vector_aligned);
64-
Drvy.copy_to(Dy, std::experimental::vector_aligned);
53+
vftype Cx[2] = {Crvx, Cvx}, Cy[2] = {Crvy, Cvy};
54+
vftype Dx[2] = {Drvx, Dvx}, Dy[2] = {Drvy, Dvy};
6555
vpoint AC, AD, BC, BD;
6656
AC = AD = BC = BD = {0, 0};
6757
for(size_t i = 0; i < flen; i++) {
6858
vftype Csx, Csy, Dsx, Dsy;
69-
Csx.copy_from(Cx + flen - i, std::experimental::element_aligned);
70-
Csy.copy_from(Cy + flen - i, std::experimental::element_aligned);
71-
Dsx.copy_from(Dx + flen - i, std::experimental::element_aligned);
72-
Dsy.copy_from(Dy + flen - i, std::experimental::element_aligned);
59+
Csx.copy_from((ftype*)Cx + flen - i, stdx::element_aligned);
60+
Csy.copy_from((ftype*)Cy + flen - i, stdx::element_aligned);
61+
Dsx.copy_from((ftype*)Dx + flen - i, stdx::element_aligned);
62+
Dsy.copy_from((ftype*)Dy + flen - i, stdx::element_aligned);
7363
vpoint As = {Ax[i], Ay[i]}, Bs = {Bx[i], By[i]};
7464
vpoint Cs = {Csx, Csy}, Ds = {Dsx, Dsy};
75-
AC += As * Cs;
76-
AD += As * Ds;
77-
BC += Bs * Cs;
78-
BD += Bs * Ds;
65+
AC += As * Cs; AD += As * Ds;
66+
BC += Bs * Cs; BD += Bs * Ds;
7967
}
8068
A.set(k, AC);
8169
C.set(k, AD + BC);

0 commit comments

Comments
 (0)