@@ -50,32 +50,20 @@ namespace cp_algo::math::fft {
50
50
auto [Dvx, Dvy] = D.vget (k);
51
51
auto [Crvx, Crvy] = vpoint (Cvx, Cvy) * vpoint (real (rt), imag (rt));
52
52
auto [Drvx, Drvy] = vpoint (Dvx, Dvy) * vpoint (real (rt), imag (rt));
53
- alignas (32 ) ftype Cx[2 * flen];
54
- alignas (32 ) ftype Cy[2 * flen];
55
- alignas (32 ) ftype Dx[2 * flen];
56
- alignas (32 ) ftype Dy[2 * flen];
57
- Cvx.copy_to (Cx + flen, std::experimental::vector_aligned);
58
- Cvy.copy_to (Cy + flen, std::experimental::vector_aligned);
59
- Dvx.copy_to (Dx + flen, std::experimental::vector_aligned);
60
- Dvy.copy_to (Dy + flen, std::experimental::vector_aligned);
61
- Crvx.copy_to (Cx, std::experimental::vector_aligned);
62
- Crvy.copy_to (Cy, std::experimental::vector_aligned);
63
- Drvx.copy_to (Dx, std::experimental::vector_aligned);
64
- Drvy.copy_to (Dy, std::experimental::vector_aligned);
53
+ vftype Cx[2 ] = {Crvx, Cvx}, Cy[2 ] = {Crvy, Cvy};
54
+ vftype Dx[2 ] = {Drvx, Dvx}, Dy[2 ] = {Drvy, Dvy};
65
55
vpoint AC, AD, BC, BD;
66
56
AC = AD = BC = BD = {0 , 0 };
67
57
for (size_t i = 0 ; i < flen; i++) {
68
58
vftype Csx, Csy, Dsx, Dsy;
69
- Csx.copy_from (Cx + flen - i, std::experimental ::element_aligned);
70
- Csy.copy_from (Cy + flen - i, std::experimental ::element_aligned);
71
- Dsx.copy_from (Dx + flen - i, std::experimental ::element_aligned);
72
- Dsy.copy_from (Dy + flen - i, std::experimental ::element_aligned);
59
+ Csx.copy_from ((ftype*) Cx + flen - i, stdx ::element_aligned);
60
+ Csy.copy_from ((ftype*) Cy + flen - i, stdx ::element_aligned);
61
+ Dsx.copy_from ((ftype*) Dx + flen - i, stdx ::element_aligned);
62
+ Dsy.copy_from ((ftype*) Dy + flen - i, stdx ::element_aligned);
73
63
vpoint As = {Ax[i], Ay[i]}, Bs = {Bx[i], By[i]};
74
64
vpoint Cs = {Csx, Csy}, Ds = {Dsx, Dsy};
75
- AC += As * Cs;
76
- AD += As * Ds;
77
- BC += Bs * Cs;
78
- BD += Bs * Ds;
65
+ AC += As * Cs; AD += As * Ds;
66
+ BC += Bs * Cs; BD += Bs * Ds;
79
67
}
80
68
A.set (k, AC);
81
69
C.set (k, AD + BC);
0 commit comments