Skip to content

Commit 91de307

Browse files
fix for |m| == 1 spin-weighted
JuliaApproximation/FastTransforms.jl#139
1 parent ecababf commit 91de307

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

src/drivers.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -718,10 +718,10 @@ void execute_spinsph_hi2lo_AVX(const ft_spin_rotation_plan * SRP, ft_complex * A
718718
int N = SRP->n;
719719
double * AD = (double *) A;
720720
double * BD = (double *) B;
721+
kernel_spinsph_hi2lo_default(SRP, 0, A, 1);
722+
kernel_spinsph_hi2lo_default(SRP, -1, A + N, 1);
723+
kernel_spinsph_hi2lo_default(SRP, 1, A + 2*N, 1);
721724
permute_sph(AD, BD, 2*N, M, 2);
722-
kernel_spinsph_hi2lo_default(SRP, 0, B, 1);
723-
kernel_spinsph_hi2lo_default(SRP, -1, B + N, 1);
724-
kernel_spinsph_hi2lo_default(SRP, 1, B + 2*N, 1);
725725
#pragma omp parallel
726726
for (int m = 2+FT_GET_THREAD_NUM(); m <= M/2; m += FT_GET_NUM_THREADS())
727727
kernel_spinsph_hi2lo_AVX(SRP, m, B + N*(2*m-1), 2);
@@ -732,10 +732,10 @@ void execute_spinsph_lo2hi_AVX(const ft_spin_rotation_plan * SRP, ft_complex * A
732732
int N = SRP->n;
733733
double * AD = (double *) A;
734734
double * BD = (double *) B;
735+
kernel_spinsph_lo2hi_default(SRP, 0, A, 1);
736+
kernel_spinsph_lo2hi_default(SRP, -1, A + N, 1);
737+
kernel_spinsph_lo2hi_default(SRP, 1, A + 2*N, 1);
735738
permute_sph(AD, BD, 2*N, M, 2);
736-
kernel_spinsph_lo2hi_default(SRP, 0, B, 1);
737-
kernel_spinsph_lo2hi_default(SRP, -1, B + N, 1);
738-
kernel_spinsph_lo2hi_default(SRP, 1, B + 2*N, 1);
739739
#pragma omp parallel
740740
for (int m = 2+FT_GET_THREAD_NUM(); m <= M/2; m += FT_GET_NUM_THREADS())
741741
kernel_spinsph_lo2hi_AVX(SRP, m, B + N*(2*m-1), 2);
@@ -746,10 +746,10 @@ void execute_spinsph_hi2lo_AVX_FMA(const ft_spin_rotation_plan * SRP, ft_complex
746746
int N = SRP->n;
747747
double * AD = (double *) A;
748748
double * BD = (double *) B;
749+
kernel_spinsph_hi2lo_default(SRP, 0, A, 1);
750+
kernel_spinsph_hi2lo_default(SRP, -1, A + N, 1);
751+
kernel_spinsph_hi2lo_default(SRP, 1, A + 2*N, 1);
749752
permute_sph(AD, BD, 2*N, M, 2);
750-
kernel_spinsph_hi2lo_default(SRP, 0, B, 1);
751-
kernel_spinsph_hi2lo_default(SRP, -1, B + N, 1);
752-
kernel_spinsph_hi2lo_default(SRP, 1, B + 2*N, 1);
753753
#pragma omp parallel
754754
for (int m = 2+FT_GET_THREAD_NUM(); m <= M/2; m += FT_GET_NUM_THREADS())
755755
kernel_spinsph_hi2lo_AVX_FMA(SRP, m, B + N*(2*m-1), 2);
@@ -760,10 +760,10 @@ void execute_spinsph_lo2hi_AVX_FMA(const ft_spin_rotation_plan * SRP, ft_complex
760760
int N = SRP->n;
761761
double * AD = (double *) A;
762762
double * BD = (double *) B;
763+
kernel_spinsph_lo2hi_default(SRP, 0, A, 1);
764+
kernel_spinsph_lo2hi_default(SRP, -1, A + N, 1);
765+
kernel_spinsph_lo2hi_default(SRP, 1, A + 2*N, 1);
763766
permute_sph(AD, BD, 2*N, M, 2);
764-
kernel_spinsph_lo2hi_default(SRP, 0, B, 1);
765-
kernel_spinsph_lo2hi_default(SRP, -1, B + N, 1);
766-
kernel_spinsph_lo2hi_default(SRP, 1, B + 2*N, 1);
767767
#pragma omp parallel
768768
for (int m = 2+FT_GET_THREAD_NUM(); m <= M/2; m += FT_GET_NUM_THREADS())
769769
kernel_spinsph_lo2hi_AVX_FMA(SRP, m, B + N*(2*m-1), 2);

0 commit comments

Comments
 (0)