@@ -718,10 +718,10 @@ void execute_spinsph_hi2lo_AVX(const ft_spin_rotation_plan * SRP, ft_complex * A
718718 int N = SRP -> n ;
719719 double * AD = (double * ) A ;
720720 double * BD = (double * ) B ;
721+ kernel_spinsph_hi2lo_default (SRP , 0 , A , 1 );
722+ kernel_spinsph_hi2lo_default (SRP , -1 , A + N , 1 );
723+ kernel_spinsph_hi2lo_default (SRP , 1 , A + 2 * N , 1 );
721724 permute_sph (AD , BD , 2 * N , M , 2 );
722- kernel_spinsph_hi2lo_default (SRP , 0 , B , 1 );
723- kernel_spinsph_hi2lo_default (SRP , -1 , B + N , 1 );
724- kernel_spinsph_hi2lo_default (SRP , 1 , B + 2 * N , 1 );
725725 #pragma omp parallel
726726 for (int m = 2 + FT_GET_THREAD_NUM (); m <= M /2 ; m += FT_GET_NUM_THREADS ())
727727 kernel_spinsph_hi2lo_AVX (SRP , m , B + N * (2 * m - 1 ), 2 );
@@ -732,10 +732,10 @@ void execute_spinsph_lo2hi_AVX(const ft_spin_rotation_plan * SRP, ft_complex * A
732732 int N = SRP -> n ;
733733 double * AD = (double * ) A ;
734734 double * BD = (double * ) B ;
735+ kernel_spinsph_lo2hi_default (SRP , 0 , A , 1 );
736+ kernel_spinsph_lo2hi_default (SRP , -1 , A + N , 1 );
737+ kernel_spinsph_lo2hi_default (SRP , 1 , A + 2 * N , 1 );
735738 permute_sph (AD , BD , 2 * N , M , 2 );
736- kernel_spinsph_lo2hi_default (SRP , 0 , B , 1 );
737- kernel_spinsph_lo2hi_default (SRP , -1 , B + N , 1 );
738- kernel_spinsph_lo2hi_default (SRP , 1 , B + 2 * N , 1 );
739739 #pragma omp parallel
740740 for (int m = 2 + FT_GET_THREAD_NUM (); m <= M /2 ; m += FT_GET_NUM_THREADS ())
741741 kernel_spinsph_lo2hi_AVX (SRP , m , B + N * (2 * m - 1 ), 2 );
@@ -746,10 +746,10 @@ void execute_spinsph_hi2lo_AVX_FMA(const ft_spin_rotation_plan * SRP, ft_complex
746746 int N = SRP -> n ;
747747 double * AD = (double * ) A ;
748748 double * BD = (double * ) B ;
749+ kernel_spinsph_hi2lo_default (SRP , 0 , A , 1 );
750+ kernel_spinsph_hi2lo_default (SRP , -1 , A + N , 1 );
751+ kernel_spinsph_hi2lo_default (SRP , 1 , A + 2 * N , 1 );
749752 permute_sph (AD , BD , 2 * N , M , 2 );
750- kernel_spinsph_hi2lo_default (SRP , 0 , B , 1 );
751- kernel_spinsph_hi2lo_default (SRP , -1 , B + N , 1 );
752- kernel_spinsph_hi2lo_default (SRP , 1 , B + 2 * N , 1 );
753753 #pragma omp parallel
754754 for (int m = 2 + FT_GET_THREAD_NUM (); m <= M /2 ; m += FT_GET_NUM_THREADS ())
755755 kernel_spinsph_hi2lo_AVX_FMA (SRP , m , B + N * (2 * m - 1 ), 2 );
@@ -760,10 +760,10 @@ void execute_spinsph_lo2hi_AVX_FMA(const ft_spin_rotation_plan * SRP, ft_complex
760760 int N = SRP -> n ;
761761 double * AD = (double * ) A ;
762762 double * BD = (double * ) B ;
763+ kernel_spinsph_lo2hi_default (SRP , 0 , A , 1 );
764+ kernel_spinsph_lo2hi_default (SRP , -1 , A + N , 1 );
765+ kernel_spinsph_lo2hi_default (SRP , 1 , A + 2 * N , 1 );
763766 permute_sph (AD , BD , 2 * N , M , 2 );
764- kernel_spinsph_lo2hi_default (SRP , 0 , B , 1 );
765- kernel_spinsph_lo2hi_default (SRP , -1 , B + N , 1 );
766- kernel_spinsph_lo2hi_default (SRP , 1 , B + 2 * N , 1 );
767767 #pragma omp parallel
768768 for (int m = 2 + FT_GET_THREAD_NUM (); m <= M /2 ; m += FT_GET_NUM_THREADS ())
769769 kernel_spinsph_lo2hi_AVX_FMA (SRP , m , B + N * (2 * m - 1 ), 2 );
0 commit comments