@@ -57,7 +57,16 @@ namespace FastNoise
5757 float32v a = u * FS::SelectHighBit ( index, float32v ( 2 ), float32v ( kRoot3f ) );
5858 float32v b = v ^ FS::Cast<float >( ( index >> 30 ) << 31 );
5959
60- return FS::MaskedAdd ( index >= int32v ( 0 ), a, b ) ^ FS::Cast<float >( ( index >> 28 ) << 31 );
60+ if constexpr ( SIMD & FastSIMD::FeatureFlag::x86 )
61+ {
62+ auto indexNegativeMask = FS::Cast<FS::Mask<32 , false >>( index >> 31 );
63+
64+ return FS::InvMaskedAdd ( indexNegativeMask, a, b ) ^ FS::Cast<float >( ( index >> 28 ) << 31 );
65+ }
66+ else
67+ {
68+ return FS::MaskedAdd ( index >= int32v ( 0 ), a, b ) ^ FS::Cast<float >( ( index >> 28 ) << 31 );
69+ }
6170 }
6271 }
6372
@@ -454,8 +463,11 @@ namespace FastNoise
454463
455464 if constexpr ( SIMD & FastSIMD::FeatureFlag::AVX512_F )
456465 {
457- // indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, 2 );
466+ #if defined( _MSC_VER ) && !defined( __clang__ )
467+ indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC ( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, std::integral_constant<int , 2 >() );
468+ #else
458469 indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC ( _mm512_rolv_epi32 ), indexFacetBasisWithPermute2, int32v ( 2 ) );
470+ #endif
459471
460472 const auto tableA_gX = FS::Constant<float >( kComponentA , kComponentA , kComponentC , kComponentC , -kComponentA , -kComponentA , kComponentC , kComponentC , kComponentA , kComponentA , kComponentC , kComponentC , -kComponentA , -kComponentA , kComponentC , kComponentC );
461473 const auto tableA_gY = FS::Constant<float >( kComponentC , kComponentB , kComponentA , kComponentA , kComponentC , kComponentB , -kComponentA , -kComponentA , kComponentC , -kComponentB , kComponentA , kComponentA , kComponentC , -kComponentB , -kComponentA , -kComponentA );
@@ -489,22 +501,22 @@ namespace FastNoise
489501 float32v sign0 = FS::Cast<float >( indexFacetBasisWithPermute2 << 31 );
490502 float32v sign1 = FS::Cast<float >( ( indexFacetBasisWithPermute2 << 30 ) & int32v ( 1 << 31 ) );
491503
492- auto notYZ = indexFacetBasisWithPermute2 >= int32v ( 0 ) ;
493- auto notXY = ( indexFacetBasisWithPermute2 << 1 ) >= int32v ( 0 ) ;
504+ auto notYZ = indexFacetBasisWithPermute2;
505+ auto notXY = indexFacetBasisWithPermute2 << 1 ;
494506
495- float32v valueA_gX = FS::Select ( notYZ, float32v ( kComponentA ) ^ sign0 , float32v ( kComponentC ) );
496- float32v valueA_gY = FS::Select ( notYZ & notXY, float32v ( kComponentC ), FS::Select ( notXY, float32v ( kComponentA ) ^ sign0 , float32v ( kComponentB ) ^ sign1 ) );
497- float32v valueA_gZ = FS::Select ( notXY, float32v ( kComponentB ) ^ sign1 , float32v ( kComponentC ) );
507+ float32v valueA_gX = FS::SelectHighBit ( notYZ, float32v ( kComponentC ) , float32v ( kComponentA ) ^ sign0 );
508+ float32v valueA_gY = FS::SelectHighBit ( notYZ | notXY, FS::SelectHighBit ( notXY, float32v ( kComponentB ) ^ sign1 , float32v ( kComponentA ) ^ sign0 ), float32v ( kComponentC ) );
509+ float32v valueA_gZ = FS::SelectHighBit ( notXY, float32v ( kComponentC ) , float32v ( kComponentB ) ^ sign1 );
498510 float32v valueA = FS::FMulAdd ( valueA_gZ, fZ , FS::FMulAdd ( fY , valueA_gY, fX * valueA_gX ) );
499511
500- float32v valueB_gX = FS::Select ( notYZ, float32v ( kComponentB ) ^ sign0 , float32v ( kComponentC ) );
501- float32v valueB_gY = FS::Select ( notYZ & notXY, float32v ( kComponentC ), FS::Select ( notXY, float32v ( kComponentB ) ^ sign0 , float32v ( kComponentA ) ^ sign1 ) );
502- float32v valueB_gZ = FS::Select ( notXY, float32v ( kComponentA ) ^ sign1 , float32v ( kComponentC ) );
512+ float32v valueB_gX = FS::SelectHighBit ( notYZ, float32v ( kComponentC ) , float32v ( kComponentB ) ^ sign0 );
513+ float32v valueB_gY = FS::SelectHighBit ( notYZ | notXY, FS::SelectHighBit ( notXY, float32v ( kComponentA ) ^ sign1 , float32v ( kComponentB ) ^ sign0 ), float32v ( kComponentC ) );
514+ float32v valueB_gZ = FS::SelectHighBit ( notXY, float32v ( kComponentC ) , float32v ( kComponentA ) ^ sign1 );
503515 float32v valueB = FS::FMulAdd ( valueB_gZ, fZ , FS::FMulAdd ( fY , valueB_gY, fX * valueB_gX ) );
504516
505- float32v valueC_gX = FS::Select ( notYZ, float32v ( kComponentsDE ) ^ sign0 , float32v ( kComponentF ) );
506- float32v valueC_gY = FS::Select ( notYZ & notXY, float32v ( kComponentF ), FS::Select ( notXY, float32v ( kComponentsDE ) ^ sign0 , float32v ( kComponentsDE ) ^ sign1 ) );
507- float32v valueC_gZ = FS::Select ( notXY, float32v ( kComponentsDE ) ^ sign1 , float32v ( kComponentF ) );
517+ float32v valueC_gX = FS::SelectHighBit ( notYZ, float32v ( kComponentF ) , float32v ( kComponentsDE ) ^ sign0 );
518+ float32v valueC_gY = FS::SelectHighBit ( notYZ | notXY, FS::SelectHighBit ( notXY, float32v ( kComponentsDE ) ^ sign1 , float32v ( kComponentsDE ) ^ sign0 ), float32v ( kComponentF ) );
519+ float32v valueC_gZ = FS::SelectHighBit ( notXY, float32v ( kComponentF ) , float32v ( kComponentsDE ) ^ sign1 );
508520 valueC = FS::FMulAdd ( valueC_gZ, fZ , FS::FMulAdd ( fY , valueC_gY, fX * valueC_gX ) );
509521
510522 valueAB = FS::SelectHighBit ( indexPermutation2HighBit, valueB, valueA );
0 commit comments