Skip to content

Commit c55d555

Browse files
DomainWarpSimplex 3D inv masked perf bump (#154)
1 parent 1084e0d commit c55d555

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

include/FastNoise/Generators/DomainWarpSimplex.inl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,19 @@ protected:
164164

165165
mask32v maskX1 = xGreaterEqualY & xGreaterEqualZ;
166166
mask32v maskY1 = FS::BitwiseAndNot( yGreaterEqualZ, xGreaterEqualY );
167-
mask32v maskZ1 = FS::BitwiseAndNot( ~xGreaterEqualZ, yGreaterEqualZ );
167+
mask32v maskZ1 = xGreaterEqualZ | yGreaterEqualZ; // Inv masked
168168

169-
mask32v nMaskX2 = ~( xGreaterEqualY | xGreaterEqualZ );
170-
mask32v nMaskY2 = xGreaterEqualY & ~yGreaterEqualZ;
169+
mask32v nMaskX2 = xGreaterEqualY | xGreaterEqualZ; // Inv masked
170+
mask32v nMaskY2 = FS::BitwiseAndNot( xGreaterEqualY, yGreaterEqualZ );
171171
mask32v nMaskZ2 = xGreaterEqualZ & yGreaterEqualZ;
172172

173173
float32v dx3 = dx0 - float32v( kReflectUnskew3 * 3 + 1 );
174174
float32v dy3 = dy0 - float32v( kReflectUnskew3 * 3 + 1 );
175175
float32v dz3 = dz0 - float32v( kReflectUnskew3 * 3 + 1 );
176176
float32v dx1 = FS::MaskedSub( maskX1, dx3, float32v( 1 ) ); // kReflectUnskew3 * 3 + 1 = kReflectUnskew3, so dx0 - kReflectUnskew3 = dx3
177177
float32v dy1 = FS::MaskedSub( maskY1, dy3, float32v( 1 ) );
178-
float32v dz1 = FS::MaskedSub( maskZ1, dz3, float32v( 1 ) );
179-
float32v dx2 = FS::MaskedIncrement( nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
178+
float32v dz1 = FS::InvMaskedSub( maskZ1, dz3, float32v( 1 ) );
179+
float32v dx2 = FS::MaskedIncrement( ~nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
180180
float32v dy2 = FS::MaskedIncrement( nMaskY2, dy0 );
181181
float32v dz2 = FS::MaskedIncrement( nMaskZ2, dz0 );
182182

@@ -200,8 +200,8 @@ protected:
200200
float32v valueZ( 0 );
201201

202202
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dx0, dy0, dz0, falloff0, valueX, valueY, valueZ );
203-
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1, falloff1, valueX, valueY, valueZ );
204-
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::InvMaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2, falloff2, valueX, valueY, valueZ );
203+
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1, falloff1, valueX, valueY, valueZ );
204+
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2, falloff2, valueX, valueY, valueZ );
205205
ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ) ), dx3, dy3, dz3, falloff3, valueX, valueY, valueZ );
206206

207207
if constexpr( Scheme != VectorizationScheme::OrthogonalGradientMatrix )

0 commit comments

Comments
 (0)