From 039134d43dc202cfbf11473d5ae80c57a0b31527 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sun, 12 Jun 2022 14:56:41 +0100
Subject: [PATCH 001/139] Remove old FastSIMD

---
 CMakeLists.txt                          |   3 +-
 include/FastSIMD/FastSIMD.h             |  51 --
 include/FastSIMD/FastSIMD_Config.h      |  33 -
 include/FastSIMD/FastSIMD_Export.h      |  11 -
 include/FastSIMD/FunctionList.h         | 856 ------------------------
 include/FastSIMD/InlInclude.h           |  10 -
 include/FastSIMD/SIMDTypeList.h         |  37 -
 src/CMakeLists.txt                      |  76 +--
 src/FastSIMD/Example/Example.h          |  17 -
 src/FastSIMD/Example/Example.inl        | 125 ----
 src/FastSIMD/FastSIMD.cpp               | 237 -------
 src/FastSIMD/FastSIMD_BuildList.inl     |  10 -
 src/FastSIMD/FastSIMD_Level_AVX2.cpp    |  17 -
 src/FastSIMD/FastSIMD_Level_AVX512.cpp  |  17 -
 src/FastSIMD/FastSIMD_Level_NEON.cpp    |   7 -
 src/FastSIMD/FastSIMD_Level_SSE2.cpp    |   7 -
 src/FastSIMD/FastSIMD_Level_SSE3.cpp    |   7 -
 src/FastSIMD/FastSIMD_Level_SSE41.cpp   |   7 -
 src/FastSIMD/FastSIMD_Level_SSE42.cpp   |   7 -
 src/FastSIMD/FastSIMD_Level_SSSE3.cpp   |   7 -
 src/FastSIMD/FastSIMD_Level_Scalar.cpp  |   7 -
 src/FastSIMD/Internal/AVX.h             | 474 -------------
 src/FastSIMD/Internal/AVX512.h          | 540 ---------------
 src/FastSIMD/Internal/NEON.h            | 424 ------------
 src/FastSIMD/Internal/SSE.h             | 574 ----------------
 src/FastSIMD/Internal/Scalar.h          | 451 -------------
 src/FastSIMD/Internal/SourceBuilder.inl |  29 -
 src/FastSIMD/Internal/VecTools.h        |  66 --
 28 files changed, 24 insertions(+), 4083 deletions(-)
 delete mode 100644 include/FastSIMD/FastSIMD.h
 delete mode 100644 include/FastSIMD/FastSIMD_Config.h
 delete mode 100644 include/FastSIMD/FastSIMD_Export.h
 delete mode 100644 include/FastSIMD/FunctionList.h
 delete mode 100644 include/FastSIMD/InlInclude.h
 delete mode 100644 include/FastSIMD/SIMDTypeList.h
 delete mode 100644 src/FastSIMD/Example/Example.h
 delete mode 100644 src/FastSIMD/Example/Example.inl
 delete mode 100644 src/FastSIMD/FastSIMD.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_BuildList.inl
 delete mode 100644 src/FastSIMD/FastSIMD_Level_AVX2.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_AVX512.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_NEON.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_SSE2.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_SSE3.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_SSE41.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_SSE42.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_SSSE3.cpp
 delete mode 100644 src/FastSIMD/FastSIMD_Level_Scalar.cpp
 delete mode 100644 src/FastSIMD/Internal/AVX.h
 delete mode 100644 src/FastSIMD/Internal/AVX512.h
 delete mode 100644 src/FastSIMD/Internal/NEON.h
 delete mode 100644 src/FastSIMD/Internal/SSE.h
 delete mode 100644 src/FastSIMD/Internal/Scalar.h
 delete mode 100644 src/FastSIMD/Internal/SourceBuilder.inl
 delete mode 100644 src/FastSIMD/Internal/VecTools.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5af45e17..325ee9b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,15 +36,14 @@ endif()
 include(GNUInstallDirs) 
 set(install_targets "")
 
+include(cmake/CPM.cmake)
 add_subdirectory(src)
 
 if(FASTNOISE2_NOISETOOL)
-    include(cmake/CPM.cmake)
     add_subdirectory(NoiseTool)
 endif()
 
 if(FASTNOISE2_TESTS)
-    include(cmake/CPM.cmake)  
     add_subdirectory(tests)
 endif()
 
diff --git a/include/FastSIMD/FastSIMD.h b/include/FastSIMD/FastSIMD.h
deleted file mode 100644
index 4111a82d..00000000
--- a/include/FastSIMD/FastSIMD.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-#include "FastSIMD_Config.h"
-
-namespace FastSIMD
-{
-    typedef uint32_t Level_BitFlags;
-
-    enum eLevel : Level_BitFlags
-    {
-        Level_Null   = 0,       // Uninitilised
-        Level_Scalar = 1 <<  0, // 80386 instruction set (Not SIMD)
-        Level_SSE    = 1 <<  1, // SSE (XMM) supported by CPU (not testing for O.S. support)
-        Level_SSE2   = 1 <<  2, // SSE2
-        Level_SSE3   = 1 <<  3, // SSE3
-        Level_SSSE3  = 1 <<  4, // Supplementary SSE3 (SSSE3)
-        Level_SSE41  = 1 <<  5, // SSE4.1
-        Level_SSE42  = 1 <<  6, // SSE4.2
-        Level_AVX    = 1 <<  7, // AVX supported by CPU and operating system
-        Level_AVX2   = 1 <<  8, // AVX2
-        Level_AVX512 = 1 <<  9, // AVX512, AVX512DQ supported by CPU and operating system
-
-        Level_NEON   = 1 << 16, // ARM NEON
-    };
-
-    const Level_BitFlags COMPILED_SIMD_LEVELS =
-        (FASTSIMD_COMPILE_SCALAR     ? Level_Scalar : 0) |
-        (FASTSIMD_COMPILE_SSE        ? Level_SSE    : 0) |
-        (FASTSIMD_COMPILE_SSE2       ? Level_SSE2   : 0) |
-        (FASTSIMD_COMPILE_SSE3       ? Level_SSE3   : 0) |
-        (FASTSIMD_COMPILE_SSSE3      ? Level_SSSE3  : 0) |
-        (FASTSIMD_COMPILE_SSE41      ? Level_SSE41  : 0) |
-        (FASTSIMD_COMPILE_SSE42      ? Level_SSE42  : 0) |
-        (FASTSIMD_COMPILE_AVX        ? Level_AVX    : 0) |
-        (FASTSIMD_COMPILE_AVX2       ? Level_AVX2   : 0) |
-        (FASTSIMD_COMPILE_AVX512     ? Level_AVX512 : 0) |
-        (FASTSIMD_COMPILE_NEON       ? Level_NEON   : 0) ;
-    
-    typedef void* (*MemoryAllocator)( size_t size, size_t align );
-
-    FASTSIMD_API eLevel CPUMaxSIMDLevel();
-
-    template<typename T>
-    T* New( eLevel maxSIMDLevel = Level_Null, MemoryAllocator allocator = nullptr );
-
-    template<typename T, eLevel SIMD_LEVEL>
-    T* ClassFactory( MemoryAllocator allocator = nullptr );
-
-#define FASTSIMD_LEVEL_SUPPORT( ... ) \
-    static const FastSIMD::Level_BitFlags Supported_SIMD_Levels = __VA_ARGS__
-
-}
diff --git a/include/FastSIMD/FastSIMD_Config.h b/include/FastSIMD/FastSIMD_Config.h
deleted file mode 100644
index b823d67e..00000000
--- a/include/FastSIMD/FastSIMD_Config.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-#include <cstdint>
-#include <cstddef>
-
-#include "FastSIMD_Export.h"
-
-#if defined(__arm__) || defined(__aarch64__)
-#define FASTSIMD_x86 false
-#define FASTSIMD_ARM true
-#else
-#define FASTSIMD_x86 true
-#define FASTSIMD_ARM false
-#endif
-
-#define FASTSIMD_64BIT (INTPTR_MAX == INT64_MAX)
-
-#define FASTSIMD_COMPILE_SCALAR (!(FASTSIMD_x86 && FASTSIMD_64BIT)) // Don't compile for x86 64bit since CPU is guaranteed SSE2 support 
-
-#define FASTSIMD_COMPILE_SSE    (FASTSIMD_x86 & false) // Not supported
-#define FASTSIMD_COMPILE_SSE2   (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_SSE3   (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_SSSE3  (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_SSE41  (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_SSE42  (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_AVX    (FASTSIMD_x86 & false) // Not supported
-#define FASTSIMD_COMPILE_AVX2   (FASTSIMD_x86 & true )
-#define FASTSIMD_COMPILE_AVX512 (FASTSIMD_x86 & true )
-
-#define FASTSIMD_COMPILE_NEON   (FASTSIMD_ARM & true )
-
-#define FASTSIMD_USE_FMA                   true
-#define FASTSIMD_CONFIG_GENERATE_CONSTANTS false
-
diff --git a/include/FastSIMD/FastSIMD_Export.h b/include/FastSIMD/FastSIMD_Export.h
deleted file mode 100644
index d81950aa..00000000
--- a/include/FastSIMD/FastSIMD_Export.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-#if !defined( FASTNOISE_STATIC_LIB ) && ( defined( _WIN32 ) || defined( __CYGWIN__ ) )
-#ifdef FASTNOISE_EXPORT // CHANGE ME
-#define FASTSIMD_API __declspec( dllexport )
-#else
-#define FASTSIMD_API __declspec( dllimport )
-#endif
-#else
-#define FASTSIMD_API
-#endif
\ No newline at end of file
diff --git a/include/FastSIMD/FunctionList.h b/include/FastSIMD/FunctionList.h
deleted file mode 100644
index 5135f461..00000000
--- a/include/FastSIMD/FunctionList.h
+++ /dev/null
@@ -1,856 +0,0 @@
-#pragma once
-#include <cinttypes>
-#include <type_traits>
-#include <memory>
-
-#include "FastSIMD/FastSIMD.h"
-
-#ifdef _MSC_VER
-#if defined( _M_IX86_FP ) && _M_IX86_FP < 2
-#define FS_VECTORCALL
-#else
-#define FS_VECTORCALL __vectorcall
-#endif
-#define FS_INLINE __forceinline
-#else
-#define FS_VECTORCALL 
-#define FS_INLINE __attribute__((always_inline)) inline
-#endif
-
-#ifndef NDEBUG
-#undef FS_INLINE
-#define FS_INLINE inline
-#endif
-
-/// <summary>
-/// Number of 32 width elements that will fit into a vector
-/// </summary>
-/// <remarks>
-/// Compile time constant
-/// </remarks>
-/// <code>
-/// size_t FS_Size_32()
-/// </code>
-#define FS_Size_32() FS::template VectorSize<sizeof( int32_t )>
-
-
-// Vector builders
-
-/// <summary>
-/// Vector with values incrementing from 0 based on element index {0, 1, 2, 3...}
-/// </summary>
-/// <code>
-/// example: int32v::FS_Incremented()
-/// </code>
-#define FS_Incremented() Incremented()
-
-
-// Load
-
-/// <summary>
-/// Copies sizeof(float32v) bytes from given memory location into float32v
-/// </summary>
-/// <remarks>
-/// Memory does not need to be aligned
-/// </remarks>
-/// <code>
-/// float32v FS_Load_f32( void const* ptr )
-/// </code>
-#define FS_Load_f32( ... ) FS::Load_f32( __VA_ARGS__ )
-
-
-/// <summary>
-/// Copies sizeof(int32v) bytes from given memory location into int32v
-/// </summary>
-/// <remarks>
-/// Memory does not need to be aligned
-/// </remarks>
-/// <code>
-/// int32v FS_Load_i32( void const* ptr )
-/// </code>
-#define FS_Load_i32( ... ) FS::Load_i32( __VA_ARGS__ )
-
-
-// Store
-
-/// <summary>
-/// Copies all elements of float32v to given memory location
-/// </summary>
-/// <code>
-/// void FS_Store_f32( void* ptr, float32v f )
-/// </code>
-#define FS_Store_f32( ... ) FS::Store_f32( __VA_ARGS__ )
-
-/// <summary>
-/// Copies all elements of int32v to given memory location
-/// </summary>
-/// <code>
-/// void FS_Store_i32( void* ptr, int32v i )
-/// </code>
-#define FS_Store_i32( ... ) FS::Store_i32( __VA_ARGS__ )
-
-
-// Extract
-
-/// <summary>
-/// Retreive element 0 from vector
-/// </summary>
-/// <code>
-/// float FS_Extract0_f32( float32v f )
-/// </code>
-#define FS_Extract0_f32( ... ) FS::Extract0_f32( __VA_ARGS__ )
-
-/// <summary>
-/// Retreive element 0 from vector
-/// </summary>
-/// <code>
-/// int32_t FS_Extract0_i32( int32v i )
-/// </code>
-#define FS_Extract0_i32( ... ) FS::Extract0_i32( __VA_ARGS__ )
-
-/// <summary>
-/// Retreive element from vector at position
-/// </summary>
-/// <code>
-/// float FS_Extract_f32( float32v f, size_t idx )
-/// </code>
-#define FS_Extract_f32( ... ) FS::Extract_f32( __VA_ARGS__ )
-
-/// <summary>
-/// Retreive element from vector at position
-/// </summary>
-/// <code>
-/// int32_t FS_Extract_i32( int32v i, size_t idx )
-/// </code>
-#define FS_Extract_i32( ... ) FS::Extract_i32( __VA_ARGS__ )
-
-
-// Cast
-
-/// <summary>
-/// Bitwise cast int to float
-/// </summary>
-/// <code>
-/// float32v FS_Casti32_f32( int32v i )
-/// </code>
-#define FS_Casti32_f32( ... ) FS::Casti32_f32( __VA_ARGS__ )
-
-/// <summary>
-/// Bitwise cast float to int
-/// </summary>
-/// <code>
-/// int32v FS_Castf32_i32( float32v f )
-/// </code>
-#define FS_Castf32_i32( ... ) FS::Castf32_i32( __VA_ARGS__ )
-
-
-// Convert
-
-/// <summary>
-/// Convert int to float 
-/// </summary>
-/// <remarks>
-/// Rounding: truncate
-/// </remarks>
-/// <code>
-/// float32v FS_Converti32_f32( int32v i )
-/// </code>
-#define FS_Converti32_f32( ... ) FS::Converti32_f32( __VA_ARGS__ )
-
-/// <summary>
-/// Convert float to int
-/// </summary>
-/// <code>
-/// int32v FS_Convertf32_i32( float32v f )
-/// </code>
-#define FS_Convertf32_i32( ... ) FS::Convertf32_i32( __VA_ARGS__ )
-
-
-// Select
-
-/// <summary>
-/// return ( m ? a : b )
-/// </summary>
-/// <code>
-/// float32v FS_Select_f32( mask32v m, float32v a, float32v b )
-/// </code>
-#define FS_Select_f32( ... ) FS::Select_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : b )
-/// </summary>
-/// <code>
-/// int32v FS_Select_i32( mask32v m, int32v a, int32v b )
-/// </code>
-#define FS_Select_i32( ... ) FS::Select_i32( __VA_ARGS__ )
-
-
-// Min, Max
-
-/// <summary>
-/// return ( a < b ? a : b )
-/// </summary>
-/// <code>
-/// float32v FS_Min_f32( float32v a, float32v b )
-/// </code>
-#define FS_Min_f32( ... ) FS::Min_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a > b ? a : b )
-/// </summary>
-/// <code>
-/// float32v FS_Max_f32( float32v a, float32v b )
-/// </code>
-#define FS_Max_f32( ... ) FS::Max_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a < b ? a : b )
-/// </summary>
-/// <code>
-/// int32v FS_Min_i32( int32v a, int32v b )
-/// </code>
-#define FS_Min_i32( ... ) FS::Min_i32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a > b ? a : b )
-/// </summary>
-/// <code>
-/// int32v FS_Max_i32( int32v a, int32v b )
-/// </code>
-#define FS_Max_i32( ... ) FS::Max_i32( __VA_ARGS__ )
-
-
-// Bitwise
-
-/// <summary>
-/// return ( a & ~b )
-/// </summary>
-/// <code>
-/// float32v FS_BitwiseAndNot_f32( float32v a, float32v b )
-/// </code>
-#define FS_BitwiseAndNot_f32( ... ) FS::BitwiseAndNot_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a & ~b )
-/// </summary>
-/// <code>
-/// int32v FS_BitwiseAndNot_i32( int32v a, int32v b )
-/// </code>
-#define FS_BitwiseAndNot_i32( ... ) FS::BitwiseAndNot_i32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a & ~b )
-/// </summary>
-/// <code>
-/// mask32v FS_BitwiseAndNot_m32( mask32v a, mask32v b )
-/// </code>
-#define FS_BitwiseAndNot_m32( ... ) FastSIMD::BitwiseAndNot_m32<FS>( __VA_ARGS__ )
-
-
-/// <summary>
-/// return ZeroExtend( a >> b )
-/// </summary>
-/// <code>
-/// float32v FS_BitwiseShiftRightZX_f32( float32v a, int32_t b )
-/// </code>
-#define FS_BitwiseShiftRightZX_f32( ... ) FS::BitwiseShiftRightZX_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ZeroExtend( a >> b )
-/// </summary>
-/// <code>
-/// float32v FS_BitwiseShiftRightZX_i32( int32v a, int32_t b )
-/// </code>
-#define FS_BitwiseShiftRightZX_i32( ... ) FS::BitwiseShiftRightZX_i32( __VA_ARGS__ )
-
-// Abs
-
-/// <summary>
-/// return ( a < 0 ? -a : a )
-/// </summary>
-/// <code>
-/// float32v FS_Abs_f32( float32v a )
-/// </code>
-#define FS_Abs_f32( ... ) FS::Abs_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( a < 0 ? -a : a )
-/// </summary>
-/// <code>
-/// int32v FS_Abs_i32( int32v a )
-/// </code>
-#define FS_Abs_i32( ... ) FS::Abs_i32( __VA_ARGS__ )
-
-
-// Float math
-
-/// <summary>
-/// return sqrt( a )
-/// </summary>
-/// <code>
-/// float32v FS_Sqrt_f32( float32v a )
-/// </code>
-#define FS_Sqrt_f32( ... ) FS::Sqrt_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return APPROXIMATE( 1.0 / sqrt( a ) )
-/// </summary>
-/// <code>
-/// float32v FS_InvSqrt_f32( float32v a )
-/// </code>
-#define FS_InvSqrt_f32( ... ) FS::InvSqrt_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return APPROXIMATE( 1.0 / a )
-/// </summary>
-/// <code>
-/// float32v FS_Reciprocal_f32( float32v a )
-/// </code>
-#define FS_Reciprocal_f32( ... ) FS::Reciprocal_f32( __VA_ARGS__ )
-
-// Floor, Ceil, Round
-
-/// <summary>
-/// return floor( a )
-/// </summary>
-/// <remarks>
-/// Rounding: Towards negative infinity
-/// </remarks>
-/// <code>
-/// float32v FS_Floor_f32( float32v a )
-/// </code>
-#define FS_Floor_f32( ... ) FS::Floor_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ceil( a )
-/// </summary>
-/// <remarks>
-/// Rounding: Towards positive infinity
-/// </remarks>
-/// <code>
-/// float32v FS_Ceil_f32( float32v a )
-/// </code>
-#define FS_Ceil_f32( ... ) FS::Ceil_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return round( a )
-/// </summary>
-/// <remarks>
-/// Rounding: Banker's rounding
-/// </remarks>
-/// <code>
-/// float32v FS_Round_f32( float32v a )
-/// </code>
-#define FS_Round_f32( ... ) FS::Round_f32( __VA_ARGS__ )
-
-// Trig
-
-/// <summary>
-/// return APPROXIMATE( cos( a ) )
-/// </summary>
-/// <code>
-/// float32v FS_Cos_f32( float32v a )
-/// </code>
-#define FS_Cos_f32( ... ) FastSIMD::Cos_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return APPROXIMATE( sin( a ) )
-/// </summary>
-/// <code>
-/// float32v FS_Sin_f32( float32v a )
-/// </code>
-#define FS_Sin_f32( ... ) FastSIMD::Sin_f32<FS>( __VA_ARGS__ )
-
-// Math
-
-/// <summary>
-/// return pow( v, pow )
-/// </summary>
-/// <code>
-/// float32v FS_Pow_f32( float32v v, float32v pow )
-/// </code>
-#define FS_Pow_f32( ... ) FastSIMD::Pow_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return log( a )
-/// </summary>
-/// <remarks>
-/// a <= 0 returns 0
-/// </remarks>
-/// <code>
-/// float32v FS_Log_f32( float32v a )
-/// </code>
-#define FS_Log_f32( ... ) FastSIMD::Log_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return exp( a )
-/// </summary>
-/// <remarks>
-/// a will be clamped to -88.376, 88.376
-/// </remarks>
-/// <code>
-/// float32v FS_Exp_f32( float32v a )
-/// </code>
-#define FS_Exp_f32( ... ) FastSIMD::Exp_f32<FS>( __VA_ARGS__ )
-
-
-// Mask
-
-/// <summary>
-/// return ( m ? a : 0 )
-/// </summary>
-/// <code>
-/// int32v FS_Mask_i32( int32v a, mask32v m )
-/// </code>
-#define FS_Mask_i32( ... ) FS::Mask_i32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : 0 )
-/// </summary>
-/// <code>
-/// float32v FS_Mask_f32( float32v a, mask32v m )
-/// </code>
-#define FS_Mask_f32( ... ) FS::Mask_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? 0 : a )
-/// </summary>
-/// <code>
-/// int32v FS_NMask_i32( int32v a, mask32v m )
-/// </code>
-#define FS_NMask_i32( ... ) FS::NMask_i32( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? 0 : a )
-/// </summary>
-/// <code>
-/// float32v FS_NMask_f32( float32v a, mask32v m )
-/// </code>
-#define FS_NMask_f32( ... ) FS::NMask_f32( __VA_ARGS__ )
-
-/// <summary>
-/// return m.contains( true )
-/// </summary>
-/// <code>
-/// bool FS_AnyMask_bool( mask32v m )
-/// </code>
-#define FS_AnyMask_bool( ... ) FS::AnyMask_bool( __VA_ARGS__ )
-
-
-// FMA
-
-/// <summary>
-/// return ( (a * b) + c )
-/// </summary>
-/// <code>
-/// float32v FS_FMulAdd_f32( float32v a, float32v b, float32v c )
-/// </code>
-#define FS_FMulAdd_f32( ... ) FastSIMD::FMulAdd_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( -(a * b) + c )
-/// </summary>
-/// <code>
-/// float32v FS_FNMulAdd_f32( float32v a, float32v b, float32v c )
-/// </code>
-#define FS_FNMulAdd_f32( ... ) FastSIMD::FNMulAdd_f32<FS>( __VA_ARGS__ )
-
-
-// Masked float
-
-/// <summary>
-/// return ( m ? (a + b) : a )
-/// </summary>
-/// <code>
-/// float32v FS_MaskedAdd_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_MaskedAdd_f32( ... ) FastSIMD::MaskedAdd_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a - b) : a )
-/// </summary>
-/// <code>
-/// float32v FS_MaskedSub_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_MaskedSub_f32( ... ) FastSIMD::MaskedSub_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a * b) : a )
-/// </summary>
-/// <code>
-/// float32v FS_MaskedMul_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_MaskedMul_f32( ... ) FastSIMD::MaskedMul_f32<FS>( __VA_ARGS__ )
-
-
-// Masked int32
-
-/// <summary>
-/// return ( m ? (a + b) : a )
-/// </summary>
-/// <code>
-/// int32v FS_MaskedAdd_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_MaskedAdd_i32( ... ) FastSIMD::MaskedAdd_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a - b) : a )
-/// </summary>
-/// <code>
-/// int32v FS_MaskedSub_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_MaskedSub_i32( ... ) FastSIMD::MaskedSub_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a * b) : a )
-/// </summary>
-/// <code>
-/// int32v FS_MaskedMul_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_MaskedMul_i32( ... ) FastSIMD::MaskedMul_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a + 1) : a )
-/// </summary>
-/// <code>
-/// int32v FS_MaskedIncrement_i32( int32v a, mask32v m )
-/// </code>
-#define FS_MaskedIncrement_i32( ... ) FastSIMD::MaskedIncrement_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? (a - 1) : a )
-/// </summary>
-/// <code>
-/// int32v FS_MaskedDecrement_i32( int32v a, mask32v m )
-/// </code>
-#define FS_MaskedDecrement_i32( ... ) FastSIMD::MaskedDecrement_i32<FS>( __VA_ARGS__ )
-
-
-// NMasked float
-
-/// <summary>
-/// return ( m ? a : (a + b) )
-/// </summary>
-/// <code>
-/// float32v FS_NMaskedAdd_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_NMaskedAdd_f32( ... ) FastSIMD::NMaskedAdd_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : (a - b) )
-/// </summary>
-/// <code>
-/// float32v FS_NMaskedSub_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_NMaskedSub_f32( ... ) FastSIMD::NMaskedSub_f32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : (a * b) )
-/// </summary>
-/// <code>
-/// float32v FS_NMaskedMul_f32( float32v a, float32v b, mask32v m )
-/// </code>
-#define FS_NMaskedMul_f32( ... ) FastSIMD::NMaskedMul_f32<FS>( __VA_ARGS__ )
-
-
-// NMasked int32
-
-/// <summary>
-/// return ( m ? a : (a + b) )
-/// </summary>
-/// <code>
-/// int32v FS_NMaskedAdd_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_NMaskedAdd_i32( ... ) FastSIMD::NMaskedAdd_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : (a - b) )
-/// </summary>
-/// <code>
-/// int32v FS_NMaskedSub_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_NMaskedSub_i32( ... ) FastSIMD::NMaskedSub_i32<FS>( __VA_ARGS__ )
-
-/// <summary>
-/// return ( m ? a : (a * b) )
-/// </summary>
-/// <code>
-/// int32v FS_NMaskedMul_i32( int32v a, int32v b, mask32v m )
-/// </code>
-#define FS_NMaskedMul_i32( ... ) FastSIMD::NMaskedMul_i32<FS>( __VA_ARGS__ )
-
-
-namespace FastSIMD
-{
-    //FMA
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v FMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
-    {
-        return (a * b) + c;
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v FNMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
-    {
-        return -(a * b) + c;
-    }
-
-    // Masked float
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v MaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a + FS::Mask_f32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v MaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a - FS::Mask_f32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v MaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a * FS::Mask_f32( b, m );
-    }
-
-    // Masked int32
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v MaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a + FS::Mask_i32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v MaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a - FS::Mask_i32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v MaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a * FS::Mask_i32( b, m );
-    }
-
-    // NMasked float
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v NMaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a + FS::NMask_f32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v NMaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a - FS::NMask_f32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v NMaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
-    {
-        return a * FS::NMask_f32( b, m );
-    }
-
-    // NMasked int32
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v NMaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a + FS::NMask_i32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v NMaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a - FS::NMask_i32( b, m );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::int32v NMaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
-    {
-        return a * FS::NMask_i32( b, m );
-    }
-
-    template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
-    {
-        return a - m;
-    }
-
-    template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
-    {
-        return MaskedSub_i32<FS>( a, typename FS::int32v( -1 ), m );
-    }
-    template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
-    {
-        return a + m;
-    }
-
-    template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
-    {
-        return MaskedAdd_i32<FS>( a, typename FS::int32v( -1 ), m );
-    }
-
-    // Bitwise
-
-    template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE  typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
-    {
-        return FS::BitwiseAndNot_i32( a, b );
-    }
-
-    template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
-    FS_INLINE typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
-    {
-        return a & (~b);
-    }
-
-    // Trig
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v Cos_f32( typename FS::float32v value )
-    {
-        typedef typename FS::int32v int32v;
-        typedef typename FS::float32v float32v;
-        typedef typename FS::mask32v mask32v;
-
-        value = FS_Abs_f32( value );
-        value -= FS_Floor_f32( value * float32v( 0.1591549f ) ) * float32v( 6.283185f );
-
-        mask32v geHalfPi  = value >= float32v( 1.570796f );
-        mask32v geHalfPi2 = value >= float32v( 3.141593f );
-        mask32v geHalfPi3 = value >= float32v( 4.7123889f );
-
-        float32v cosAngle = value ^ FS_Mask_f32( ( value ^ float32v( 3.141593f ) - value ), geHalfPi );
-        cosAngle = cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), geHalfPi2 );
-        cosAngle = cosAngle ^ FS_Mask_f32( cosAngle ^ ( float32v( 6.283185f ) - value ), geHalfPi3 );
-
-        cosAngle *= cosAngle;
-
-        cosAngle = FS_FMulAdd_f32( cosAngle, FS_FMulAdd_f32( cosAngle, float32v( 0.03679168f ), float32v( -0.49558072f ) ), float32v( 0.99940307f ) );
-
-        return cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), FS_BitwiseAndNot_m32( geHalfPi, geHalfPi3 ) );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v Sin_f32( typename FS::float32v value )
-    {
-        return Cos_f32<FS>( typename FS::float32v( 1.570796f ) - value );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v Exp_f32( typename FS::float32v x )
-    {
-        typedef typename FS::int32v int32v;
-        typedef typename FS::float32v float32v;
-
-        x = FS_Min_f32( x, float32v( 88.3762626647949f ) );
-        x = FS_Max_f32( x, float32v( -88.3762626647949f ) );
-
-        /* express exp(x) as exp(g + n*log(2)) */
-        float32v fx = x * float32v( 1.44269504088896341f );
-        fx += float32v( 0.5f );
-
-        float32v flr = FS_Floor_f32( fx );  
-        fx = FS_MaskedSub_f32( flr, float32v( 1 ), flr > fx );
-
-        x -= fx * float32v( 0.693359375f );
-        x -= fx * float32v( -2.12194440e-4f );
-
-        float32v y( 1.9875691500E-4f );
-        y *= x;
-        y += float32v( 1.3981999507E-3f );
-        y *= x;
-        y += float32v( 8.3334519073E-3f );
-        y *= x;
-        y += float32v( 4.1665795894E-2f );
-        y *= x;
-        y += float32v( 1.6666665459E-1f );
-        y *= x;
-        y += float32v( 5.0000001201E-1f );
-        y *= x * x;
-        y += x + float32v( 1 );        
-
-        /* build 2^n */
-        int32v i = FS_Convertf32_i32( fx );
-        // another two AVX2 instructions
-        i += int32v( 0x7f );
-        i <<= 23;
-        float32v pow2n = FS_Casti32_f32( i );
-        
-        return y * pow2n;        
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v Log_f32( typename FS::float32v x )
-    {
-        typedef typename FS::int32v int32v;
-        typedef typename FS::float32v float32v;
-        typedef typename FS::mask32v mask32v;
-                
-        mask32v validMask = x > float32v( 0 );
-
-        x = FS_Max_f32( x, FS_Casti32_f32( int32v( 0x00800000 ) ) );  /* cut off denormalized stuff */
-
-        // can be done with AVX2
-        int32v i = FS_BitwiseShiftRightZX_i32( FS_Castf32_i32( x ), 23 );
-
-        /* keep only the fractional part */
-        x &= FS_Casti32_f32( int32v( ~0x7f800000 ) );
-        x |= float32v( 0.5f );
-
-        // this is again another AVX2 instruction
-        i -= int32v( 0x7f );
-        float32v e = FS_Converti32_f32( i );
-
-        e += float32v( 1 );
-
-        mask32v mask = x < float32v( 0.707106781186547524f );
-        x = FS_MaskedAdd_f32( x, x, mask );
-        x -= float32v( 1 );
-        e = FS_MaskedSub_f32( e, float32v( 1 ), mask );
-
-        float32v y = float32v( 7.0376836292E-2f );
-        y *= x;
-        y += float32v( -1.1514610310E-1f );
-        y *= x;
-        y += float32v( 1.1676998740E-1f );
-        y *= x;
-        y += float32v( -1.2420140846E-1f );
-        y *= x;
-        y += float32v( 1.4249322787E-1f );
-        y *= x;
-        y += float32v( -1.6668057665E-1f );
-        y *= x;
-        y += float32v( 2.0000714765E-1f );
-        y *= x;
-        y += float32v( -2.4999993993E-1f );
-        y *= x;
-        y += float32v( 3.3333331174E-1f );
-        y *= x;
-
-        float32v xx = x * x;
-        y *= xx;
-        y *= e * float32v( -2.12194440e-4f );
-        y -= xx * float32v( 0.5f );
-
-        x += y;
-        x += e * float32v( 0.693359375f );
-
-        return FS_Mask_f32( x, validMask );
-    }
-
-    template<typename FS>
-    FS_INLINE typename FS::float32v Pow_f32( typename FS::float32v value, typename FS::float32v pow )
-    {
-        return Exp_f32<FS>( pow * Log_f32<FS>( value ) );
-    }
-}
diff --git a/include/FastSIMD/InlInclude.h b/include/FastSIMD/InlInclude.h
deleted file mode 100644
index b4f4ae16..00000000
--- a/include/FastSIMD/InlInclude.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-#include "FunctionList.h"
-
-template<typename CLASS, typename FS>
-class FS_T;
-
-#define FASTSIMD_DECLARE_FS_TYPES \
-using float32v = typename FS::float32v;\
-using int32v   = typename FS::int32v;\
-using mask32v  = typename FS::mask32v
diff --git a/include/FastSIMD/SIMDTypeList.h b/include/FastSIMD/SIMDTypeList.h
deleted file mode 100644
index bb624b2d..00000000
--- a/include/FastSIMD/SIMDTypeList.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include "FastSIMD.h"
-
-namespace FastSIMD
-{
-    template<eLevel... T>
-    struct SIMDTypeContainer
-    {
-        static constexpr eLevel MinimumCompiled = Level_Null;
-
-        template<eLevel L>
-        static constexpr eLevel GetNextCompiledAfter = Level_Null;
-    };
-
-    template<eLevel HEAD, eLevel... TAIL>
-    struct SIMDTypeContainer<HEAD, TAIL...>
-    {
-        static constexpr eLevel MinimumCompiled = (HEAD & COMPILED_SIMD_LEVELS) != 0 ? HEAD : SIMDTypeContainer<TAIL...>::MinimumCompiled;
-
-        template<eLevel L>
-        static constexpr eLevel GetNextCompiledAfter = (L == HEAD) ? SIMDTypeContainer<TAIL...>::MinimumCompiled : SIMDTypeContainer<TAIL...>::template GetNextCompiledAfter<L>;
-    };
-
-    using SIMDTypeList = SIMDTypeContainer<
-        Level_Scalar,
-        Level_SSE,
-        Level_SSE2,
-        Level_SSE3,
-        Level_SSSE3,
-        Level_SSE41,
-        Level_SSE42,
-        Level_AVX,
-        Level_AVX2,
-        Level_AVX512,
-        Level_NEON>;
-}
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 07a751f4..a6a43cba 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,32 +1,22 @@
 set(CMAKE_CXX_STANDARD 17)
 
-set(install_targets ${install_targets} FastNoise PARENT_SCOPE)
-
-file(GLOB_RECURSE FastSIMD_headers "../include/FastSIMD/*.h")
-file(GLOB_RECURSE FastSIMD_include_inl "../include/FastSIMD/*.inl")
-file(GLOB FastSIMD_inline "FastSIMD/*.inl")
-file(GLOB_RECURSE FastSIMD_internal_headers "FastSIMD/Internal/*.h")
-file(GLOB_RECURSE FastSIMD_internal_inl "FastSIMD/Internal/*.inl")
-
-set(install_fastsimd_headers ${FastSIMD_headers} PARENT_SCOPE)
-
-list(APPEND FastSIMD_headers ${FastSIMD_inline})
-list(APPEND FastSIMD_headers ${FastSIMD_include_inl})
-list(APPEND FastSIMD_internal_headers ${FastSIMD_internal_inl})
-
-set(FastSIMD_sources
-    FastSIMD/FastSIMD.cpp
-    FastSIMD/FastSIMD_Level_AVX2.cpp
-    FastSIMD/FastSIMD_Level_AVX512.cpp
-    FastSIMD/FastSIMD_Level_NEON.cpp
-    FastSIMD/FastSIMD_Level_Scalar.cpp
-    FastSIMD/FastSIMD_Level_SSE2.cpp
-    FastSIMD/FastSIMD_Level_SSE3.cpp
-    FastSIMD/FastSIMD_Level_SSE41.cpp
-    FastSIMD/FastSIMD_Level_SSE42.cpp
-    FastSIMD/FastSIMD_Level_SSSE3.cpp
+CPMAddPackage(
+    NAME FastSIMD
+    GITHUB_REPOSITORY Auburn/FastSIMD
+    GIT_TAG 0ee3529e6e455264e206d7ed554b90105bbe716b
+    EXCLUDE_FROM_ALL YES
+    OPTIONS
+        "BUILD_SHARED_LIBS OFF"
 )
 
+set(install_targets ${install_targets}
+    FastNoise
+    FastSIMD    
+    FastSIMD_FastNoise  
+    FastSIMD_DispatchClass 
+    PARENT_SCOPE)
+
+
 file(GLOB FastNoise_headers "../include/FastNoise/*.h")
 file(GLOB FastNoise_inl "../include/FastNoise/*.inl")
 file(GLOB_RECURSE FastNoise_generators_headers "../include/FastNoise/Generators/*.h")
@@ -43,10 +33,6 @@ set(FastNoise_source
     FastNoise/SmartNode.cpp
     FastNoise/FastNoise_C.cpp)
 
-source_group("FastSIMD" FILES ${FastSIMD_headers})
-source_group("FastSIMD" FILES ${FastSIMD_sources})
-source_group("FastSIMD\\internals" FILES ${FastSIMD_internal_headers})
-
 source_group("FastNoise" FILES ${FastNoise_headers})
 source_group("FastNoise" FILES ${FastNoise_source})
 source_group("FastNoise\\Generators" FILES ${FastNoise_generators_headers})
@@ -76,36 +62,20 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
+
+fastsimd_create_simd_library(FastSIMD_FastNoise "../include/FastNoise/FastNoise_BuildList.inl")
+
+target_link_libraries(FastNoise PUBLIC FastSIMD_FastNoise)
+
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-    target_compile_options(FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251)
+    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251)
     
-    if(CMAKE_SIZEOF_VOID_P EQUAL 4)
-        set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
-    endif()
-    set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
-
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
-        target_compile_options(FastNoise PRIVATE /GL- /GS- /fp:fast)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast)
     else()
-        target_compile_options(FastNoise PRIVATE -ffast-math -fno-stack-protector)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector)        
     endif()
 
-    if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR "${CMAKE_CXX_FLAGS}" MATCHES "-m32")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "-msse")
-        set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "-msse2")
-    endif()
-    set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "-msse3")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "-mssse3")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "-msse4.1")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "-msse4.2")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
-    set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512dq -mfma")
 endif()
 
diff --git a/src/FastSIMD/Example/Example.h b/src/FastSIMD/Example/Example.h
deleted file mode 100644
index f64ed155..00000000
--- a/src/FastSIMD/Example/Example.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "FS_Class.inl"
-#ifdef FASTSIMD_INCLUDE_CHECK
-#include __FILE__
-#endif
-#include "FS_Class.inl"
-#pragma once
-
-FASTSIMD_CLASS_DECLARATION( Example )
-{
-    FASTSIMD_CLASS_SETUP( FastSIMD::Level_AVX2 | FastSIMD::Level_SSE41 | FastSIMD::Level_SSE2 | FastSIMD::Level_Scalar );
-
-public:
-
-    FS_EXTERNAL_FUNC( void DoStuff( int* data ) );
-
-    FS_EXTERNAL_FUNC( void DoArray( int* data0, int* data1, int size ) );
-};
diff --git a/src/FastSIMD/Example/Example.inl b/src/FastSIMD/Example/Example.inl
deleted file mode 100644
index c640a018..00000000
--- a/src/FastSIMD/Example/Example.inl
+++ /dev/null
@@ -1,125 +0,0 @@
-#define FASTSIMD_INTELLISENSE
-#include "Example.h"
-
-//template<typename T>// Generic function, used if no specialised function found
-//FS_CLASS( Example ) < T, FS_SIMD_CLASS::SIMD_Level >::FS_CLASS( Example )()
-//{
-//    int test = 1;
-//
-//    test += test;
-//}
-
-template<typename F, FastSIMD::ELevel S> // Generic function, used if no specialised function found
-void FS_CLASS( Example )<F, S>::DoStuff( int* data )
-{
-    int32v a = int32v( 1 );
-
-    FS_Store_i32( data, a );
-}
-
-//template<typename CLASS_T, typename SIMD_T> // Different function for level SSE2 or AVX2
-//void FS_CLASS( Example )::DoStuff( int* data )
-//{
-//    int32v a = _mm_loadu_si128( reinterpret_cast<__m128i const*>(data) );
-//
-//    a += _mm_set_epi32( 2, 3, 4, 5 );
-//
-//    a -= _mm_castps_si128( FS_VecZero_f32( ) );
-//
-//    FS_Store_i32( data, a );
-//}
-//
-//
-//template<typename CLASS_T, FastSIMD::Level LEVEL_T>
-//void FS_CLASS( Example )::DoArray( int* data0, int* data1, int size )
-//{
-//    for ( int i = 0; i < size; i += FS_VectorSize_i32() )
-//    {
-//        int32v a = FS_Load_i32( &data0[i] );
-//        int32v b = FS_Load_i32( &data1[i] );
-//        
-//        a *= b;
-//
-//        a <<= 1;
-//
-//        a -= FS_VecZero_i32();
-//
-//        (~a);
-//
-//        FS_Store_i32( &data0[i], a );
-//    }
-//}
-
-template<typename F, FastSIMD::ELevel S>
-void FS_CLASS( Example )<F, S>::DoArray( int* data0, int* data1, int size )
-{
-    for ( size_t i = 0; i < size; i += int32v::FS_Size() )
-    {
-        int32v a = FS_Load_i32( &data0[i] );
-        int32v b = FS_Load_i32( &data1[i] );
-
-        a += b;
-
-        a <<= 1;
-
-        a *= b;
-
-        a -= int32v::FS_Zero();
-
-        (~a);
-
-        FS_Store_i32( &data0[i], a );
-    }
-}
-
-template<typename T_FS>
-class FS_CLASS( Example )<T_FS, FastSIMD::Level_AVX2> : public FS_CLASS( Example )<T_FS, FastSIMD::Level_Null>
-{
-    //typedef FastSIMD_AVX2 T_FS;
-    FASTSIMD_CLASS_SETUP( FastSIMD::COMPILED_SIMD_LEVELS );
-
-public:
-    void DoArray( int* data0, int* data1, int size )
-    {
-        for ( size_t i = 0; i < size; i += int32v::FS_Size() )
-        {
-            int32v a = FS_Load_i32( &data0[i] );
-            int32v b = FS_Load_i32( &data1[i] );
-
-            //a += gfhfdghdfgh();
-
-            a += b;
-
-            a <<= 2;
-
-            a *= b;
-
-            a -= int32v::FS_Zero();
-
-            (~a);
-
-            FS_Store_i32( &data0[i], a );
-        }
-    }
-};
-
-//
-//template<typename T>
-//typename std::enable_if<(T::SIMD_Level <= 1)>::type FS_CLASS( Example )<T, FS_SIMD_CLASS::SIMD_Level>::DoArray( int* data0, int* data1, int size )
-//{
-//    for ( int i = 0; i < size; i += FS_VectorSize_i32() )
-//    {
-//        int32v a = FS_Load_i32( &data0[i] );
-//        int32v b = FS_Load_i32( &data1[i] );
-//
-//        a += b;
-//
-//        a <<= 1;
-//
-//        a -= FS_VecZero_i32();
-//
-//        (~a);
-//
-//        FS_Store_i32( &data0[i], a );
-//    }
-//}
diff --git a/src/FastSIMD/FastSIMD.cpp b/src/FastSIMD/FastSIMD.cpp
deleted file mode 100644
index 558d3998..00000000
--- a/src/FastSIMD/FastSIMD.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#include <algorithm>
-#include <cstdint>
-
-#ifdef __GNUG__
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-
-#include "FastSIMD/SIMDTypeList.h"
-
-static_assert(FastSIMD::SIMDTypeList::MinimumCompiled & FastSIMD::COMPILED_SIMD_LEVELS, "FASTSIMD_FALLBACK_SIMD_LEVEL is not a compiled SIMD level, check FastSIMD_Config.h");
-
-#if FASTSIMD_x86
-// Define interface to cpuid instruction.
-// input:  eax = functionnumber, ecx = 0
-// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
-static void cpuid( int output[4], int functionnumber )
-{
-#if defined( __GNUC__ ) || defined( __clang__ )              // use inline assembly, Gnu/AT&T syntax
-
-    int a, b, c, d;
-    __asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(0) : );
-    output[0] = a;
-    output[1] = b;
-    output[2] = c;
-    output[3] = d;
-
-#elif defined( _MSC_VER ) || defined ( __INTEL_COMPILER )     // Microsoft or Intel compiler, intrin.h included
-
-    __cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
-
-#else                                                      // unknown platform. try inline assembly with masm/intel syntax
-
-    __asm
-    {
-        mov eax, functionnumber
-        xor ecx, ecx
-        cpuid;
-        mov esi, output
-            mov[esi], eax
-            mov[esi + 4], ebx
-            mov[esi + 8], ecx
-            mov[esi + 12], edx
-    }
-
-#endif
-}
-
-// Define interface to xgetbv instruction
-static int64_t xgetbv( int ctr )
-{
-#if (defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000) || (defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
-
-    return _xgetbv( ctr ); // intrinsic function for XGETBV
-
-#elif defined( __GNUC__ )                                    // use inline assembly, Gnu/AT&T syntax
-
-    uint32_t a, d;
-    __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(ctr) : );
-    return a | (uint64_t( d ) << 32);
-
-#else  // #elif defined (_WIN32)                           // other compiler. try inline assembly with masm/intel/MS syntax
-
-    uint32_t a, d;
-    __asm {
-        mov ecx, ctr
-        _emit 0x0f
-        _emit 0x01
-        _emit 0xd0; // xgetbv
-        mov a, eax
-            mov d, edx
-    }
-    return a | (uint64_t( d ) << 32);
-
-#endif
-}
-#endif
-
-FASTSIMD_API FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
-{
-    static eLevel simdLevel = Level_Null;
-
-    if ( simdLevel > Level_Null )
-    {
-        return simdLevel;
-    }
-
-#if FASTSIMD_x86
-    int abcd[4] = { 0,0,0,0 }; // cpuid results
-
-#if !FASTSIMD_64BIT
-    simdLevel = Level_Scalar; // default value
-
-    cpuid( abcd, 0 ); // call cpuid function 0
-    if ( abcd[0] == 0 )
-        return simdLevel; // no further cpuid function supported
-
-    cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
-    if ( (abcd[3] & (1 << 0)) == 0 )
-        return simdLevel; // no floating point
-    if ( (abcd[3] & (1 << 23)) == 0 )
-        return simdLevel; // no MMX
-    if ( (abcd[3] & (1 << 15)) == 0 )
-        return simdLevel; // no conditional move
-    if ( (abcd[3] & (1 << 24)) == 0 )
-        return simdLevel; // no FXSAVE
-    if ( (abcd[3] & (1 << 25)) == 0 )
-        return simdLevel; // no SSE
-    simdLevel = Level_SSE;
-    // 1: SSE supported
-
-    if ( (abcd[3] & (1 << 26)) == 0 )
-        return simdLevel; // no SSE2
-#else
-    cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
-#endif
-
-    simdLevel = Level_SSE2; // default value for 64bit
-    // 2: SSE2 supported
-
-    if ( (abcd[2] & (1 << 0)) == 0 )
-        return simdLevel; // no SSE3
-    simdLevel = Level_SSE3;
-    // 3: SSE3 supported
-
-    if ( (abcd[2] & (1 << 9)) == 0 )
-        return simdLevel; // no SSSE3
-    simdLevel = Level_SSSE3;
-    // 4: SSSE3 supported
-
-    if ( (abcd[2] & (1 << 19)) == 0 )
-        return simdLevel; // no SSE4.1
-    simdLevel = Level_SSE41;
-    // 5: SSE4.1 supported
-
-    if ( (abcd[2] & (1 << 23)) == 0 )
-        return simdLevel; // no POPCNT
-    if ( (abcd[2] & (1 << 20)) == 0 )
-        return simdLevel; // no SSE4.2
-    simdLevel = Level_SSE42;
-    // 6: SSE4.2 supported
-
-    if ( (abcd[2] & (1 << 26)) == 0 )
-        return simdLevel; // no XSAVE
-    if ( (abcd[2] & (1 << 27)) == 0 )
-        return simdLevel; // no OSXSAVE
-    if ( (abcd[2] & (1 << 28)) == 0 )
-        return simdLevel; // no AVX
-
-    uint64_t osbv = xgetbv( 0 );
-    if ( (osbv & 6) != 6 )
-        return simdLevel; // AVX not enabled in O.S.
-    simdLevel = Level_AVX;
-    // 7: AVX supported
-
-    cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
-    if ( (abcd[1] & (1 << 5)) == 0 )
-        return simdLevel; // no AVX2
-    simdLevel = Level_AVX2;
-    // 8: AVX2 supported
-
-    if( (osbv & (0xE0)) != 0xE0 )
-        return simdLevel; // AVX512 not enabled in O.S.
-    if ( (abcd[1] & (1 << 16)) == 0 )
-        return simdLevel; // no AVX512
-    cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
-    if ( (abcd[0] & 0x60) != 0x60 )
-        return simdLevel; // no AVX512
-    // 9: AVX512 supported
-
-    cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
-    if ( (abcd[1] & (1 << 31)) == 0 )
-        return simdLevel; // no AVX512VL
-    // 10: AVX512VL supported
-
-    if ( (abcd[1] & 0x40020000) != 0x40020000 )
-        return simdLevel; // no AVX512BW, AVX512DQ
-    simdLevel = Level_AVX512;
-    // 11: AVX512BW & AVX512DQ supported
-#endif
-
-#if FASTSIMD_ARM
-    simdLevel = Level_NEON;
-#endif
-
-    return simdLevel;
-}
-
-template<typename CLASS_T, FastSIMD::eLevel SIMD_LEVEL>
-CLASS_T* SIMDLevelSelector( FastSIMD::eLevel maxSIMDLevel, FastSIMD::MemoryAllocator allocator )
-{
-    if constexpr( ( CLASS_T::Supported_SIMD_Levels & SIMD_LEVEL ) != 0 )
-    {
-        CLASS_T* newClass = SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel, allocator );
-
-        if( !newClass && SIMD_LEVEL <= maxSIMDLevel )
-        {
-            return FastSIMD::ClassFactory<CLASS_T, SIMD_LEVEL>( allocator );
-        }
-
-        return newClass;
-    }
-    else
-    {
-        if constexpr( SIMD_LEVEL == FastSIMD::Level_Null )
-        {
-            return nullptr;
-        }
-
-        return SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel, allocator );        
-    }
-}
-
-template<typename CLASS_T>
-CLASS_T* FastSIMD::New( eLevel maxSIMDLevel, FastSIMD::MemoryAllocator allocator )
-{
-    if( maxSIMDLevel == Level_Null )
-    {
-        maxSIMDLevel = CPUMaxSIMDLevel();
-    }
-    else
-    {
-        maxSIMDLevel = std::min( maxSIMDLevel, CPUMaxSIMDLevel() );        
-    }
-
-    static_assert(( CLASS_T::Supported_SIMD_Levels & FastSIMD::SIMDTypeList::MinimumCompiled ), "MinimumCompiled SIMD Level must be supported by this class" );
-    return SIMDLevelSelector<CLASS_T, SIMDTypeList::MinimumCompiled>( maxSIMDLevel, allocator );
-}
-
-#define FASTSIMD_BUILD_CLASS( CLASS ) \
-template FASTSIMD_API CLASS* FastSIMD::New( FastSIMD::eLevel, FastSIMD::MemoryAllocator );
-
-#define FASTSIMD_INCLUDE_HEADER_ONLY
-#include "FastSIMD_BuildList.inl"
diff --git a/src/FastSIMD/FastSIMD_BuildList.inl b/src/FastSIMD/FastSIMD_BuildList.inl
deleted file mode 100644
index 8e65ff25..00000000
--- a/src/FastSIMD/FastSIMD_BuildList.inl
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-#ifndef FASTSIMD_BUILD_CLASS
-#error Do not include this file
-#endif
-
-//#include "Example/Example.inl"
-//FASTSIMD_BUILD_CLASS( Example )
-
-#include "FastNoise/FastNoise_BuildList.inl"
\ No newline at end of file
diff --git a/src/FastSIMD/FastSIMD_Level_AVX2.cpp b/src/FastSIMD/FastSIMD_Level_AVX2.cpp
deleted file mode 100644
index c8ae3ed8..00000000
--- a/src/FastSIMD/FastSIMD_Level_AVX2.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_AVX2
-
-// To compile AVX2 support enable AVX(2) code generation compiler flags for this file
-#ifndef __AVX__
-#ifdef _MSC_VER
-#error To compile AVX set C++ code generation to use /arch:AVX on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
-#else
-#error To compile AVX add build command "-march=core-avx" on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
-#endif
-#endif
-
-#include "Internal/AVX.h"
-#define FS_SIMD_CLASS FastSIMD::AVX2
-#include "Internal/SourceBuilder.inl"
-#endif
\ No newline at end of file
diff --git a/src/FastSIMD/FastSIMD_Level_AVX512.cpp b/src/FastSIMD/FastSIMD_Level_AVX512.cpp
deleted file mode 100644
index 1472d656..00000000
--- a/src/FastSIMD/FastSIMD_Level_AVX512.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_AVX512 
-
-// To compile AVX512 support enable AVX512 code generation compiler flags for this file
-#ifndef __AVX512DQ__ 
-#ifdef _MSC_VER
-#error To compile AVX512 set C++ code generation to use /arch:AVX512 on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
-#else
-#error To compile AVX512 add build command "-mavx512f -mavx512dq" on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
-#endif
-#endif
-
-#include "Internal/AVX512.h"
-#define FS_SIMD_CLASS FastSIMD::AVX512
-#include "Internal/SourceBuilder.inl"
-#endif
\ No newline at end of file
diff --git a/src/FastSIMD/FastSIMD_Level_NEON.cpp b/src/FastSIMD/FastSIMD_Level_NEON.cpp
deleted file mode 100644
index e804ace1..00000000
--- a/src/FastSIMD/FastSIMD_Level_NEON.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_NEON
-#include "Internal/NEON.h"
-#define FS_SIMD_CLASS FastSIMD::NEON
-#include "Internal/SourceBuilder.inl"
-#endif
\ No newline at end of file
diff --git a/src/FastSIMD/FastSIMD_Level_SSE2.cpp b/src/FastSIMD/FastSIMD_Level_SSE2.cpp
deleted file mode 100644
index a36c4f66..00000000
--- a/src/FastSIMD/FastSIMD_Level_SSE2.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SSE2
-#include "Internal/SSE.h"
-#define FS_SIMD_CLASS FastSIMD::SSE2
-#include "Internal/SourceBuilder.inl"
-#endif
diff --git a/src/FastSIMD/FastSIMD_Level_SSE3.cpp b/src/FastSIMD/FastSIMD_Level_SSE3.cpp
deleted file mode 100644
index a633767d..00000000
--- a/src/FastSIMD/FastSIMD_Level_SSE3.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SSE3
-#include "Internal/SSE.h"
-#define FS_SIMD_CLASS FastSIMD::SSE3
-#include "Internal/SourceBuilder.inl"
-#endif
diff --git a/src/FastSIMD/FastSIMD_Level_SSE41.cpp b/src/FastSIMD/FastSIMD_Level_SSE41.cpp
deleted file mode 100644
index b33ba482..00000000
--- a/src/FastSIMD/FastSIMD_Level_SSE41.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SSE41
-#include "Internal/SSE.h"
-#define FS_SIMD_CLASS FastSIMD::SSE41
-#include "Internal/SourceBuilder.inl"
-#endif
diff --git a/src/FastSIMD/FastSIMD_Level_SSE42.cpp b/src/FastSIMD/FastSIMD_Level_SSE42.cpp
deleted file mode 100644
index 140065e0..00000000
--- a/src/FastSIMD/FastSIMD_Level_SSE42.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SSE42
-#include "Internal/SSE.h"
-#define FS_SIMD_CLASS FastSIMD::SSE42
-#include "Internal/SourceBuilder.inl"
-#endif
diff --git a/src/FastSIMD/FastSIMD_Level_SSSE3.cpp b/src/FastSIMD/FastSIMD_Level_SSSE3.cpp
deleted file mode 100644
index f91de069..00000000
--- a/src/FastSIMD/FastSIMD_Level_SSSE3.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SSSE3
-#include "Internal/SSE.h"
-#define FS_SIMD_CLASS FastSIMD::SSSE3
-#include "Internal/SourceBuilder.inl"
-#endif
diff --git a/src/FastSIMD/FastSIMD_Level_Scalar.cpp b/src/FastSIMD/FastSIMD_Level_Scalar.cpp
deleted file mode 100644
index 87aff72a..00000000
--- a/src/FastSIMD/FastSIMD_Level_Scalar.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "FastSIMD/FastSIMD.h"
-
-#if FASTSIMD_COMPILE_SCALAR
-#include "Internal/Scalar.h"
-#define FS_SIMD_CLASS FastSIMD::Scalar
-#include "Internal/SourceBuilder.inl"
-#endif
\ No newline at end of file
diff --git a/src/FastSIMD/Internal/AVX.h b/src/FastSIMD/Internal/AVX.h
deleted file mode 100644
index b46375cd..00000000
--- a/src/FastSIMD/Internal/AVX.h
+++ /dev/null
@@ -1,474 +0,0 @@
-#pragma once
-
-#ifdef __GNUG__
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-
-#include "VecTools.h"
-
-namespace FastSIMD
-{
-    struct AVX_f32x8
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( AVX_f32x8, __m256 );
-
-        FS_INLINE static AVX_f32x8 Incremented()
-        {
-            return _mm256_set_ps( 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
-        }
-
-        FS_INLINE explicit AVX_f32x8( float f )
-        {
-            *this = _mm256_set1_ps( f );
-        }
-
-        FS_INLINE explicit AVX_f32x8( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7 )
-        {
-            *this = _mm256_set_ps( f7, f6, f5, f4, f3, f2, f1, f0 );
-        }
-
-        FS_INLINE AVX_f32x8& operator+=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_add_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator-=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_sub_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator*=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_mul_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator/=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_div_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator&=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_and_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator|=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_or_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8& operator^=( const AVX_f32x8& rhs )
-        {
-            *this = _mm256_xor_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX_f32x8 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
-#else
-            const __m256i neg1 = _mm256_set1_epi32( -1 );
-#endif
-            return _mm256_xor_ps( *this, _mm256_castsi256_ps( neg1 ) );
-        }
-
-        FS_INLINE AVX_f32x8 operator-() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m256i minInt = _mm256_slli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 31 );
-#else
-            const __m256i minInt = _mm256_set1_epi32( 0x80000000 );
-#endif
-            return _mm256_xor_ps( *this, _mm256_castsi256_ps( minInt ) );
-        }
-
-        FS_INLINE __m256i operator==( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_EQ_OS ) );
-        }
-
-        FS_INLINE __m256i operator!=( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_NEQ_OS ) );
-        }
-
-        FS_INLINE __m256i operator>( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GT_OS ) );
-        }
-
-        FS_INLINE __m256i operator<( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LT_OS ) );
-        }
-
-        FS_INLINE __m256i operator>=( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GE_OS ) );
-        }
-
-        FS_INLINE __m256i operator<=( const AVX_f32x8& rhs )
-        {
-            return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LE_OS ) );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX_f32x8 )
-
-
-    struct AVX2_i32x8
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( AVX2_i32x8, __m256i );
-
-        FS_INLINE static AVX2_i32x8 Incremented()
-        {
-            return _mm256_set_epi32( 7, 6, 5, 4, 3, 2, 1, 0 );
-        }
-
-        FS_INLINE explicit AVX2_i32x8( int32_t f )
-        {
-            *this = _mm256_set1_epi32( f );
-        }
-
-        FS_INLINE explicit AVX2_i32x8( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7 )
-        {
-            *this = _mm256_set_epi32( i7, i6, i5, i4, i3, i2, i1, i0 );
-        }
-
-        FS_INLINE AVX2_i32x8& operator+=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_add_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator-=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_sub_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator*=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_mullo_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator&=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_and_si256( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator|=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_or_si256( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator^=( const AVX2_i32x8& rhs )
-        {
-            *this = _mm256_xor_si256( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator>>=( int32_t rhs )
-        {
-            *this = _mm256_srai_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8& operator<<=( int32_t rhs )
-        {
-            *this = _mm256_slli_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX2_i32x8 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
-#else
-            const __m256i neg1 = _mm256_set1_epi32( -1 );
-#endif
-            return _mm256_xor_si256( *this, neg1 );
-        }
-
-        FS_INLINE AVX2_i32x8 operator-() const
-        {
-            return _mm256_sub_epi32( _mm256_setzero_si256(), *this );
-        }
-
-        FS_INLINE AVX2_i32x8 operator==( const AVX2_i32x8& rhs )
-        {
-            return _mm256_cmpeq_epi32( *this, rhs );
-        }
-
-        FS_INLINE AVX2_i32x8 operator>( const AVX2_i32x8& rhs )
-        {
-            return _mm256_cmpgt_epi32( *this, rhs );
-        }
-
-        FS_INLINE AVX2_i32x8 operator<( const AVX2_i32x8& rhs )
-        {
-            return _mm256_cmpgt_epi32( rhs, *this );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_INT( AVX2_i32x8, int32_t )
-
-    template<eLevel LEVEL_T>
-    class AVX_T
-    {
-    public:
-        static_assert( LEVEL_T >= Level_AVX && LEVEL_T <= Level_AVX2, "Cannot create template with unsupported SIMD level" );
-
-        static constexpr eLevel SIMD_Level = LEVEL_T;
-
-        template<size_t ElementSize>
-        static constexpr size_t VectorSize = (256 / 8) / ElementSize;
-
-        typedef AVX_f32x8  float32v;
-        typedef AVX2_i32x8 int32v;
-        typedef AVX2_i32x8 mask32v;
-
-        // Load
-
-        FS_INLINE static float32v Load_f32( void const* p )
-        {
-            return _mm256_loadu_ps( reinterpret_cast<float const*>(p) );
-        }
-
-        FS_INLINE static int32v Load_i32( void const* p )
-        {
-            return _mm256_loadu_si256( reinterpret_cast<__m256i const*>(p) );
-        }
-
-        // Store
-
-        FS_INLINE static void Store_f32( void* p, float32v a )
-        {
-            _mm256_storeu_ps( reinterpret_cast<float*>(p), a );
-        }
-
-        FS_INLINE static void Store_i32( void* p, int32v a )
-        {
-            _mm256_storeu_si256( reinterpret_cast<__m256i*>(p), a );
-        }
-
-        // Extract
-
-        FS_INLINE static float Extract0_f32( float32v a )
-        {
-            return _mm256_cvtss_f32( a );
-        }
-
-        FS_INLINE static int32_t Extract0_i32( int32v a )
-        {
-            return _mm_cvtsi128_si32(_mm256_castsi256_si128( a ));
-        }
-
-        FS_INLINE static float Extract_f32( float32v a, size_t idx )
-        {
-            float f[8];
-            Store_f32( &f, a );
-            return f[idx & 7];
-        }
-
-        FS_INLINE static int32_t Extract_i32( int32v a, size_t idx )
-        {
-            int32_t i[8];
-            Store_i32( &i, a );
-            return i[idx & 7];
-        }
-
-        // Cast
-
-        FS_INLINE static float32v Casti32_f32( int32v a )
-        {
-            return _mm256_castsi256_ps( a );
-        }
-
-        FS_INLINE static int32v Castf32_i32( float32v a )
-        {
-            return _mm256_castps_si256( a );
-        }
-
-        // Convert
-
-        FS_INLINE static float32v Converti32_f32( int32v a )
-        {
-            return _mm256_cvtepi32_ps( a );
-        }
-
-        FS_INLINE static int32v Convertf32_i32( float32v a )
-        {
-            return _mm256_cvtps_epi32( a );
-        }
-
-        // Select
-
-        FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-        {
-            return  _mm256_blendv_ps( b, a, _mm256_castsi256_ps( m ) );
-        }
-
-        FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-        {
-            return _mm256_castps_si256( _mm256_blendv_ps( _mm256_castsi256_ps( b ), _mm256_castsi256_ps( a ), _mm256_castsi256_ps( m ) ) );
-        }
-
-        // Min, Max
-
-        FS_INLINE static float32v Min_f32( float32v a, float32v b )
-        {
-            return _mm256_min_ps( a, b );
-        }
-
-        FS_INLINE static float32v Max_f32( float32v a, float32v b )
-        {
-            return _mm256_max_ps( a, b );
-        }
-
-        FS_INLINE static int32v Min_i32( int32v a, int32v b )
-        {
-            return _mm256_min_epi32( a, b );
-        }
-
-        FS_INLINE static int32v Max_i32( int32v a, int32v b )
-        {
-            return _mm256_max_epi32( a, b );
-        }
-
-        // Bitwise
-
-        FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
-        {
-            return _mm256_andnot_ps( b, a );
-        }
-
-        FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
-        {
-            return _mm256_andnot_si256( b, a );
-        }
-
-        FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
-        {
-            return Casti32_f32( _mm256_srli_epi32( Castf32_i32( a ), b ) );
-        }
-
-        FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
-        {
-            return _mm256_srli_epi32( a, b );
-        }
-
-        // Abs
-
-        FS_INLINE static float32v Abs_f32( float32v a )
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m256i intMax = _mm256_srli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 1 );
-#else
-            const __m256i intMax = _mm256_set1_epi32( 0x7FFFFFFF );
-#endif
-            return _mm256_and_ps( a, _mm256_castsi256_ps( intMax ) );
-        }
-
-        FS_INLINE static int32v Abs_i32( int32v a )
-        {
-            return _mm256_abs_epi32( a );
-        }
-
-        // Float math
-
-        FS_INLINE static float32v Sqrt_f32( float32v a )
-        {
-            return _mm256_sqrt_ps( a );
-        }
-
-        FS_INLINE static float32v InvSqrt_f32( float32v a )
-        {
-            return _mm256_rsqrt_ps( a );
-        }
-
-        FS_INLINE static float32v Reciprocal_f32( float32v a )
-        {
-            return _mm256_rcp_ps( a );
-        }
-
-        // Floor, Ceil, Round
-
-        FS_INLINE static float32v Floor_f32( float32v a )
-        {
-            return _mm256_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
-        }
-
-        FS_INLINE static float32v Ceil_f32( float32v a )
-        {
-            return _mm256_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
-        }
-
-        FS_INLINE static float32v Round_f32( float32v a )
-        {
-            return _mm256_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
-        }
-
-        //Mask
-
-        FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
-        {
-            return a & m;
-        }
-
-        FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
-        {
-            return _mm256_and_ps( a, _mm256_castsi256_ps( m ) );
-        }
-
-        FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
-        {
-            return _mm256_andnot_si256( m, a );
-        }
-
-        FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
-        {
-            return _mm256_andnot_ps( _mm256_castsi256_ps( m ), a );
-        }
-
-        FS_INLINE static bool AnyMask_bool( mask32v m )
-        {
-            return !_mm256_testz_si256( m, m );
-        }
-    };
-
-#if FASTSIMD_COMPILE_AVX
-    typedef AVX_T<Level_AVX>  AVX;
-#endif
-
-#if FASTSIMD_COMPILE_AVX2
-    typedef AVX_T<Level_AVX2> AVX2;
-
-#if FASTSIMD_USE_FMA
-    template<>
-    FS_INLINE AVX2::float32v FMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
-    {
-        return _mm256_fmadd_ps( a, b, c );
-    }
-
-    template<>
-    FS_INLINE AVX2::float32v FNMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
-    {
-        return _mm256_fnmadd_ps( a, b, c );
-    }
-#endif
-#endif
-    
-}
diff --git a/src/FastSIMD/Internal/AVX512.h b/src/FastSIMD/Internal/AVX512.h
deleted file mode 100644
index fd378eda..00000000
--- a/src/FastSIMD/Internal/AVX512.h
+++ /dev/null
@@ -1,540 +0,0 @@
-#pragma once
-
-#include <immintrin.h>
-
-#include "VecTools.h"
-
-namespace FastSIMD
-{
-
-    struct AVX512_f32x16
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( AVX512_f32x16, __m512 );
-
-        FS_INLINE static AVX512_f32x16 Incremented()
-        {
-            return _mm512_set_ps( 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
-        }
-
-        FS_INLINE explicit AVX512_f32x16( float f )
-        {
-            *this = _mm512_set1_ps( f );
-        }
-
-        FS_INLINE explicit AVX512_f32x16( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7, float f8, float f9, float f10, float f11, float f12, float f13, float f14, float f15 )
-        {
-            *this = _mm512_set_ps( f15, f14, f13, f12, f11, f10, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0 );
-        }
-
-        FS_INLINE AVX512_f32x16& operator+=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_add_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator-=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_sub_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator*=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_mul_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator/=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_div_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator&=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_and_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator|=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_or_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16& operator^=( const AVX512_f32x16& rhs )
-        {
-            *this = _mm512_xor_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_f32x16 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
-#else
-            const __m512i neg1 = _mm512_set1_epi32( -1 );
-#endif
-            return _mm512_xor_ps( *this, _mm512_castsi512_ps( neg1 ) );
-        }
-
-        FS_INLINE AVX512_f32x16 operator-() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m512i minInt = _mm512_slli_epi32( _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() ), 31 );
-#else
-            const __m512i minInt = _mm512_set1_epi32( 0x80000000 );
-#endif
-            return _mm512_xor_ps( *this, _mm512_castsi512_ps( minInt ) );
-        }
-
-        FS_INLINE __mmask16 operator==( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_EQ_OS );
-        }
-
-        FS_INLINE __mmask16 operator!=( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_NEQ_OS );
-        }
-
-        FS_INLINE __mmask16 operator>( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_GT_OS );
-        }
-
-        FS_INLINE __mmask16 operator<( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_LT_OS );
-        }
-
-        FS_INLINE __mmask16 operator>=( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_GE_OS );
-        }
-
-        FS_INLINE __mmask16 operator<=( const AVX512_f32x16& rhs )
-        {
-            return _mm512_cmp_ps_mask( *this, rhs, _CMP_LE_OS );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX512_f32x16 )
-
-
-    struct AVX512_i32x16
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( AVX512_i32x16, __m512i );
-
-        FS_INLINE static AVX512_i32x16 Incremented()
-        {
-            return _mm512_set_epi32( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 );
-        }
-
-        FS_INLINE explicit AVX512_i32x16( int32_t i )
-        {
-            *this = _mm512_set1_epi32( i );
-        }
-
-        FS_INLINE explicit AVX512_i32x16( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7, int32_t i8, int32_t i9, int32_t i10, int32_t i11, int32_t i12, int32_t i13, int32_t i14, int32_t i15 )
-        {
-            *this = _mm512_set_epi32( i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0 );
-        }
-
-        FS_INLINE AVX512_i32x16& operator+=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_add_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator-=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_sub_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator*=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_mullo_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator&=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_and_si512( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator|=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_or_si512( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator^=( const AVX512_i32x16& rhs )
-        {
-            *this = _mm512_xor_si512( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator>>=( int32_t rhs )
-        {
-            *this = _mm512_srai_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16& operator<<=( int32_t rhs )
-        {
-            *this = _mm512_slli_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE AVX512_i32x16 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
-#else
-            const __m512i neg1 = _mm512_set1_epi32( -1 );
-#endif
-            return _mm512_xor_si512( *this, neg1 );
-        }
-
-        FS_INLINE AVX512_i32x16 operator-() const
-        {
-            return _mm512_sub_epi32( _mm512_setzero_si512(), *this );
-        }
-
-        FS_INLINE __mmask16 operator==( const AVX512_i32x16& rhs )
-        {
-            return _mm512_cmpeq_epi32_mask( *this, rhs );
-        }
-
-        FS_INLINE __mmask16 operator>( const AVX512_i32x16& rhs )
-        {
-            return _mm512_cmpgt_epi32_mask( *this, rhs );
-        }
-
-        FS_INLINE __mmask16 operator<( const AVX512_i32x16& rhs )
-        {
-            return _mm512_cmplt_epi32_mask( *this, rhs );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_INT( AVX512_i32x16, int32_t )
-
-    template<eLevel LEVEL_T>
-    class AVX512_T
-    {
-    public:
-        static_assert( LEVEL_T == Level_AVX512, "Cannot create template with unsupported SIMD level" );
-
-        static constexpr eLevel SIMD_Level = LEVEL_T;
-
-        template<size_t ElementSize>
-        static constexpr size_t VectorSize = (512 / 8) / ElementSize;
-
-        typedef AVX512_f32x16  float32v;
-        typedef AVX512_i32x16  int32v;
-        typedef __mmask16      mask32v;
-
-        // Load
-
-        FS_INLINE static float32v Load_f32( void const* p )
-        {
-            return _mm512_loadu_ps( p );
-        }
-
-        FS_INLINE static int32v Load_i32( void const* p )
-        {
-            return _mm512_loadu_si512( p );
-        }
-
-        // Store
-
-        FS_INLINE static void Store_f32( void* p, float32v a )
-        {
-            _mm512_storeu_ps( p, a );
-        }
-
-        FS_INLINE static void Store_i32( void* p, int32v a )
-        {
-            _mm512_storeu_si512( p, a );
-        }
-
-        // Cast
-
-        FS_INLINE static float32v Casti32_f32( int32v a )
-        {
-            return _mm512_castsi512_ps( a );
-        }
-
-        FS_INLINE static int32v Castf32_i32( float32v a )
-        {
-            return _mm512_castps_si512( a );
-        }
-
-        // Extract
-
-        FS_INLINE static float Extract0_f32( float32v a )
-        {
-            return _mm512_cvtss_f32( a );
-        }
-
-        FS_INLINE static int32_t Extract0_i32( int32v a )
-        {
-            return _mm_cvtsi128_si32( _mm512_castsi512_si128( a ) );
-        }
-
-        FS_INLINE static float Extract_f32( float32v a, size_t idx )
-        {
-            float32v x = _mm512_maskz_compress_ps( mask32v( 1u << (idx & 15) ), a );
-            return _mm512_cvtss_f32( x );
-        }
-
-        FS_INLINE static int32_t Extract_i32( int32v a, size_t idx )
-        {
-            int32v x = _mm512_maskz_compress_epi32( mask32v( 1u << (idx & 15) ), a );
-            return _mm_cvtsi128_si32( _mm512_castsi512_si128( x ) );
-        }
-
-        // Convert
-
-        FS_INLINE static float32v Converti32_f32( int32v a )
-        {
-            return _mm512_cvtepi32_ps( a );
-        }
-
-        FS_INLINE static int32v Convertf32_i32( float32v a )
-        {
-            return _mm512_cvtps_epi32( a );
-        }
-
-        // Select
-
-        FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-        {
-            return _mm512_mask_blend_ps( m, b, a );
-        }
-
-        FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-        {
-            return _mm512_mask_blend_epi32( m, b, a );
-        }
-
-        // Min, Max
-
-        FS_INLINE static float32v Min_f32( float32v a, float32v b )
-        {
-            return _mm512_min_ps( a, b );
-        }
-
-        FS_INLINE static float32v Max_f32( float32v a, float32v b )
-        {
-            return _mm512_max_ps( a, b );
-        }
-
-        FS_INLINE static int32v Min_i32( int32v a, int32v b )
-        {
-            return _mm512_min_epi32( a, b );
-        }
-
-        FS_INLINE static int32v Max_i32( int32v a, int32v b )
-        {
-            return _mm512_max_epi32( a, b );
-        }
-
-        // Bitwise
-
-        FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
-        {
-            return _mm512_andnot_ps( b, a );
-        }
-
-        FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
-        {
-            return _mm512_andnot_si512( b, a );
-        }
-
-        FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
-        {
-            return Casti32_f32( _mm512_srli_epi32( Castf32_i32( a ), b ) );
-        }
-
-        FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
-        {
-            return _mm512_srli_epi32( a, b );
-        }
-
-        // Abs
-
-        FS_INLINE static float32v Abs_f32( float32v a )
-        {
-            return _mm512_abs_ps( a );
-        }
-
-        FS_INLINE static int32v Abs_i32( int32v a )
-        {
-            return _mm512_abs_epi32( a );
-        }
-
-        // Float math
-
-        FS_INLINE static float32v Sqrt_f32( float32v a )
-        {
-            return _mm512_sqrt_ps( a );
-        }
-
-        FS_INLINE static float32v InvSqrt_f32( float32v a )
-        {
-            return _mm512_rsqrt14_ps( a );
-        }
-
-        FS_INLINE static float32v Reciprocal_f32( float32v a )
-        {
-            return _mm512_rcp14_ps( a );
-        }
-
-        // Floor, Ceil, Round
-
-        FS_INLINE static float32v Floor_f32( float32v a )
-        {
-            return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
-        }
-
-        FS_INLINE static float32v Ceil_f32( float32v a )
-        {
-            return _mm512_roundscale_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
-        }
-
-        FS_INLINE static float32v Round_f32( float32v a )
-        {
-            return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
-        }
-
-        //Mask
-
-        FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
-        {
-            return _mm512_maskz_mov_epi32( m, a );
-        }
-
-        FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
-        {
-            return _mm512_maskz_mov_ps( m, a );
-        }
-
-        FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
-        {
-            return _mm512_maskz_mov_epi32( ~m, a );
-        }
-
-        FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
-        {
-            return _mm512_maskz_mov_ps( ~m, a );
-        }
-
-        FS_INLINE static bool AnyMask_bool( mask32v m )
-        {
-            return m;
-        }
-    };
-
-#if FASTSIMD_COMPILE_AVX512
-    typedef AVX512_T<Level_AVX512> AVX512;
-
-#if FASTSIMD_USE_FMA
-    template<>
-    FS_INLINE AVX512::float32v FMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
-    {
-        return _mm512_fmadd_ps( a, b, c );
-    }
-
-    template<>
-    FS_INLINE AVX512::float32v FNMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
-    {
-        return _mm512_fnmadd_ps( a, b, c );
-    }
-#endif
-
-    // Masked float
-
-    template<>
-    FS_INLINE AVX512::float32v MaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_add_ps( a, m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::float32v MaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_sub_ps( a, m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::float32v MaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_mul_ps( a, m, a, b );
-    }
-
-    // Masked int32
-
-    template<>
-    FS_INLINE AVX512::int32v MaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_add_epi32( a, m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::int32v MaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_sub_epi32( a, m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::int32v MaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_mullo_epi32( a, m, a, b );
-    }
-
-    // NMasked float
-
-    template<>
-    FS_INLINE AVX512::float32v NMaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_add_ps( a, ~m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::float32v NMaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_sub_ps( a, ~m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::float32v NMaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_mul_ps( a, ~m, a, b );
-    }
-
-    // NMasked int32
-
-    template<>
-    FS_INLINE AVX512::int32v NMaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_add_epi32( a, ~m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::int32v NMaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_sub_epi32( a, ~m, a, b );
-    }
-
-    template<>
-    FS_INLINE AVX512::int32v NMaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
-    {
-        return _mm512_mask_mul_epi32( a, ~m, a, b );
-    }
-#endif
-    
-}
diff --git a/src/FastSIMD/Internal/NEON.h b/src/FastSIMD/Internal/NEON.h
deleted file mode 100644
index 344f9faf..00000000
--- a/src/FastSIMD/Internal/NEON.h
+++ /dev/null
@@ -1,424 +0,0 @@
-#pragma once
-
-#include <arm_neon.h>
-
-#include "VecTools.h"
-
-struct NEON_f32x4
-{
-    FASTSIMD_INTERNAL_TYPE_SET( NEON_f32x4, float32x4_t );
-
-    constexpr FS_INLINE static uint8_t Size()
-    {
-        return 4;
-    }
-
-    FS_INLINE static NEON_f32x4 Zero()
-    {
-        return vdupq_n_f32( 0 );
-    }
-
-    FS_INLINE static NEON_f32x4 Incremented()
-    {
-        alignas(16) const float f[4]{ 0.0f, 1.0f, 2.0f, 3.0f };
-        return vld1q_f32( f );
-    }
-
-    FS_INLINE explicit NEON_f32x4( float f )
-    {
-        *this = vdupq_n_f32( f );
-    }
-
-    FS_INLINE explicit NEON_f32x4( float f0, float f1, float f2, float f3 )
-    {
-        alignas(16) const float f[4]{ f0, f1, f2, f3 };
-        *this = vld1q_f32( f );
-    }
-
-    FS_INLINE NEON_f32x4& operator+=( const NEON_f32x4& rhs )
-    {
-        *this = vaddq_f32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_f32x4& operator-=( const NEON_f32x4& rhs )
-    {
-        *this = vsubq_f32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_f32x4& operator*=( const NEON_f32x4& rhs )
-    {
-        *this = vmulq_f32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_f32x4& operator/=( const NEON_f32x4& rhs )
-    {
-        float32x4_t reciprocal = vrecpeq_f32( rhs );
-        // use a couple Newton-Raphson steps to refine the estimate.  Depending on your
-        // application's accuracy requirements, you may be able to get away with only
-        // one refinement (instead of the two used here).  Be sure to test!
-        reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
-        reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
-
-        // and finally, compute a/b = a*(1/b)
-        *this = vmulq_f32( *this, reciprocal );
-        return *this;
-    }
-
-    FS_INLINE NEON_f32x4 operator-() const
-    {
-        return vnegq_f32( *this );
-    }
-};
-
-FASTSIMD_INTERNAL_OPERATORS_FLOAT( NEON_f32x4 )
-
-
-struct NEON_i32x4
-{
-    FASTSIMD_INTERNAL_TYPE_SET( NEON_i32x4, int32x4_t );
-
-    constexpr FS_INLINE static uint8_t Size()
-    {
-        return 4;
-    }
-
-    FS_INLINE static NEON_i32x4 Zero()
-    {
-        return vdupq_n_s32( 0 );
-    }
-
-    FS_INLINE static NEON_i32x4 Incremented()
-    {
-        alignas(16) const int32_t f[4]{ 0, 1, 2, 3 };
-        return vld1q_s32( f );
-    }
-
-    FS_INLINE explicit NEON_i32x4( int32_t i )
-    {
-        *this = vdupq_n_s32( i );
-    }
-
-    FS_INLINE explicit NEON_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
-    {
-        alignas(16) const int32_t f[4]{ i0, i1, i2, i3 };
-        *this = vld1q_s32( f );
-    }
-
-    FS_INLINE NEON_i32x4& operator+=( const NEON_i32x4& rhs )
-    {
-        *this = vaddq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator-=( const NEON_i32x4& rhs )
-    {
-        *this = vsubq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator*=( const NEON_i32x4& rhs )
-    {
-        *this = vmulq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator&=( const NEON_i32x4& rhs )
-    {
-        *this = vandq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator|=( const NEON_i32x4& rhs )
-    {
-        *this = vorrq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator^=( const NEON_i32x4& rhs )
-    {
-        *this = veorq_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator>>=( const int32_t rhs )
-    {
-        *this = vshrq_n_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4& operator<<=( const int32_t rhs )
-    {
-        *this = vshlq_n_s32( *this, rhs );
-        return *this;
-    }
-
-    FS_INLINE NEON_i32x4 operator~() const
-    {
-        return vmvnq_s32( *this );
-    }
-
-    FS_INLINE NEON_i32x4 operator-() const
-    {
-        return vnegq_s32( *this );
-    }
-};
-
-FASTSIMD_INTERNAL_OPERATORS_INT( NEON_i32x4, int32_t )
-
-template<FastSIMD::eLevel LEVEL_T>
-class FastSIMD_NEON_T
-{
-public:
-    static const FastSIMD::eLevel SIMD_Level = LEVEL_T;
-    static const size_t VectorSize = 128 / 8;
-
-    typedef NEON_f32x4 float32v;
-    typedef NEON_i32x4 int32v;
-    typedef NEON_i32x4 mask32v;
-
-    // Load
-
-    FS_INLINE static float32v Load_f32( void const* p )
-    {
-        return vld1q_f32( reinterpret_cast<float const*>(p) );
-    }
-
-    FS_INLINE static int32v Load_i32( void const* p )
-    {
-        return vld1q_s32( reinterpret_cast<int32_t const*>(p) );
-    }
-
-    // Store
-
-    FS_INLINE static void Store_f32( void* p, float32v a )
-    {
-        vst1q_f32( reinterpret_cast<float*>(p), a );
-    }
-
-    FS_INLINE static void Store_i32( void* p, int32v a )
-    {
-        vst1q_s32( reinterpret_cast<int32_t*>(p), a );
-    }
-
-    // Cast
-
-    FS_INLINE static float32v Casti32_f32( int32v a )
-    {
-        return vreinterpretq_f32_s32( a );
-    }
-
-    FS_INLINE static int32v Castf32_i32( float32v a )
-    {
-        return vreinterpretq_s32_f32( a );
-    }
-
-    // Convert
-
-    FS_INLINE static float32v Converti32_f32( int32v a )
-    {
-        return vcvtq_f32_s32( a );
-    }
-
-    FS_INLINE static int32v Convertf32_i32( float32v a )
-    {
-        return vcvtq_s32_f32( a );
-    }
-
-    // Comparisons
-
-    FS_INLINE static mask32v Equal_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_s32_u32( vceq_f32( a, b ) );
-    }
-
-    FS_INLINE static mask32v GreaterThan_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_s32_u32( vcgtq_f32( a, b ) );
-    }
-
-    FS_INLINE static mask32v LessThan_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_s32_u32( vcltq_f32( a, b ) );
-    }
-
-    FS_INLINE static mask32v GreaterEqualThan_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_s32_u32( vcgeq_f32( a, b ) );
-    }
-
-    FS_INLINE static mask32v LessEqualThan_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_s32_u32( vcleq_f32( a, b ) );
-    }
-
-    FS_INLINE static mask32v Equal_i32( int32v a, int32v b )
-    {
-        return vceq_s32( a, b );
-    }
-
-    FS_INLINE static mask32v GreaterThan_i32( int32v a, int32v b )
-    {
-        return vcgtq_s32( a, b );
-    }
-
-    FS_INLINE static mask32v LessThan_i32( int32v a, int32v b )
-    {
-        return vcltq_s32( a, b );
-    }
-
-    // Select
-
-    FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-    {
-        return vbslq_f32( vreinterpretq_u32_s32( mask ), b, a );
-    }
-
-    FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-    {
-        return vbslq_s32( vreinterpretq_u32_s32( mask ), b, a );
-    }
-
-    // Min, Max
-
-    FS_INLINE static float32v Min_f32( float32v a, float32v b )
-    {
-        return vminq_f32( a, b );
-    }
-
-    FS_INLINE static float32v Max_f32( float32v a, float32v b )
-    {
-        return vmaxq_f32( a, b );
-    }
-
-    FS_INLINE static int32v Min_i32( int32v a, int32v b )
-    {
-        return vminq_s32( a, b );
-    }
-
-    FS_INLINE static int32v Max_i32( int32v a, int32v b )
-    {
-        return vmaxq_s32( a, b );
-    }
-    
-    // Bitwise
-
-    FS_INLINE static float32v BitwiseAnd_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
-    }
-
-    FS_INLINE static float32v BitwiseOr_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_f32_s32( vorrq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
-    }
-
-    FS_INLINE static float32v BitwiseXor_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_f32_s32( veorq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
-    }
-
-    FS_INLINE static float32v BitwiseNot_f32( float32v a )
-    {
-        return vreinterpretq_f32_s32( vmvn_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
-    }
-
-    FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
-    {
-        return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vmvn_s32( vreinterpretq_s32_f32( b ) ) ) );
-    }
-
-    FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
-    {
-        return vandq_s32( a , vmvn_s32( b ) );
-    }
-
-    // Abs
-
-    FS_INLINE static float32v Abs_f32( float32v a )
-    {
-        return vabsq_f32( a );
-    }
-
-    FS_INLINE static int32v Abs_i32( int32v a )
-    {
-        return vabsq_s32( a );
-    }
-
-    // Float math
-
-    FS_INLINE static float32v Sqrt_f32( float32v a )
-    {
-        return vsqrtq_f32( a );
-    }
-
-    FS_INLINE static float32v InvSqrt_f32( float32v a )
-    {
-        return vrsqrteq_f32( a );
-    }
-
-    // Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
-
-    FS_INLINE static float32v Floor_f32( float32v a )
-    {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-        const float32x4_t f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
-#else
-        const float32x4_t f1 = vdupq_n_f32( 1.0f );
-#endif
-        float32x4_t fval = vrndmq_f32( a );
-
-        return vsubq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
-    }
-
-    FS_INLINE static float32v Ceil_f32( float32v a )
-    {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-        const __m128 f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
-#else
-        const __m128 f1 = vdupq_n_f32( 1.0f );
-#endif
-        float32x4_t fval = vrndmq_f32( a );
-
-        return vaddq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
-    }
-
-    template<FastSIMD::eLevel L = LEVEL_T>
-    FS_INLINE static FS_ENABLE_IF( L < FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
-    {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-        const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
-#else
-        const __m128 nearest2 = vdupq_n_f32( 1.99999988079071044921875f );
-#endif
-        __m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );       // truncate a
-        __m128 rmd = _mm_sub_ps( a, aTrunc );                           // get remainder
-        __m128 rmd2 = _mm_mul_ps( rmd, nearest2 );                      // mul remainder by near 2 will yield the needed offset
-        __m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
-        return _mm_add_ps( aTrunc, rmd2Trunc );
-    }
-
-    template<FastSIMD::eLevel L = LEVEL_T>
-    FS_INLINE static FS_ENABLE_IF( L >= FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
-    {
-        return vrndnq_f32( a );
-    }
-
-    // Mask
-
-    FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
-    {
-        return a & m;
-    }
-
-    FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
-    {
-        return BitwiseAnd_f32( a, vreinterpretq_f32_s32( m ) );
-    }
-};
-
-#if FASTSIMD_COMPILE_NEON
-typedef FastSIMD_SSE_T<FastSIMD::ELevel_NEON> FastSIMD_NEON;
-#endif
diff --git a/src/FastSIMD/Internal/SSE.h b/src/FastSIMD/Internal/SSE.h
deleted file mode 100644
index 664a005b..00000000
--- a/src/FastSIMD/Internal/SSE.h
+++ /dev/null
@@ -1,574 +0,0 @@
-#pragma once
-
-#ifdef __GNUG__
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-
-#include "VecTools.h"
-
-namespace FastSIMD
-{
-    struct SSE_f32x4
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( SSE_f32x4, __m128 );
-
-        FS_INLINE static SSE_f32x4 Incremented()
-        {
-            return _mm_set_ps( 3.0f, 2.0f, 1.0f, 0.0f );
-        }
-
-        FS_INLINE explicit SSE_f32x4( float f )
-        {
-            *this = _mm_set1_ps( f );
-        }
-
-        FS_INLINE explicit SSE_f32x4( float f0, float f1, float f2, float f3 )
-        {
-            *this = _mm_set_ps( f3, f2, f1, f0 );
-        }
-
-        FS_INLINE SSE_f32x4& operator+=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_add_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator-=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_sub_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator*=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_mul_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator/=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_div_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator&=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_and_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator|=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_or_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4& operator^=( const SSE_f32x4& rhs )
-        {
-            *this = _mm_xor_ps( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_f32x4 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
-#else
-            const __m128i neg1 = _mm_set1_epi32( -1 );
-#endif
-            return _mm_xor_ps( *this, _mm_castsi128_ps( neg1 ) );
-        }
-
-        FS_INLINE SSE_f32x4 operator-() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128i minInt = _mm_slli_epi32( _mm_cmpeq_epi32( _mm_undefined_si128(), _mm_setzero_si128() ), 31 );
-#else
-            const __m128i minInt = _mm_set1_epi32( 0x80000000 );
-#endif
-            return _mm_xor_ps( *this, _mm_castsi128_ps( minInt ) );
-        }
-
-        FS_INLINE __m128i operator==( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmpeq_ps( *this, rhs ) );
-        }
-
-        FS_INLINE __m128i operator!=( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmpneq_ps( *this, rhs ) );
-        }
-
-        FS_INLINE __m128i operator>( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmpgt_ps( *this, rhs ) );
-        }
-
-        FS_INLINE __m128i operator<( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmplt_ps( *this, rhs ) );
-        }
-
-        FS_INLINE __m128i operator>=( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmpge_ps( *this, rhs ) );
-        }
-
-        FS_INLINE __m128i operator<=( const SSE_f32x4& rhs )
-        {
-            return _mm_castps_si128( _mm_cmple_ps( *this, rhs ) );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_FLOAT( SSE_f32x4 )
-
-
-    template<eLevel LEVEL_T>
-    struct SSE_i32x4
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( SSE_i32x4, __m128i );
-
-        FS_INLINE static SSE_i32x4 Incremented()
-        {
-            return _mm_set_epi32( 3, 2, 1, 0 );
-        }
-
-        FS_INLINE explicit SSE_i32x4( int32_t i )
-        {
-            *this = _mm_set1_epi32( i );
-        }
-
-        FS_INLINE explicit SSE_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
-        {
-            *this = _mm_set_epi32( i3, i2, i1, i0 );
-        }
-
-        FS_INLINE SSE_i32x4& operator+=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_add_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator-=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_sub_epi32( *this, rhs );
-            return *this;
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
-        {
-            __m128i tmp1 = _mm_mul_epu32( *this, rhs ); /* mul 2,0*/
-            __m128i tmp2 = _mm_mul_epu32( _mm_srli_si128( *this, 4 ), _mm_srli_si128( rhs, 4 ) ); /* mul 3,1 */
-            *this = _mm_unpacklo_epi32( _mm_shuffle_epi32( tmp1, _MM_SHUFFLE( 0, 0, 2, 0 ) ), _mm_shuffle_epi32( tmp2, _MM_SHUFFLE( 0, 0, 2, 0 ) ) ); /* shuffle results to [63..0] and pack */
-            return *this;
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_mullo_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator&=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_and_si128( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator|=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_or_si128( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator^=( const SSE_i32x4& rhs )
-        {
-            *this = _mm_xor_si128( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator>>=( int32_t rhs )
-        {
-            *this = _mm_srai_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4& operator<<=( int32_t rhs )
-        {
-            *this = _mm_slli_epi32( *this, rhs );
-            return *this;
-        }
-
-        FS_INLINE SSE_i32x4 operator~() const
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
-#else
-            const __m128i neg1 = _mm_set1_epi32( -1 );
-#endif
-            return _mm_xor_si128( *this, neg1 );
-        }
-
-        FS_INLINE SSE_i32x4 operator-() const
-        {
-            return _mm_sub_epi32( _mm_setzero_si128(), *this );
-        }
-
-        FS_INLINE SSE_i32x4 operator==( const SSE_i32x4& rhs )
-        {
-            return _mm_cmpeq_epi32( *this, rhs );
-        }
-
-        FS_INLINE SSE_i32x4 operator>( const SSE_i32x4& rhs )
-        {
-            return _mm_cmpgt_epi32( *this, rhs );
-        }
-
-        FS_INLINE SSE_i32x4 operator<( const SSE_i32x4& rhs )
-        {
-            return _mm_cmplt_epi32( *this, rhs );
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( SSE_i32x4, int32_t )
-
-    template<eLevel LEVEL_T>
-    class SSE_T
-    {
-    public:
-        static_assert( LEVEL_T >= Level_SSE && LEVEL_T <= Level_SSE42, "Cannot create template with unsupported SIMD level" );
-
-        static constexpr eLevel SIMD_Level = LEVEL_T;
-
-        template<size_t ElementSize>
-        static constexpr size_t VectorSize = (128 / 8) / ElementSize;
-
-        typedef SSE_f32x4          float32v;
-        typedef SSE_i32x4<LEVEL_T> int32v;
-        typedef SSE_i32x4<LEVEL_T> mask32v;
-
-        // Load
-
-        FS_INLINE static float32v Load_f32( void const* p )
-        {
-            return _mm_loadu_ps( reinterpret_cast<float const*>(p) );
-        }
-
-        FS_INLINE static int32v Load_i32( void const* p )
-        {
-            return _mm_loadu_si128( reinterpret_cast<__m128i const*>(p) );
-        }
-
-        // Store
-
-        FS_INLINE static void Store_f32( void* p, float32v a )
-        {
-            _mm_storeu_ps( reinterpret_cast<float*>(p), a );
-        }
-
-        FS_INLINE static void Store_i32( void* p, int32v a )
-        {
-            _mm_storeu_si128( reinterpret_cast<__m128i*>(p), a );
-        }
-
-        // Extract
-
-        FS_INLINE static float Extract0_f32( float32v a )
-        {
-            return _mm_cvtss_f32( a );
-        }
-
-        FS_INLINE static int32_t Extract0_i32( int32v a )
-        {
-            return _mm_cvtsi128_si32( a );
-        }
-
-        FS_INLINE static float Extract_f32( float32v a, size_t idx )
-        {
-            float f[4];
-            Store_f32( &f, a );
-            return f[idx & 3];
-        }
-
-        FS_INLINE static int32_t Extract_i32( int32v a, size_t idx )
-        {
-            int32_t i[4];
-            Store_i32( &i, a );
-            return i[idx & 3];
-        }
-
-        // Cast
-
-        FS_INLINE static float32v Casti32_f32( int32v a )
-        {
-            return _mm_castsi128_ps( a );
-        }
-
-        FS_INLINE static int32v Castf32_i32( float32v a )
-        {
-            return _mm_castps_si128( a );
-        }
-
-        // Convert
-
-        FS_INLINE static float32v Converti32_f32( int32v a )
-        {
-            return _mm_cvtepi32_ps( a );
-        }
-
-        FS_INLINE static int32v Convertf32_i32( float32v a )
-        {
-            return _mm_cvtps_epi32( a );
-        }
-
-        // Select
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-        {
-            __m128 mf = _mm_castsi128_ps( m );
-
-            return _mm_xor_ps( b, _mm_and_ps( mf, _mm_xor_ps( a, b ) ) );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-        {
-            return  _mm_blendv_ps( b, a, _mm_castsi128_ps( m ) );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-        {
-            return _mm_xor_si128( b, _mm_and_si128( m, _mm_xor_si128( a, b ) ) );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-        {
-            return _mm_castps_si128( _mm_blendv_ps( _mm_castsi128_ps( b ), _mm_castsi128_ps( a ), _mm_castsi128_ps( m ) ) );
-        }
-
-        // Min, Max
-
-        FS_INLINE static float32v Min_f32( float32v a, float32v b )
-        {
-            return _mm_min_ps( a, b );
-        }
-
-        FS_INLINE static float32v Max_f32( float32v a, float32v b )
-        {
-            return _mm_max_ps( a, b );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Min_i32( int32v a, int32v b )
-        {
-            return Select_i32( a < b, a, b );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Min_i32( int32v a, int32v b )
-        {
-            return _mm_min_epi32( a, b );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Max_i32( int32v a, int32v b )
-        {
-            return Select_i32( a > b, a, b );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static int32v Max_i32( int32v a, int32v b )
-        {
-            return _mm_max_epi32( a, b );
-        }
-
-        // Bitwise
-
-        FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
-        {
-            return _mm_andnot_ps( b, a );
-        }
-
-        FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
-        {
-            return _mm_andnot_si128( b, a );
-        }
-
-        FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
-        {
-            return Casti32_f32( _mm_srli_epi32( Castf32_i32( a ), b ) );
-        }
-
-        FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
-        {
-            return _mm_srli_epi32( a, b );
-        }
-
-        // Abs
-
-        FS_INLINE static float32v Abs_f32( float32v a )
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128i intMax = _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 1 );
-#else
-            const __m128i intMax = _mm_set1_epi32( 0x7FFFFFFF );
-#endif
-            return _mm_and_ps( a, _mm_castsi128_ps( intMax ) );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSSE3)>* = nullptr>
-        FS_INLINE static int32v Abs_i32( int32v a )
-        {
-            __m128i signMask = _mm_srai_epi32( a, 31 );
-            return _mm_sub_epi32( _mm_xor_si128( a, signMask ), signMask );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSSE3)>* = nullptr>
-        FS_INLINE static int32v Abs_i32( int32v a )
-        {
-            return _mm_abs_epi32( a );
-        }
-
-        // Float math
-
-        FS_INLINE static float32v Sqrt_f32( float32v a )
-        {
-            return _mm_sqrt_ps( a );
-        }
-
-        FS_INLINE static float32v InvSqrt_f32( float32v a )
-        {
-            return _mm_rsqrt_ps( a );
-        }
-
-        FS_INLINE static float32v Reciprocal_f32( float32v a )
-        {
-            return _mm_rcp_ps( a );
-        }
-
-        // Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Floor_f32( float32v a )
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
-#else
-            const __m128 f1 = _mm_set1_ps( 1.0f );
-#endif
-            __m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
-
-            return _mm_sub_ps( fval, _mm_and_ps( _mm_cmplt_ps( a, fval ), f1 ) );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Floor_f32( float32v a )
-        {
-            return _mm_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Ceil_f32( float32v a )
-        {
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
-#else
-            const __m128 f1 = _mm_set1_ps( 1.0f );
-#endif
-            __m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
-            __m128 cmp = _mm_cmplt_ps( fval, a );
-            return _mm_add_ps( fval, _mm_and_ps( cmp, f1 ) );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Ceil_f32( float32v a )
-        {
-            return _mm_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Round_f32( float32v a )
-        {
-            __m128 aSign = _mm_and_ps( a, _mm_castsi128_ps( int32v( 0x80000000 ) ) );
-
-            return _mm_cvtepi32_ps( _mm_cvttps_epi32( a + float32v(_mm_or_ps( aSign, float32v( 0.5f ) ) ) ) );
-
-#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
-            const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
-#else
-            const __m128 nearest2 = _mm_set1_ps( 1.99999988079071044921875f );
-#endif
-            __m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );       // truncate a
-            __m128 rmd = _mm_sub_ps( a, aTrunc );                           // get remainder
-            __m128 rmd2 = _mm_mul_ps( rmd, nearest2 );                   // mul remainder by near 2 will yield the needed offset
-            __m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
-            return _mm_add_ps( aTrunc, rmd2Trunc );
-        }
-        
-        template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
-        FS_INLINE static float32v Round_f32( float32v a )
-        {
-            return _mm_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
-        }
-
-        // Mask
-
-        FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
-        {
-            return a & m;
-        }
-
-        FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
-        {
-            return _mm_and_ps( a, _mm_castsi128_ps( m ) );
-        }
-
-        FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
-        {
-            return _mm_andnot_si128( m, a );
-        }
-
-        FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
-        {
-            return _mm_andnot_ps( _mm_castsi128_ps( m ), a );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<( L < Level_SSE41 )>* = nullptr>
-        FS_INLINE static bool AnyMask_bool( mask32v m )
-        {
-            return _mm_movemask_ps( _mm_castsi128_ps( m ) );
-        }
-
-        template<eLevel L = LEVEL_T, std::enable_if_t<( L >= Level_SSE41 )>* = nullptr>
-        FS_INLINE static bool AnyMask_bool( mask32v m )
-        {
-            return !_mm_testz_si128( m, m );
-        }
-    };
-
-#if FASTSIMD_COMPILE_SSE
-    typedef SSE_T<Level_SSE>   SSE;
-#endif
-#if FASTSIMD_COMPILE_SSE2
-    typedef SSE_T<Level_SSE2>  SSE2;
-#endif
-#if FASTSIMD_COMPILE_SSE3
-    typedef SSE_T<Level_SSE3>  SSE3;
-#endif
-#if FASTSIMD_COMPILE_SSSE3
-    typedef SSE_T<Level_SSSE3> SSSE3;
-#endif
-#if FASTSIMD_COMPILE_SSE41
-    typedef SSE_T<Level_SSE41> SSE41;
-#endif
-#if FASTSIMD_COMPILE_SSE42
-    typedef SSE_T<Level_SSE42> SSE42;
-#endif
-}
diff --git a/src/FastSIMD/Internal/Scalar.h b/src/FastSIMD/Internal/Scalar.h
deleted file mode 100644
index 7831c7e5..00000000
--- a/src/FastSIMD/Internal/Scalar.h
+++ /dev/null
@@ -1,451 +0,0 @@
-#pragma once
-
-#include "VecTools.h"
-#include <algorithm>
-#include <cmath>
-
-namespace FastSIMD
-{
-    template<typename OUT, typename IN>
-    OUT ScalarCast( IN a )
-    {
-        union
-        {
-            OUT o;
-            IN  i;
-        } u;
-
-        u.i = a;
-        return u.o;
-    }
-
-    struct Scalar_Float
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( Scalar_Float, float );
-
-        FS_INLINE static Scalar_Float Incremented()
-        {
-            return 0.0f;
-        }
-
-        FS_INLINE Scalar_Float& operator+=( const Scalar_Float& rhs )
-        {
-            vector += rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator-=( const Scalar_Float& rhs )
-        {
-            vector -= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator*=( const Scalar_Float& rhs )
-        {
-            vector *= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator/=( const Scalar_Float& rhs )
-        {
-            vector /= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator&=( const Scalar_Float& rhs )
-        {
-            *this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) & ScalarCast<int32_t, float>( rhs ) );
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator|=( const Scalar_Float& rhs )
-        {
-            *this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) | ScalarCast<int32_t, float>( rhs ) );
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float& operator^=( const Scalar_Float& rhs )
-        {
-            *this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) ^ ScalarCast<int32_t, float>( rhs ) );
-            return *this;
-        }
-
-        FS_INLINE Scalar_Float operator~() const
-        {
-            return ScalarCast<float>( ~ScalarCast<int32_t, float>( *this ) );
-        }
-
-        FS_INLINE Scalar_Float operator-() const
-        {
-            return -vector;
-        }
-
-        FS_INLINE bool operator==( const Scalar_Float& rhs )
-        {
-            return vector == rhs;
-        }
-
-        FS_INLINE bool operator!=( const Scalar_Float& rhs )
-        {
-            return vector != rhs;
-        }
-
-        FS_INLINE bool operator>( const Scalar_Float& rhs )
-        {
-            return vector > rhs;
-        }
-
-        FS_INLINE bool operator<( const Scalar_Float& rhs )
-        {
-            return vector < rhs;
-        }
-
-        FS_INLINE bool operator>=( const Scalar_Float& rhs )
-        {
-            return vector >= rhs;
-        }
-
-        FS_INLINE bool operator<=( const Scalar_Float& rhs )
-        {
-            return vector <= rhs;
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_FLOAT( Scalar_Float )
-
-
-    struct Scalar_Int
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( Scalar_Int, int32_t );
-
-        FS_INLINE static Scalar_Int Incremented()
-        {
-            return 0;
-        }
-
-        FS_INLINE Scalar_Int& operator+=( const Scalar_Int& rhs )
-        {
-            vector += rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator-=( const Scalar_Int& rhs )
-        {
-            vector -= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator*=( const Scalar_Int& rhs )
-        {
-            vector *= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator&=( const Scalar_Int& rhs )
-        {
-            vector &= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator|=( const Scalar_Int& rhs )
-        {
-            vector |= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator^=( const Scalar_Int& rhs )
-        {
-            vector ^= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator>>=( int32_t rhs )
-        {
-            vector >>= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int& operator<<=( int32_t rhs )
-        {
-            vector <<= rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Int operator~() const
-        {
-            return ~vector;
-        }
-
-        FS_INLINE Scalar_Int operator-() const
-        {
-            return -vector;
-        }
-
-        FS_INLINE bool operator==( const Scalar_Int& rhs )
-        {
-            return vector == rhs;
-        }
-
-        FS_INLINE bool operator>( const Scalar_Int& rhs )
-        {
-            return vector > rhs;
-        }
-
-        FS_INLINE bool operator<( const Scalar_Int& rhs )
-        {
-            return vector < rhs;
-        }
-    };
-
-    FASTSIMD_INTERNAL_OPERATORS_INT( Scalar_Int, int32_t )
-
-
-    struct Scalar_Mask
-    {
-        FASTSIMD_INTERNAL_TYPE_SET( Scalar_Mask, bool );
-
-        FS_INLINE Scalar_Mask operator~() const
-        {
-            return !vector;
-        }
-
-        FS_INLINE Scalar_Mask& operator&=( const Scalar_Mask& rhs )
-        {
-            vector = vector && rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Mask& operator|=( const Scalar_Mask& rhs )
-        {
-            vector = vector || rhs;
-            return *this;
-        }
-
-        FS_INLINE Scalar_Mask operator&( const Scalar_Mask& rhs )
-        {
-            return vector && rhs;
-        }
-
-        FS_INLINE Scalar_Mask operator|( const Scalar_Mask& rhs )
-        {
-            return vector || rhs;
-        }
-    };
-
-    class Scalar
-    {
-    public:
-        static constexpr eLevel SIMD_Level = FastSIMD::Level_Scalar;
-
-        template<size_t ElementSize = 8>
-        static constexpr size_t VectorSize = sizeof(int32_t) / ElementSize;
-
-        typedef Scalar_Float float32v;
-        typedef Scalar_Int   int32v;
-        typedef Scalar_Mask  mask32v;
-
-        // Load
-
-        FS_INLINE static float32v Load_f32( void const* p )
-        {
-            return *reinterpret_cast<float32v const*>(p);
-        }
-
-        FS_INLINE static int32v Load_i32( void const* p )
-        {
-            return *reinterpret_cast<int32v const*>(p);
-        }
-
-        // Store
-
-        FS_INLINE static void Store_f32( void* p, float32v a )
-        {
-            *reinterpret_cast<float32v*>(p) = a;
-        }
-
-        FS_INLINE static void Store_i32( void* p, int32v a )
-        {
-            *reinterpret_cast<int32v*>(p) = a;
-        }
-
-        // Extract
-
-        FS_INLINE static float Extract0_f32( float32v a )
-        {
-            return a;
-        }
-
-        FS_INLINE static int32_t Extract0_i32( int32v a )
-        {
-            return a;
-        }
-
-        FS_INLINE static float Extract_f32( float32v a, size_t idx )
-        {
-            return a;
-        }
-
-        FS_INLINE static int32_t Extract_i32( int32v a, size_t idx )
-        {
-            return a;
-        }
-
-        // Cast
-
-        FS_INLINE static float32v Casti32_f32( int32v a )
-        {
-            return ScalarCast<float, int32_t>( a );
-        }
-
-        FS_INLINE static int32v Castf32_i32( float32v a )
-        {
-            return ScalarCast<int32_t, float>( a );
-        }
-
-        // Convert
-
-        FS_INLINE static float32v Converti32_f32( int32v a )
-        {
-            return static_cast<float>(a);
-        }
-
-        FS_INLINE static int32v Convertf32_i32( float32v a )
-        {
-            return static_cast<int32_t>(nearbyint( a ));
-        }
-
-        // Select
-
-        FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
-        {
-            return m ? a : b;
-        }
-
-        FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
-        {
-            return m ? a : b;
-        }
-
-        // Min, Max
-
-        FS_INLINE static float32v Min_f32( float32v a, float32v b )
-        {
-            return fminf( a, b );
-        }
-
-        FS_INLINE static float32v Max_f32( float32v a, float32v b )
-        {
-            return fmaxf( a, b );
-        }
-
-        FS_INLINE static int32v Min_i32( int32v a, int32v b )
-        {
-            return std::min( a, b );
-        }
-
-        FS_INLINE static int32v Max_i32( int32v a, int32v b )
-        {
-            return std::max( a, b );
-        }
-
-        // Bitwise       
-
-        FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
-        {
-            return Casti32_f32( Castf32_i32( a ) & ~Castf32_i32( b ) );
-        }
-
-        FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
-        {
-            return a & ~b;
-        }
-
-        FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
-        {
-            return Casti32_f32( int32_t( uint32_t( Castf32_i32( a ) ) >> b ) );
-        }
-
-        FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
-        {
-            return int32_t( uint32_t( a ) >> b );
-        }
-
-        // Abs
-
-        FS_INLINE static float32v Abs_f32( float32v a )
-        {
-            return fabsf( a );
-        }
-
-        FS_INLINE static int32v Abs_i32( int32v a )
-        {
-            return abs( a );
-        }
-
-        // Float math
-
-        FS_INLINE static float32v Sqrt_f32( float32v a )
-        {
-            return sqrtf( a );
-        }
-
-        FS_INLINE static float32v InvSqrt_f32( float32v a )
-        {
-            float xhalf = 0.5f * (float)a;
-            a = Casti32_f32( 0x5f3759df - ((int32_t)Castf32_i32( a ) >> 1) );
-            a *= (1.5f - xhalf * (float)a * (float)a);
-            return a;
-        }
-
-        FS_INLINE static float32v Reciprocal_f32( float32v a )
-        {
-            // pow( pow(x,-0.5), 2 ) = pow( x, -1 ) = 1.0 / x
-            a = Casti32_f32( (0xbe6eb3beU - (int32_t)Castf32_i32( a )) >> 1 );
-            return a * a;
-        }
-
-        // Floor, Ceil, Round
-
-        FS_INLINE static float32v Floor_f32( float32v a )
-        {
-            return floorf( a );
-        }
-
-        FS_INLINE static float32v Ceil_f32( float32v a )
-        {
-            return ceilf( a );
-        }
-
-        FS_INLINE static float32v Round_f32( float32v a )
-        {
-            return nearbyintf( a );
-        }
-
-        // Mask
-
-        FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
-        {
-            return m ? a : int32v(0);
-        }
-
-        FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
-        {
-            return m ? a : float32v(0);
-        }
-
-        FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
-        {
-            return m ? int32v(0) : a;
-        }
-
-        FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
-        {
-            return m ? float32v(0) : a;
-        }
-
-        FS_INLINE static bool AnyMask_bool( mask32v m )
-        {
-            return m;
-        }
-    };
-}
diff --git a/src/FastSIMD/Internal/SourceBuilder.inl b/src/FastSIMD/Internal/SourceBuilder.inl
deleted file mode 100644
index 34adda5e..00000000
--- a/src/FastSIMD/Internal/SourceBuilder.inl
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-#include "FastSIMD/FastSIMD.h"
-
-template<typename CLASS, typename FS>
-class FS_T;
-
-template<typename CLASS, FastSIMD::eLevel LEVEL>
-CLASS* FastSIMD::ClassFactory( FastSIMD::MemoryAllocator allocator ) 
-{
-    if constexpr( ( CLASS::Supported_SIMD_Levels & LEVEL & FastSIMD::COMPILED_SIMD_LEVELS ) != 0 )
-    {
-        static_assert( std::is_base_of_v<CLASS, FS_T<CLASS, FS_SIMD_CLASS>> );
-
-        if( allocator )
-        {
-            void* alloc = allocator( sizeof( FS_T<CLASS, FS_SIMD_CLASS> ), alignof( FS_T<CLASS, FS_SIMD_CLASS> ) );
-            
-            return new( alloc ) FS_T<CLASS, FS_SIMD_CLASS>;
-        }
-
-        return new FS_T<CLASS, FS_SIMD_CLASS>;        
-    }
-    return nullptr; 
-}
-
-#define FASTSIMD_BUILD_CLASS( CLASS ) \
-template FASTSIMD_API CLASS* FastSIMD::ClassFactory<CLASS, FS_SIMD_CLASS::SIMD_Level>( FastSIMD::MemoryAllocator );
-
-#include "../FastSIMD_BuildList.inl"
diff --git a/src/FastSIMD/Internal/VecTools.h b/src/FastSIMD/Internal/VecTools.h
deleted file mode 100644
index d3c42581..00000000
--- a/src/FastSIMD/Internal/VecTools.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <cinttypes>
-
-#include "FastSIMD/FastSIMD.h"
-#include "FastSIMD/FunctionList.h"
-
-#define FASTSIMD_INTERNAL_TYPE_SET( CLASS, TYPE )                           \
-TYPE vector;									                            \
-FS_INLINE CLASS() { }                                                       \
-FS_INLINE CLASS( const TYPE& v ) : vector(v) {};	                        \
-FS_INLINE CLASS& operator = ( const TYPE& v ) { vector = v; return *this; } \
-FS_INLINE operator TYPE() const { return vector; }
-
-#define FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, OPERATOR, OPERATOREQUALS )	\
-FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs )             \
-{											                                \
-    lhs OPERATOREQUALS rhs;								                    \
-    return lhs;								                                \
-}
-
-#define FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE, TYPE2, OPERATOR, OPERATOREQUALS ) \
-template<FastSIMD::eLevel L>                                                          \
-FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs )                       \
-{											                                          \
-    lhs OPERATOREQUALS rhs;								                              \
-    return lhs;								                                          \
-}
-
-#define FASTSIMD_INTERNAL_OPERATORS_FLOAT( TYPE )      \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, /, /= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= ) 
-
-#define FASTSIMD_INTERNAL_OPERATORS_FLOAT_TEMPLATED( TYPE )            \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, /, /= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= ) 
-
-#define FASTSIMD_INTERNAL_OPERATORS_INT( TYPE, TYPE2 ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= ) \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, >>, >>= )     \
-FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, <<, <<= )
-
-#define FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( TYPE, TYPE2 )       \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= ) \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, >>, >>= )        \
-FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, <<, <<= )

From 4d95b02a3e48c1eb5b64bdb3700459e9b040dcdb Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 13 Jun 2022 20:11:04 +0100
Subject: [PATCH 002/139] Latest FastSIMD

---
 src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a6a43cba..d7795c17 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 0ee3529e6e455264e206d7ed554b90105bbe716b
+    GIT_TAG 0cac980919a2ca8aa19b1bb95e468784ef15c6fb
     EXCLUDE_FROM_ALL YES
     OPTIONS
         "BUILD_SHARED_LIBS OFF"

From dc5475395953a44d6a91ba164d1117defacfd118 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sat, 18 Jun 2022 12:46:54 +0100
Subject: [PATCH 003/139] Convert build list for nuFastSIMD

---
 include/FastNoise/FastNoise_BuildList.inl | 105 +++++++++++-----------
 src/FastNoise/Metadata.cpp                |   6 +-
 2 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index d00d547c..f59d14aa 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -1,11 +1,8 @@
 #pragma once
 
-#ifndef FASTSIMD_BUILD_CLASS
-#error Do not include this file
-#endif
-
-#ifndef FASTNOISE_CLASS
-#define FASTNOISE_CLASS( CLASS ) FastNoise::CLASS
+#ifndef FASTNOISE_REGISTER_NODE
+#define FASTNOISE_REGISTER_NODE( CLASS ) \
+template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
@@ -79,51 +76,51 @@
 // Always add to bottom of list,
 // inserting will break existing encoded node trees
 
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Constant ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( White ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Checkerboard ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SineWave ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PositionOutput ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DistanceToPoint ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Value ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Perlin ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Simplex ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( OpenSimplex2 ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularValue ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularDistance ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularLookup ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalFBm ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalPingPong ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalRidged ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpGradient ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalProgressive ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalIndependant ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainScale ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainOffset ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainRotate ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SeedOffset ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Remap ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( ConvertRGBA8 ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Add ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Subtract ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Multiply ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Divide ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Min ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Max ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MinSmooth ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MaxSmooth ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Fade ) )
-                       
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Terrace ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowFloat ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowInt ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainAxisScale ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( AddDimension ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( RemoveDimension ) )
-FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( GeneratorCache ) )
+FASTNOISE_REGISTER_NODE( Constant );
+FASTNOISE_REGISTER_NODE( White );
+FASTNOISE_REGISTER_NODE( Checkerboard );
+FASTNOISE_REGISTER_NODE( SineWave );
+FASTNOISE_REGISTER_NODE( PositionOutput );
+FASTNOISE_REGISTER_NODE( DistanceToPoint );
+                    
+FASTNOISE_REGISTER_NODE( Value );
+FASTNOISE_REGISTER_NODE( Perlin );
+FASTNOISE_REGISTER_NODE( Simplex );
+FASTNOISE_REGISTER_NODE( OpenSimplex2 );
+                    
+FASTNOISE_REGISTER_NODE( CellularValue );
+FASTNOISE_REGISTER_NODE( CellularDistance );
+FASTNOISE_REGISTER_NODE( CellularLookup );
+                    
+FASTNOISE_REGISTER_NODE( FractalFBm );
+FASTNOISE_REGISTER_NODE( FractalPingPong );
+FASTNOISE_REGISTER_NODE( FractalRidged );
+                    
+FASTNOISE_REGISTER_NODE( DomainWarpGradient );
+FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
+FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
+                    
+FASTNOISE_REGISTER_NODE( DomainScale );
+FASTNOISE_REGISTER_NODE( DomainOffset );
+FASTNOISE_REGISTER_NODE( DomainRotate );
+FASTNOISE_REGISTER_NODE( SeedOffset );
+FASTNOISE_REGISTER_NODE( Remap );
+FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
+                    
+FASTNOISE_REGISTER_NODE( Add );
+FASTNOISE_REGISTER_NODE( Subtract );
+FASTNOISE_REGISTER_NODE( Multiply );
+FASTNOISE_REGISTER_NODE( Divide );
+FASTNOISE_REGISTER_NODE( Min );
+FASTNOISE_REGISTER_NODE( Max );
+FASTNOISE_REGISTER_NODE( MinSmooth );
+FASTNOISE_REGISTER_NODE( MaxSmooth );
+FASTNOISE_REGISTER_NODE( Fade );
+                    
+FASTNOISE_REGISTER_NODE( Terrace );
+FASTNOISE_REGISTER_NODE( PowFloat );
+FASTNOISE_REGISTER_NODE( PowInt );
+FASTNOISE_REGISTER_NODE( DomainAxisScale );
+FASTNOISE_REGISTER_NODE( AddDimension );
+FASTNOISE_REGISTER_NODE( RemoveDimension );
+FASTNOISE_REGISTER_NODE( GeneratorCache );
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 0f796e95..bea7c23b 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -459,7 +459,7 @@ std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* classNam
 #define FASTNOISE_GET_MEMORY_ALLOCATOR() , &SmartNodeManager::Allocate
 #endif
 
-#define FASTSIMD_BUILD_CLASS2( CLASS ) \
+#define FASTNOISE_REGISTER_NODE( CLASS ) \
 const std::unique_ptr<const FastNoise::MetadataT<CLASS>> g ## CLASS ## Metadata = CreateMetadataInstance<CLASS>( #CLASS );\
 template<> FASTNOISE_API const FastNoise::Metadata& FastNoise::Impl::GetMetadata<CLASS>()\
 {\
@@ -474,9 +474,5 @@ SmartNode<> FastNoise::MetadataT<CLASS>::CreateNode( FastSIMD::eLevel l ) const\
     return SmartNode<>( FastSIMD::New<CLASS>( l FASTNOISE_GET_MEMORY_ALLOCATOR() ) );\
 }
 
-#define FASTSIMD_BUILD_CLASS( CLASS ) FASTSIMD_BUILD_CLASS2( CLASS )
-
-#define FASTNOISE_CLASS( CLASS ) CLASS
-
 #define FASTSIMD_INCLUDE_HEADER_ONLY
 #include "FastNoise/FastNoise_BuildList.inl"
\ No newline at end of file

From 57c4a35204a354995d9f7972b30c460f6ab6ded6 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Fri, 8 Jul 2022 22:29:23 +0100
Subject: [PATCH 004/139] Basics working with new FastSIMD

---
 CMakePresets.json                             |    6 +
 NoiseTool/FastNoiseNodeEditor.cpp             |   32 +-
 NoiseTool/FastNoiseNodeEditor.h               |    8 +-
 NoiseTool/MeshNoisePreview.cpp                |    8 +-
 NoiseTool/NoiseTexture.cpp                    |    4 +-
 NoiseTool/NoiseToolApp.cpp                    |   12 +-
 NoiseTool/NoiseToolApp.h                      |    2 +-
 include/FastNoise/FastNoise.h                 |   12 +-
 include/FastNoise/FastNoise_BuildList.inl     |  188 +--
 include/FastNoise/FastNoise_C.h               |    4 +-
 include/FastNoise/FastNoise_Config.h          |   13 +-
 .../FastNoise/Generators/BasicGenerators.h    |   22 +-
 .../FastNoise/Generators/BasicGenerators.inl  |   64 +-
 include/FastNoise/Generators/Blends.h         |  564 ++++---
 include/FastNoise/Generators/Blends.inl       |  324 ++--
 include/FastNoise/Generators/Cellular.h       |  252 ++--
 include/FastNoise/Generators/Cellular.inl     | 1311 ++++++++---------
 include/FastNoise/Generators/DomainWarp.h     |   94 +-
 include/FastNoise/Generators/DomainWarp.inl   |  405 +++--
 .../FastNoise/Generators/DomainWarpFractal.h  |   88 +-
 .../Generators/DomainWarpFractal.inl          |  146 +-
 include/FastNoise/Generators/Fractal.h        |  220 ++-
 include/FastNoise/Generators/Fractal.inl      |  212 ++-
 include/FastNoise/Generators/Generator.h      |    2 +-
 include/FastNoise/Generators/Generator.inl    |  235 ++-
 include/FastNoise/Generators/Modifiers.h      |   33 +-
 include/FastNoise/Generators/Modifiers.inl    |   99 +-
 include/FastNoise/Generators/Perlin.h         |   48 +-
 include/FastNoise/Generators/Perlin.inl       |  215 ++-
 include/FastNoise/Generators/Simplex.h        |   86 +-
 include/FastNoise/Generators/Simplex.inl      |  740 +++++-----
 include/FastNoise/Generators/Utils.inl        |  469 +++---
 include/FastNoise/Generators/Value.h          |   48 +-
 include/FastNoise/Generators/Value.inl        |  173 ++-
 include/FastNoise/Metadata.h                  |    2 +-
 include/FastNoise/SmartNode.h                 |    8 +-
 src/CMakeLists.txt                            |    4 +-
 src/FastNoise/FastNoise_C.cpp                 |    4 +-
 src/FastNoise/Metadata.cpp                    |    8 +-
 tests/FastNoiseBenchmark.cpp                  |    6 +-
 tests/SIMDUnitTest.cpp                        |   66 +-
 41 files changed, 3041 insertions(+), 3196 deletions(-)

diff --git a/CMakePresets.json b/CMakePresets.json
index 5054a60f..32aba147 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -8,6 +8,8 @@
       "binaryDir": "${sourceDir}/out/build/${presetName}",
       "installDir": "${sourceDir}/out/install/${presetName}",
       "cacheVariables": {
+        //"CMAKE_C_COMPILER": "clang-cl",
+        //"CMAKE_CXX_COMPILER": "clang-cl",
         "CPM_SOURCE_CACHE": {
           "value": "${sourceDir}/out/cpm-cache",
           "type": "PATH"
@@ -15,6 +17,10 @@
         "BUILD_SHARED_LIBS": {
           "value": "True",
           "type": "BOOL"
+        },
+        "CPM_Fast_SIMD_SOURCE": {
+          "value": "../../../../../FastSIMD",
+          "type": "PATH"
         }
       }
     },
diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index c1a19846..955af253 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -88,7 +88,9 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
     if( generator )
     {
         auto genRGB = FastNoise::New<FastNoise::ConvertRGBA8>( editor.mMaxSIMDLevel );
-        genRGB->SetSource( generator );
+        //genRGB->SetSource( generator );
+
+        FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
         
         auto startTime = std::chrono::high_resolution_clock::now();
 
@@ -749,7 +751,7 @@ void FastNoiseNodeEditor::UpdateSelected()
     }
 }
 
-void FastNoiseNodeEditor::SetSIMDLevel( FastSIMD::eLevel lvl )
+void FastNoiseNodeEditor::SetSIMDLevel( FastSIMD::FeatureSet lvl )
 {
     mMaxSIMDLevel = lvl;
 
@@ -1246,22 +1248,22 @@ void FastNoiseNodeEditor::ChangeSelectedNode( FastNoise::NodeData* newId )
     }
 }
 
-const char* FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::eLevel lvl )
+const char* FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::FeatureSet lvl )
 {
     switch( lvl )
     {
     default:
-    case FastSIMD::Level_Null:   return "NULL";
-    case FastSIMD::Level_Scalar: return "Scalar";
-    case FastSIMD::Level_SSE:    return "SSE";
-    case FastSIMD::Level_SSE2:   return "SSE2";
-    case FastSIMD::Level_SSE3:   return "SSE3";
-    case FastSIMD::Level_SSSE3:  return "SSSE3";
-    case FastSIMD::Level_SSE41:  return "SSE4.1";
-    case FastSIMD::Level_SSE42:  return "SSE4.2";
-    case FastSIMD::Level_AVX:    return "AVX";
-    case FastSIMD::Level_AVX2:   return "AVX2";
-    case FastSIMD::Level_AVX512: return "AVX512";
-    case FastSIMD::Level_NEON:   return "NEON";
+    case FastSIMD::FeatureSet::Null:   return "NULL";
+    case FastSIMD::FeatureSet::Scalar: return "Scalar";
+    case FastSIMD::FeatureSet::SSE:    return "SSE";
+    case FastSIMD::FeatureSet::SSE2:   return "SSE2";
+    case FastSIMD::FeatureSet::SSE3:   return "SSE3";
+    case FastSIMD::FeatureSet::SSSE3:  return "SSSE3";
+    case FastSIMD::FeatureSet::SSE41:  return "SSE4.1";
+    case FastSIMD::FeatureSet::SSE42:  return "SSE4.2";
+    case FastSIMD::FeatureSet::AVX:    return "AVX";
+    case FastSIMD::FeatureSet::AVX2_FMA:   return "AVX2";
+    case FastSIMD::FeatureSet::AVX512_Baseline_FMA: return "AVX512";
+    case FastSIMD::FeatureSet::NEON:   return "NEON";
     }
 }
diff --git a/NoiseTool/FastNoiseNodeEditor.h b/NoiseTool/FastNoiseNodeEditor.h
index 7aad4236..3943f262 100644
--- a/NoiseTool/FastNoiseNodeEditor.h
+++ b/NoiseTool/FastNoiseNodeEditor.h
@@ -23,9 +23,9 @@ namespace Magnum
     public:
         FastNoiseNodeEditor();
         void Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition );
-        void SetSIMDLevel( FastSIMD::eLevel lvl );
+        void SetSIMDLevel( FastSIMD::FeatureSet lvl );
 
-        static const char* GetSIMDLevelName( FastSIMD::eLevel lvl );
+        static const char* GetSIMDLevelName( FastSIMD::FeatureSet lvl );
 
     private:
         struct Node
@@ -138,7 +138,7 @@ namespace Magnum
         int mNodeSeed = 1337;
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
-        FastSIMD::eLevel mMaxSIMDLevel    = FastSIMD::Level_Null;
-        FastSIMD::eLevel mActualSIMDLevel = FastSIMD::Level_Null;
+        FastSIMD::FeatureSet mMaxSIMDLevel    = FastSIMD::FeatureSet::Max;
+        FastSIMD::FeatureSet mActualSIMDLevel = FastSIMD::FeatureSet::Null;
     };
 }
\ No newline at end of file
diff --git a/NoiseTool/MeshNoisePreview.cpp b/NoiseTool/MeshNoisePreview.cpp
index 1d8094c7..c7fb69f4 100644
--- a/NoiseTool/MeshNoisePreview.cpp
+++ b/NoiseTool/MeshNoisePreview.cpp
@@ -561,7 +561,7 @@ MeshNoisePreview::Chunk::Chunk( MeshData& meshData )
 {
     mPos = meshData.pos;
 
-    if( !meshData.vertexData.empty() )
+    if( !meshData.vertexData.isEmpty() )
     {
         //https://doc.magnum.graphics/magnum/classMagnum_1_1GL_1_1Mesh.html
 
@@ -569,7 +569,7 @@ MeshNoisePreview::Chunk::Chunk( MeshData& meshData )
 
         mMesh->addVertexBuffer( GL::Buffer( GL::Buffer::TargetHint::Array, meshData.vertexData ), 0, VertexLightShader::PositionLight{} );
 
-        if( meshData.indicies.empty() )
+        if( meshData.indicies.isEmpty() )
         {
             mMesh->setCount( (int)meshData.vertexData.size() );
         }
@@ -597,9 +597,9 @@ MeshNoisePreview::VertexLightShader::VertexLightShader()
     GL::Shader frag = CreateShader( version, GL::Shader::Type::Fragment );
     
     CORRADE_INTERNAL_ASSERT_OUTPUT(
-        vert.addSource( noiseToolResources.get( "VertexLight.vert" ) ).compile() );
+        vert.addSource( noiseToolResources.getString( "VertexLight.vert" ) ).compile() );
     CORRADE_INTERNAL_ASSERT_OUTPUT( 
-        frag.addSource( noiseToolResources.get( "VertexLight.frag" ) ).compile() );
+        frag.addSource( noiseToolResources.getString( "VertexLight.frag" ) ).compile() );
 
     attachShader( vert );
     attachShader( frag );
diff --git a/NoiseTool/NoiseTexture.cpp b/NoiseTool/NoiseTexture.cpp
index b0d5174d..547aba69 100644
--- a/NoiseTool/NoiseTexture.cpp
+++ b/NoiseTool/NoiseTexture.cpp
@@ -320,8 +320,8 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     static thread_local std::vector<float> noiseData;
     noiseData.resize( (size_t)buildData.size.x() * buildData.size.y() );
 
-    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetSIMDLevel() );
-    gen->SetSource( buildData.generator );
+    auto gen = buildData.generator;// FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetSIMDLevel() );
+    //gen->SetSource( buildData.generator );
 
     FastNoise::OutputMinMax minMax;
 
diff --git a/NoiseTool/NoiseToolApp.cpp b/NoiseTool/NoiseToolApp.cpp
index ff01a3fd..78c424f7 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/NoiseTool/NoiseToolApp.cpp
@@ -62,19 +62,19 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     GL::Renderer::setBlendEquation( GL::Renderer::BlendEquation::Add, GL::Renderer::BlendEquation::Add );
     GL::Renderer::setBlendFunction( GL::Renderer::BlendFunction::SourceAlpha, GL::Renderer::BlendFunction::OneMinusSourceAlpha );
 
-    Debug{} << "FastSIMD detected max CPU SIMD Level:" << FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::CPUMaxSIMDLevel() );
+    Debug{} << "FastSIMD detected max CPU SIMD Level:" << FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::DetectCpuMaxFeatureSet() );
 
     mLevelNames = { "Auto" };
-    mLevelEnums = { FastSIMD::Level_Null };
+    mLevelEnums = { FastSIMD::FeatureSet::Null };
 
     for( int i = 1; i > 0; i <<= 1 )
     {
-        FastSIMD::eLevel lvl = (FastSIMD::eLevel)i;
-        if( lvl & FastNoise::SUPPORTED_SIMD_LEVELS & FastSIMD::COMPILED_SIMD_LEVELS )
+        FastSIMD::FeatureSet lvl = (FastSIMD::FeatureSet)i;
+        /*if( lvl & FastNoise::SUPPORTED_SIMD_LEVELS & FastSIMD::COMPILED_SIMD_LEVELS )
         {
             mLevelNames.emplace_back( FastNoiseNodeEditor::GetSIMDLevelName( lvl ) );
             mLevelEnums.emplace_back( lvl );
-        }
+        }*/
     }
 }
 
@@ -117,7 +117,7 @@ void NoiseToolApp::drawEvent()
         if( ImGui::Combo( "Max SIMD Level", &mMaxSIMDLevel, mLevelNames.data(), (int)mLevelEnums.size() ) ||
             ImGuiExtra::ScrollCombo( &mMaxSIMDLevel, (int)mLevelEnums.size() ) )
         {   
-            FastSIMD::eLevel newLevel = mLevelEnums[mMaxSIMDLevel];
+            FastSIMD::FeatureSet newLevel = mLevelEnums[mMaxSIMDLevel];
             mNodeEditor.SetSIMDLevel( newLevel );
         }
     }
diff --git a/NoiseTool/NoiseToolApp.h b/NoiseTool/NoiseToolApp.h
index 12610606..b18b6cb1 100644
--- a/NoiseTool/NoiseToolApp.h
+++ b/NoiseTool/NoiseToolApp.h
@@ -42,7 +42,7 @@ namespace Magnum
         bool mBackFaceCulling = false;
         int mMaxSIMDLevel = 0;
         std::vector<const char*> mLevelNames;
-        std::vector<FastSIMD::eLevel> mLevelEnums;
+        std::vector<FastSIMD::FeatureSet> mLevelEnums;
 
         ImGuiIntegration::Context mImGuiIntegrationContext;
         ImGuiContext* mImGuiContext;
diff --git a/include/FastNoise/FastNoise.h b/include/FastNoise/FastNoise.h
index 332cff00..4c78faa7 100644
--- a/include/FastNoise/FastNoise.h
+++ b/include/FastNoise/FastNoise.h
@@ -25,15 +25,15 @@ namespace FastNoise
     /// <param name="maxSimdLevel">Max SIMD level, Null = Auto</param>
     /// <returns>SmartNode<T> is guaranteed not nullptr</returns>
     template<typename T>
-    SmartNode<T> New( FastSIMD::eLevel maxSimdLevel /*= FastSIMD::Level_Null*/ )
+    SmartNode<T> New( FastSIMD::FeatureSet maxFeatureSet /*= FastSIMD::FeatureSet::Max*/ )
     {
-        static_assert( std::is_base_of<Generator, T>::value, "This function should only be used for FastNoise node classes, for example FastNoise::Simplex" );
-        static_assert( std::is_member_function_pointer<decltype(&T::GetMetadata)>::value, "Cannot create abstract node class, use a derived class, for example: Fractal -> FractalFBm" );
+        //static_assert( std::is_base_of<Generator, T>::value, "This function should only be used for FastNoise node classes, for example FastNoise::Simplex" );
+        //static_assert( std::is_member_function_pointer<decltype(&T::GetMetadata)>::value, "Cannot create abstract node class, use a derived class, for example: Fractal -> FractalFBm" );
 
 #if FASTNOISE_USE_SHARED_PTR
-        return SmartNode<T>( FastSIMD::New<T>( maxSimdLevel ) );
+        return SmartNode<T>( FastSIMD::NewDispatchClass<T>( maxSimdLevel ) );
 #else
-        return SmartNode<T>( FastSIMD::New<T>( maxSimdLevel, &SmartNodeManager::Allocate ) );
+        return SmartNode<T>( FastSIMD::NewDispatchClass<T>( maxFeatureSet, &SmartNodeManager::Allocate ) );
 #endif
     }
 
@@ -46,5 +46,5 @@ namespace FastNoise
     /// <param name="encodedNodeTreeString">Can be generated using the NoiseTool</param>
     /// <param name="maxSimdLevel">Max SIMD level, Null = Auto</param>
     /// <returns>Root node of the tree, nullptr for invalid strings</returns>
-    FASTNOISE_API SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::eLevel maxSimdLevel = FastSIMD::Level_Null );
+    FASTNOISE_API SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max );
 }
diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index f59d14aa..cb4a52d3 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -16,60 +16,60 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 #else
 #include "Generators/BasicGenerators.inl"
 #endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Value.h"
-#else
-#include "Generators/Value.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Perlin.h"
-#else
-#include "Generators/Perlin.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Simplex.h"
-#else
-#include "Generators/Simplex.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Cellular.h"
-#else
-#include "Generators/Cellular.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Fractal.h"
-#else
-#include "Generators/Fractal.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/DomainWarp.h"
-#else
-#include "Generators/DomainWarp.inl"
-#endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/DomainWarpFractal.h"
-#else
-#include "Generators/DomainWarpFractal.inl"
-#endif
-
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Value.h"
+//#else
+//#include "Generators/Value.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Perlin.h"
+//#else
+//#include "Generators/Perlin.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Simplex.h"
+//#else
+//#include "Generators/Simplex.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Cellular.h"
+//#else
+//#include "Generators/Cellular.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Fractal.h"
+//#else
+//#include "Generators/Fractal.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/DomainWarp.h"
+//#else
+//#include "Generators/DomainWarp.inl"
+//#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/DomainWarpFractal.h"
+//#else
+//#include "Generators/DomainWarpFractal.inl"
+//#endif
+//
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
 #include "Generators/Modifiers.h"
 #else
 #include "Generators/Modifiers.inl"
 #endif
-
-#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Blends.h"
-#else
-#include "Generators/Blends.inl"
-#endif
+//
+//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+//#include "Generators/Blends.h"
+//#else
+//#include "Generators/Blends.inl"
+//#endif
 
 // Nodes
 // Order is important!
@@ -77,50 +77,50 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 // inserting will break existing encoded node trees
 
 FASTNOISE_REGISTER_NODE( Constant );
-FASTNOISE_REGISTER_NODE( White );
-FASTNOISE_REGISTER_NODE( Checkerboard );
+//FASTNOISE_REGISTER_NODE( White );
+//FASTNOISE_REGISTER_NODE( Checkerboard );
 FASTNOISE_REGISTER_NODE( SineWave );
-FASTNOISE_REGISTER_NODE( PositionOutput );
-FASTNOISE_REGISTER_NODE( DistanceToPoint );
-                    
-FASTNOISE_REGISTER_NODE( Value );
-FASTNOISE_REGISTER_NODE( Perlin );
-FASTNOISE_REGISTER_NODE( Simplex );
-FASTNOISE_REGISTER_NODE( OpenSimplex2 );
-                    
-FASTNOISE_REGISTER_NODE( CellularValue );
-FASTNOISE_REGISTER_NODE( CellularDistance );
-FASTNOISE_REGISTER_NODE( CellularLookup );
-                    
-FASTNOISE_REGISTER_NODE( FractalFBm );
-FASTNOISE_REGISTER_NODE( FractalPingPong );
-FASTNOISE_REGISTER_NODE( FractalRidged );
-                    
-FASTNOISE_REGISTER_NODE( DomainWarpGradient );
-FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
-FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
-                    
-FASTNOISE_REGISTER_NODE( DomainScale );
-FASTNOISE_REGISTER_NODE( DomainOffset );
-FASTNOISE_REGISTER_NODE( DomainRotate );
-FASTNOISE_REGISTER_NODE( SeedOffset );
-FASTNOISE_REGISTER_NODE( Remap );
+//FASTNOISE_REGISTER_NODE( PositionOutput );
+//FASTNOISE_REGISTER_NODE( DistanceToPoint );
+//                    
+//FASTNOISE_REGISTER_NODE( Value );
+//FASTNOISE_REGISTER_NODE( Perlin );
+//FASTNOISE_REGISTER_NODE( Simplex );
+//FASTNOISE_REGISTER_NODE( OpenSimplex2 );
+//                    
+//FASTNOISE_REGISTER_NODE( CellularValue );
+//FASTNOISE_REGISTER_NODE( CellularDistance );
+//FASTNOISE_REGISTER_NODE( CellularLookup );
+//                    
+//FASTNOISE_REGISTER_NODE( FractalFBm );
+//FASTNOISE_REGISTER_NODE( FractalPingPong );
+//FASTNOISE_REGISTER_NODE( FractalRidged );
+//                    
+//FASTNOISE_REGISTER_NODE( DomainWarpGradient );
+//FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
+//FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
+//                    
+//FASTNOISE_REGISTER_NODE( DomainScale );
+//FASTNOISE_REGISTER_NODE( DomainOffset );
+//FASTNOISE_REGISTER_NODE( DomainRotate );
+//FASTNOISE_REGISTER_NODE( SeedOffset );
+//FASTNOISE_REGISTER_NODE( Remap );
 FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
-                    
-FASTNOISE_REGISTER_NODE( Add );
-FASTNOISE_REGISTER_NODE( Subtract );
-FASTNOISE_REGISTER_NODE( Multiply );
-FASTNOISE_REGISTER_NODE( Divide );
-FASTNOISE_REGISTER_NODE( Min );
-FASTNOISE_REGISTER_NODE( Max );
-FASTNOISE_REGISTER_NODE( MinSmooth );
-FASTNOISE_REGISTER_NODE( MaxSmooth );
-FASTNOISE_REGISTER_NODE( Fade );
-                    
-FASTNOISE_REGISTER_NODE( Terrace );
-FASTNOISE_REGISTER_NODE( PowFloat );
-FASTNOISE_REGISTER_NODE( PowInt );
-FASTNOISE_REGISTER_NODE( DomainAxisScale );
-FASTNOISE_REGISTER_NODE( AddDimension );
-FASTNOISE_REGISTER_NODE( RemoveDimension );
-FASTNOISE_REGISTER_NODE( GeneratorCache );
+//                    
+//FASTNOISE_REGISTER_NODE( Add );
+//FASTNOISE_REGISTER_NODE( Subtract );
+//FASTNOISE_REGISTER_NODE( Multiply );
+//FASTNOISE_REGISTER_NODE( Divide );
+//FASTNOISE_REGISTER_NODE( Min );
+//FASTNOISE_REGISTER_NODE( Max );
+//FASTNOISE_REGISTER_NODE( MinSmooth );
+//FASTNOISE_REGISTER_NODE( MaxSmooth );
+//FASTNOISE_REGISTER_NODE( Fade );
+//                    
+//FASTNOISE_REGISTER_NODE( Terrace );
+//FASTNOISE_REGISTER_NODE( PowFloat );
+//FASTNOISE_REGISTER_NODE( PowInt );
+//FASTNOISE_REGISTER_NODE( DomainAxisScale );
+//FASTNOISE_REGISTER_NODE( AddDimension );
+//FASTNOISE_REGISTER_NODE( RemoveDimension );
+//FASTNOISE_REGISTER_NODE( GeneratorCache );
diff --git a/include/FastNoise/FastNoise_C.h b/include/FastNoise/FastNoise_C.h
index 4a923ded..1ee53e58 100644
--- a/include/FastNoise/FastNoise_C.h
+++ b/include/FastNoise/FastNoise_C.h
@@ -7,7 +7,7 @@
 extern "C" {
 #endif
 
-FASTNOISE_API void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned /*FastSIMD::eLevel*/ simdLevel /*0 = Auto*/ );
+FASTNOISE_API void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*0 = Auto*/ );
 
 FASTNOISE_API void fnDeleteNodeRef( void* node );
 
@@ -54,7 +54,7 @@ FASTNOISE_API float fnGenSingle4D( const void* node, float x, float y, float z,
 
 FASTNOISE_API int fnGetMetadataCount();
 FASTNOISE_API const char* fnGetMetadataName( int id ); // valid IDs up to `fnGetMetadataCount() - 1`
-FASTNOISE_API void* fnNewFromMetadata( int id, unsigned /*FastSIMD::eLevel*/ simdLevel /*0 = Auto*/ );
+FASTNOISE_API void* fnNewFromMetadata( int id, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*0 = Auto*/ );
 
 FASTNOISE_API int fnGetMetadataVariableCount( int id );
 FASTNOISE_API const char* fnGetMetadataVariableName( int id, int variableIndex );
diff --git a/include/FastNoise/FastNoise_Config.h b/include/FastNoise/FastNoise_Config.h
index f3b1d715..22a88f25 100644
--- a/include/FastNoise/FastNoise_Config.h
+++ b/include/FastNoise/FastNoise_Config.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <FastSIMD/FastSIMD.h>
+#include <FastSIMD/DispatchClass.h>
 #include "FastNoise_Export.h"
 
 #define FASTNOISE_CALC_MIN_MAX true
@@ -10,14 +10,7 @@
 #endif
 
 namespace FastNoise
-{
-    const FastSIMD::Level_BitFlags SUPPORTED_SIMD_LEVELS =
-        FastSIMD::Level_Scalar |
-        FastSIMD::Level_SSE2   |
-        FastSIMD::Level_SSE41  |
-        FastSIMD::Level_AVX2   |
-        FastSIMD::Level_AVX512 ;
-    
+{    
     class Generator;
     struct Metadata;
 
@@ -36,7 +29,7 @@ namespace FastNoise
     using SmartNodeArg = const SmartNode<const T>&;
 
     template<typename T>
-    SmartNode<T> New( FastSIMD::eLevel maxSimdLevel = FastSIMD::Level_Null );
+    SmartNode<T> New( FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max );
 } // namespace FastNoise
 
 #if !FASTNOISE_USE_SHARED_PTR
diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 2684f100..a30f6ea5 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -6,7 +6,6 @@ namespace FastNoise
     class Constant : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetValue( float value ) { mValue = value; }
@@ -19,7 +18,7 @@ namespace FastNoise
     template<>
     struct MetadataT<Constant> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -32,7 +31,6 @@ namespace FastNoise
     class White : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
     };
 
@@ -40,7 +38,7 @@ namespace FastNoise
     template<>
     struct MetadataT<White> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -52,7 +50,6 @@ namespace FastNoise
     class Checkerboard : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSize( float value ) { mSize = value; }
@@ -65,7 +62,7 @@ namespace FastNoise
     template<>
     struct MetadataT<Checkerboard> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -78,20 +75,19 @@ namespace FastNoise
     class SineWave : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
-        void SetScale( float value ) { mScale = value; }
+        void SetScale( float value ) { mScaleInv = 1 / value; }
 
     protected:
-        float mScale = 1.0f;
+        float mScaleInv = 1.0f;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
     struct MetadataT<SineWave> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -104,7 +100,6 @@ namespace FastNoise
     class PositionOutput : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         template<Dim D>
@@ -122,7 +117,7 @@ namespace FastNoise
     template<>
     struct MetadataT<PositionOutput> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -136,7 +131,6 @@ namespace FastNoise
     class DistanceToPoint : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -158,7 +152,7 @@ namespace FastNoise
     template<>
     struct MetadataT<DistanceToPoint> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index fcd8c471..3def3541 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -1,94 +1,84 @@
-#include "FastSIMD/InlInclude.h"
-
 #include "BasicGenerators.h"
-#include "Utils.inl"
+//#include "Utils.inl"
 
-template<typename FS>
-class FS_T<FastNoise::Constant, FS> : public virtual FastNoise::Constant, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         return float32v( mValue );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::White, FS> : public virtual FastNoise::White, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::White, SIMD> : public virtual FastNoise::White, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         size_t idx = 0;
-        ((pos = FS_Casti32_f32( (FS_Castf32_i32( pos ) ^ (FS_Castf32_i32( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
+        ((pos = SIMD_Casti32_f32( (SIMD_Castf32_i32( pos ) ^ (SIMD_Castf32_i32( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
 
-        return FnUtils::GetValueCoord( seed, FS_Castf32_i32( pos )... );
+        return FnUtils::GetValueCoord( seed, SIMD_Castf32_i32( pos )... );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::Checkerboard, FS> : public virtual FastNoise::Checkerboard, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v multiplier = FS_Reciprocal_f32( float32v( mSize ) );
+        float32v multiplier = SIMD_Reciprocal_f32( float32v( mSize ) );
 
-        int32v value = (FS_Convertf32_i32( pos * multiplier ) ^ ...);
+        int32v value = (SIMD_Convertf32_i32( pos * multiplier ) ^ ...);
 
-        return float32v( 1.0f ) ^ FS_Casti32_f32( value << 31 );
+        return float32v( 1.0f ) ^ SIMD_Casti32_f32( value << 31 );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::SineWave, FS> : public virtual FastNoise::SineWave, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v multiplier = FS_Reciprocal_f32( float32v( mScale ) );
-
-        return (FS_Sin_f32( pos * multiplier ) * ...);
+        return (FS::Sin( pos * float32v( mScaleInv ) ) * ...);
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::PositionOutput, FS> : public virtual FastNoise::PositionOutput, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> : public virtual FastNoise::PositionOutput, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        size_t offsetIdx = 0;
+        size_t ofSIMDetIdx = 0;
         size_t multiplierIdx = 0;
 
-        (((pos += float32v( mOffset[offsetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
+        (((pos += float32v( mOfSIMDet[ofSIMDetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
         return (pos + ...);
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::DistanceToPoint, FS> : public virtual FastNoise::DistanceToPoint, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> : public virtual FastNoise::DistanceToPoint, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         size_t pointIdx = 0;
 
diff --git a/include/FastNoise/Generators/Blends.h b/include/FastNoise/Generators/Blends.h
index 1ad8f447..c22c2ad0 100644
--- a/include/FastNoise/Generators/Blends.h
+++ b/include/FastNoise/Generators/Blends.h
@@ -1,293 +1,271 @@
-#pragma once
-#include "Generator.h"
-
-#include <climits>
-
-namespace FastNoise
-{
-    class OperatorSourceLHS : public virtual Generator
-    {
-    public:
-        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
-        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
-        void SetRHS( float value ) { mRHS = value; }
-
-    protected:
-        GeneratorSource mLHS;
-        HybridSource mRHS = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OperatorSourceLHS> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
-            this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
-        }
-    };
-#endif
-
-    class OperatorHybridLHS : public virtual Generator
-    {
-    public:
-        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
-        void SetLHS( float value ) { mLHS = value; }
-        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
-        void SetRHS( float value ) { mRHS = value; }
-
-    protected:
-        HybridSource mLHS = 0.0f;
-        HybridSource mRHS = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OperatorHybridLHS> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
-            this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
-        }
-    };
-#endif
-
-    class Add : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Add> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class Subtract : public virtual OperatorHybridLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Subtract> : MetadataT<OperatorHybridLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class Multiply : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Multiply> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class Divide : public virtual OperatorHybridLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Divide> : MetadataT<OperatorHybridLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class Min : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Min> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class Max : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Max> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class PowFloat : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
-        void SetValue( float value ) { mValue = value; }
-        void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
-        void SetPow( float value ) { mPow = value; }
-
-    protected:
-        HybridSource mValue = 2.0f;
-        HybridSource mPow = 2.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<PowFloat> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
-            this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
-        }
-    };
-#endif
-
-    class PowInt : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
-        void SetPow( int value ) { mPow = value; }
-
-    protected:
-        GeneratorSource mValue;
-        int mPow = 2;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<PowInt> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( "Value", &PowInt::SetValue );
-            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
-        }
-    };
-#endif
-
-    class MinSmooth : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
-        void SetSmoothness( float value ) { mSmoothness = value; }
-
-    protected:
-        HybridSource mSmoothness = 0.1f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<MinSmooth> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
-        }
-    };
-#endif
-
-    class MaxSmooth : public virtual OperatorSourceLHS
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
-        void SetSmoothness( float value ) { mSmoothness = value; }
-
-    protected:
-        HybridSource mSmoothness = 0.1f;  
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<MaxSmooth> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
-        }
-    };
-#endif
-
-    class Fade : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-        void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
-        void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
-
-        void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
-        void SetFade( float value ) { mFade = value; }
-
-    protected:
-        GeneratorSource mA;
-        GeneratorSource mB;
-        HybridSource mFade = 0.5f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Fade> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( "A", &Fade::SetA );
-            this->AddGeneratorSource( "B", &Fade::SetB );
-            this->AddHybridSource( "Fade", 0.5f, &Fade::SetFade, &Fade::SetFade );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+#include <climits>
+
+namespace FastNoise
+{
+    class OperatorSourceLHS : public virtual Generator
+    {
+    public:
+        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
+        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
+        void SetRHS( float value ) { mRHS = value; }
+
+    protected:
+        GeneratorSource mLHS;
+        HybridSource mRHS = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<OperatorSourceLHS> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
+            this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
+        }
+    };
+#endif
+
+    class OperatorHybridLHS : public virtual Generator
+    {
+    public:
+        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
+        void SetLHS( float value ) { mLHS = value; }
+        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
+        void SetRHS( float value ) { mRHS = value; }
+
+    protected:
+        HybridSource mLHS = 0.0f;
+        HybridSource mRHS = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<OperatorHybridLHS> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
+            this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
+        }
+    };
+#endif
+
+    class Add : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Add> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Subtract : public virtual OperatorHybridLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Subtract> : MetadataT<OperatorHybridLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Multiply : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Multiply> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Divide : public virtual OperatorHybridLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Divide> : MetadataT<OperatorHybridLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Min : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Min> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Max : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Max> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class PowFloat : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
+        void SetValue( float value ) { mValue = value; }
+        void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
+        void SetPow( float value ) { mPow = value; }
+
+    protected:
+        HybridSource mValue = 2.0f;
+        HybridSource mPow = 2.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<PowFloat> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
+            this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
+        }
+    };
+#endif
+
+    class PowInt : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
+        void SetPow( int value ) { mPow = value; }
+
+    protected:
+        GeneratorSource mValue;
+        int mPow = 2;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<PowInt> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddGeneratorSource( "Value", &PowInt::SetValue );
+            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
+        }
+    };
+#endif
+
+    class MinSmooth : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
+        void SetSmoothness( float value ) { mSmoothness = value; }
+
+    protected:
+        HybridSource mSmoothness = 0.1f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<MinSmooth> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
+        }
+    };
+#endif
+
+    class MaxSmooth : public virtual OperatorSourceLHS
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
+        void SetSmoothness( float value ) { mSmoothness = value; }
+
+    protected:
+        HybridSource mSmoothness = 0.1f;  
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<MaxSmooth> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
+        }
+    };
+#endif
+
+    class Fade : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+        void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
+        void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
+
+        void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
+        void SetFade( float value ) { mFade = value; }
+
+    protected:
+        GeneratorSource mA;
+        GeneratorSource mB;
+        HybridSource mFade = 0.5f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Fade> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddGeneratorSource( "A", &Fade::SetA );
+            this->AddGeneratorSource( "B", &Fade::SetB );
+            this->AddHybridSource( "Fade", 0.5f, &Fade::SetFade, &Fade::SetFade );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 7f631d36..f01d0e9a 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -1,173 +1,151 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Blends.h"
-
-template<typename FS>
-class FS_T<FastNoise::Add, FS> : public virtual FastNoise::Add, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Subtract, FS> : public virtual FastNoise::Subtract, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Multiply, FS> : public virtual FastNoise::Multiply, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Divide, FS> : public virtual FastNoise::Divide, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::PowFloat, FS> : public virtual FastNoise::PowFloat, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::PowInt, FS> : public virtual FastNoise::PowInt, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v value = this->GetSourceValue( mValue, seed, pos... );
-        float32v pow = value * value;
-
-        for( int i = 2; i < mPow; i++ )
-        {
-            pow *= value;
-        }
-
-        return pow;
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Min, FS> : public virtual FastNoise::Min, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS_Min_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Max, FS> : public virtual FastNoise::Max, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS_Max_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::MinSmooth, FS> : public virtual FastNoise::MinSmooth, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v a = this->GetSourceValue( mLHS, seed, pos... );
-        float32v b = this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
-
-        float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
-
-        h *= FS_Reciprocal_f32( smoothness );
-
-        return FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::MaxSmooth, FS> : public virtual FastNoise::MaxSmooth, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v a = -this->GetSourceValue( mLHS, seed, pos... );
-        float32v b = -this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
-
-        float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
-
-        h *= FS_Reciprocal_f32( smoothness );
-
-        return -FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::Fade, FS> : public virtual FastNoise::Fade, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v fade = FS_Abs_f32( this->GetSourceValue( mFade, seed, pos... ) );
-
-        return FS_FMulAdd_f32( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
-    }
-};
-
+#include "FastSIMD/InlInclude.h"
+
+#include "Blends.h"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Add, FS> : public virtual FastNoise::Add, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Subtract, FS> : public virtual FastNoise::Subtract, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Multiply, FS> : public virtual FastNoise::Multiply, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Divide, FS> : public virtual FastNoise::Divide, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::PowFloat, FS> : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::PowInt, FS> : public virtual FastNoise::PowInt, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v value = this->GetSourceValue( mValue, seed, pos... );
+        float32v pow = value * value;
+
+        for( int i = 2; i < mPow; i++ )
+        {
+            pow *= value;
+        }
+
+        return pow;
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Min, FS> : public virtual FastNoise::Min, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS::Min( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Max, FS> : public virtual FastNoise::Max, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS::Max( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::MinSmooth, FS> : public virtual FastNoise::MinSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v a = this->GetSourceValue( mLHS, seed, pos... );
+        float32v b = this->GetSourceValue( mRHS, seed, pos... );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+
+        float32v h = FS::Max( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
+
+        h *= FS_Reciprocal_f32( smoothness );
+
+        return FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::MaxSmooth, FS> : public virtual FastNoise::MaxSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v a = -this->GetSourceValue( mLHS, seed, pos... );
+        float32v b = -this->GetSourceValue( mRHS, seed, pos... );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+
+        float32v h = FS::Max( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
+
+        h *= FS_Reciprocal_f32( smoothness );
+
+        return -FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Fade, FS> : public virtual FastNoise::Fade, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v fade = FS_Abs_f32( this->GetSourceValue( mFade, seed, pos... ) );
+
+        return FS_FMulAdd_f32( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
+    }
+};
+
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 81a50fed..f4e28082 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -1,129 +1,123 @@
-#pragma once
-#include "Generator.h"
-
-#include <algorithm>
-
-namespace FastNoise
-{
-    class Cellular : public virtual Generator
-    {
-    public:
-        void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
-        void SetJitterModifier( float value ) { mJitterModifier = value; }
-        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
-
-    protected:
-        HybridSource mJitterModifier = 1.0f;
-        DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Cellular> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-            this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
-            this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, kDistanceFunction_Strings );
-        }
-    };
-#endif
-
-    class CellularValue : public virtual Cellular
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        static const int kMaxDistanceCount = 4;
-
-        void SetValueIndex( int value ) { mValueIndex = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-
-    protected:
-        int mValueIndex = 0;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularValue> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, CellularValue::kMaxDistanceCount - 1 );
-        }
-    };
-#endif
-
-    class CellularDistance : public virtual Cellular
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        enum class ReturnType
-        {
-            Index0,
-            Index0Add1,
-            Index0Sub1,
-            Index0Mul1,
-            Index0Div1
-        };
-
-        static const int kMaxDistanceCount = 4;
-
-        void SetDistanceIndex0( int value ) { mDistanceIndex0 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-        void SetDistanceIndex1( int value ) { mDistanceIndex1 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-        void SetReturnType( ReturnType value ) { mReturnType = value; }
-
-    protected:
-        ReturnType mReturnType = ReturnType::Index0;
-        int mDistanceIndex0 = 0;
-        int mDistanceIndex1 = 1;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularDistance> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, CellularDistance::kMaxDistanceCount - 1 );
-            this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, CellularDistance::kMaxDistanceCount - 1 );
-            this->AddVariableEnum( "Return Type", CellularDistance::ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
-        }
-    };
-#endif
-
-    class CellularLookup : public virtual Cellular
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
-        void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
-
-    protected:
-        GeneratorSource mLookup;
-        float mLookupFreq = 0.1f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularLookup> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
-            this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+#include <algorithm>
+
+namespace FastNoise
+{
+    class Cellular : public virtual Generator
+    {
+    public:
+        void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
+        void SetJitterModifier( float value ) { mJitterModifier = value; }
+        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
+
+    protected:
+        HybridSource mJitterModifier = 1.0f;
+        DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Cellular> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+            this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
+            this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, kDistanceFunction_Strings );
+        }
+    };
+#endif
+
+    class CellularValue : public virtual Cellular
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        static const int kMaxDistanceCount = 4;
+
+        void SetValueIndex( int value ) { mValueIndex = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+
+    protected:
+        int mValueIndex = 0;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularValue> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, CellularValue::kMaxDistanceCount - 1 );
+        }
+    };
+#endif
+
+    class CellularDistance : public virtual Cellular
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        enum class ReturnType
+        {
+            Index0,
+            Index0Add1,
+            Index0Sub1,
+            Index0Mul1,
+            Index0Div1
+        };
+
+        static const int kMaxDistanceCount = 4;
+
+        void SetDistanceIndex0( int value ) { mDistanceIndex0 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+        void SetDistanceIndex1( int value ) { mDistanceIndex1 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+        void SetReturnType( ReturnType value ) { mReturnType = value; }
+
+    protected:
+        ReturnType mReturnType = ReturnType::Index0;
+        int mDistanceIndex0 = 0;
+        int mDistanceIndex1 = 1;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularDistance> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, CellularDistance::kMaxDistanceCount - 1 );
+            this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, CellularDistance::kMaxDistanceCount - 1 );
+            this->AddVariableEnum( "Return Type", CellularDistance::ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
+        }
+    };
+#endif
+
+    class CellularLookup : public virtual Cellular
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
+        void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
+
+    protected:
+        GeneratorSource mLookup;
+        float mLookupFreq = 0.1f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularLookup> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
+            this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 472b17e0..39b1af8e 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -1,660 +1,651 @@
-#include "FastSIMD/InlInclude.h"
-
-#include <cfloat>
-#include <array>
-
-#include "Cellular.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FS_T<FastNoise::Cellular, FS> : public virtual FastNoise::Cellular, public FS_T<FastNoise::Generator, FS>
-{
-protected:
-    const float kJitter2D = 0.437016f;
-    const float kJitter3D = 0.396144f;
-    const float kJitter4D = 0.366025f;
-    const float kJitterIdx23 = 0.190983f;
-};
-
-template<typename FS>
-class FS_T<FastNoise::CellularValue, FS> : public virtual FastNoise::CellularValue, public FS_T<FastNoise::Cellular, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
-                float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
-
-                float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
-
-                for( int i = 0; ; i++ )
-                {
-                    mask32v closer = newDistance < distance[i];
-
-                    float32v localDistance = distance[i];
-                    float32v localCellValue = value[i];
-
-                    distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                    value[i] = FS_Select_f32( closer, newCellValue, value[i] );
-
-                    if( i > mValueIndex )
-                    {
-                        break;
-                    }
-
-                    newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                    newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
-                }
-
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return value[mValueIndex];
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-        
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-    
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-    
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS_Converti32_f32( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS_Converti32_f32( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                
-                    float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
-                
-                    for( int i = 0; ; i++ )
-                    {
-                        mask32v closer = newDistance < distance[i];
-
-                        float32v localDistance = distance[i];
-                        float32v localCellValue = value[i];
-
-                        distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                        value[i] = FS_Select_f32( closer, newCellValue, value[i] );
-
-                        if( i > mValueIndex )
-                        {
-                            break;
-                        }
-
-                        newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                        newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
-                    }
-            
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-    
-        return value[mValueIndex];
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-        
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
-        
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-        float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
-    
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
-    
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
-
-                        float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        for( int i = 0; ; i++ )
-                        {
-                            mask32v closer = newDistance < distance[i];
-
-                            float32v localDistance = distance[i];
-                            float32v localCellValue = value[i];
-
-                            distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                            value[i] = FS_Select_f32( closer, newCellValue, value[i] );
-
-                            if( i > mValueIndex )
-                            {
-                                break;
-                            }
-
-                            newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                            newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
-                        }
-
-                        wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-    
-        return value[mValueIndex];
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::CellularDistance, FS> : public virtual FastNoise::CellularDistance, public FS_T<FastNoise::Cellular, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for ( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
-                float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
-
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
-
-                for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                {
-                    distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
-                }
-
-                distance[0] = FS_Min_f32( distance[0], newDistance );
-
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
-
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
-
-                    for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                    {
-                        distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
-                    }
-
-                    distance[0] = FS_Min_f32( distance[0], newDistance );
-
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-        float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
-
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                        {
-                            distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
-                        }
-
-                        distance[0] = FS_Min_f32( distance[0], newDistance );
-
-                        wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    FS_INLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
-    {
-        if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
-        {
-            distance[mDistanceIndex0] *= FS_InvSqrt_f32( distance[mDistanceIndex0] );
-            distance[mDistanceIndex1] *= FS_InvSqrt_f32( distance[mDistanceIndex1] );
-        }
-
-        switch( mReturnType )
-        {
-        default:
-        case ReturnType::Index0:
-        {
-            return distance[mDistanceIndex0];
-        }
-        case ReturnType::Index0Add1:
-        {
-            return distance[mDistanceIndex0] + distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Sub1:
-        {
-            return distance[mDistanceIndex0] - distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Mul1:
-        {
-            return distance[mDistanceIndex0] * distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Div1:
-        {
-            return distance[mDistanceIndex0] * FS_Reciprocal_f32( distance[mDistanceIndex1] );
-        }
-        }
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::CellularLookup, FS> : public virtual FastNoise::CellularLookup, public FS_T<FastNoise::Cellular, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY;
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
-                float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
-
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
-
-                mask32v closer = newDistance < distance;
-                distance = FS_Min_f32( newDistance, distance );
-
-                cellX = FS_Select_f32( closer, xd + x, cellX );
-                cellY = FS_Select_f32( closer, yd + y, cellY );
-
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY, cellZ;
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
-
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
-
-                    mask32v closer = newDistance < distance;
-                    distance = FS_Min_f32( newDistance, distance );
-
-                    cellX = FS_Select_f32( closer, xd + x, cellX );
-                    cellY = FS_Select_f32( closer, yd + y, cellY );
-                    cellZ = FS_Select_f32( closer, zd + z, cellZ );
-
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY, cellZ, cellW;
-
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
-
-        float32v xcf = FS_Converti32_f32( xc ) - x;
-        float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
-        float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
-        float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
-
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
-
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        mask32v closer = newDistance < distance;
-                        distance = FS_Min_f32( newDistance, distance );
-
-                        cellX = FS_Select_f32( closer, xd + x, cellX );
-                        cellY = FS_Select_f32( closer, yd + y, cellY );
-                        cellZ = FS_Select_f32( closer, zd + z, cellZ );
-                        cellW = FS_Select_f32( closer, wd + w, cellW );
-
-                        wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include <cfloat>
+#include <array>
+
+#include "Cellular.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Cellular, FS> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{
+protected:
+    const float kJitter2D = 0.437016f;
+    const float kJitter3D = 0.396144f;
+    const float kJitter4D = 0.366025f;
+    const float kJitterIdx23 = 0.190983f;
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
+                xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+
+                float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+
+                for( int i = 0; ; i++ )
+                {
+                    mask32v closer = newDistance < distance[i];
+
+                    float32v localDistance = distance[i];
+                    float32v localCellValue = value[i];
+
+                    distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
+                    value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+
+                    if( i > mValueIndex )
+                    {
+                        break;
+                    }
+
+                    newDistance = FS_Select_f32( closer, localDistance, newDistance );
+                    newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                }
+
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return value[mValueIndex];
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+        
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+    
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+    
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
+                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                
+                    float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+                
+                    for( int i = 0; ; i++ )
+                    {
+                        mask32v closer = newDistance < distance[i];
+
+                        float32v localDistance = distance[i];
+                        float32v localCellValue = value[i];
+
+                        distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
+                        value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+
+                        if( i > mValueIndex )
+                        {
+                            break;
+                        }
+
+                        newDistance = FS_Select_f32( closer, localDistance, newDistance );
+                        newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                    }
+            
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+    
+        return value[mValueIndex];
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+        
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+        
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+    
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+        wcBase *= int32v( FnPrimes::W );
+    
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
+                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+
+                        float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        for( int i = 0; ; i++ )
+                        {
+                            mask32v closer = newDistance < distance[i];
+
+                            float32v localDistance = distance[i];
+                            float32v localCellValue = value[i];
+
+                            distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
+                            value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+
+                            if( i > mValueIndex )
+                            {
+                                break;
+                            }
+
+                            newDistance = FS_Select_f32( closer, localDistance, newDistance );
+                            newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                        }
+
+                        wcf += float32v( 1 );
+                        wc += int32v( FnPrimes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+    
+        return value[mValueIndex];
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for ( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
+                xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+
+                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+
+                for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                {
+                    distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                }
+
+                distance[0] = FS::Min( distance[0], newDistance );
+
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
+                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+
+                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+
+                    for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                    {
+                        distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                    }
+
+                    distance[0] = FS::Min( distance[0], newDistance );
+
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+        wcBase *= int32v( FnPrimes::W );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
+                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+
+                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                        {
+                            distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                        }
+
+                        distance[0] = FS::Min( distance[0], newDistance );
+
+                        wcf += float32v( 1 );
+                        wc += int32v( FnPrimes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    FS_FORCEINLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
+    {
+        if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
+        {
+            distance[mDistanceIndex0] *= FS_InvSqrt_f32( distance[mDistanceIndex0] );
+            distance[mDistanceIndex1] *= FS_InvSqrt_f32( distance[mDistanceIndex1] );
+        }
+
+        switch( mReturnType )
+        {
+        default:
+        case ReturnType::Index0:
+        {
+            return distance[mDistanceIndex0];
+        }
+        case ReturnType::Index0Add1:
+        {
+            return distance[mDistanceIndex0] + distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Sub1:
+        {
+            return distance[mDistanceIndex0] - distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Mul1:
+        {
+            return distance[mDistanceIndex0] * distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Div1:
+        {
+            return distance[mDistanceIndex0] * FS_Reciprocal_f32( distance[mDistanceIndex1] );
+        }
+        }
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY;
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
+                xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+
+                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+
+                mask32v closer = newDistance < distance;
+                distance = FS::Min( newDistance, distance );
+
+                cellX = FS_Select_f32( closer, xd + x, cellX );
+                cellY = FS_Select_f32( closer, yd + y, cellY );
+
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY, cellZ;
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
+                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+
+                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+
+                    mask32v closer = newDistance < distance;
+                    distance = FS::Min( newDistance, distance );
+
+                    cellX = FS_Select_f32( closer, xd + x, cellX );
+                    cellY = FS_Select_f32( closer, yd + y, cellY );
+                    cellZ = FS_Select_f32( closer, zd + z, cellZ );
+
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY, cellZ, cellW;
+
+        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
+        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+
+        xc *= int32v( FnPrimes::X );
+        ycBase *= int32v( FnPrimes::Y );
+        zcBase *= int32v( FnPrimes::Z );
+        wcBase *= int32v( FnPrimes::W );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
+                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
+                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+
+                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        mask32v closer = newDistance < distance;
+                        distance = FS::Min( newDistance, distance );
+
+                        cellX = FS_Select_f32( closer, xd + x, cellX );
+                        cellY = FS_Select_f32( closer, yd + y, cellY );
+                        cellZ = FS_Select_f32( closer, zd + z, cellZ );
+                        cellW = FS_Select_f32( closer, wd + w, cellW );
+
+                        wcf += float32v( 1 );
+                        wc += int32v( FnPrimes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( FnPrimes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( FnPrimes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( FnPrimes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
+    }
+};
diff --git a/include/FastNoise/Generators/DomainWarp.h b/include/FastNoise/Generators/DomainWarp.h
index 42b529c3..1bbfe095 100644
--- a/include/FastNoise/Generators/DomainWarp.h
+++ b/include/FastNoise/Generators/DomainWarp.h
@@ -1,48 +1,46 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class DomainWarp : public virtual Generator
-    {
-    public:
-        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
-        void SetWarpAmplitude( float value ) { mWarpAmplitude = value; } 
-        void SetWarpFrequency( float value ) { mWarpFrequency = value; }
-
-    protected:
-        GeneratorSource mSource;
-        HybridSource mWarpAmplitude = 1.0f;
-        float mWarpFrequency = 0.5f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarp> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Domain Warp" );
-            this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
-            this->AddHybridSource( "Warp Amplitude", 1.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
-            this->AddVariable( "Warp Frequency", 0.5f, &DomainWarp::SetWarpFrequency );
-        }
-    };
-#endif
-
-    class DomainWarpGradient : public virtual DomainWarp
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpGradient> : MetadataT<DomainWarp>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class DomainWarp : public virtual Generator
+    {
+    public:
+        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+        void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
+        void SetWarpAmplitude( float value ) { mWarpAmplitude = value; } 
+        void SetWarpFrequency( float value ) { mWarpFrequency = value; }
+
+    protected:
+        GeneratorSource mSource;
+        HybridSource mWarpAmplitude = 1.0f;
+        float mWarpFrequency = 0.5f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarp> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Domain Warp" );
+            this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
+            this->AddHybridSource( "Warp Amplitude", 1.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
+            this->AddVariable( "Warp Frequency", 0.5f, &DomainWarp::SetWarpFrequency );
+        }
+    };
+#endif
+
+    class DomainWarpGradient : public virtual DomainWarp
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpGradient> : MetadataT<DomainWarp>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index cb006bd0..887ef655 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -1,205 +1,200 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "DomainWarp.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FS_T<FastNoise::DomainWarp, FS> : public virtual FastNoise::DomainWarp, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
-
-        return this->GetSourceValue( mSource, seed, pos...);
-    }
-
-public:
-    float GetWarpFrequency() const { return mWarpFrequency; }
-    const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
-    const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
-
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
-};
-
-template<typename FS>
-class FS_T<FastNoise::DomainWarpGradient, FS> : public virtual FastNoise::DomainWarpGradient, public FS_T<FastNoise::DomainWarp, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-public:
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-
-    #define GRADIENT_COORD( _x, _y )\
-        int32v hash##_x##_y = FnUtils::HashPrimesHB(seed, x##_x, y##_y );\
-        float32v x##_x##_y = FS_Converti32_f32( hash##_x##_y & int32v( 0xffff ) );\
-        float32v y##_x##_y = FS_Converti32_f32( (hash##_x##_y >> 16) & int32v( 0xffff ) );
-
-        GRADIENT_COORD( 0, 0 );
-        GRADIENT_COORD( 1, 0 );
-        GRADIENT_COORD( 0, 1 );
-        GRADIENT_COORD( 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
-
-        float32v xWarp = (FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
-
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, yWarp * yWarp );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-
-    #define GRADIENT_COORD( _x, _y, _z )\
-        int32v hash##_x##_y##_z = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z );\
-        float32v x##_x##_y##_z = FS_Converti32_f32( hash##_x##_y##_z & int32v( 0x3ff ) );\
-        float32v y##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
-        float32v z##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
-
-        GRADIENT_COORD( 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0 );
-        GRADIENT_COORD( 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 1 );
-        GRADIENT_COORD( 0, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0z = FnUtils::Lerp( FnUtils::Lerp( x000, x100, xs ), FnUtils::Lerp( x010, x110, xs ), ys );
-        float32v y0z = FnUtils::Lerp( FnUtils::Lerp( y000, y100, xs ), FnUtils::Lerp( y010, y110, xs ), ys );
-        float32v z0z = FnUtils::Lerp( FnUtils::Lerp( z000, z100, xs ), FnUtils::Lerp( z010, z110, xs ), ys );
-                   
-        float32v x1z = FnUtils::Lerp( FnUtils::Lerp( x001, x101, xs ), FnUtils::Lerp( x011, x111, xs ), ys );
-        float32v y1z = FnUtils::Lerp( FnUtils::Lerp( y001, y101, xs ), FnUtils::Lerp( y011, y111, xs ), ys );
-        float32v z1z = FnUtils::Lerp( FnUtils::Lerp( z001, z101, xs ), FnUtils::Lerp( z011, z111, xs ), ys );
-
-        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
-
-        float32v xWarp = (FnUtils::Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v zWarp = (FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
-        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
-
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, zWarp * zWarp ) );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-        ws = FnUtils::InterpHermite( w - ws );
-
-    #define GRADIENT_COORD( _x, _y, _z, _w )\
-        int32v hash##_x##_y##_z##_w = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
-        float32v x##_x##_y##_z##_w = FS_Converti32_f32( hash##_x##_y##_z##_w & int32v( 0xff ) );\
-        float32v y##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
-        float32v z##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
-        float32v w##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
-
-        GRADIENT_COORD( 0, 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0, 0 );
-        GRADIENT_COORD( 1, 1, 0, 0 );
-        GRADIENT_COORD( 0, 0, 1, 0 );
-        GRADIENT_COORD( 1, 0, 1, 0 );
-        GRADIENT_COORD( 0, 1, 1, 0 );
-        GRADIENT_COORD( 1, 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 0, 1 );
-        GRADIENT_COORD( 0, 1, 0, 1 );
-        GRADIENT_COORD( 1, 1, 0, 1 );
-        GRADIENT_COORD( 0, 0, 1, 1 );
-        GRADIENT_COORD( 1, 0, 1, 1 );
-        GRADIENT_COORD( 0, 1, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0000, x1000, xs ), FnUtils::Lerp( x0100, x1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0010, x1010, xs ), FnUtils::Lerp( x0110, x1110, xs ), ys ), zs );
-        float32v y0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0000, y1000, xs ), FnUtils::Lerp( y0100, y1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0010, y1010, xs ), FnUtils::Lerp( y0110, y1110, xs ), ys ), zs );
-        float32v z0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0000, z1000, xs ), FnUtils::Lerp( z0100, z1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0010, z1010, xs ), FnUtils::Lerp( z0110, z1110, xs ), ys ), zs );
-        float32v w0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0000, w1000, xs ), FnUtils::Lerp( w0100, w1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0010, w1010, xs ), FnUtils::Lerp( w0110, w1110, xs ), ys ), zs );
-
-        float32v x1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0001, x1001, xs ), FnUtils::Lerp( x0101, x1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0011, x1011, xs ), FnUtils::Lerp( x0111, x1111, xs ), ys ), zs );
-        float32v y1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0001, y1001, xs ), FnUtils::Lerp( y0101, y1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0011, y1011, xs ), FnUtils::Lerp( y0111, y1111, xs ), ys ), zs );
-        float32v z1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0001, z1001, xs ), FnUtils::Lerp( z0101, z1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0011, z1011, xs ), FnUtils::Lerp( z0111, z1111, xs ), ys ), zs );
-        float32v w1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0001, w1001, xs ), FnUtils::Lerp( w0101, w1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0011, w1011, xs ), FnUtils::Lerp( w0111, w1111, xs ), ys ), zs );                        
-
-        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
-
-        float32v xWarp = (FnUtils::Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v zWarp = (FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v wWarp = (FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
-        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
-        wOut = FS_FMulAdd_f32( wWarp, warpAmp, wOut );
-
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, FS_FMulAdd_f32( zWarp, zWarp, wWarp * wWarp ) ) );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-};
-
+#include "FastSIMD/InlInclude.h"
+
+#include "DomainWarp.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::DomainWarp, FS> : public virtual FastNoise::DomainWarp, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
+
+        return this->GetSourceValue( mSource, seed, pos...);
+    }
+
+public:
+    float GetWarpFrequency() const { return mWarpFrequency; }
+    const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
+    const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
+
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, FS>
+{public:
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+
+    #define GRADIENT_COORD( _x, _y )\
+        int32v hash##_x##_y = FnUtils::HashPrimesHB(seed, x##_x, y##_y );\
+        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
+        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
+
+        GRADIENT_COORD( 0, 0 );
+        GRADIENT_COORD( 1, 0 );
+        GRADIENT_COORD( 0, 1 );
+        GRADIENT_COORD( 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
+
+        float32v xWarp = (FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        float32v yWarp = (FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+
+        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
+        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
+
+        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, yWarp * yWarp );
+
+        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+
+    #define GRADIENT_COORD( _x, _y, _z )\
+        int32v hash##_x##_y##_z = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z );\
+        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
+        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
+        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
+
+        GRADIENT_COORD( 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0 );
+        GRADIENT_COORD( 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 1 );
+        GRADIENT_COORD( 0, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0z = FnUtils::Lerp( FnUtils::Lerp( x000, x100, xs ), FnUtils::Lerp( x010, x110, xs ), ys );
+        float32v y0z = FnUtils::Lerp( FnUtils::Lerp( y000, y100, xs ), FnUtils::Lerp( y010, y110, xs ), ys );
+        float32v z0z = FnUtils::Lerp( FnUtils::Lerp( z000, z100, xs ), FnUtils::Lerp( z010, z110, xs ), ys );
+                   
+        float32v x1z = FnUtils::Lerp( FnUtils::Lerp( x001, x101, xs ), FnUtils::Lerp( x011, x111, xs ), ys );
+        float32v y1z = FnUtils::Lerp( FnUtils::Lerp( y001, y101, xs ), FnUtils::Lerp( y011, y111, xs ), ys );
+        float32v z1z = FnUtils::Lerp( FnUtils::Lerp( z001, z101, xs ), FnUtils::Lerp( z011, z111, xs ), ys );
+
+        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
+
+        float32v xWarp = (FnUtils::Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v yWarp = (FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v zWarp = (FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+
+        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
+        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
+        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
+
+        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, zWarp * zWarp ) );
+
+        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+        float32v ws = FS_Floor_f32( w );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v w1 = w0 + int32v( FnPrimes::W );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+        ws = FnUtils::InterpHermite( w - ws );
+
+    #define GRADIENT_COORD( _x, _y, _z, _w )\
+        int32v hash##_x##_y##_z##_w = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
+        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
+        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
+        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
+        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
+
+        GRADIENT_COORD( 0, 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0, 0 );
+        GRADIENT_COORD( 1, 1, 0, 0 );
+        GRADIENT_COORD( 0, 0, 1, 0 );
+        GRADIENT_COORD( 1, 0, 1, 0 );
+        GRADIENT_COORD( 0, 1, 1, 0 );
+        GRADIENT_COORD( 1, 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 0, 1 );
+        GRADIENT_COORD( 0, 1, 0, 1 );
+        GRADIENT_COORD( 1, 1, 0, 1 );
+        GRADIENT_COORD( 0, 0, 1, 1 );
+        GRADIENT_COORD( 1, 0, 1, 1 );
+        GRADIENT_COORD( 0, 1, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0000, x1000, xs ), FnUtils::Lerp( x0100, x1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0010, x1010, xs ), FnUtils::Lerp( x0110, x1110, xs ), ys ), zs );
+        float32v y0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0000, y1000, xs ), FnUtils::Lerp( y0100, y1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0010, y1010, xs ), FnUtils::Lerp( y0110, y1110, xs ), ys ), zs );
+        float32v z0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0000, z1000, xs ), FnUtils::Lerp( z0100, z1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0010, z1010, xs ), FnUtils::Lerp( z0110, z1110, xs ), ys ), zs );
+        float32v w0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0000, w1000, xs ), FnUtils::Lerp( w0100, w1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0010, w1010, xs ), FnUtils::Lerp( w0110, w1110, xs ), ys ), zs );
+
+        float32v x1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0001, x1001, xs ), FnUtils::Lerp( x0101, x1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0011, x1011, xs ), FnUtils::Lerp( x0111, x1111, xs ), ys ), zs );
+        float32v y1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0001, y1001, xs ), FnUtils::Lerp( y0101, y1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0011, y1011, xs ), FnUtils::Lerp( y0111, y1111, xs ), ys ), zs );
+        float32v z1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0001, z1001, xs ), FnUtils::Lerp( z0101, z1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0011, z1011, xs ), FnUtils::Lerp( z0111, z1111, xs ), ys ), zs );
+        float32v w1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0001, w1001, xs ), FnUtils::Lerp( w0101, w1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0011, w1011, xs ), FnUtils::Lerp( w0111, w1111, xs ), ys ), zs );                        
+
+        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
+
+        float32v xWarp = (FnUtils::Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v yWarp = (FnUtils::Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v zWarp = (FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v wWarp = (FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+
+        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
+        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
+        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
+        wOut = FS_FMulAdd_f32( wWarp, warpAmp, wOut );
+
+        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, FS_FMulAdd_f32( zWarp, zWarp, wWarp * wWarp ) ) );
+
+        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
+    }
+};
+
diff --git a/include/FastNoise/Generators/DomainWarpFractal.h b/include/FastNoise/Generators/DomainWarpFractal.h
index 73dc3c47..34a9b0e7 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.h
+++ b/include/FastNoise/Generators/DomainWarpFractal.h
@@ -1,46 +1,42 @@
-#pragma once
-#include "Fractal.h"
-#include "DomainWarp.h"
-
-namespace FastNoise
-{
-    class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpFractalProgressive> : MetadataT<Fractal<DomainWarp>>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
-        {
-            groups.push_back( "Domain Warp" );
-        }
-    };
-#endif
-
-    class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<Fractal<DomainWarp>>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
-        {
-            groups.push_back( "Domain Warp" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Fractal.h"
+#include "DomainWarp.h"
+
+namespace FastNoise
+{
+    class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpFractalProgressive> : MetadataT<Fractal<DomainWarp>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
+        {
+            groups.push_back( "Domain Warp" );
+        }
+    };
+#endif
+
+    class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<Fractal<DomainWarp>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
+        {
+            groups.push_back( "Domain Warp" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/DomainWarpFractal.inl b/include/FastNoise/Generators/DomainWarpFractal.inl
index ad7057e8..6bcd5503 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.inl
+++ b/include/FastNoise/Generators/DomainWarpFractal.inl
@@ -1,75 +1,71 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "DomainWarpFractal.h"
-
-template<typename FS>
-class FS_T<FastNoise::DomainWarpFractalProgressive, FS> : public virtual FastNoise::DomainWarpFractalProgressive, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        auto* warp = this->GetSourceSIMD( mSource );
-
-        float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v freq = float32v( warp->GetWarpFrequency() );
-        int32v seedInc = seed;
-
-        float32v gain = this->GetSourceValue( mGain, seed, pos... );
-        float32v lacunarity( mLacunarity );
-
-        float32v strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
-
-        for (int i = 1; i < mOctaves; i++)
-        {
-            seedInc -= int32v( -1 );
-            freq *= lacunarity;
-            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
-            amp *= gain;
-            strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
-        }
-
-        return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::DomainWarpFractalIndependant, FS> : public virtual FastNoise::DomainWarpFractalIndependant, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
-        {
-            auto* warp = this->GetSourceSIMD( mSource );
-
-            float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
-            float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, noisePos... );
-            float32v freq = float32v( warp->GetWarpFrequency() );
-            int32v seedInc = seed;
-
-            float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
-            float32v lacunarity( mLacunarity );
-        
-            float32v strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
-    
-            for( int i = 1; i < mOctaves; i++ )
-            {
-                seedInc -= int32v( -1 );
-                freq *= lacunarity;
-                amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
-                amp *= gain;
-                strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
-            }
-    
-            return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
-
-        } ( pos..., pos... );
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include "DomainWarpFractal.h"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, FS> : public virtual FastNoise::DomainWarpFractalProgressive, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        auto* warp = this->GetSourceSIMD( mSource );
+
+        float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v freq = float32v( warp->GetWarpFrequency() );
+        int32v seedInc = seed;
+
+        float32v gain = this->GetSourceValue( mGain, seed, pos... );
+        float32v lacunarity( mLacunarity );
+
+        float32v strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
+
+        for (int i = 1; i < mOctaves; i++)
+        {
+            seedInc -= int32v( -1 );
+            freq *= lacunarity;
+            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+            amp *= gain;
+            strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
+        }
+
+        return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, FS> : public virtual FastNoise::DomainWarpFractalIndependant, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
+        {
+            auto* warp = this->GetSourceSIMD( mSource );
+
+            float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
+            float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, noisePos... );
+            float32v freq = float32v( warp->GetWarpFrequency() );
+            int32v seedInc = seed;
+
+            float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
+            float32v lacunarity( mLacunarity );
+        
+            float32v strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
+    
+            for( int i = 1; i < mOctaves; i++ )
+            {
+                seedInc -= int32v( -1 );
+                freq *= lacunarity;
+                amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+                amp *= gain;
+                strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
+            }
+    
+            return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
+
+        } ( pos..., pos... );
+    }
+};
diff --git a/include/FastNoise/Generators/Fractal.h b/include/FastNoise/Generators/Fractal.h
index a26677fe..dbad20d9 100644
--- a/include/FastNoise/Generators/Fractal.h
+++ b/include/FastNoise/Generators/Fractal.h
@@ -1,113 +1,107 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    template<typename T = Generator>
-    class Fractal : public virtual Generator
-    {
-    public:
-        void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetGain( float value ) { mGain = value; CalculateFractalBounding(); } 
-        void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
-        void SetWeightedStrength( float value ) { mWeightedStrength = value; } 
-        void SetWeightedStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWeightedStrength, gen ); }
-        void SetOctaveCount( int value ) { mOctaves = value; CalculateFractalBounding(); } 
-        void SetLacunarity( float value ) { mLacunarity = value; } 
-
-    protected:
-        GeneratorSourceT<T> mSource;
-        HybridSource mGain = 0.5f;
-        HybridSource mWeightedStrength = 0.0f;
-
-        int   mOctaves = 3;
-        float mLacunarity = 2.0f;
-        float mFractalBounding = 1.0f / 1.75f;
-
-        virtual void CalculateFractalBounding()
-        {
-            float gain = std::abs( mGain.constant );
-            float amp = gain;
-            float ampFractal = 1.0f;
-            for( int i = 1; i < mOctaves; i++ )
-            {
-                ampFractal += amp;
-                amp *= gain;
-            }
-            mFractalBounding = 1.0f / ampFractal;
-        }     
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<typename T>
-    struct MetadataT<Fractal<T>> : MetadataT<Generator>
-    {
-        MetadataT( const char* sourceName = "Source" )
-        {
-            groups.push_back( "Fractal" );
-
-            this->AddGeneratorSource( sourceName, &Fractal<T>::SetSource );
-            this->AddHybridSource( "Gain", 0.5f, &Fractal<T>::SetGain, &Fractal<T>::SetGain );
-            this->AddHybridSource( "Weighted Strength", 0.0f, &Fractal<T>::SetWeightedStrength, &Fractal<T>::SetWeightedStrength );
-            this->AddVariable( "Octaves", 3, &Fractal<T>::SetOctaveCount, 2, 16 );
-            this->AddVariable( "Lacunarity", 2.0f, &Fractal<T>::SetLacunarity );
-        }
-    };
-#endif
-
-    class FractalFBm : public virtual Fractal<>
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalFBm> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class FractalRidged : public virtual Fractal<>
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalRidged> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-    };
-#endif
-
-    class FractalPingPong : public virtual Fractal<>
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-
-        void SetPingPongStrength( float value ) { mPingPongStrength = value; }
-        void SetPingPongStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPingPongStrength, gen ); }
-
-    protected:
-        HybridSource mPingPongStrength = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalPingPong> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Ping Pong Strength", 2.0f, &FractalPingPong::SetPingPongStrength, &FractalPingPong::SetPingPongStrength );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    template<typename T = Generator>
+    class Fractal : public virtual Generator
+    {
+    public:
+        void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+        void SetGain( float value ) { mGain = value; CalculateFractalBounding(); } 
+        void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
+        void SetWeightedStrength( float value ) { mWeightedStrength = value; } 
+        void SetWeightedStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWeightedStrength, gen ); }
+        void SetOctaveCount( int value ) { mOctaves = value; CalculateFractalBounding(); } 
+        void SetLacunarity( float value ) { mLacunarity = value; } 
+
+    protected:
+        GeneratorSourceT<T> mSource;
+        HybridSource mGain = 0.5f;
+        HybridSource mWeightedStrength = 0.0f;
+
+        int   mOctaves = 3;
+        float mLacunarity = 2.0f;
+        float mFractalBounding = 1.0f / 1.75f;
+
+        virtual void CalculateFractalBounding()
+        {
+            float gain = std::abs( mGain.constant );
+            float amp = gain;
+            float ampFractal = 1.0f;
+            for( int i = 1; i < mOctaves; i++ )
+            {
+                ampFractal += amp;
+                amp *= gain;
+            }
+            mFractalBounding = 1.0f / ampFractal;
+        }     
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<typename T>
+    struct MetadataT<Fractal<T>> : MetadataT<Generator>
+    {
+        MetadataT( const char* sourceName = "Source" )
+        {
+            groups.push_back( "Fractal" );
+
+            this->AddGeneratorSource( sourceName, &Fractal<T>::SetSource );
+            this->AddHybridSource( "Gain", 0.5f, &Fractal<T>::SetGain, &Fractal<T>::SetGain );
+            this->AddHybridSource( "Weighted Strength", 0.0f, &Fractal<T>::SetWeightedStrength, &Fractal<T>::SetWeightedStrength );
+            this->AddVariable( "Octaves", 3, &Fractal<T>::SetOctaveCount, 2, 16 );
+            this->AddVariable( "Lacunarity", 2.0f, &Fractal<T>::SetLacunarity );
+        }
+    };
+#endif
+
+    class FractalFBm : public virtual Fractal<>
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalFBm> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class FractalRidged : public virtual Fractal<>
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalRidged> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class FractalPingPong : public virtual Fractal<>
+    {
+    public:        const Metadata& GetMetadata() const override;
+
+        void SetPingPongStrength( float value ) { mPingPongStrength = value; }
+        void SetPingPongStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPingPongStrength, gen ); }
+
+    protected:
+        HybridSource mPingPongStrength = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalPingPong> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Ping Pong Strength", 2.0f, &FractalPingPong::SetPingPongStrength, &FractalPingPong::SetPingPongStrength );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index 243d87ec..9f147746 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -1,109 +1,103 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Fractal.h"
-
-template<typename FS, typename T>
-class FS_T<FastNoise::Fractal<T>, FS> : public virtual FastNoise::Fractal<T>, public FS_T<FastNoise::Generator, FS>
-{
-
-};
-
-template<typename FS>
-class FS_T<FastNoise::FractalFBm, FS> : public virtual FastNoise::FractalFBm, public FS_T<FastNoise::Fractal<>, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = this->GetSourceValue( mSource, seed, pos... );
-
-        float32v sum = noise * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
-            amp *= gain;
-
-            noise = this->GetSourceValue( mSource, seed, (pos *= lacunarity)... );
-            sum += noise * amp;
-        }
-
-        return sum;
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::FractalRidged, FS> : public virtual FastNoise::FractalRidged, public FS_T<FastNoise::Fractal<>, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_INLINE float32v GenT(int32v seed, P... pos) const
-    {
-        float32v gain = this->GetSourceValue( mGain, seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
-
-        float32v sum = (noise * float32v( -2 ) + float32v( 1 )) * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
-            amp *= gain;
-
-            noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
-            sum += (noise * float32v( -2 ) + float32v( 1 )) * amp;
-        }
-
-        return sum;
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::FractalPingPong, FS> : public virtual FastNoise::FractalPingPong, public FS_T<FastNoise::Fractal<>, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-    FASTNOISE_IMPL_GEN_T;
-
-    static float32v PingPong( float32v t )
-    {
-        t -= FS_Round_f32( t * float32v( 0.5f ) ) * float32v( 2 );
-        return FS_Select_f32( t < float32v( 1 ), t, float32v( 2 ) - t );
-    }
-
-    template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
-
-        float32v sum = noise * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
-            amp *= gain;
-
-            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
-            sum += noise * amp;
-        }
-
-        return sum;
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include "Fractal.h"
+
+template<typename FS, typename T>
+class FastSIMD::DispatchClass<FastNoise::Fractal<T>, FS> : public virtual FastNoise::Fractal<T>, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{
+
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::FractalFBm, FS> : public virtual FastNoise::FractalFBm, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = this->GetSourceValue( mSource, seed, pos... );
+
+        float32v sum = noise * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= gain;
+
+            noise = this->GetSourceValue( mSource, seed, (pos *= lacunarity)... );
+            sum += noise * amp;
+        }
+
+        return sum;
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::FractalRidged, FS> : public virtual FastNoise::FractalRidged, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT(int32v seed, P... pos) const
+    {
+        float32v gain = this->GetSourceValue( mGain, seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
+
+        float32v sum = (noise * float32v( -2 ) + float32v( 1 )) * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
+            amp *= gain;
+
+            noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
+            sum += (noise * float32v( -2 ) + float32v( 1 )) * amp;
+        }
+
+        return sum;
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::FractalPingPong, FS> : public virtual FastNoise::FractalPingPong, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
+{    FASTNOISE_IMPL_GEN_T;
+
+    static float32v PingPong( float32v t )
+    {
+        t -= FS_Round_f32( t * float32v( 0.5f ) ) * float32v( 2 );
+        return FS_Select_f32( t < float32v( 1 ), t, float32v( 2 ) - t );
+    }
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
+
+        float32v sum = noise * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= gain;
+
+            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
+            sum += noise * amp;
+        }
+
+        return sum;
+    }
+};
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 80133408..eb01af40 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -95,7 +95,7 @@ namespace FastNoise
 
         virtual ~Generator() = default;
 
-        virtual FastSIMD::eLevel GetSIMDLevel() const = 0;
+        virtual FastSIMD::FeatureSet GetSIMDLevel() const = 0;
         virtual const Metadata& GetMetadata() const = 0;
 
         virtual OutputMinMax GenUniformGrid2D( float* out,
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 54342b66..c3bf5754 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -1,18 +1,17 @@
 #include <cassert>
 #include <cstring>
-#include "FastSIMD/InlInclude.h"
 
 #include "Generator.h"
 
-#ifdef FS_SIMD_CLASS
 #pragma warning( disable:4250 )
-#endif
 
-template<typename FS>
-class FS_T<FastNoise::Generator, FS> : public virtual FastNoise::Generator
-{
-    FASTSIMD_DECLARE_FS_TYPES;
+using float32v = FS::NativeRegister<float>;
+using int32v = FS::NativeRegister<std::int32_t>;
+using mask32v = FS::NativeRegister<FS::Mask<32>>;
 
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Generator, SIMD> : public virtual FastNoise::Generator
+{
 public:
     virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const = 0;
     virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const = 0;
@@ -23,12 +22,12 @@ public:
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
 
-    FastSIMD::eLevel GetSIMDLevel() const final
+    FastSIMD::FeatureSet GetSIMDLevel() const final
     {
-        return FS::SIMD_Level;
+        return FASTSIMD_DEFAULT_FEATURE_SET;
     }
 
-    using VoidPtrStorageType = const FS_T<Generator, FS>*;
+    using VoidPtrStorageType = const DispatchClass<Generator, SIMD>*;
 
     void SetSourceSIMDPtr( const Generator* base, const void** simdPtr ) final
     {
@@ -44,7 +43,7 @@ public:
     }
 
     template<typename T, typename... POS>
-    FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos ) const
+    FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos ) const
     {
         if( memberVariable.simdGeneratorPtr )
         {
@@ -56,7 +55,7 @@ public:
     }
 
     template<typename T, typename... POS>
-    FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos ) const
+    FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos ) const
     {
         assert( memberVariable.simdGeneratorPtr );
         auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
@@ -65,12 +64,12 @@ public:
     }
 
     template<typename T>
-    FS_INLINE const FS_T<T, FS>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable ) const
+    FS_FORCEINLINE const DispatchClass<T, SIMD>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable ) const
     {
         assert( memberVariable.simdGeneratorPtr );
         auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
 
-        auto simdT = static_cast<const FS_T<T, FS>*>( simdGen );
+        auto simdT = static_cast<const FastSIMD::DispatchClass<T, SIMD>*>( simdGen );
         return simdT;
     }
 
@@ -90,31 +89,31 @@ public:
         size_t totalValues = xSize * ySize;
         size_t index = 0;
 
-        xIdx += int32v::FS_Incremented();
+        xIdx += FS::Incremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
 
-        while( index < totalValues - FS_Size_32() )
+        while( index < totalValues - int32v::ElementCount )
         {
-            float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-            float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
 
             float32v gen = Gen( int32v( seed ), xPos, yPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
 
-            index += FS_Size_32();
-            xIdx += int32v( FS_Size_32() );
+            index += int32v::ElementCount;
+            xIdx += int32v( int32v::ElementCount );
 
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
         }
 
-        float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-        float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
 
         float32v gen = Gen( int32v( seed ), xPos, yPos );
 
@@ -140,35 +139,35 @@ public:
         size_t totalValues = xSize * ySize * zSize;
         size_t index = 0;
 
-        xIdx += int32v::FS_Incremented();
+        xIdx += FS::Incremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
         AxisReset<true>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
 
-        while( index < totalValues - FS_Size_32() )
+        while( index < totalValues - int32v::ElementCount )
         {
-            float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-            float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
-            float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+            float32v zPos = FS::Convert<float>( zIdx ) * freqV;
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
 
-            index += FS_Size_32();
-            xIdx += int32v( FS_Size_32() );
+            index += int32v::ElementCount;
+            xIdx += int32v( int32v::ElementCount );
             
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
             AxisReset<false>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
         }
 
-        float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-        float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
-        float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+        float32v zPos = FS::Convert<float>( zIdx ) * freqV;
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
 
@@ -197,39 +196,39 @@ public:
         size_t totalValues = xSize * ySize * zSize * wSize;
         size_t index = 0;
 
-        xIdx += int32v::FS_Incremented();
+        xIdx += FS::Incremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
         AxisReset<true>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
         AxisReset<true>( zIdx, wIdx, zMax, zSizeV, xSize * ySize * zSize );
 
-        while( index < totalValues - FS_Size_32() )
+        while( index < totalValues - int32v::ElementCount )
         {
-            float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-            float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
-            float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
-            float32v wPos = FS_Converti32_f32( wIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+            float32v zPos = FS::Convert<float>( zIdx ) * freqV;
+            float32v wPos = FS::Convert<float>( wIdx ) * freqV;
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
 
-            index += FS_Size_32();
-            xIdx += int32v( FS_Size_32() );
+            index += int32v::ElementCount;
+            xIdx += int32v( int32v::ElementCount );
 
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
             AxisReset<false>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
             AxisReset<false>( zIdx, wIdx, zMax, zSizeV, xSize * ySize * zSize );
         }
 
-        float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
-        float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
-        float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
-        float32v wPos = FS_Converti32_f32( wIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
+        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+        float32v zPos = FS::Convert<float>( zIdx ) * freqV;
+        float32v wPos = FS::Convert<float>( wIdx ) * freqV;
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
@@ -242,23 +241,23 @@ public:
         float32v max( -INFINITY );
 
         size_t index = 0;
-        while( index < count - FS_Size_32() )
+        while( index < count - int32v::ElementCount )
         {
-            float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-            float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
+            float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+            float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
-            index += FS_Size_32();
+            index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
+        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos );
 
@@ -271,25 +270,25 @@ public:
         float32v max( -INFINITY );
 
         size_t index = 0;
-        while( index < count - FS_Size_32() )
+        while( index < count - int32v::ElementCount )
         {
-            float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-            float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
-            float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
+            float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+            float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
+            float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
-            index += FS_Size_32();
+            index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
-        float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
+        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
+        float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
 
@@ -302,27 +301,27 @@ public:
         float32v max( -INFINITY );
 
         size_t index = 0;
-        while( index < count - FS_Size_32() )
+        while( index < count - int32v::ElementCount )
         {
-            float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-            float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
-            float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
-            float32v wPos = float32v( wOffset ) + FS_Load_f32( &wPosArray[index] );
+            float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+            float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
+            float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
+            float32v wPos = float32v( wOffset ) + FS::Load<float32v>( &wPosArray[index] );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
-            index += FS_Size_32();
+            index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
-        float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
-        float32v wPos = float32v( wOffset ) + FS_Load_f32( &wPosArray[index] );
+        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
+        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
+        float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
+        float32v wPos = float32v( wOffset ) + FS::Load<float32v>( &wPosArray[index] );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
@@ -331,17 +330,17 @@ public:
 
     float GenSingle2D( float x, float y, int seed ) const final
     {
-        return FS_Extract0_f32( Gen( int32v( seed ), float32v( x ), float32v( y ) ) );
+        return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ) ) );
     }
 
     float GenSingle3D( float x, float y, float z, int seed ) const final
     {
-        return FS_Extract0_f32( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ) ) );
+        return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ) ) );
     }
 
     float GenSingle4D( float x, float y, float z, float w, int seed ) const final
     {
-        return FS_Extract0_f32( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ), float32v( w ) ) );
+        return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ), float32v( w ) ) );
     }
 
     FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int xSize, int ySize, float frequency, int seed ) const final
@@ -367,41 +366,41 @@ public:
         float32v xMul = float32v( 1 / xSizePi );
         float32v yMul = float32v( 1 / ySizePi );
 
-        xIdx += int32v::FS_Incremented();
+        xIdx += FS::Incremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
 
-        while( index < totalValues - FS_Size_32() )
+        while( index < totalValues - int32v::ElementCount )
         {
-            float32v xF = FS_Converti32_f32( xIdx ) * xMul;
-            float32v yF = FS_Converti32_f32( yIdx ) * yMul;
+            float32v xF = FS::Convert<float>( xIdx ) * xMul;
+            float32v yF = FS::Convert<float>( yIdx ) * yMul;
 
-            float32v xPos = FS_Cos_f32( xF ) * xFreq;
-            float32v yPos = FS_Cos_f32( yF ) * yFreq;
-            float32v zPos = FS_Sin_f32( xF ) * xFreq;
-            float32v wPos = FS_Sin_f32( yF ) * yFreq;
+            float32v xPos = FS::Cos( xF ) * xFreq;
+            float32v yPos = FS::Cos( yF ) * yFreq;
+            float32v zPos = FS::Sin( xF ) * xFreq;
+            float32v wPos = FS::Sin( yF ) * yFreq;
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
-            FS_Store_f32( &noiseOut[index], gen );
+            FS::Store( &noiseOut[index], gen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, gen );
-            max = FS_Max_f32( max, gen );
+            min = FS::Min( min, gen );
+            max = FS::Max( max, gen );
 #endif
 
-            index += FS_Size_32();
-            xIdx += int32v( FS_Size_32() );
+            index += int32v::ElementCount;
+            xIdx += int32v( int32v::ElementCount );
 
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
         }
 
-        float32v xF = FS_Converti32_f32( xIdx ) * xMul;
-        float32v yF = FS_Converti32_f32( yIdx ) * yMul;
+        float32v xF = FS::Convert<float>( xIdx ) * xMul;
+        float32v yF = FS::Convert<float>( yIdx ) * yMul;
 
-        float32v xPos = FS_Cos_f32( xF ) * xFreq;
-        float32v yPos = FS_Cos_f32( yF ) * yFreq;
-        float32v zPos = FS_Sin_f32( xF ) * xFreq;
-        float32v wPos = FS_Sin_f32( yF ) * yFreq;
+        float32v xPos = FS::Cos( xF ) * xFreq;
+        float32v yPos = FS::Cos( yF ) * yFreq;
+        float32v zPos = FS::Sin( xF ) * xFreq;
+        float32v wPos = FS::Sin( yF ) * yFreq;
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
@@ -410,28 +409,28 @@ public:
 
 private:
     template<bool INITIAL>
-    static FS_INLINE void AxisReset( int32v& aIdx, int32v& bIdx, int32v aMax, int32v aSize, size_t aStep )
+    static FS_FORCEINLINE void AxisReset( int32v& aIdx, int32v& bIdx, int32v aMax, int32v aSize, size_t aStep )
     {
-        for( size_t resetLoop = INITIAL ? aStep : 0; resetLoop < FS_Size_32(); resetLoop += aStep )
+        for( size_t resetLoop = INITIAL ? aStep : 0; resetLoop < int32v::ElementCount; resetLoop += aStep )
         {
             mask32v aReset = aIdx > aMax;
-            bIdx = FS_MaskedIncrement_i32( bIdx, aReset );
-            aIdx = FS_MaskedSub_i32( aIdx, aSize, aReset );
+            bIdx = FS::MaskedIncrement( aReset, bIdx );
+            aIdx = FS::MaskedSub( aReset, aIdx, aSize );
         }
     }
 
-    static FS_INLINE FastNoise::OutputMinMax DoRemaining( float* noiseOut, size_t totalValues, size_t index, float32v min, float32v max, float32v finalGen )
+    static FS_FORCEINLINE FastNoise::OutputMinMax DoRemaining( float* noiseOut, size_t totalValues, size_t index, float32v min, float32v max, float32v finalGen )
     {
         FastNoise::OutputMinMax minMax;
         size_t remaining = totalValues - index;
 
-        if( remaining == FS_Size_32() )
+        if( remaining == int32v::ElementCount )
         {
-            FS_Store_f32( &noiseOut[index], finalGen );
+            FS::Store( &noiseOut[index], finalGen );
 
 #if FASTNOISE_CALC_MIN_MAX
-            min = FS_Min_f32( min, finalGen );
-            max = FS_Max_f32( max, finalGen );
+            min = FS::Min( min, finalGen );
+            max = FS::Max( max, finalGen );
 #endif
         }
         else
@@ -450,7 +449,7 @@ private:
 #if FASTNOISE_CALC_MIN_MAX
         float* minP = reinterpret_cast<float*>(&min);
         float* maxP = reinterpret_cast<float*>(&max);
-        for( size_t i = 0; i < FS_Size_32(); i++ )
+        for( size_t i = 0; i < int32v::ElementCount; i++ )
         {
             minMax << FastNoise::OutputMinMax{ minP[i], maxP[i] };
         }
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index 2657e11a..595bdab1 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -6,7 +6,6 @@ namespace FastNoise
     class DomainScale : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -21,7 +20,7 @@ namespace FastNoise
     template<>
     struct MetadataT<DomainScale> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -35,7 +34,6 @@ namespace FastNoise
     class DomainOffset : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -58,7 +56,7 @@ namespace FastNoise
     template<>
     struct MetadataT<DomainOffset> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -72,7 +70,6 @@ namespace FastNoise
     class DomainRotate : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -120,7 +117,7 @@ namespace FastNoise
     template<>
     struct MetadataT<DomainRotate> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -136,7 +133,6 @@ namespace FastNoise
     class SeedOffset : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -151,7 +147,7 @@ namespace FastNoise
     template<>
     struct MetadataT<SeedOffset> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -165,7 +161,6 @@ namespace FastNoise
     class Remap : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -186,7 +181,7 @@ namespace FastNoise
     template<>
     struct MetadataT<Remap> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -223,7 +218,6 @@ namespace FastNoise
     class ConvertRGBA8 : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -242,7 +236,7 @@ namespace FastNoise
     template<>
     struct MetadataT<ConvertRGBA8> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -267,7 +261,6 @@ namespace FastNoise
     class Terrace : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -286,7 +279,7 @@ namespace FastNoise
     template<>
     struct MetadataT<Terrace> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -301,7 +294,6 @@ namespace FastNoise
     class DomainAxisScale : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -321,7 +313,7 @@ namespace FastNoise
     template<>
     struct MetadataT<DomainAxisScale> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -335,7 +327,6 @@ namespace FastNoise
     class AddDimension : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -351,7 +342,7 @@ namespace FastNoise
     template<>
     struct MetadataT<AddDimension> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -365,7 +356,6 @@ namespace FastNoise
     class RemoveDimension : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -380,7 +370,7 @@ namespace FastNoise
     template<>
     struct MetadataT<RemoveDimension> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
@@ -394,7 +384,6 @@ namespace FastNoise
     class GeneratorCache : public virtual Generator
     {
     public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
@@ -407,7 +396,7 @@ namespace FastNoise
     template<>
     struct MetadataT<GeneratorCache> : MetadataT<Generator>
     {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index c5e0f331..30cec8cd 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -1,28 +1,24 @@
-#include "FastSIMD/InlInclude.h"
-
 #include "Modifiers.h"
 
-template<typename FS>
-class FS_T<FastNoise::DomainScale, FS> : public virtual FastNoise::DomainScale, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainScale, SIMD> : public virtual FastNoise::DomainScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
     
     template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         return this->GetSourceValue( mSource, seed, (pos * float32v( mScale ))... );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::DomainOffset, FS> : public virtual FastNoise::DomainOffset, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainOffset, SIMD> : public virtual FastNoise::DomainOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
     
     template<typename... P> 
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         return [this, seed]( std::remove_reference_t<P>... sourcePos, std::remove_reference_t<P>... offset )
         {
@@ -34,11 +30,9 @@ class FS_T<FastNoise::DomainOffset, FS> : public virtual FastNoise::DomainOffset
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::DomainRotate, FS> : public virtual FastNoise::DomainRotate, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> : public virtual FastNoise::DomainRotate, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
-
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
         if( mPitchSin == 0.0f && mRollSin == 0.0f )
@@ -66,27 +60,25 @@ class FS_T<FastNoise::DomainRotate, FS> : public virtual FastNoise::DomainRotate
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::SeedOffset, FS> : public virtual FastNoise::SeedOffset, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::SeedOffset, SIMD> : public virtual FastNoise::SeedOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         return this->GetSourceValue( mSource, seed + int32v( mOffset ), pos... );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::Remap, FS> : public virtual FastNoise::Remap, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Remap, SIMD> : public virtual FastNoise::Remap, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         float32v source = this->GetSourceValue( mSource, seed, pos... );
             
@@ -94,42 +86,40 @@ class FS_T<FastNoise::Remap, FS> : public virtual FastNoise::Remap, public FS_T<
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::ConvertRGBA8, FS> : public virtual FastNoise::ConvertRGBA8, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::ConvertRGBA8, SIMD> : public virtual FastNoise::ConvertRGBA8, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         float32v source = this->GetSourceValue( mSource, seed, pos... );
         
-        source = FS_Min_f32( source, float32v( mMax ));
-        source = FS_Max_f32( source, float32v( mMin ));
+        source = FS::Min( source, float32v( mMax ));
+        source = FS::Max( source, float32v( mMin ));
         source -= float32v( mMin );
 
         source *= float32v( 255.0f / (mMax - mMin) );
 
-        int32v byteVal = FS_Convertf32_i32( source );
+        int32v byteVal = FS::Convert<std::int32_t>( source );
 
         int32v output = int32v( 255 << 24 );
         output |= byteVal;
         output |= byteVal << 8;
         output |= byteVal << 16;
 
-        return FS_Casti32_f32( output );
+        return FS::Cast<float>( output );
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::Terrace, FS> : public virtual FastNoise::Terrace, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Terrace, SIMD> : public virtual FastNoise::Terrace, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         float32v source = this->GetSourceValue( mSource, seed, pos... );
 
@@ -145,7 +135,7 @@ class FS_T<FastNoise::Terrace, FS> : public virtual FastNoise::Terrace, public F
             diff = float32v( 0.5f ) - diff;
 
             diff *= float32v( mSmoothnessRecip );
-            diff = FS_Min_f32( diff, float32v( 0.5f ) );
+            diff = FS::Min( diff, float32v( 0.5f ) );
             diff = FS_Select_f32( diffSign, float32v( 0.5f ) - diff, diff - float32v( 0.5f ) );
 
             rounded += diff;
@@ -155,14 +145,13 @@ class FS_T<FastNoise::Terrace, FS> : public virtual FastNoise::Terrace, public F
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::DomainAxisScale, FS> : public virtual FastNoise::DomainAxisScale, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainAxisScale, SIMD> : public virtual FastNoise::DomainAxisScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         size_t idx = 0;
         ((pos *= float32v( mScale[idx++] )), ...);
@@ -171,14 +160,13 @@ class FS_T<FastNoise::DomainAxisScale, FS> : public virtual FastNoise::DomainAxi
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::AddDimension, FS> : public virtual FastNoise::AddDimension, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::AddDimension, SIMD> : public virtual FastNoise::AddDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         if constexpr( sizeof...(P) == (size_t)FastNoise::Dim::Count )
         {
@@ -191,11 +179,9 @@ class FS_T<FastNoise::AddDimension, FS> : public virtual FastNoise::AddDimension
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::RemoveDimension, FS> : public virtual FastNoise::RemoveDimension, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> : public virtual FastNoise::RemoveDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
-
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
         return this->GetSourceValue( mSource, seed, x, y );
@@ -234,18 +220,17 @@ class FS_T<FastNoise::RemoveDimension, FS> : public virtual FastNoise::RemoveDim
     }
 };
 
-template<typename FS>
-class FS_T<FastNoise::GeneratorCache, FS> : public virtual FastNoise::GeneratorCache, public FS_T<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual FastNoise::GeneratorCache, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    FASTSIMD_DECLARE_FS_TYPES;
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
-    FS_INLINE float32v GenT( int32v seed, P... pos ) const
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         thread_local static const void* CachedGenerator = nullptr;
-        thread_local static float CachedValue[FS_Size_32()];
-        thread_local static float CachedPos[FS_Size_32()][sizeof...( P )];
+        thread_local static float CachedValue[int32v::ElementCount];
+        thread_local static float CachedPos[int32v::ElementCount][sizeof...( P )];
         // TLS is not always aligned (compiler bug), need to avoid using SIMD types
 
         float32v arrayPos[] = { pos... };
@@ -254,7 +239,7 @@ class FS_T<FastNoise::GeneratorCache, FS> : public virtual FastNoise::GeneratorC
 
         for( size_t i = 0; i < sizeof...( P ); i++ )
         {
-            isSame &= !FS_AnyMask_bool( arrayPos[i] != FS_Load_f32( &CachedPos[i] ) );
+            isSame &= !FS_AnyMask_bool( arrayPos[i] != FS::Load( &CachedPos[i] ) );
         }
 
         if( !isSame )
@@ -262,16 +247,16 @@ class FS_T<FastNoise::GeneratorCache, FS> : public virtual FastNoise::GeneratorC
             CachedGenerator = mSource.simdGeneratorPtr;
 
             float32v value = this->GetSourceValue( mSource, seed, pos... );
-            FS_Store_f32( &CachedValue, value );
+            FS::Store( &CachedValue, value );
 
             for( size_t i = 0; i < sizeof...(P); i++ )
             {
-                FS_Store_f32( &CachedPos[i], arrayPos[i] );
+                FS::Store( &CachedPos[i], arrayPos[i] );
             }
 
             return value;
         }
 
-        return FS_Load_f32( &CachedValue );
+        return FS::Load( &CachedValue );
     }
 };
diff --git a/include/FastNoise/Generators/Perlin.h b/include/FastNoise/Generators/Perlin.h
index 1865abd4..4f9db4f8 100644
--- a/include/FastNoise/Generators/Perlin.h
+++ b/include/FastNoise/Generators/Perlin.h
@@ -1,25 +1,23 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class Perlin : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Perlin> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class Perlin : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Perlin> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index 7351be42..d5bf597e 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -1,109 +1,106 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Perlin.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FS_T<FastNoise::Perlin, FS> : public virtual FastNoise::Perlin, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-
-        float32v xf0 = xs = x - xs;
-        float32v yf0 = ys = y - ys;
-        float32v xf1 = xf0 - float32v( 1 );
-        float32v yf1 = yf0 - float32v( 1 );
-
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
-
-        return float32v( 0.579106986522674560546875f ) * FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0 ), xf0, yf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1 ), xf0, yf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-
-        float32v xf0 = xs = x - xs;
-        float32v yf0 = ys = y - ys;
-        float32v zf0 = zs = z - zs;
-        float32v xf1 = xf0 - float32v( 1 );
-        float32v yf1 = yf0 - float32v( 1 );
-        float32v zf1 = zf0 - float32v( 1 );
-
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
-        zs = FnUtils::InterpQuintic( zs );
-
-        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
-
-        float32v xf0 = xs = x - xs;
-        float32v yf0 = ys = y - ys;
-        float32v zf0 = zs = z - zs;
-        float32v wf0 = ws = w - ws;
-        float32v xf1 = xf0 - float32v( 1 );
-        float32v yf1 = yf0 - float32v( 1 );
-        float32v zf1 = zf0 - float32v( 1 );
-        float32v wf1 = wf0 - float32v( 1 );
-
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
-        zs = FnUtils::InterpQuintic( zs );
-        ws = FnUtils::InterpQuintic( ws );
-
-        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                                                                                     
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
-            FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                                                                                     
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include "Perlin.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+
+        float32v xf0 = xs = x - xs;
+        float32v yf0 = ys = y - ys;
+        float32v xf1 = xf0 - float32v( 1 );
+        float32v yf1 = yf0 - float32v( 1 );
+
+        xs = FnUtils::InterpQuintic( xs );
+        ys = FnUtils::InterpQuintic( ys );
+
+        return float32v( 0.579106986522674560546875f ) * FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0 ), xf0, yf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1 ), xf0, yf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+
+        float32v xf0 = xs = x - xs;
+        float32v yf0 = ys = y - ys;
+        float32v zf0 = zs = z - zs;
+        float32v xf1 = xf0 - float32v( 1 );
+        float32v yf1 = yf0 - float32v( 1 );
+        float32v zf1 = zf0 - float32v( 1 );
+
+        xs = FnUtils::InterpQuintic( xs );
+        ys = FnUtils::InterpQuintic( ys );
+        zs = FnUtils::InterpQuintic( zs );
+
+        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
+            FnUtils::Lerp( 
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+        float32v ws = FS_Floor_f32( w );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v w1 = w0 + int32v( FnPrimes::W );
+
+        float32v xf0 = xs = x - xs;
+        float32v yf0 = ys = y - ys;
+        float32v zf0 = zs = z - zs;
+        float32v wf0 = ws = w - ws;
+        float32v xf1 = xf0 - float32v( 1 );
+        float32v yf1 = yf0 - float32v( 1 );
+        float32v zf1 = zf0 - float32v( 1 );
+        float32v wf1 = wf0 - float32v( 1 );
+
+        xs = FnUtils::InterpQuintic( xs );
+        ys = FnUtils::InterpQuintic( ys );
+        zs = FnUtils::InterpQuintic( zs );
+        ws = FnUtils::InterpQuintic( ws );
+
+        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
+            FnUtils::Lerp(                                                                                                                                                     
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
+            FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
+            FnUtils::Lerp(                                                                                                                                                     
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
+    }
+};
diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index f0caa158..bc351857 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -1,45 +1,41 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class Simplex : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Simplex> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-        }
-    };
-#endif
-
-    class OpenSimplex2 : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OpenSimplex2> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class Simplex : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Simplex> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+        }
+    };
+#endif
+
+    class OpenSimplex2 : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<OpenSimplex2> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 422b3c35..782ab4ef 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -1,373 +1,367 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Simplex.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FS_T<FastNoise::Simplex, FS> : public virtual FastNoise::Simplex, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        const float SQRT3 = 1.7320508075688772935274463415059f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS_Floor_f32( x + f );
-        float32v y0 = FS_Floor_f32( y + f );
-
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
-
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //NMasked funcs
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
-
-        float32v x2 = x0 + float32v( G2 * 2 - 1 );
-        float32v y2 = y0 + float32v( G2 * 2 - 1 );
-
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, float32v( 0.5f ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, float32v( 0.5f ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, float32v( 0.5f ) ) );
-
-        t0 = FS_Max_f32( t0, float32v( 0 ) );
-        t1 = FS_Max_f32( t1, float32v( 0 ) );
-        t2 = FS_Max_f32( t2, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
-
-        return float32v( 38.283687591552734375f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        const float F3 = 1.0f / 3.0f;
-        const float G3 = 1.0f / 2.0f;
-
-        float32v s = float32v( F3 ) * (x + y + z);
-        x += s;
-        y += s;
-        z += s;
-
-        float32v x0 = FS_Floor_f32( x );
-        float32v y0 = FS_Floor_f32( y );
-        float32v z0 = FS_Floor_f32( z );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
-
-        mask32v x_ge_y = xi >= yi;
-        mask32v y_ge_z = yi >= zi;
-        mask32v x_ge_z = xi >= zi;
-
-        float32v g = float32v( G3 ) * (xi + yi + zi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-
-        mask32v i1 = x_ge_y & x_ge_z;
-        mask32v j1 = FS_BitwiseAndNot_m32( y_ge_z, x_ge_y );
-        mask32v k1 = FS_BitwiseAndNot_m32( ~x_ge_z, y_ge_z );
-
-        mask32v i2 = x_ge_y | x_ge_z;
-        mask32v j2 = ~x_ge_y | y_ge_z;
-        mask32v k2 = x_ge_z & y_ge_z; //NMasked
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G3 );
-        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G3 );
-        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G3 );
-        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G3 * 2 );
-        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G3 * 2 );
-        float32v z2 = FS_NMaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G3 * 2 );
-        float32v x3 = x0 + float32v( G3 * 3 - 1 );
-        float32v y3 = y0 + float32v( G3 * 3 - 1 );
-        float32v z3 = z0 + float32v( G3 * 3 - 1 );
-
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, float32v( 0.6f ) ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, float32v( 0.6f ) ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, float32v( 0.6f ) ) ) );
-        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, float32v( 0.6f ) ) ) );
-
-        t0 = FS_Max_f32( t0, float32v( 0 ) );
-        t1 = FS_Max_f32( t1, float32v( 0 ) );
-        t2 = FS_Max_f32( t2, float32v( 0 ) );
-        t3 = FS_Max_f32( t3, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;             
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k ), x0, y0, z0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ), FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ) ), x1, y1, z1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
-        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
-
-        return float32v( 32.69428253173828125f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, n3 * t3 ) ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        const float SQRT5 = 2.236067977499f;
-        const float F4 = (SQRT5 - 1.0f) / 4.0f;
-        const float G4 = (5.0f - SQRT5) / 20.0f;
-
-        float32v s = float32v( F4 ) * (x + y + z + w);
-        x += s;
-        y += s;
-        z += s;
-        w += s;
-
-        float32v x0 = FS_Floor_f32( x );
-        float32v y0 = FS_Floor_f32( y );
-        float32v z0 = FS_Floor_f32( z );
-        float32v w0 = FS_Floor_f32( w );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-        float32v wi = w - w0;
-
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
-        int32v l = FS_Convertf32_i32( w0 ) * int32v( FnPrimes::W );
-
-        float32v g = float32v( G4 ) * (xi + yi + zi + wi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-        w0 = wi - g;
-
-        int32v rankx( 0 );
-        int32v ranky( 0 );
-        int32v rankz( 0 );
-        int32v rankw( 0 );
-
-        mask32v x_ge_y = x0 >= y0;
-        rankx = FS_MaskedIncrement_i32( rankx, x_ge_y );
-        ranky = FS_MaskedIncrement_i32( ranky, ~x_ge_y );
-
-        mask32v x_ge_z = x0 >= z0;
-        rankx = FS_MaskedIncrement_i32( rankx, x_ge_z );
-        rankz = FS_MaskedIncrement_i32( rankz, ~x_ge_z );
-
-        mask32v x_ge_w = x0 >= w0;
-        rankx = FS_MaskedIncrement_i32( rankx, x_ge_w );
-        rankw = FS_MaskedIncrement_i32( rankw, ~x_ge_w );
-
-        mask32v y_ge_z = y0 >= z0;
-        ranky = FS_MaskedIncrement_i32( ranky, y_ge_z );
-        rankz = FS_MaskedIncrement_i32( rankz, ~y_ge_z );
-
-        mask32v y_ge_w = y0 >= w0;
-        ranky = FS_MaskedIncrement_i32( ranky, y_ge_w );
-        rankw = FS_MaskedIncrement_i32( rankw, ~y_ge_w );
-
-        mask32v z_ge_w = z0 >= w0;
-        rankz = FS_MaskedIncrement_i32( rankz, z_ge_w );
-        rankw = FS_MaskedIncrement_i32( rankw, ~z_ge_w );
-
-        mask32v i1 = rankx > int32v( 2 );
-        mask32v j1 = ranky > int32v( 2 );
-        mask32v k1 = rankz > int32v( 2 );
-        mask32v l1 = rankw > int32v( 2 );
-
-        mask32v i2 = rankx > int32v( 1 );
-        mask32v j2 = ranky > int32v( 1 );
-        mask32v k2 = rankz > int32v( 1 );
-        mask32v l2 = rankw > int32v( 1 );
-
-        mask32v i3 = rankx > int32v( 0 );
-        mask32v j3 = ranky > int32v( 0 );
-        mask32v k3 = rankz > int32v( 0 );
-        mask32v l3 = rankw > int32v( 0 );
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G4 );
-        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G4 );
-        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G4 );
-        float32v w1 = FS_MaskedSub_f32( w0, float32v( 1 ), l1 ) + float32v( G4 );
-        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G4 * 2 );
-        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G4 * 2 );
-        float32v z2 = FS_MaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G4 * 2 );
-        float32v w2 = FS_MaskedSub_f32( w0, float32v( 1 ), l2 ) + float32v( G4 * 2 );
-        float32v x3 = FS_MaskedSub_f32( x0, float32v( 1 ), i3 ) + float32v( G4 * 3 );
-        float32v y3 = FS_MaskedSub_f32( y0, float32v( 1 ), j3 ) + float32v( G4 * 3 );
-        float32v z3 = FS_MaskedSub_f32( z0, float32v( 1 ), k3 ) + float32v( G4 * 3 );
-        float32v w3 = FS_MaskedSub_f32( w0, float32v( 1 ), l3 ) + float32v( G4 * 3 );
-        float32v x4 = x0 + float32v( G4 * 4 - 1 );
-        float32v y4 = y0 + float32v( G4 * 4 - 1 );
-        float32v z4 = z0 + float32v( G4 * 4 - 1 );
-        float32v w4 = w0 + float32v( G4 * 4 - 1 );
-
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, FS_FNMulAdd_f32( w0, w0, float32v( 0.6f ) ) ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, FS_FNMulAdd_f32( w1, w1, float32v( 0.6f ) ) ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, FS_FNMulAdd_f32( w2, w2, float32v( 0.6f ) ) ) ) );
-        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, FS_FNMulAdd_f32( w3, w3, float32v( 0.6f ) ) ) ) );
-        float32v t4 = FS_FNMulAdd_f32( x4, x4, FS_FNMulAdd_f32( y4, y4, FS_FNMulAdd_f32( z4, z4, FS_FNMulAdd_f32( w4, w4, float32v( 0.6f ) ) ) ) );
-
-        t0 = FS_Max_f32( t0, float32v( 0 ) );
-        t1 = FS_Max_f32( t1, float32v( 0 ) );
-        t2 = FS_Max_f32( t2, float32v( 0 ) );
-        t3 = FS_Max_f32( t3, float32v( 0 ) );
-        t4 = FS_Max_f32( t4, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;
-        t4 *= t4; t4 *= t4;
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l1 ) ), x1, y1, z1, w1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l2 ) ), x2, y2, z2, w2 );
-        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i3 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j3 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k3 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
-        float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
-
-        return float32v( 27.f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, FS_FMulAdd_f32( n3, t3, n4 * t4 ) ) ) );
-    }
-};
-
-template<typename FS>
-class FS_T<FastNoise::OpenSimplex2, FS> : public virtual FastNoise::OpenSimplex2, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        const float SQRT3 = 1.7320508075f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS_Floor_f32( x + f );
-        float32v y0 = FS_Floor_f32( y + f );
-
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
-
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //NMasked funcs
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v x2 = x0 + float32v( (G2 * 2) - 1 );
-        float32v y2 = y0 + float32v( (G2 * 2) - 1 );
-
-        float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
-        float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
-        float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
-
-        t0 = FS_Max_f32( t0, float32v( 0 ) );
-        t1 = FS_Max_f32( t1, float32v( 0 ) );
-        t2 = FS_Max_f32( t2, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-
-        float32v n0 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
-        float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
-
-        return float32v( 49.918426513671875f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
-        float32v xr = f - x;
-        float32v yr = f - y;
-        float32v zr = f - z;
-
-        float32v val( 0 );
-        for( size_t i = 0; ; i++ )
-        {
-            float32v v0xr = FS_Round_f32( xr );
-            float32v v0yr = FS_Round_f32( yr );
-            float32v v0zr = FS_Round_f32( zr );
-            float32v d0xr = xr - v0xr;
-            float32v d0yr = yr - v0yr;
-            float32v d0zr = zr - v0zr;
-
-            float32v score0xr = FS_Abs_f32( d0xr );
-            float32v score0yr = FS_Abs_f32( d0yr );
-            float32v score0zr = FS_Abs_f32( d0zr );
-            mask32v dir0xr = FS_Max_f32( score0yr, score0zr ) <= score0xr;
-            mask32v dir0yr = FS_BitwiseAndNot_m32( FS_Max_f32( score0zr, score0xr ) <= score0yr, dir0xr );
-            mask32v dir0zr = ~(dir0xr | dir0yr);
-            float32v v1xr = FS_MaskedAdd_f32( v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ), dir0xr );
-            float32v v1yr = FS_MaskedAdd_f32( v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ), dir0yr );
-            float32v v1zr = FS_MaskedAdd_f32( v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ), dir0zr );
-            float32v d1xr = xr - v1xr;
-            float32v d1yr = yr - v1yr;
-            float32v d1zr = zr - v1zr;
-
-            int32v hv0xr = FS_Convertf32_i32( v0xr ) * int32v( FnPrimes::X );
-            int32v hv0yr = FS_Convertf32_i32( v0yr ) * int32v( FnPrimes::Y );
-            int32v hv0zr = FS_Convertf32_i32( v0zr ) * int32v( FnPrimes::Z );
-
-            int32v hv1xr = FS_Convertf32_i32( v1xr ) * int32v( FnPrimes::X );
-            int32v hv1yr = FS_Convertf32_i32( v1yr ) * int32v( FnPrimes::Y );
-            int32v hv1zr = FS_Convertf32_i32( v1zr ) * int32v( FnPrimes::Z );
-
-            float32v t0 = FS_FNMulAdd_f32( d0zr, d0zr, FS_FNMulAdd_f32( d0yr, d0yr, FS_FNMulAdd_f32( d0xr, d0xr, float32v( 0.6f ) ) ) );
-            float32v t1 = FS_FNMulAdd_f32( d1zr, d1zr, FS_FNMulAdd_f32( d1yr, d1yr, FS_FNMulAdd_f32( d1xr, d1xr, float32v( 0.6f ) ) ) );
-            t0 = FS_Max_f32( t0, float32v( 0 ) );
-            t1 = FS_Max_f32( t1, float32v( 0 ) );
-            t0 *= t0; t0 *= t0;
-            t1 *= t1; t1 *= t1;
-
-            float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
-            float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
-
-            val = FS_FMulAdd_f32( v0, t0, FS_FMulAdd_f32( v1, t1, val ) );
-
-            if( i == 1 )
-            {
-                break;
-            }
-
-            xr += float32v( 0.5f );
-            yr += float32v( 0.5f );
-            zr += float32v( 0.5f );
-            seed = ~seed;
-        }
-
-        return float32v( 32.69428253173828125f ) * val;
-    } 
-};
-
+#include "FastSIMD/InlInclude.h"
+
+#include "Simplex.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        const float SQRT3 = 1.7320508075688772935274463415059f;
+        const float F2 = 0.5f * (SQRT3 - 1.0f);
+        const float G2 = (3.0f - SQRT3) / 6.0f;
+
+        float32v f = float32v( F2 ) * (x + y);
+        float32v x0 = FS_Floor_f32( x + f );
+        float32v y0 = FS_Floor_f32( y + f );
+
+        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+
+        float32v g = float32v( G2 ) * (x0 + y0);
+        x0 = x - (x0 - g);
+        y0 = y - (y0 - g);
+
+        mask32v i1 = x0 > y0;
+        //mask32v j1 = ~i1; //NMasked funcs
+
+        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
+        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
+
+        float32v x2 = x0 + float32v( G2 * 2 - 1 );
+        float32v y2 = y0 + float32v( G2 * 2 - 1 );
+
+        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, float32v( 0.5f ) ) );
+        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, float32v( 0.5f ) ) );
+        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, float32v( 0.5f ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+
+        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
+        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
+        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
+
+        return float32v( 38.283687591552734375f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        const float F3 = 1.0f / 3.0f;
+        const float G3 = 1.0f / 2.0f;
+
+        float32v s = float32v( F3 ) * (x + y + z);
+        x += s;
+        y += s;
+        z += s;
+
+        float32v x0 = FS_Floor_f32( x );
+        float32v y0 = FS_Floor_f32( y );
+        float32v z0 = FS_Floor_f32( z );
+        float32v xi = x - x0;
+        float32v yi = y - y0;
+        float32v zi = z - z0;
+
+        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
+
+        mask32v x_ge_y = xi >= yi;
+        mask32v y_ge_z = yi >= zi;
+        mask32v x_ge_z = xi >= zi;
+
+        float32v g = float32v( G3 ) * (xi + yi + zi);
+        x0 = xi - g;
+        y0 = yi - g;
+        z0 = zi - g;
+
+        mask32v i1 = x_ge_y & x_ge_z;
+        mask32v j1 = FS_BitwiseAndNot_m32( y_ge_z, x_ge_y );
+        mask32v k1 = FS_BitwiseAndNot_m32( ~x_ge_z, y_ge_z );
+
+        mask32v i2 = x_ge_y | x_ge_z;
+        mask32v j2 = ~x_ge_y | y_ge_z;
+        mask32v k2 = x_ge_z & y_ge_z; //NMasked
+
+        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G3 );
+        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G3 );
+        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G3 );
+        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G3 * 2 );
+        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G3 * 2 );
+        float32v z2 = FS_NMaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G3 * 2 );
+        float32v x3 = x0 + float32v( G3 * 3 - 1 );
+        float32v y3 = y0 + float32v( G3 * 3 - 1 );
+        float32v z3 = z0 + float32v( G3 * 3 - 1 );
+
+        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, float32v( 0.6f ) ) ) );
+        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, float32v( 0.6f ) ) ) );
+        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, float32v( 0.6f ) ) ) );
+        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, float32v( 0.6f ) ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+        t3 = FS::Max( t3, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+        t3 *= t3; t3 *= t3;             
+
+        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k ), x0, y0, z0 );
+        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ), FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ) ), x1, y1, z1 );
+        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
+        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
+
+        return float32v( 32.69428253173828125f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, n3 * t3 ) ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        const float SQRT5 = 2.236067977499f;
+        const float F4 = (SQRT5 - 1.0f) / 4.0f;
+        const float G4 = (5.0f - SQRT5) / 20.0f;
+
+        float32v s = float32v( F4 ) * (x + y + z + w);
+        x += s;
+        y += s;
+        z += s;
+        w += s;
+
+        float32v x0 = FS_Floor_f32( x );
+        float32v y0 = FS_Floor_f32( y );
+        float32v z0 = FS_Floor_f32( z );
+        float32v w0 = FS_Floor_f32( w );
+        float32v xi = x - x0;
+        float32v yi = y - y0;
+        float32v zi = z - z0;
+        float32v wi = w - w0;
+
+        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
+        int32v l = FS_Convertf32_i32( w0 ) * int32v( FnPrimes::W );
+
+        float32v g = float32v( G4 ) * (xi + yi + zi + wi);
+        x0 = xi - g;
+        y0 = yi - g;
+        z0 = zi - g;
+        w0 = wi - g;
+
+        int32v rankx( 0 );
+        int32v ranky( 0 );
+        int32v rankz( 0 );
+        int32v rankw( 0 );
+
+        mask32v x_ge_y = x0 >= y0;
+        rankx = FS::MaskedIncrement( rankx, x_ge_y );
+        ranky = FS::MaskedIncrement( ranky, ~x_ge_y );
+
+        mask32v x_ge_z = x0 >= z0;
+        rankx = FS::MaskedIncrement( rankx, x_ge_z );
+        rankz = FS::MaskedIncrement( rankz, ~x_ge_z );
+
+        mask32v x_ge_w = x0 >= w0;
+        rankx = FS::MaskedIncrement( rankx, x_ge_w );
+        rankw = FS::MaskedIncrement( rankw, ~x_ge_w );
+
+        mask32v y_ge_z = y0 >= z0;
+        ranky = FS::MaskedIncrement( ranky, y_ge_z );
+        rankz = FS::MaskedIncrement( rankz, ~y_ge_z );
+
+        mask32v y_ge_w = y0 >= w0;
+        ranky = FS::MaskedIncrement( ranky, y_ge_w );
+        rankw = FS::MaskedIncrement( rankw, ~y_ge_w );
+
+        mask32v z_ge_w = z0 >= w0;
+        rankz = FS::MaskedIncrement( rankz, z_ge_w );
+        rankw = FS::MaskedIncrement( rankw, ~z_ge_w );
+
+        mask32v i1 = rankx > int32v( 2 );
+        mask32v j1 = ranky > int32v( 2 );
+        mask32v k1 = rankz > int32v( 2 );
+        mask32v l1 = rankw > int32v( 2 );
+
+        mask32v i2 = rankx > int32v( 1 );
+        mask32v j2 = ranky > int32v( 1 );
+        mask32v k2 = rankz > int32v( 1 );
+        mask32v l2 = rankw > int32v( 1 );
+
+        mask32v i3 = rankx > int32v( 0 );
+        mask32v j3 = ranky > int32v( 0 );
+        mask32v k3 = rankz > int32v( 0 );
+        mask32v l3 = rankw > int32v( 0 );
+
+        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G4 );
+        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G4 );
+        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G4 );
+        float32v w1 = FS_MaskedSub_f32( w0, float32v( 1 ), l1 ) + float32v( G4 );
+        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G4 * 2 );
+        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G4 * 2 );
+        float32v z2 = FS_MaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G4 * 2 );
+        float32v w2 = FS_MaskedSub_f32( w0, float32v( 1 ), l2 ) + float32v( G4 * 2 );
+        float32v x3 = FS_MaskedSub_f32( x0, float32v( 1 ), i3 ) + float32v( G4 * 3 );
+        float32v y3 = FS_MaskedSub_f32( y0, float32v( 1 ), j3 ) + float32v( G4 * 3 );
+        float32v z3 = FS_MaskedSub_f32( z0, float32v( 1 ), k3 ) + float32v( G4 * 3 );
+        float32v w3 = FS_MaskedSub_f32( w0, float32v( 1 ), l3 ) + float32v( G4 * 3 );
+        float32v x4 = x0 + float32v( G4 * 4 - 1 );
+        float32v y4 = y0 + float32v( G4 * 4 - 1 );
+        float32v z4 = z0 + float32v( G4 * 4 - 1 );
+        float32v w4 = w0 + float32v( G4 * 4 - 1 );
+
+        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, FS_FNMulAdd_f32( w0, w0, float32v( 0.6f ) ) ) ) );
+        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, FS_FNMulAdd_f32( w1, w1, float32v( 0.6f ) ) ) ) );
+        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, FS_FNMulAdd_f32( w2, w2, float32v( 0.6f ) ) ) ) );
+        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, FS_FNMulAdd_f32( w3, w3, float32v( 0.6f ) ) ) ) );
+        float32v t4 = FS_FNMulAdd_f32( x4, x4, FS_FNMulAdd_f32( y4, y4, FS_FNMulAdd_f32( z4, z4, FS_FNMulAdd_f32( w4, w4, float32v( 0.6f ) ) ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+        t3 = FS::Max( t3, float32v( 0 ) );
+        t4 = FS::Max( t4, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+        t3 *= t3; t3 *= t3;
+        t4 *= t4; t4 *= t4;
+
+        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
+        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
+            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ),
+            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ),
+            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ),
+            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l1 ) ), x1, y1, z1, w1 );
+        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
+            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ),
+            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ),
+            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ),
+            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l2 ) ), x2, y2, z2, w2 );
+        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
+            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i3 ),
+            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j3 ),
+            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k3 ),
+            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
+        float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
+
+        return float32v( 27.f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, FS_FMulAdd_f32( n3, t3, n4 * t4 ) ) ) );
+    }
+};
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        const float SQRT3 = 1.7320508075f;
+        const float F2 = 0.5f * (SQRT3 - 1.0f);
+        const float G2 = (3.0f - SQRT3) / 6.0f;
+
+        float32v f = float32v( F2 ) * (x + y);
+        float32v x0 = FS_Floor_f32( x + f );
+        float32v y0 = FS_Floor_f32( y + f );
+
+        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+
+        float32v g = float32v( G2 ) * (x0 + y0);
+        x0 = x - (x0 - g);
+        y0 = y - (y0 - g);
+
+        mask32v i1 = x0 > y0;
+        //mask32v j1 = ~i1; //NMasked funcs
+
+        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
+        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
+        float32v x2 = x0 + float32v( (G2 * 2) - 1 );
+        float32v y2 = y0 + float32v( (G2 * 2) - 1 );
+
+        float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
+        float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
+        float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+
+        float32v n0 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
+        float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
+        float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
+
+        return float32v( 49.918426513671875f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
+        float32v xr = f - x;
+        float32v yr = f - y;
+        float32v zr = f - z;
+
+        float32v val( 0 );
+        for( size_t i = 0; ; i++ )
+        {
+            float32v v0xr = FS_Round_f32( xr );
+            float32v v0yr = FS_Round_f32( yr );
+            float32v v0zr = FS_Round_f32( zr );
+            float32v d0xr = xr - v0xr;
+            float32v d0yr = yr - v0yr;
+            float32v d0zr = zr - v0zr;
+
+            float32v score0xr = FS_Abs_f32( d0xr );
+            float32v score0yr = FS_Abs_f32( d0yr );
+            float32v score0zr = FS_Abs_f32( d0zr );
+            mask32v dir0xr = FS::Max( score0yr, score0zr ) <= score0xr;
+            mask32v dir0yr = FS_BitwiseAndNot_m32( FS::Max( score0zr, score0xr ) <= score0yr, dir0xr );
+            mask32v dir0zr = ~(dir0xr | dir0yr);
+            float32v v1xr = FS_MaskedAdd_f32( v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ), dir0xr );
+            float32v v1yr = FS_MaskedAdd_f32( v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ), dir0yr );
+            float32v v1zr = FS_MaskedAdd_f32( v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ), dir0zr );
+            float32v d1xr = xr - v1xr;
+            float32v d1yr = yr - v1yr;
+            float32v d1zr = zr - v1zr;
+
+            int32v hv0xr = FS_Convertf32_i32( v0xr ) * int32v( FnPrimes::X );
+            int32v hv0yr = FS_Convertf32_i32( v0yr ) * int32v( FnPrimes::Y );
+            int32v hv0zr = FS_Convertf32_i32( v0zr ) * int32v( FnPrimes::Z );
+
+            int32v hv1xr = FS_Convertf32_i32( v1xr ) * int32v( FnPrimes::X );
+            int32v hv1yr = FS_Convertf32_i32( v1yr ) * int32v( FnPrimes::Y );
+            int32v hv1zr = FS_Convertf32_i32( v1zr ) * int32v( FnPrimes::Z );
+
+            float32v t0 = FS_FNMulAdd_f32( d0zr, d0zr, FS_FNMulAdd_f32( d0yr, d0yr, FS_FNMulAdd_f32( d0xr, d0xr, float32v( 0.6f ) ) ) );
+            float32v t1 = FS_FNMulAdd_f32( d1zr, d1zr, FS_FNMulAdd_f32( d1yr, d1yr, FS_FNMulAdd_f32( d1xr, d1xr, float32v( 0.6f ) ) ) );
+            t0 = FS::Max( t0, float32v( 0 ) );
+            t1 = FS::Max( t1, float32v( 0 ) );
+            t0 *= t0; t0 *= t0;
+            t1 *= t1; t1 *= t1;
+
+            float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
+            float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
+
+            val = FS_FMulAdd_f32( v0, t0, FS_FMulAdd_f32( v1, t1, val ) );
+
+            if( i == 1 )
+            {
+                break;
+            }
+
+            xr += float32v( 0.5f );
+            yr += float32v( 0.5f );
+            zr += float32v( 0.5f );
+            seed = ~seed;
+        }
+
+        return float32v( 32.69428253173828125f ) * val;
+    } 
+};
+
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 65b71a02..ff23e6ce 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -1,5 +1,4 @@
 #pragma once
-#include "FastSIMD/InlInclude.h"
 #include <climits>
 
 namespace FastNoise
@@ -14,300 +13,292 @@ namespace FastNoise
         static constexpr int Lookup[] = { X,Y,Z,W };
     }
 
-    template<typename FS>
-    struct Utils
+    static constexpr float ROOT2 = 1.4142135623730950488f;
+    static constexpr float ROOT3 = 1.7320508075688772935f;
+
+    template<typename SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
-        using float32v = typename FS::float32v;
-        using int32v = typename FS::int32v;
-        using mask32v = typename FS::mask32v;
+        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        static constexpr float ROOT2 = 1.4142135623730950488f;
-        static constexpr float ROOT3 = 1.7320508075688772935f;
+        // Bit-4 = Choose X Y ordering
+        mask32v xy;
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
-        FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+        if constexpr( SIMD == FastSIMD::FeatureSet::Scalar )
         {
-            int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
-
-            // Bit-4 = Choose X Y ordering
-            mask32v xy;
+            xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
+        }
+        else
+        {
+            xy = index << 29;
 
-            if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
+            if constexpr( SIMD & FastSIMD::FeatureFlags::)
             {
-                xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
+                xy >>= 31;
             }
-            else
-            {
-                xy = index << 29;
+        }
 
-                if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
-                {
-                    xy >>= 31;
-                }
-            }
+        float32v a = FS_Select_f32( xy, fY, fX );
+        float32v b = FS_Select_f32( xy, fX, fY );
 
-            float32v a = FS_Select_f32( xy, fY, fX );
-            float32v b = FS_Select_f32( xy, fX, fY );
+        // Bit-1 = b flip sign
+        b ^= FS_Casti32_f32( index << 31 );
 
-            // Bit-1 = b flip sign
-            b ^= FS_Casti32_f32( index << 31 );
+        // Bit-2 = Mul a by 2 or Root3
+        mask32v aMul2;
 
-            // Bit-2 = Mul a by 2 or Root3
-            mask32v aMul2;
+        if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
+        {
+            aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
+        }
+        else
+        {
+            aMul2 = (index << 30) >> 31;
+        }
 
-            if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
-            {
-                aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
-            }
-            else
-            {
-                aMul2 = (index << 30) >> 31;
-            }
+        a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
+        // b zero value if a mul 2
+        b = FS_NMask_f32( b, aMul2 );
 
-            a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
-            // b zero value if a mul 2
-            b = FS_NMask_f32( b, aMul2 );
+        // Bit-8 = Flip sign of a + b
+        return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
+    }
 
-            // Bit-8 = Flip sign of a + b
-            return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    {
+        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
-        FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-        {
-            int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
+        float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
 
-            float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
-            float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
+        // Bit-8 = Flip sign of a + b
+        return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
+    }
 
-            // Bit-8 = Flip sign of a + b
-            return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
-        }
+    template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    {
+        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
-        FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-        {
-            int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
+        float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
 
-            float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
-            float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
+        return FS_FMulAdd_f32( gX, fX, fY * gY );
+    }
 
-            return FS_FMulAdd_f32( gX, fX, fY * gY );
-        }
 
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
+    {
+        // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
+        // ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
+
+        int32v  bit1 = (hash << 31);
+        int32v  bit2 = (hash >> 1) << 31;
+        mask32v bit4;
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
+        if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
         {
-            // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
-            // ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
-
-            int32v  bit1 = (hash << 31);
-            int32v  bit2 = (hash >> 1) << 31;
-            mask32v bit4;
+            bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
+        }
+        else
+        {
+            bit4 = hash << 29;
 
-            if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
+            if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
             {
-                bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
+                bit4 >>= 31;
             }
-            else
-            {
-                bit4 = hash << 29;
+        }
 
-                if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
-                {
-                    bit4 >>= 31;
-                }
-            }
+        fX ^= FS_Casti32_f32( bit1 );
+        fY ^= FS_Casti32_f32( bit2 );
+        
+        float32v a = FS_Select_f32( bit4, fY, fX );
+        float32v b = FS_Select_f32( bit4, fX, fY );
+        
+        return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
+    }
 
-            fX ^= FS_Casti32_f32( bit1 );
-            fY ^= FS_Casti32_f32( bit2 );
-            
-            float32v a = FS_Select_f32( bit4, fY, fX );
-            float32v b = FS_Select_f32( bit4, fX, fY );
-            
-            return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
+    {
+        float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
+        float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
-        {
-            float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
-            float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
+        return FS_FMulAdd_f32( gX, fX, fY * gY );
+    }
 
-            return FS_FMulAdd_f32( gX, fX, fY * gY );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
+     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
+    {
+        float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
+        float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
-         FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
-        {
-            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
-            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
+        return FS_FMulAdd_f32( gX, fX, fY * gY );
+    }
 
-            return FS_FMulAdd_f32( gX, fX, fY * gY );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
+    {
+        int32v hasha13 = hash & int32v( 13 );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
-        {
-            int32v hasha13 = hash & int32v( 13 );
+        //if h < 8 then x, else y
+        float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
 
-            //if h < 8 then x, else y
-            float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
+        //if h < 4 then y else if h is 12 or 14 then x else z
+        float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
+        v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
 
-            //if h < 4 then y else if h is 12 or 14 then x else z
-            float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
-            v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
+        //if h1 then -u else u
+        //if h2 then -v else v
+        float32v h1 = FS_Casti32_f32( hash << 31 );
+        float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
+        //then add them
+        return ( u ^ h1 ) + ( v ^ h2 );
+    }
 
-            //if h1 then -u else u
-            //if h2 then -v else v
-            float32v h1 = FS_Casti32_f32( hash << 31 );
-            float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
-            //then add them
-            return ( u ^ h1 ) + ( v ^ h2 );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
+    {
+        float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
+        float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+        float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
-        {
-            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
-            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
+        return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
+    }
 
-            return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
+    {
+        int32v p = hash & int32v( 3 << 3 );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
+        float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
+        float32v b;
+        if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
         {
-            int32v p = hash & int32v( 3 << 3 );
+            b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );        
+        }
+        else
+        {
+            b = FS_Select_f32( hash << 27, fY, fZ );
+        }
+        float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
 
-            float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
-            float32v b;
-            if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
-            {
-                b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );        
-            }
-            else
-            {
-                b = FS_Select_f32( hash << 27, fY, fZ );
-            }
-            float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
+        float32v aSign = FS_Casti32_f32( hash << 31 );
+        float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
+        float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
 
-            float32v aSign = FS_Casti32_f32( hash << 31 );
-            float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
-            float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
+        return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
+    }
 
-            return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
-        }
+    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
+    {
+        float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+        float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
+        float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
+        float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
 
-        template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
-        FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
-        {
-            float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
-            float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
-            float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+        return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
+    }
 
-            return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
-        }
+    template<typename SIMD = FS, typename... P>
+    FS_FORCEINLINE static int32v HashPrimes( int32v seed, P... primedPos )
+    {
+        int32v hash = seed;
+        hash ^= (primedPos ^ ...);
 
-        template<typename SIMD = FS, typename... P>
-        FS_INLINE static int32v HashPrimes( int32v seed, P... primedPos )
-        {
-            int32v hash = seed;
-            hash ^= (primedPos ^ ...);
+        hash *= int32v( 0x27d4eb2d );
+        return (hash >> 15) ^ hash;
+    }
 
-            hash *= int32v( 0x27d4eb2d );
-            return (hash >> 15) ^ hash;
-        }
+    template<typename SIMD = FS, typename... P>
+    FS_FORCEINLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
+    {
+        int32v hash = seed;
+        hash ^= (primedPos ^ ...);
+        
+        hash *= int32v( 0x27d4eb2d );
+        return hash;
+    }  
+
+    template<typename SIMD = FS, typename... P>
+     FS_FORCEINLINE static float32v GetValueCoord( int32v seed, P... primedPos )
+    {
+        int32v hash = seed;
+        hash ^= (primedPos ^ ...);
+        
+        hash *= hash * int32v( 0x27d4eb2d );
+        return FS::Convert<float>( hash ) * float32v( 1.0f / (float)INT_MAX );
+    }
 
-        template<typename SIMD = FS, typename... P>
-        FS_INLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
-        {
-            int32v hash = seed;
-            hash ^= (primedPos ^ ...);
-            
-            hash *= int32v( 0x27d4eb2d );
-            return hash;
-        }  
-
-        template<typename SIMD = FS, typename... P>
-         FS_INLINE static float32v GetValueCoord( int32v seed, P... primedPos )
-        {
-            int32v hash = seed;
-            hash ^= (primedPos ^ ...);
-            
-            hash *= hash * int32v( 0x27d4eb2d );
-            return FS_Converti32_f32( hash ) * float32v( 1.0f / (float)INT_MAX );
-        }
+    template<typename SIMD = FS>
+    FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )
+    {
+        return FS_FMulAdd_f32( t, b - a, a );
+    }
 
-        template<typename SIMD = FS>
-        FS_INLINE static float32v Lerp( float32v a, float32v b, float32v t )
-        {
-            return FS_FMulAdd_f32( t, b - a, a );
-        }
+    template<typename SIMD = FS>
+     FS_FORCEINLINE static float32v InterpHermite( float32v t )
+    {
+        return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
+    }
 
-        template<typename SIMD = FS>
-         FS_INLINE static float32v InterpHermite( float32v t )
-        {
-            return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
-        }
+    template<typename SIMD = FS>
+     FS_FORCEINLINE static float32v InterpQuintic( float32v t )
+    {
+        return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
+    }
 
-        template<typename SIMD = FS>
-         FS_INLINE static float32v InterpQuintic( float32v t )
+    template<typename SIMD = FS, typename... P>
+    FS_FORCEINLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
+    {
+        switch( distFunc )
         {
-            return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
-        }
+            default:
+            case DistanceFunction::Euclidean:
+            {
+                float32v distSqr = dX * dX;
+                ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
 
-        template<typename SIMD = FS, typename... P>
-        FS_INLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
-        {
-            switch( distFunc )
+                return FS_InvSqrt_f32( distSqr ) * distSqr;
+            }
+
+            case DistanceFunction::EuclideanSquared:
             {
-                default:
-                case DistanceFunction::Euclidean:
-                {
-                    float32v distSqr = dX * dX;
-                    ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
-
-                    return FS_InvSqrt_f32( distSqr ) * distSqr;
-                }
-
-                case DistanceFunction::EuclideanSquared:
-                {
-                    float32v distSqr = dX * dX;
-                    ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
-
-                    return distSqr;
-                }
-
-                case DistanceFunction::Manhattan:
-                {
-                    float32v dist = FS_Abs_f32( dX );
-                    dist += (FS_Abs_f32( d ) + ...);
-
-                    return dist;
-                }
-
-                case DistanceFunction::Hybrid:
-                {
-                    float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
-                    ((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
-
-                    return both;
-                }
-
-                case DistanceFunction::MaxAxis:
-                {
-                    float32v max = FS_Abs_f32( dX );
-                    ((max = FS_Max_f32( FS_Abs_f32(d), max )), ...);
-
-                    return max;
-                }
+                float32v distSqr = dX * dX;
+                ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
+
+                return distSqr;
+            }
+
+            case DistanceFunction::Manhattan:
+            {
+                float32v dist = FS_Abs_f32( dX );
+                dist += (FS_Abs_f32( d ) + ...);
+
+                return dist;
+            }
+
+            case DistanceFunction::Hybrid:
+            {
+                float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
+                ((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
+
+                return both;
+            }
+
+            case DistanceFunction::MaxAxis:
+            {
+                float32v max = FS_Abs_f32( dX );
+                ((max = FS::Max( FS_Abs_f32(d), max )), ...);
+
+                return max;
             }
         }
-    };
+    }    
 }
 
 using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
diff --git a/include/FastNoise/Generators/Value.h b/include/FastNoise/Generators/Value.h
index cb6f0465..2cbce7ec 100644
--- a/include/FastNoise/Generators/Value.h
+++ b/include/FastNoise/Generators/Value.h
@@ -1,25 +1,23 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class Value : public virtual Generator
-    {
-    public:
-        FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Value> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::eLevel ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class Value : public virtual Generator
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Value> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 8c3565ee..6d432e73 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -1,88 +1,85 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Value.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FS_T<FastNoise::Value, FS> : public virtual FastNoise::Value, public FS_T<FastNoise::Generator, FS>
-{
-    FASTSIMD_DECLARE_FS_TYPES;
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-
-        return FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-
-        return FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-        ws = FnUtils::InterpHermite( w - ws );
-
-        return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
-            FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include "Value.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Value, FS> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+
+        return FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+
+        return FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
+            FnUtils::Lerp(                                                                                
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+        float32v ws = FS_Floor_f32( w );
+
+        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v w1 = w0 + int32v( FnPrimes::W );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+        ws = FnUtils::InterpHermite( w - ws );
+
+        return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
+            FnUtils::Lerp( 
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
+            FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
+            FnUtils::Lerp( 
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
+    }
+};
diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index ee74b6c6..4adc8654 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -186,7 +186,7 @@ namespace FastNoise
         /// </example>
         /// <param name="maxSimdLevel">Max SIMD level, Null = Auto</param>
         /// <returns>SmartNode<T> is guaranteed not nullptr</returns>
-        virtual SmartNode<> CreateNode( FastSIMD::eLevel maxSimdLevel = FastSIMD::Level_Null ) const = 0;
+        virtual SmartNode<> CreateNode( FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max ) const = 0;
 
     protected:
         Metadata()
diff --git a/include/FastNoise/SmartNode.h b/include/FastNoise/SmartNode.h
index 09156728..7773fc80 100644
--- a/include/FastNoise/SmartNode.h
+++ b/include/FastNoise/SmartNode.h
@@ -27,7 +27,7 @@ namespace FastNoise
         friend class SmartNode;
 
         template<typename T>
-        friend SmartNode<T> New( FastSIMD::eLevel );
+        friend SmartNode<T> New( FastSIMD::FeatureSet );
 
         static uint64_t GetReference( const void* ptr );
 
@@ -224,7 +224,7 @@ namespace FastNoise
 
     private:
         template<typename U>
-        friend SmartNode<U> New( FastSIMD::eLevel );
+        friend SmartNode<U> New( FastSIMD::FeatureSet );
 
         template<typename U>
         friend struct MetadataT;
@@ -233,10 +233,10 @@ namespace FastNoise
         friend class SmartNode;
 
         explicit SmartNode( T* ptr ) :
-            mReferenceId( SmartNodeManager::GetReference( ptr ) ),
+            mReferenceId( ptr ? SmartNodeManager::GetReference( ptr ) : SmartNodeManager::kInvalidReferenceId ),
             mPtr( ptr )
         {
-            SmartNodeManager::IncReference( mReferenceId );
+            TryInc( mReferenceId );            
         }
 
         void Release()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d7795c17..3811cce0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 0cac980919a2ca8aa19b1bb95e468784ef15c6fb
+    GIT_TAG 8acda378ad0c85d5a8c144a49b399fe9e4599684
     EXCLUDE_FROM_ALL YES
     OPTIONS
         "BUILD_SHARED_LIBS OFF"
@@ -65,6 +65,8 @@ set_target_properties(FastNoise PROPERTIES
 
 fastsimd_create_simd_library(FastSIMD_FastNoise "../include/FastNoise/FastNoise_BuildList.inl")
 
+target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
+
 target_link_libraries(FastNoise PUBLIC FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index 3fbc249a..a67547b3 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -23,7 +23,7 @@ void StoreMinMax( float* floatArray2, FastNoise::OutputMinMax minMax )
 
 void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned simdLevel )
 {
-    if( FastNoise::SmartNode<> node = FastNoise::NewFromEncodedNodeTree( encodedString, (FastSIMD::eLevel)simdLevel ) )
+    if( FastNoise::SmartNode<> node = FastNoise::NewFromEncodedNodeTree( encodedString, (FastSIMD::FeatureSet)simdLevel ) )
     {
         return new FastNoise::SmartNode<>( std::move( node ) );
     }
@@ -113,7 +113,7 @@ void* fnNewFromMetadata( int id, unsigned simdLevel )
 {
     if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
     {
-        return new FastNoise::SmartNode<>( metadata->CreateNode( (FastSIMD::eLevel)simdLevel ) );
+        return new FastNoise::SmartNode<>( metadata->CreateNode( (FastSIMD::FeatureSet)simdLevel ) );
     }
     return nullptr;
 }
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index bea7c23b..e7572e53 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -197,7 +197,7 @@ bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T&
     return true;
 }
 
-SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::vector<SmartNode<>>& referenceNodes, FastSIMD::eLevel level = FastSIMD::Level_Null )
+SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::vector<SmartNode<>>& referenceNodes, FastSIMD::FeatureSet level = FastSIMD::FeatureSet::Max )
 {
     uint16_t nodeId;
     if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
@@ -296,7 +296,7 @@ SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialised
     return generator;
 }
 
-SmartNode<> FastNoise::NewFromEncodedNodeTree( const char* serialisedBase64NodeData, FastSIMD::eLevel level )
+SmartNode<> FastNoise::NewFromEncodedNodeTree( const char* serialisedBase64NodeData, FastSIMD::FeatureSet level )
 {
     std::vector<uint8_t> dataStream = Base64::Decode( serialisedBase64NodeData );
     size_t startIdx = 0;
@@ -469,9 +469,9 @@ const FastNoise::Metadata& CLASS::GetMetadata() const\
 {\
     return FastNoise::Impl::GetMetadata<CLASS>();\
 }\
-SmartNode<> FastNoise::MetadataT<CLASS>::CreateNode( FastSIMD::eLevel l ) const\
+SmartNode<> FastNoise::MetadataT<CLASS>::CreateNode( FastSIMD::FeatureSet l ) const\
 {\
-    return SmartNode<>( FastSIMD::New<CLASS>( l FASTNOISE_GET_MEMORY_ALLOCATOR() ) );\
+    return SmartNode<>( FastSIMD::NewDispatchClass<CLASS>( l FASTNOISE_GET_MEMORY_ALLOCATOR() ) );\
 }
 
 #define FASTSIMD_INCLUDE_HEADER_ONLY
diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 45484dab..605820aa 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -9,7 +9,7 @@
 static const size_t gPositionCount = 8192;
 static float gPositionFloats[gPositionCount]; 
 
-FastNoise::SmartNode<> BuildGenerator( benchmark::State& state, const FastNoise::Metadata* metadata, FastSIMD::eLevel level )
+FastNoise::SmartNode<> BuildGenerator( benchmark::State& state, const FastNoise::Metadata* metadata, FastSIMD::FeatureSet level )
 {    
     FastNoise::SmartNode<> generator = metadata->CreateNode( level );
 
@@ -101,7 +101,7 @@ void BenchFastNoiseGenerator4D( benchmark::State& state, const FastNoise::SmartN
 }
 
 template<typename T>
-void RegisterBenchmarks( FastSIMD::eLevel level, const char* groupName, const char* name, T generatorFunc )
+void RegisterBenchmarks( FastSIMD::FeatureSet level, const char* groupName, const char* name, T generatorFunc )
 {
     std::string benchName = "0D/";
 
@@ -145,7 +145,7 @@ int main( int argc, char** argv )
         gPositionFloats[idx] = (float)idx * 0.6f;
     }
     
-    for( FastSIMD::eLevel level = FastSIMD::CPUMaxSIMDLevel(); level != FastSIMD::Level_Null; level = (FastSIMD::eLevel)(level >> 1) )
+    for( FastSIMD::FeatureSet level = FastSIMD::CPUMaxSIMDLevel(); level != FastSIMD::Level_Null; level = (FastSIMD::FeatureSet)(level >> 1) )
     {
         if( !(level & FastSIMD::COMPILED_SIMD_LEVELS & FastNoise::SUPPORTED_SIMD_LEVELS) )
         {
diff --git a/tests/SIMDUnitTest.cpp b/tests/SIMDUnitTest.cpp
index 727fb4d3..9c59b55e 100644
--- a/tests/SIMDUnitTest.cpp
+++ b/tests/SIMDUnitTest.cpp
@@ -146,14 +146,14 @@ std::enable_if_t<!std::is_same<void, FS>::value> TestFunction_##NAME( void* base
     }                                                                                                      \
     else                                                                                                   \
     {                                                                                                      \
-        T result[FS_Size_32()];                                                    \
+        T result[int32v::ElementCount];                                                    \
         int failCount = 0;                                                                                    \
                                                                                                            \
-        for ( std::size_t i = 0; i < TestCount; i += FS_Size_32() )                \
+        for ( std::size_t i = 0; i < TestCount; i += int32v::ElementCount )                \
         {                                                                                                  \
             FUNC;                                                                                          \
                                                                                                            \
-            for ( std::size_t ir = 0; ir < FS_Size_32(); ir++ )                    \
+            for ( std::size_t ir = 0; ir < int32v::ElementCount; ir++ )                    \
             {                                                                                              \
                 if ( isBase )                                                                              \
                 {                                                                                          \
@@ -178,29 +178,29 @@ std::enable_if_t<!std::is_same<void, FS>::value> TestFunction_##NAME( void* base
 }                                                                                                          \
 SIMDUnitTest test_##NAME( TestFunction_##NAME<RETURN_TYPE, LEVEL> );
 
-SIMD_FUNCTION_TEST( LoadStore_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) ) )
+SIMD_FUNCTION_TEST( LoadStore_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) ) )
 
 SIMD_FUNCTION_TEST( LoadStore_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) ) )
 
 
-SIMD_FUNCTION_TEST( Casti32_f32, float, FS_Store_f32( &result, FS_Casti32_f32( FS_Load_i32( &rndInts0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Casti32_f32, float, FS::Store( &result, FS_Casti32_f32( FS_Load_i32( &rndInts0[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Castf32_i32, int32_t, FS_Store_i32( &result, FS_Castf32_i32( FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Castf32_i32, int32_t, FS_Store_i32( &result, FS_Castf32_i32( FS::Load( &rndFloats0[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Converti32_f32, float, FS_Store_f32( &result, FS_Converti32_f32( FS_Load_i32( &rndInts0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Converti32_f32, float, FS::Store( &result, FS::Convert<float>( FS_Load_i32( &rndInts0[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Convertf32_i32, int32_t, FS_Store_i32( &result, FS_Convertf32_i32( FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Convertf32_i32, int32_t, FS_Store_i32( &result, FS_Convertf32_i32( FS::Load( &rndFloats0[i] ) ) ) )
 
 
-SIMD_FUNCTION_TEST( Equal_f32, float, FS_Store_f32( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS_Load_f32( &rndFloats0[i] ) == FS_Load_f32( &rndFloats1[i] ) ) ) ) )
+SIMD_FUNCTION_TEST( Equal_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) == FS::Load( &rndFloats1[i] ) ) ) ) )
 
-SIMD_FUNCTION_TEST( GreaterThan_f32, float, FS_Store_f32( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS_Load_f32( &rndFloats0[i] ) > FS_Load_f32( &rndFloats1[i] ) ) ) ) )
+SIMD_FUNCTION_TEST( GreaterThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) > FS::Load( &rndFloats1[i] ) ) ) ) )
 
-SIMD_FUNCTION_TEST( LessThan_f32, float, FS_Store_f32( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS_Load_f32( &rndFloats0[i] ) < FS_Load_f32( &rndFloats1[i] ) ) ) ) )
+SIMD_FUNCTION_TEST( LessThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) < FS::Load( &rndFloats1[i] ) ) ) ) )
 
-SIMD_FUNCTION_TEST( GreaterEqualThan_f32, float, FS_Store_f32( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS_Load_f32( &rndFloats0[i] ) >= FS_Load_f32( &rndFloats1[i] ) ) ) ) )
+SIMD_FUNCTION_TEST( GreaterEqualThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) >= FS::Load( &rndFloats1[i] ) ) ) ) )
 
-SIMD_FUNCTION_TEST( LessEqualThan_f32, float, FS_Store_f32( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS_Load_f32( &rndFloats0[i] ) <= FS_Load_f32( &rndFloats1[i] ) ) ) ) )
+SIMD_FUNCTION_TEST( LessEqualThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) <= FS::Load( &rndFloats1[i] ) ) ) ) )
 
 SIMD_FUNCTION_TEST( Equal_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32( typename FS::int32v( 1 ), ( FS_Load_i32( &rndInts0[i] ) == FS_Load_i32( &rndInts1[i] ) ) ) ) )
 
@@ -209,56 +209,56 @@ SIMD_FUNCTION_TEST( GreaterThan_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32
 SIMD_FUNCTION_TEST( LessThan_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32( typename FS::int32v( 1 ), ( FS_Load_i32( &rndInts0[i] ) < FS_Load_i32( &rndInts1[i] ) ) ) ) )
 
 
-SIMD_FUNCTION_TEST( Select_f32, float, FS_Store_f32( &result, FS_Select_f32( ( FS_Load_f32( &rndFloats0[i] ) > FS_Load_f32( &rndFloats1[i] ) ), FS_Load_f32( &rndFloats0[i] ), FS_Load_f32( &rndFloats1[i] ) ) ) )
+SIMD_FUNCTION_TEST( Select_f32, float, FS::Store( &result, FS_Select_f32( ( FS::Load( &rndFloats0[i] ) > FS::Load( &rndFloats1[i] ) ), FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
 
 SIMD_FUNCTION_TEST( Select_i32, int32_t, FS_Store_i32( &result, FS_Select_i32( ( FS_Load_i32( &rndInts0[i] ) > FS_Load_i32( &rndInts1[i] ) ), FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
 
 
-SIMD_FUNCTION_TEST( Min_f32, float, FS_Store_f32( &result, FS_Min_f32( FS_Load_f32( &rndFloats0[i] ), FS_Load_f32( &rndFloats1[i] ) ) ) )
+SIMD_FUNCTION_TEST( Min_f32, float, FS::Store( &result, FS::Min( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Max_f32, float, FS_Store_f32( &result, FS_Max_f32( FS_Load_f32( &rndFloats0[i] ), FS_Load_f32( &rndFloats1[i] ) ) ) )
+SIMD_FUNCTION_TEST( Max_f32, float, FS::Store( &result, FS::Max( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
 
 SIMD_FUNCTION_TEST( Min_i32, int32_t, FS_Store_i32( &result, FS_Min_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
 
 SIMD_FUNCTION_TEST( Max_i32, int32_t, FS_Store_i32( &result, FS_Max_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
 
 
-SIMD_FUNCTION_TEST( BitwiseAndNot_f32, float, FS_Store_f32( &result, FS_BitwiseAndNot_f32( FS_Load_f32( &rndFloats0[i] ), FS_Load_f32( &rndFloats1[i] ) ) ) )
+SIMD_FUNCTION_TEST( BitwiseAndNot_f32, float, FS::Store( &result, FS_BitwiseAndNot_f32( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
 
 SIMD_FUNCTION_TEST( BitwiseAndNot_i32, int32_t, FS_Store_i32( &result, FS_BitwiseAndNot_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
 
 
-SIMD_FUNCTION_TEST( BitwiseShiftRightZX_f32, float, FS_Store_f32( &result, FS_BitwiseShiftRightZX_f32( FS_Load_f32( &rndFloats0[i] ), (rndInts1[i & NonVecMask] & 31) ) ) )
+SIMD_FUNCTION_TEST( BitwiseShiftRightZX_f32, float, FS::Store( &result, FS_BitwiseShiftRightZX_f32( FS::Load( &rndFloats0[i] ), (rndInts1[i & NonVecMask] & 31) ) ) )
 
 SIMD_FUNCTION_TEST( BitwiseShiftRightZX_i32, int32_t, FS_Store_i32( &result, FS_BitwiseShiftRightZX_i32( FS_Load_i32( &rndInts0[i] ), (rndInts1[i & NonVecMask] & 31) ) ) )
 
 
-SIMD_FUNCTION_TEST( Abs_f32, float, FS_Store_f32( &result, FS_Abs_f32( FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Abs_f32, float, FS::Store( &result, FS_Abs_f32( FS::Load( &rndFloats0[i] ) ) ) )
 
 SIMD_FUNCTION_TEST( Abs_i32, int32_t, FS_Store_i32( &result, FS_Abs_i32( FS_Load_i32( &rndInts0[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Sqrt_f32, float, FS_Store_f32( &result, FS_Sqrt_f32( FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Sqrt_f32, float, FS::Store( &result, FS_Sqrt_f32( FS::Load( &rndFloats0[i] ) ) ) )
 
-//SIMD_FUNCTION_TEST( InvSqrt_f32, float, FS_Store_f32( &result, FS_InvSqrt_f32( FS_Load_f32( &rndFloats0[i] ) ) ) )
+//SIMD_FUNCTION_TEST( InvSqrt_f32, float, FS::Store( &result, FS_InvSqrt_f32( FS::Load( &rndFloats0[i] ) ) ) )
 
 
 const float MAX_ROUNDING = (float)INT_MAX / 2.0f;
 
-SIMD_FUNCTION_TEST( Floor_f32, float, FS_Store_f32( &result, FS_Floor_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Floor_f32, float, FS::Store( &result, FS_Floor_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS::Load( &rndFloats0[i] ) ) ) )
 
-SIMD_FUNCTION_TEST( Ceil_f32, float, FS_Store_f32( &result, FS_Ceil_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS_Load_f32( &rndFloats0[i] ) ) ) )
+SIMD_FUNCTION_TEST( Ceil_f32, float, FS::Store( &result, FS_Ceil_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS::Load( &rndFloats0[i] ) ) ) )
 
-//SIMD_FUNCTION_TEST( Round_f32, float, FS_Store_f32( &result, FS_Round_f32( FS_Min_f32( FS::float32v( MAX_ROUNDING ), FS_Max_f32( FS::float32v( -MAX_ROUNDING ), FS_Load_f32( &rndFloats0[i] ) ) ) ) ) )
+//SIMD_FUNCTION_TEST( Round_f32, float, FS::Store( &result, FS_Round_f32( FS::Min( FS::float32v( MAX_ROUNDING ), FS::Max( FS::float32v( -MAX_ROUNDING ), FS::Load( &rndFloats0[i] ) ) ) ) ) )
 
-SIMD_FUNCTION_TEST( Add_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) + FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Sub_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) - FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Mul_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) * FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Div_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) / FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( And_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) & FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Xor_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) ^ FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Or_f32, float, FS_Store_f32( &result, FS_Load_f32( &rndFloats0[i] ) | FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Not_f32, float, FS_Store_f32( &result, ~FS_Load_f32( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Negate_f32, float, FS_Store_f32( &result, -FS_Load_f32( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Add_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) + FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Sub_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) - FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Mul_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) * FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Div_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) / FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( And_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) & FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Xor_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) ^ FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Or_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) | FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Not_f32, float, FS::Store( &result, ~FS::Load( &rndFloats1[i] ) ) )
+SIMD_FUNCTION_TEST( Negate_f32, float, FS::Store( &result, -FS::Load( &rndFloats1[i] ) ) )
 
 SIMD_FUNCTION_TEST( Add_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) + FS_Load_i32( &rndInts1[i] ) ) )
 SIMD_FUNCTION_TEST( Sub_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) - FS_Load_i32( &rndInts1[i] ) ) )

From 1359d80b98ae79e94fdfa9488158392faa6d39a1 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Fri, 15 Jul 2022 19:26:48 +0100
Subject: [PATCH 005/139] Support FastSIMD object libraries

---
 CMakePresets.json                    |  6 ------
 NoiseTool/FastNoiseNodeEditor.cpp    |  2 +-
 NoiseTool/NoiseTexture.cpp           |  4 ++--
 include/FastNoise/FastNoise_Config.h |  2 +-
 include/FastNoise/FastNoise_Export.h |  8 ++++++--
 src/CMakeLists.txt                   | 23 +++++++++++------------
 6 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/CMakePresets.json b/CMakePresets.json
index 32aba147..5054a60f 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -8,8 +8,6 @@
       "binaryDir": "${sourceDir}/out/build/${presetName}",
       "installDir": "${sourceDir}/out/install/${presetName}",
       "cacheVariables": {
-        //"CMAKE_C_COMPILER": "clang-cl",
-        //"CMAKE_CXX_COMPILER": "clang-cl",
         "CPM_SOURCE_CACHE": {
           "value": "${sourceDir}/out/cpm-cache",
           "type": "PATH"
@@ -17,10 +15,6 @@
         "BUILD_SHARED_LIBS": {
           "value": "True",
           "type": "BOOL"
-        },
-        "CPM_Fast_SIMD_SOURCE": {
-          "value": "../../../../../FastSIMD",
-          "type": "PATH"
         }
       }
     },
diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index 955af253..b51b0789 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -88,7 +88,7 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
     if( generator )
     {
         auto genRGB = FastNoise::New<FastNoise::ConvertRGBA8>( editor.mMaxSIMDLevel );
-        //genRGB->SetSource( generator );
+        genRGB->SetSource( generator );
 
         FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
         
diff --git a/NoiseTool/NoiseTexture.cpp b/NoiseTool/NoiseTexture.cpp
index 547aba69..b0d5174d 100644
--- a/NoiseTool/NoiseTexture.cpp
+++ b/NoiseTool/NoiseTexture.cpp
@@ -320,8 +320,8 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     static thread_local std::vector<float> noiseData;
     noiseData.resize( (size_t)buildData.size.x() * buildData.size.y() );
 
-    auto gen = buildData.generator;// FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetSIMDLevel() );
-    //gen->SetSource( buildData.generator );
+    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetSIMDLevel() );
+    gen->SetSource( buildData.generator );
 
     FastNoise::OutputMinMax minMax;
 
diff --git a/include/FastNoise/FastNoise_Config.h b/include/FastNoise/FastNoise_Config.h
index 22a88f25..8639c06c 100644
--- a/include/FastNoise/FastNoise_Config.h
+++ b/include/FastNoise/FastNoise_Config.h
@@ -1,6 +1,6 @@
 #pragma once
-#include <FastSIMD/DispatchClass.h>
 #include "FastNoise_Export.h"
+#include <FastSIMD/DispatchClass.h>
 
 #define FASTNOISE_CALC_MIN_MAX true
 #define FASTNOISE_USE_SHARED_PTR false
diff --git a/include/FastNoise/FastNoise_Export.h b/include/FastNoise/FastNoise_Export.h
index 416cc91a..9d983f71 100644
--- a/include/FastNoise/FastNoise_Export.h
+++ b/include/FastNoise/FastNoise_Export.h
@@ -1,8 +1,8 @@
 #ifndef FASTNOISE_EXPORT_H
 #define FASTNOISE_EXPORT_H
 
-#if !defined( FASTNOISE_STATIC_LIB ) && ( defined( _WIN32 ) || defined( __CYGWIN__ ) )
-#ifdef FASTNOISE_EXPORT
+#if ( !defined( FASTNOISE_STATIC_LIB ) && !defined( FASTSIMD_STATIC_LIB ) ) && ( defined( _WIN32 ) || defined( __CYGWIN__ ) )
+#if defined( FASTNOISE_EXPORT ) || defined( FASTSIMD_EXPORT )
 #define FASTNOISE_API __declspec( dllexport )
 #else
 #define FASTNOISE_API __declspec( dllimport )
@@ -11,4 +11,8 @@
 #define FASTNOISE_API
 #endif
 
+#if defined( FASTNOISE_STATIC_LIB ) && !defined( FASTSIMD_STATIC_LIB )
+#define FASTSIMD_STATIC_LIB
+#endif
+
 #endif
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3811cce0..983afea5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,17 +3,16 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 8acda378ad0c85d5a8c144a49b399fe9e4599684
+    GIT_TAG 5ff01e0274dd7605d0b80d4957e3affb04c1178f
     EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "BUILD_SHARED_LIBS OFF"
+    #OPTIONS
+    #    "BUILD_SHARED_LIBS OFF"
 )
 
 set(install_targets ${install_targets}
     FastNoise
-    FastSIMD    
+    FastSIMD
     FastSIMD_FastNoise  
-    FastSIMD_DispatchClass 
     PARENT_SCOPE)
 
 
@@ -52,22 +51,22 @@ target_include_directories(FastNoise PUBLIC
     $<BUILD_INTERFACE:${FastNoise2_SOURCE_DIR}/include>
     $<INSTALL_INTERFACE:include>
 )
-
-if(NOT BUILD_SHARED_LIBS)
-    target_compile_definitions(FastNoise PUBLIC FASTNOISE_STATIC_LIB)
-endif()
+   
+target_compile_definitions(FastNoise PRIVATE FASTNOISE_EXPORT)
 
 set_target_properties(FastNoise PROPERTIES
-    DEFINE_SYMBOL FASTNOISE_EXPORT
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-
 fastsimd_create_simd_library(FastSIMD_FastNoise "../include/FastNoise/FastNoise_BuildList.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
-target_link_libraries(FastNoise PUBLIC FastSIMD_FastNoise)
+if(NOT BUILD_SHARED_LIBS)
+    target_compile_definitions(FastNoise PUBLIC FASTNOISE_STATIC_LIB)
+endif()
+
+target_link_libraries(FastNoise PUBLIC FastSIMD PRIVATE FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
     target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251)

From a423c54babc0bd7fcee27ae4262588c4c22ad04c Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sun, 17 Jul 2022 23:20:00 +0100
Subject: [PATCH 006/139] Rename SIMD level references

---
 NoiseTool/FastNoiseNodeEditor.cpp          | 24 +++++++++-------
 NoiseTool/FastNoiseNodeEditor.h            |  6 ++--
 NoiseTool/NoiseTexture.cpp                 |  2 +-
 NoiseTool/NoiseToolApp.cpp                 | 32 ++++++++++++----------
 NoiseTool/NoiseToolApp.h                   |  6 ++--
 include/FastNoise/Generators/Generator.h   |  4 +--
 include/FastNoise/Generators/Generator.inl |  4 +--
 src/FastNoise/FastNoise_C.cpp              |  2 +-
 8 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index b51b0789..b76d9c44 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -78,7 +78,7 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
     static std::array<float, NoiseSize * NoiseSize> noiseData;
 
     serialised = FastNoise::Metadata::SerialiseNodeData( data.get(), true );
-    auto generator = FastNoise::NewFromEncodedNodeTree( serialised.c_str(), editor.mMaxSIMDLevel );
+    auto generator = FastNoise::NewFromEncodedNodeTree( serialised.c_str(), editor.mMaxFeatureSet );
 
     if( !benchmark && nodeTreeChanged )
     {
@@ -87,7 +87,7 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
 
     if( generator )
     {
-        auto genRGB = FastNoise::New<FastNoise::ConvertRGBA8>( editor.mMaxSIMDLevel );
+        auto genRGB = FastNoise::New<FastNoise::ConvertRGBA8>( editor.mMaxFeatureSet );
         genRGB->SetSource( generator );
 
         FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
@@ -551,8 +551,8 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
     const ImGuiViewport* viewport = ImGui::GetMainViewport();
     ImGui::DockSpaceOverViewport( viewport, ImGuiDockNodeFlags_PassthruCentralNode ); 
 
-    std::string simdTxt = "Current SIMD Level: ";
-    simdTxt += GetSIMDLevelName( mActualSIMDLevel );
+    std::string simdTxt = "Current Feature Set: ";
+    simdTxt += GetFeatureSetName( mActualFeatureSet );
     ImGui::TextUnformatted( simdTxt.c_str() );
 
     ImGui::DragInt( "Node Benchmark Count", &mNodeBenchmarkMax, 8, 8, 64 * 1024 );
@@ -753,7 +753,7 @@ void FastNoiseNodeEditor::UpdateSelected()
 
 void FastNoiseNodeEditor::SetSIMDLevel( FastSIMD::FeatureSet lvl )
 {
-    mMaxSIMDLevel = lvl;
+    mMaxFeatureSet = lvl;
 
     mOverheadNode.generateAverages.clear();
     DoNodeBenchmarks();
@@ -1162,11 +1162,11 @@ FastNoise::SmartNode<> FastNoiseNodeEditor::GenerateSelectedPreview()
 
     if( find != mNodes.end() )
     {
-        generator = FastNoise::NewFromEncodedNodeTree( find->second.serialised.c_str(), mMaxSIMDLevel );
+        generator = FastNoise::NewFromEncodedNodeTree( find->second.serialised.c_str(), mMaxFeatureSet );
 
         if( generator )
         {
-            mActualSIMDLevel = generator->GetSIMDLevel();
+            mActualFeatureSet = generator->GetLiveFeatureSet();
         }
     }
 
@@ -1248,7 +1248,7 @@ void FastNoiseNodeEditor::ChangeSelectedNode( FastNoise::NodeData* newId )
     }
 }
 
-const char* FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::FeatureSet lvl )
+const char* FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::FeatureSet lvl )
 {
     switch( lvl )
     {
@@ -1262,8 +1262,12 @@ const char* FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::FeatureSet lvl )
     case FastSIMD::FeatureSet::SSE41:  return "SSE4.1";
     case FastSIMD::FeatureSet::SSE42:  return "SSE4.2";
     case FastSIMD::FeatureSet::AVX:    return "AVX";
-    case FastSIMD::FeatureSet::AVX2_FMA:   return "AVX2";
-    case FastSIMD::FeatureSet::AVX512_Baseline_FMA: return "AVX512";
+    case FastSIMD::FeatureSet::AVX2:   return "AVX2";
+    case FastSIMD::FeatureSet::AVX2_FMA:   return "AVX2_FMA";
+    case FastSIMD::FeatureSet::AVX512_Baseline: return "AVX512";
+    case FastSIMD::FeatureSet::AVX512_Baseline_FMA: return "AVX512_FMA";
     case FastSIMD::FeatureSet::NEON:   return "NEON";
+    case FastSIMD::FeatureSet::NEON_FMA:   return "NEON_FMA";
+    case FastSIMD::FeatureSet::Max:   return "AUTO";
     }
 }
diff --git a/NoiseTool/FastNoiseNodeEditor.h b/NoiseTool/FastNoiseNodeEditor.h
index 3943f262..63c7693e 100644
--- a/NoiseTool/FastNoiseNodeEditor.h
+++ b/NoiseTool/FastNoiseNodeEditor.h
@@ -25,7 +25,7 @@ namespace Magnum
         void Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition );
         void SetSIMDLevel( FastSIMD::FeatureSet lvl );
 
-        static const char* GetSIMDLevelName( FastSIMD::FeatureSet lvl );
+        static const char* GetFeatureSetName( FastSIMD::FeatureSet lvl );
 
     private:
         struct Node
@@ -138,7 +138,7 @@ namespace Magnum
         int mNodeSeed = 1337;
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
-        FastSIMD::FeatureSet mMaxSIMDLevel    = FastSIMD::FeatureSet::Max;
-        FastSIMD::FeatureSet mActualSIMDLevel = FastSIMD::FeatureSet::Null;
+        FastSIMD::FeatureSet mMaxFeatureSet    = FastSIMD::FeatureSet::Max;
+        FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Null;
     };
 }
\ No newline at end of file
diff --git a/NoiseTool/NoiseTexture.cpp b/NoiseTool/NoiseTexture.cpp
index b0d5174d..4bdc37a4 100644
--- a/NoiseTool/NoiseTexture.cpp
+++ b/NoiseTool/NoiseTexture.cpp
@@ -320,7 +320,7 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     static thread_local std::vector<float> noiseData;
     noiseData.resize( (size_t)buildData.size.x() * buildData.size.y() );
 
-    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetSIMDLevel() );
+    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetLiveFeatureSet() );
     gen->SetSource( buildData.generator );
 
     FastNoise::OutputMinMax minMax;
diff --git a/NoiseTool/NoiseToolApp.cpp b/NoiseTool/NoiseToolApp.cpp
index 78c424f7..3c04357a 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/NoiseTool/NoiseToolApp.cpp
@@ -62,19 +62,21 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     GL::Renderer::setBlendEquation( GL::Renderer::BlendEquation::Add, GL::Renderer::BlendEquation::Add );
     GL::Renderer::setBlendFunction( GL::Renderer::BlendFunction::SourceAlpha, GL::Renderer::BlendFunction::OneMinusSourceAlpha );
 
-    Debug{} << "FastSIMD detected max CPU SIMD Level:" << FastNoiseNodeEditor::GetSIMDLevelName( FastSIMD::DetectCpuMaxFeatureSet() );
-
-    mLevelNames = { "Auto" };
-    mLevelEnums = { FastSIMD::FeatureSet::Null };
-
-    for( int i = 1; i > 0; i <<= 1 )
+    Debug{} << "FastSIMD detected max CPU supported feature set:" << FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::DetectCpuMaxFeatureSet() );
+
+    mFeatureSetSelection = 
+    { 
+        FastSIMD::FeatureSet::Max,
+        FastSIMD::FeatureSet::Scalar,
+        FastSIMD::FeatureSet::SSE2,
+        FastSIMD::FeatureSet::SSE41,
+        FastSIMD::FeatureSet::AVX2_FMA,
+        FastSIMD::FeatureSet::AVX512_Baseline_FMA,
+    };
+
+    for( FastSIMD::FeatureSet featureSet : mFeatureSetSelection )
     {
-        FastSIMD::FeatureSet lvl = (FastSIMD::FeatureSet)i;
-        /*if( lvl & FastNoise::SUPPORTED_SIMD_LEVELS & FastSIMD::COMPILED_SIMD_LEVELS )
-        {
-            mLevelNames.emplace_back( FastNoiseNodeEditor::GetSIMDLevelName( lvl ) );
-            mLevelEnums.emplace_back( lvl );
-        }*/
+        mFeatureSetNames.push_back( FastNoiseNodeEditor::GetFeatureSetName( featureSet ) );
     }
 }
 
@@ -114,10 +116,10 @@ void NoiseToolApp::drawEvent()
         ImGui::Text( "Application average %.3f ms/frame (%.1f FPS)",
             1000.0 / Double( ImGui::GetIO().Framerate ), Double( ImGui::GetIO().Framerate ) );
 
-        if( ImGui::Combo( "Max SIMD Level", &mMaxSIMDLevel, mLevelNames.data(), (int)mLevelEnums.size() ) ||
-            ImGuiExtra::ScrollCombo( &mMaxSIMDLevel, (int)mLevelEnums.size() ) )
+        if( ImGui::Combo( "Max Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
+            ImGuiExtra::ScrollCombo( &mMaxFeatureSet, (int)mFeatureSetSelection.size() ) )
         {   
-            FastSIMD::FeatureSet newLevel = mLevelEnums[mMaxSIMDLevel];
+            FastSIMD::FeatureSet newLevel = mFeatureSetSelection[mMaxFeatureSet];
             mNodeEditor.SetSIMDLevel( newLevel );
         }
     }
diff --git a/NoiseTool/NoiseToolApp.h b/NoiseTool/NoiseToolApp.h
index b18b6cb1..bae0f347 100644
--- a/NoiseTool/NoiseToolApp.h
+++ b/NoiseTool/NoiseToolApp.h
@@ -40,9 +40,9 @@ namespace Magnum
 
         Color3 mClearColor{ 0.122f };
         bool mBackFaceCulling = false;
-        int mMaxSIMDLevel = 0;
-        std::vector<const char*> mLevelNames;
-        std::vector<FastSIMD::FeatureSet> mLevelEnums;
+        int mMaxFeatureSet = 0;
+        std::vector<FastSIMD::FeatureSet> mFeatureSetSelection;
+        std::vector<const char*> mFeatureSetNames;
 
         ImGuiIntegration::Context mImGuiIntegrationContext;
         ImGuiContext* mImGuiContext;
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index eb01af40..b831bf85 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -95,7 +95,7 @@ namespace FastNoise
 
         virtual ~Generator() = default;
 
-        virtual FastSIMD::FeatureSet GetSIMDLevel() const = 0;
+        virtual FastSIMD::FeatureSet GetLiveFeatureSet() const = 0;
         virtual const Metadata& GetMetadata() const = 0;
 
         virtual OutputMinMax GenUniformGrid2D( float* out,
@@ -138,7 +138,7 @@ namespace FastNoise
         {
             static_assert( std::is_base_of<Generator, T>::value, "T must be child of FastNoise::Generator class" );
 
-            assert( !gen.get() || GetSIMDLevel() == gen->GetSIMDLevel() ); // Ensure that all SIMD levels match
+            assert( !gen.get() || GetLiveFeatureSet() == gen->GetLiveFeatureSet() ); // Ensure that all SIMD levels match
 
             SetSourceSIMDPtr( dynamic_cast<const Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
             memberVariable.base = gen;
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index c3bf5754..a5a37675 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -15,14 +15,14 @@ class FastSIMD::DispatchClass<FastNoise::Generator, SIMD> : public virtual FastN
 public:
     virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const = 0;
     virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const = 0;
-    virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const { return Gen( seed, x, y, z ); };
+    virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const { return Gen( seed, x, y, z ); }
 
 #define FASTNOISE_IMPL_GEN_T\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const override { return GenT( seed, x, y ); }\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
 
-    FastSIMD::FeatureSet GetSIMDLevel() const final
+    FastSIMD::FeatureSet GetLiveFeatureSet() const final
     {
         return FASTSIMD_DEFAULT_FEATURE_SET;
     }
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index a67547b3..3944c285 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -37,7 +37,7 @@ void fnDeleteNodeRef( void* node )
 
 unsigned fnGetSIMDLevel( const void* node )
 {
-    return (unsigned)ToGen( node )->GetSIMDLevel();
+    return (unsigned)ToGen( node )->GetLiveFeatureSet();
 }
 
 int fnGetMetadataID( const void* node )

From c5b862c82b7d1f93a8330d4a673bcdb455d2c55c Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Wed, 27 Jul 2022 16:42:59 +0100
Subject: [PATCH 007/139] More FastSIMD conversion of nodes

---
 include/FastNoise/FastNoise_BuildList.inl     |  26 +--
 .../FastNoise/Generators/BasicGenerators.h    |   4 +-
 .../FastNoise/Generators/BasicGenerators.inl  |  14 +-
 include/FastNoise/Generators/Blends.inl       |  16 +-
 include/FastNoise/Generators/Cellular.inl     | 168 ++++++++---------
 include/FastNoise/Generators/DomainWarp.inl   |  42 ++---
 include/FastNoise/Generators/Fractal.inl      |   8 +-
 include/FastNoise/Generators/Modifiers.inl    |  20 +-
 include/FastNoise/Generators/Perlin.inl       |  18 +-
 include/FastNoise/Generators/Simplex.inl      |  84 ++++-----
 include/FastNoise/Generators/Utils.inl        | 114 +++++-------
 include/FastNoise/Generators/Value.inl        | 171 +++++++++---------
 12 files changed, 334 insertions(+), 351 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index cb4a52d3..8fe88ff0 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -78,9 +78,9 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 
 FASTNOISE_REGISTER_NODE( Constant );
 //FASTNOISE_REGISTER_NODE( White );
-//FASTNOISE_REGISTER_NODE( Checkerboard );
+FASTNOISE_REGISTER_NODE( Checkerboard );
 FASTNOISE_REGISTER_NODE( SineWave );
-//FASTNOISE_REGISTER_NODE( PositionOutput );
+FASTNOISE_REGISTER_NODE( PositionOutput );
 //FASTNOISE_REGISTER_NODE( DistanceToPoint );
 //                    
 //FASTNOISE_REGISTER_NODE( Value );
@@ -99,12 +99,12 @@ FASTNOISE_REGISTER_NODE( SineWave );
 //FASTNOISE_REGISTER_NODE( DomainWarpGradient );
 //FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
 //FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
-//                    
-//FASTNOISE_REGISTER_NODE( DomainScale );
-//FASTNOISE_REGISTER_NODE( DomainOffset );
-//FASTNOISE_REGISTER_NODE( DomainRotate );
-//FASTNOISE_REGISTER_NODE( SeedOffset );
-//FASTNOISE_REGISTER_NODE( Remap );
+                    
+FASTNOISE_REGISTER_NODE( DomainScale );
+FASTNOISE_REGISTER_NODE( DomainOffset );
+FASTNOISE_REGISTER_NODE( DomainRotate );
+FASTNOISE_REGISTER_NODE( SeedOffset );
+FASTNOISE_REGISTER_NODE( Remap );
 FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
 //                    
 //FASTNOISE_REGISTER_NODE( Add );
@@ -116,11 +116,11 @@ FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
 //FASTNOISE_REGISTER_NODE( MinSmooth );
 //FASTNOISE_REGISTER_NODE( MaxSmooth );
 //FASTNOISE_REGISTER_NODE( Fade );
-//                    
-//FASTNOISE_REGISTER_NODE( Terrace );
+                    
+FASTNOISE_REGISTER_NODE( Terrace );
 //FASTNOISE_REGISTER_NODE( PowFloat );
 //FASTNOISE_REGISTER_NODE( PowInt );
-//FASTNOISE_REGISTER_NODE( DomainAxisScale );
-//FASTNOISE_REGISTER_NODE( AddDimension );
-//FASTNOISE_REGISTER_NODE( RemoveDimension );
+FASTNOISE_REGISTER_NODE( DomainAxisScale );
+FASTNOISE_REGISTER_NODE( AddDimension );
+FASTNOISE_REGISTER_NODE( RemoveDimension );
 //FASTNOISE_REGISTER_NODE( GeneratorCache );
diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index a30f6ea5..f9f4bb64 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -52,10 +52,10 @@ namespace FastNoise
     public:
         const Metadata& GetMetadata() const override;
 
-        void SetSize( float value ) { mSize = value; }
+        void SetSize( float value ) { mSizeInv = 1 / value; }
 
     protected:
-        float mSize = 1.0f;
+        float mSizeInv = 1.0f;
     };
 
 #ifdef FASTNOISE_METADATA
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 3def3541..c906ffdf 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -22,9 +22,9 @@ class FastSIMD::DispatchClass<FastNoise::White, SIMD> : public virtual FastNoise
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         size_t idx = 0;
-        ((pos = SIMD_Casti32_f32( (SIMD_Castf32_i32( pos ) ^ (SIMD_Castf32_i32( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
+        ((pos = FS::Cast<float>( (FS::Cast<int32_t>( pos ) ^ (FS::Cast<int32_t>( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
 
-        return FnUtils::GetValueCoord( seed, SIMD_Castf32_i32( pos )... );
+        return FnUtils::GetValueCoord( seed, FS::Cast<int32_t>( pos )... );
     }
 };
 
@@ -36,11 +36,9 @@ class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> : public virtual Fa
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v multiplier = SIMD_Reciprocal_f32( float32v( mSize ) );
+        int32v value = (FS::Convert<int32_t>( pos * float32v( mSizeInv ) ) ^ ...);
 
-        int32v value = (SIMD_Convertf32_i32( pos * multiplier ) ^ ...);
-
-        return float32v( 1.0f ) ^ SIMD_Casti32_f32( value << 31 );
+        return float32v( 1.0f ) ^ FS::Cast<float>( value << 31 );
     }
 };
 
@@ -64,10 +62,10 @@ class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> : public virtual
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        size_t ofSIMDetIdx = 0;
+        size_t offsetIdx = 0;
         size_t multiplierIdx = 0;
 
-        (((pos += float32v( mOfSIMDet[ofSIMDetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
+        (((pos += float32v( mOffset[offsetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
         return (pos + ...);
     }
 };
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index f01d0e9a..2ff5c6cd 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -107,13 +107,13 @@ class FastSIMD::DispatchClass<FastNoise::MinSmooth, FS> : public virtual FastNoi
     {
         float32v a = this->GetSourceValue( mLHS, seed, pos... );
         float32v b = this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
 
-        float32v h = FS::Max( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
+        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
 
         h *= FS_Reciprocal_f32( smoothness );
 
-        return FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+        return FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
     }
 };
 
@@ -126,13 +126,13 @@ class FastSIMD::DispatchClass<FastNoise::MaxSmooth, FS> : public virtual FastNoi
     {
         float32v a = -this->GetSourceValue( mLHS, seed, pos... );
         float32v b = -this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
 
-        float32v h = FS::Max( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
+        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
 
         h *= FS_Reciprocal_f32( smoothness );
 
-        return -FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+        return -FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
     }
 };
 
@@ -143,9 +143,9 @@ class FastSIMD::DispatchClass<FastNoise::Fade, FS> : public virtual FastNoise::F
     template<typename... P> 
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v fade = FS_Abs_f32( this->GetSourceValue( mFade, seed, pos... ) );
+        float32v fade = FS::Abs( this->GetSourceValue( mFade, seed, pos... ) );
 
-        return FS_FMulAdd_f32( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
+        return FS::FMulAdd( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
     }
 };
 
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 39b1af8e..1efc41c6 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -27,8 +27,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -46,9 +46,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
 
                 float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
                 float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
@@ -60,16 +60,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                     float32v localDistance = distance[i];
                     float32v localCellValue = value[i];
 
-                    distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                    value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+                    distance[i] = FS::Select( closer, newDistance, distance[i] );
+                    value[i] = FS::Select( closer, newCellValue, value[i] );
 
                     if( i > mValueIndex )
                     {
                         break;
                     }
 
-                    newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                    newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                    newDistance = FS::Select( closer, localDistance, newDistance );
+                    newCellValue = FS::Select( closer, localCellValue, newCellValue );
                 }
 
                 ycf += float32v( 1 );
@@ -91,9 +91,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
         
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
         
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -118,10 +118,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                     float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                 
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
                 
                     float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
                     float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
@@ -133,16 +133,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                         float32v localDistance = distance[i];
                         float32v localCellValue = value[i];
 
-                        distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                        value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+                        distance[i] = FS::Select( closer, newDistance, distance[i] );
+                        value[i] = FS::Select( closer, newCellValue, value[i] );
 
                         if( i > mValueIndex )
                         {
                             break;
                         }
 
-                        newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                        newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                        newDistance = FS::Select( closer, localDistance, newDistance );
+                        newCellValue = FS::Select( closer, localCellValue, newCellValue );
                     }
             
                     zcf += float32v( 1 );
@@ -167,10 +167,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
         
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
         
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -202,11 +202,11 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
 
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
 
                         float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
                         float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
@@ -218,16 +218,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                             float32v localDistance = distance[i];
                             float32v localCellValue = value[i];
 
-                            distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
-                            value[i] = FS_Select_f32( closer, newCellValue, value[i] );
+                            distance[i] = FS::Select( closer, newDistance, distance[i] );
+                            value[i] = FS::Select( closer, newCellValue, value[i] );
 
                             if( i > mValueIndex )
                             {
                                 break;
                             }
 
-                            newDistance = FS_Select_f32( closer, localDistance, newDistance );
-                            newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
+                            newDistance = FS::Select( closer, localDistance, newDistance );
+                            newCellValue = FS::Select( closer, localCellValue, newCellValue );
                         }
 
                         wcf += float32v( 1 );
@@ -256,8 +256,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -275,9 +275,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
 
                 float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
 
@@ -305,9 +305,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -332,10 +332,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                     float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
 
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
 
                     float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
 
@@ -366,10 +366,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -401,11 +401,11 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
 
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
 
                         float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
 
@@ -475,8 +475,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v distance( FLT_MAX );
         float32v cellX, cellY;
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -494,17 +494,17 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
-                float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
-                xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                yd = FS_FMulAdd_f32( yd, invMag, ycf );
+                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
 
                 float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
 
                 mask32v closer = newDistance < distance;
                 distance = FS::Min( newDistance, distance );
 
-                cellX = FS_Select_f32( closer, xd + x, cellX );
-                cellY = FS_Select_f32( closer, yd + y, cellY );
+                cellX = FS::Select( closer, xd + x, cellX );
+                cellY = FS::Select( closer, yd + y, cellY );
 
                 ycf += float32v( 1 );
                 yc += int32v( FnPrimes::Y );
@@ -522,9 +522,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v distance( FLT_MAX );
         float32v cellX, cellY, cellZ;
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -549,19 +549,19 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                     float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
 
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
-                    xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                    yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                    zd = FS_FMulAdd_f32( zd, invMag, zcf );
+                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
 
                     float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
 
                     mask32v closer = newDistance < distance;
                     distance = FS::Min( newDistance, distance );
 
-                    cellX = FS_Select_f32( closer, xd + x, cellX );
-                    cellY = FS_Select_f32( closer, yd + y, cellY );
-                    cellZ = FS_Select_f32( closer, zd + z, cellZ );
+                    cellX = FS::Select( closer, xd + x, cellX );
+                    cellY = FS::Select( closer, yd + y, cellY );
+                    cellZ = FS::Select( closer, zd + z, cellZ );
 
                     zcf += float32v( 1 );
                     zc += int32v( FnPrimes::Z );
@@ -582,10 +582,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v distance( FLT_MAX );
         float32v cellX, cellY, cellZ, cellW;
 
-        int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
-        int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
-        int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
-        int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
 
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
@@ -617,21 +617,21 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
 
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
-                        xd = FS_FMulAdd_f32( xd, invMag, xcf );
-                        yd = FS_FMulAdd_f32( yd, invMag, ycf );
-                        zd = FS_FMulAdd_f32( zd, invMag, zcf );
-                        wd = FS_FMulAdd_f32( wd, invMag, wcf );
+                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
 
                         float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
 
                         mask32v closer = newDistance < distance;
                         distance = FS::Min( newDistance, distance );
 
-                        cellX = FS_Select_f32( closer, xd + x, cellX );
-                        cellY = FS_Select_f32( closer, yd + y, cellY );
-                        cellZ = FS_Select_f32( closer, zd + z, cellZ );
-                        cellW = FS_Select_f32( closer, wd + w, cellW );
+                        cellX = FS::Select( closer, xd + x, cellX );
+                        cellY = FS::Select( closer, yd + y, cellY );
+                        cellZ = FS::Select( closer, zd + z, cellZ );
+                        cellW = FS::Select( closer, wd + w, cellW );
 
                         wcf += float32v( 1 );
                         wc += int32v( FnPrimes::W );
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index 887ef655..b0cdb3fa 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -33,8 +33,8 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v xs = FS_Floor_f32( x );
         float32v ys = FS_Floor_f32( y );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
 
@@ -58,10 +58,10 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v xWarp = (FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
         float32v yWarp = (FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
 
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
 
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, yWarp * yWarp );
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, yWarp * yWarp );
 
         return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
     }
@@ -72,9 +72,9 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v ys = FS_Floor_f32( y );
         float32v zs = FS_Floor_f32( z );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
         int32v z1 = z0 + int32v( FnPrimes::Z );
@@ -114,11 +114,11 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v yWarp = (FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
         float32v zWarp = (FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
 
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
-        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
 
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, zWarp * zWarp ) );
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, zWarp * zWarp ) );
 
         return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
     }
@@ -130,10 +130,10 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v zs = FS_Floor_f32( z );
         float32v ws = FS_Floor_f32( w );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
         int32v z1 = z0 + int32v( FnPrimes::Z );
@@ -187,12 +187,12 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
         float32v zWarp = (FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
         float32v wWarp = (FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
 
-        xOut = FS_FMulAdd_f32( xWarp, warpAmp, xOut );
-        yOut = FS_FMulAdd_f32( yWarp, warpAmp, yOut );
-        zOut = FS_FMulAdd_f32( zWarp, warpAmp, zOut );
-        wOut = FS_FMulAdd_f32( wWarp, warpAmp, wOut );
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
+        wOut = FS::FMulAdd( wWarp, warpAmp, wOut );
 
-        float32v warpLengthSq = FS_FMulAdd_f32( xWarp, xWarp, FS_FMulAdd_f32( yWarp, yWarp, FS_FMulAdd_f32( zWarp, zWarp, wWarp * wWarp ) ) );
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, FS::FMulAdd( zWarp, zWarp, wWarp * wWarp ) ) );
 
         return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
     }
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index 9f147746..8d996f1e 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -48,7 +48,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalRidged, FS> : public virtual Fas
         float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
         float32v lacunarity( mLacunarity );
         float32v amp( mFractalBounding );
-        float32v noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
+        float32v noise = FS::Abs( this->GetSourceValue( mSource, seed, pos... ) );
 
         float32v sum = (noise * float32v( -2 ) + float32v( 1 )) * amp;
 
@@ -58,7 +58,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalRidged, FS> : public virtual Fas
             amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
             amp *= gain;
 
-            noise = FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
+            noise = FS::Abs( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
             sum += (noise * float32v( -2 ) + float32v( 1 )) * amp;
         }
 
@@ -72,8 +72,8 @@ class FastSIMD::DispatchClass<FastNoise::FractalPingPong, FS> : public virtual F
 
     static float32v PingPong( float32v t )
     {
-        t -= FS_Round_f32( t * float32v( 0.5f ) ) * float32v( 2 );
-        return FS_Select_f32( t < float32v( 1 ), t, float32v( 2 ) - t );
+        t -= FS::Round( t * float32v( 0.5f ) ) * float32v( 2 );
+        return FS::Select( t < float32v( 1 ), t, float32v( 2 ) - t );
     }
 
     template<typename... P>
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index 30cec8cd..ae947597 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -38,8 +38,8 @@ class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> : public virtual Fa
         if( mPitchSin == 0.0f && mRollSin == 0.0f )
         {
             return this->GetSourceValue( mSource, seed,
-                FS_FNMulAdd_f32( y, float32v( mYawSin ), x * float32v( mYawCos ) ),
-                FS_FMulAdd_f32( x, float32v( mYawSin ), y * float32v( mYawCos ) ) );
+                FS::FNMulAdd( y, float32v( mYawSin ), x * float32v( mYawCos ) ),
+                FS::FMulAdd( x, float32v( mYawSin ), y * float32v( mYawCos ) ) );
         }
 
         return Gen( seed, x, y, float32v( 0 ) );
@@ -48,9 +48,9 @@ class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> : public virtual Fa
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
     {
         return this->GetSourceValue( mSource, seed,
-            FS_FMulAdd_f32( x, float32v( mXa ), FS_FMulAdd_f32( y, float32v( mXb ), z * float32v( mXc ) ) ),
-            FS_FMulAdd_f32( x, float32v( mYa ), FS_FMulAdd_f32( y, float32v( mYb ), z * float32v( mYc ) ) ),
-            FS_FMulAdd_f32( x, float32v( mZa ), FS_FMulAdd_f32( y, float32v( mZb ), z * float32v( mZc ) ) ) );
+            FS::FMulAdd( x, float32v( mXa ), FS::FMulAdd( y, float32v( mXb ), z * float32v( mXc ) ) ),
+            FS::FMulAdd( x, float32v( mYa ), FS::FMulAdd( y, float32v( mYb ), z * float32v( mYc ) ) ),
+            FS::FMulAdd( x, float32v( mZa ), FS::FMulAdd( y, float32v( mZb ), z * float32v( mZc ) ) ) );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
@@ -124,19 +124,19 @@ class FastSIMD::DispatchClass<FastNoise::Terrace, SIMD> : public virtual FastNoi
         float32v source = this->GetSourceValue( mSource, seed, pos... );
 
         source *= float32v( mMultiplier );
-        float32v rounded = FS_Round_f32( source );
+        float32v rounded = FS::Round( source );
 
         if( mSmoothness != 0.0f )
         {
             float32v diff = rounded - source;
             mask32v diffSign = diff < float32v( 0 );
 
-            diff = FS_Abs_f32( diff );
+            diff = FS::Abs( diff );
             diff = float32v( 0.5f ) - diff;
 
             diff *= float32v( mSmoothnessRecip );
             diff = FS::Min( diff, float32v( 0.5f ) );
-            diff = FS_Select_f32( diffSign, float32v( 0.5f ) - diff, diff - float32v( 0.5f ) );
+            diff = FS::Select( diffSign, float32v( 0.5f ) - diff, diff - float32v( 0.5f ) );
 
             rounded += diff;
         }
@@ -239,7 +239,7 @@ class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual
 
         for( size_t i = 0; i < sizeof...( P ); i++ )
         {
-            isSame &= !FS_AnyMask_bool( arrayPos[i] != FS::Load( &CachedPos[i] ) );
+            isSame &= !FS_AnyMask_bool( arrayPos[i] != FS::Load<float32v>( &CachedPos[i] ) );
         }
 
         if( !isSame )
@@ -257,6 +257,6 @@ class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual
             return value;
         }
 
-        return FS::Load( &CachedValue );
+        return FS::Load<float32v>( &CachedValue[0] );
     }
 };
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index d5bf597e..7b59ef33 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -10,8 +10,8 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise:
         float32v xs = FS_Floor_f32( x );
         float32v ys = FS_Floor_f32( y );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
 
@@ -34,9 +34,9 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise:
         float32v ys = FS_Floor_f32( y );
         float32v zs = FS_Floor_f32( z );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
         int32v z1 = z0 + int32v( FnPrimes::Z );
@@ -67,10 +67,10 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise:
         float32v zs = FS_Floor_f32( z );
         float32v ws = FS_Floor_f32( w );
 
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
         int32v x1 = x0 + int32v( FnPrimes::X );
         int32v y1 = y0 + int32v( FnPrimes::Y );
         int32v z1 = z0 + int32v( FnPrimes::Z );
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 782ab4ef..592ab430 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -15,8 +15,8 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v x0 = FS_Floor_f32( x + f );
         float32v y0 = FS_Floor_f32( y + f );
 
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
 
         float32v g = float32v( G2 ) * (x0 + y0);
         x0 = x - (x0 - g);
@@ -31,9 +31,9 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v x2 = x0 + float32v( G2 * 2 - 1 );
         float32v y2 = y0 + float32v( G2 * 2 - 1 );
 
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, float32v( 0.5f ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, float32v( 0.5f ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, float32v( 0.5f ) ) );
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, float32v( 0.5f ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, float32v( 0.5f ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, float32v( 0.5f ) ) );
 
         t0 = FS::Max( t0, float32v( 0 ) );
         t1 = FS::Max( t1, float32v( 0 ) );
@@ -47,7 +47,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
         float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
 
-        return float32v( 38.283687591552734375f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
+        return float32v( 38.283687591552734375f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
@@ -67,9 +67,9 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v yi = y - y0;
         float32v zi = z - z0;
 
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
+        int32v k = FS::Convert<int32_t>( z0 ) * int32v( FnPrimes::Z );
 
         mask32v x_ge_y = xi >= yi;
         mask32v y_ge_z = yi >= zi;
@@ -98,10 +98,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v y3 = y0 + float32v( G3 * 3 - 1 );
         float32v z3 = z0 + float32v( G3 * 3 - 1 );
 
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, float32v( 0.6f ) ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, float32v( 0.6f ) ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, float32v( 0.6f ) ) ) );
-        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, float32v( 0.6f ) ) ) );
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, float32v( 0.6f ) ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, float32v( 0.6f ) ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, float32v( 0.6f ) ) ) );
+        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, float32v( 0.6f ) ) ) );
 
         t0 = FS::Max( t0, float32v( 0 ) );
         t1 = FS::Max( t1, float32v( 0 ) );
@@ -118,7 +118,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
         float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
 
-        return float32v( 32.69428253173828125f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, n3 * t3 ) ) );
+        return float32v( 32.69428253173828125f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
@@ -142,10 +142,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v zi = z - z0;
         float32v wi = w - w0;
 
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
-        int32v l = FS_Convertf32_i32( w0 ) * int32v( FnPrimes::W );
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
+        int32v k = FS::Convert<int32_t>( z0 ) * int32v( FnPrimes::Z );
+        int32v l = FS::Convert<int32_t>( w0 ) * int32v( FnPrimes::W );
 
         float32v g = float32v( G4 ) * (xi + yi + zi + wi);
         x0 = xi - g;
@@ -214,11 +214,11 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
         float32v z4 = z0 + float32v( G4 * 4 - 1 );
         float32v w4 = w0 + float32v( G4 * 4 - 1 );
 
-        float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, FS_FNMulAdd_f32( w0, w0, float32v( 0.6f ) ) ) ) );
-        float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, FS_FNMulAdd_f32( w1, w1, float32v( 0.6f ) ) ) ) );
-        float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, FS_FNMulAdd_f32( w2, w2, float32v( 0.6f ) ) ) ) );
-        float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, FS_FNMulAdd_f32( w3, w3, float32v( 0.6f ) ) ) ) );
-        float32v t4 = FS_FNMulAdd_f32( x4, x4, FS_FNMulAdd_f32( y4, y4, FS_FNMulAdd_f32( z4, z4, FS_FNMulAdd_f32( w4, w4, float32v( 0.6f ) ) ) ) );
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, FS::FNMulAdd( w0, w0, float32v( 0.6f ) ) ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, FS::FNMulAdd( w1, w1, float32v( 0.6f ) ) ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, FS::FNMulAdd( w2, w2, float32v( 0.6f ) ) ) ) );
+        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, FS::FNMulAdd( w3, w3, float32v( 0.6f ) ) ) ) );
+        float32v t4 = FS::FNMulAdd( x4, x4, FS::FNMulAdd( y4, y4, FS::FNMulAdd( z4, z4, FS::FNMulAdd( w4, w4, float32v( 0.6f ) ) ) ) );
 
         t0 = FS::Max( t0, float32v( 0 ) );
         t1 = FS::Max( t1, float32v( 0 ) );
@@ -250,7 +250,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise
             FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
         float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
 
-        return float32v( 27.f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, FS_FMulAdd_f32( n3, t3, n4 * t4 ) ) ) );
+        return float32v( 27.f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) );
     }
 };
 
@@ -266,8 +266,8 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual Fast
         float32v x0 = FS_Floor_f32( x + f );
         float32v y0 = FS_Floor_f32( y + f );
 
-        int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
 
         float32v g = float32v( G2 ) * (x0 + y0);
         x0 = x - (x0 - g);
@@ -297,7 +297,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual Fast
         float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
         float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
 
-        return float32v( 49.918426513671875f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
+        return float32v( 49.918426513671875f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
@@ -310,16 +310,16 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual Fast
         float32v val( 0 );
         for( size_t i = 0; ; i++ )
         {
-            float32v v0xr = FS_Round_f32( xr );
-            float32v v0yr = FS_Round_f32( yr );
-            float32v v0zr = FS_Round_f32( zr );
+            float32v v0xr = FS::Round( xr );
+            float32v v0yr = FS::Round( yr );
+            float32v v0zr = FS::Round( zr );
             float32v d0xr = xr - v0xr;
             float32v d0yr = yr - v0yr;
             float32v d0zr = zr - v0zr;
 
-            float32v score0xr = FS_Abs_f32( d0xr );
-            float32v score0yr = FS_Abs_f32( d0yr );
-            float32v score0zr = FS_Abs_f32( d0zr );
+            float32v score0xr = FS::Abs( d0xr );
+            float32v score0yr = FS::Abs( d0yr );
+            float32v score0zr = FS::Abs( d0zr );
             mask32v dir0xr = FS::Max( score0yr, score0zr ) <= score0xr;
             mask32v dir0yr = FS_BitwiseAndNot_m32( FS::Max( score0zr, score0xr ) <= score0yr, dir0xr );
             mask32v dir0zr = ~(dir0xr | dir0yr);
@@ -330,16 +330,16 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual Fast
             float32v d1yr = yr - v1yr;
             float32v d1zr = zr - v1zr;
 
-            int32v hv0xr = FS_Convertf32_i32( v0xr ) * int32v( FnPrimes::X );
-            int32v hv0yr = FS_Convertf32_i32( v0yr ) * int32v( FnPrimes::Y );
-            int32v hv0zr = FS_Convertf32_i32( v0zr ) * int32v( FnPrimes::Z );
+            int32v hv0xr = FS::Convert<int32_t>( v0xr ) * int32v( FnPrimes::X );
+            int32v hv0yr = FS::Convert<int32_t>( v0yr ) * int32v( FnPrimes::Y );
+            int32v hv0zr = FS::Convert<int32_t>( v0zr ) * int32v( FnPrimes::Z );
 
-            int32v hv1xr = FS_Convertf32_i32( v1xr ) * int32v( FnPrimes::X );
-            int32v hv1yr = FS_Convertf32_i32( v1yr ) * int32v( FnPrimes::Y );
-            int32v hv1zr = FS_Convertf32_i32( v1zr ) * int32v( FnPrimes::Z );
+            int32v hv1xr = FS::Convert<int32_t>( v1xr ) * int32v( FnPrimes::X );
+            int32v hv1yr = FS::Convert<int32_t>( v1yr ) * int32v( FnPrimes::Y );
+            int32v hv1zr = FS::Convert<int32_t>( v1zr ) * int32v( FnPrimes::Z );
 
-            float32v t0 = FS_FNMulAdd_f32( d0zr, d0zr, FS_FNMulAdd_f32( d0yr, d0yr, FS_FNMulAdd_f32( d0xr, d0xr, float32v( 0.6f ) ) ) );
-            float32v t1 = FS_FNMulAdd_f32( d1zr, d1zr, FS_FNMulAdd_f32( d1yr, d1yr, FS_FNMulAdd_f32( d1xr, d1xr, float32v( 0.6f ) ) ) );
+            float32v t0 = FS::FNMulAdd( d0zr, d0zr, FS::FNMulAdd( d0yr, d0yr, FS::FNMulAdd( d0xr, d0xr, float32v( 0.6f ) ) ) );
+            float32v t1 = FS::FNMulAdd( d1zr, d1zr, FS::FNMulAdd( d1yr, d1yr, FS::FNMulAdd( d1xr, d1xr, float32v( 0.6f ) ) ) );
             t0 = FS::Max( t0, float32v( 0 ) );
             t1 = FS::Max( t1, float32v( 0 ) );
             t0 *= t0; t0 *= t0;
@@ -348,7 +348,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual Fast
             float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
             float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
 
-            val = FS_FMulAdd_f32( v0, t0, FS_FMulAdd_f32( v1, t1, val ) );
+            val = FS::FMulAdd( v0, t0, FS::FMulAdd( v1, t1, val ) );
 
             if( i == 1 )
             {
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index ff23e6ce..0b05187e 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -16,75 +16,59 @@ namespace FastNoise
     static constexpr float ROOT2 = 1.4142135623730950488f;
     static constexpr float ROOT3 = 1.7320508075688772935f;
 
-    template<typename SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
     FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
-        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
         // Bit-4 = Choose X Y ordering
         mask32v xy;
 
-        if constexpr( SIMD == FastSIMD::FeatureSet::Scalar )
-        {
-            xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
-        }
-        else
-        {
-            xy = index << 29;
+        xy = index << 29;
 
-            if constexpr( SIMD & FastSIMD::FeatureFlags::)
-            {
-                xy >>= 31;
-            }
-        }
+        if constexpr( !(SIMD & FastSIMD::FeatureFlag::SSE41) )
+        {
+            xy >>= 31;
+        }        
 
-        float32v a = FS_Select_f32( xy, fY, fX );
-        float32v b = FS_Select_f32( xy, fX, fY );
+        float32v a = FS::Select( xy, fY, fX );
+        float32v b = FS::Select( xy, fX, fY );
 
         // Bit-1 = b flip sign
-        b ^= FS_Casti32_f32( index << 31 );
+        b ^= FS::Cast<float>( index << 31 );
 
         // Bit-2 = Mul a by 2 or Root3
-        mask32v aMul2;
-
-        if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
-        {
-            aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
-        }
-        else
-        {
-            aMul2 = (index << 30) >> 31;
-        }
+        mask32v aMul2 = (index << 30) >> 31;        
 
-        a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
+        a *= FS::Select( aMul2, float32v( 2 ), float32v( ROOT3 ) );
         // b zero value if a mul 2
         b = FS_NMask_f32( b, aMul2 );
 
         // Bit-8 = Flip sign of a + b
-        return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
+        return ( a + b ) ^ FS::Cast<float>( (index >> 3) << 31 );
     }
 
     template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
     FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
-        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
         float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
         float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
 
         // Bit-8 = Flip sign of a + b
-        return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
+        return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( (index >> 3) << 31 );
     }
 
     template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
     FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
-        int32v index = FS_Convertf32_i32( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
         float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
         float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
 
-        return FS_FMulAdd_f32( gX, fX, fY * gY );
+        return FS::FMulAdd( gX, fX, fY * gY );
     }
 
 
@@ -112,13 +96,13 @@ namespace FastNoise
             }
         }
 
-        fX ^= FS_Casti32_f32( bit1 );
-        fY ^= FS_Casti32_f32( bit2 );
+        fX ^= FS::Cast<float>( bit1 );
+        fY ^= FS::Cast<float>( bit2 );
         
-        float32v a = FS_Select_f32( bit4, fY, fX );
-        float32v b = FS_Select_f32( bit4, fX, fY );
+        float32v a = FS::Select( bit4, fY, fX );
+        float32v b = FS::Select( bit4, fX, fY );
         
-        return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
+        return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
     }
 
     template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
@@ -127,7 +111,7 @@ namespace FastNoise
         float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
         float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
 
-        return FS_FMulAdd_f32( gX, fX, fY * gY );
+        return FS::FMulAdd( gX, fX, fY * gY );
     }
 
     template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
@@ -136,7 +120,7 @@ namespace FastNoise
         float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
         float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
 
-        return FS_FMulAdd_f32( gX, fX, fY * gY );
+        return FS::FMulAdd( gX, fX, fY * gY );
     }
 
     template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
@@ -145,16 +129,16 @@ namespace FastNoise
         int32v hasha13 = hash & int32v( 13 );
 
         //if h < 8 then x, else y
-        float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
+        float32v u = FS::Select( hasha13 < int32v( 8 ), fX, fY );
 
         //if h < 4 then y else if h is 12 or 14 then x else z
-        float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
-        v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
+        float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
+        v = FS::Select( hasha13 < int32v( 2 ), fY, v );
 
         //if h1 then -u else u
         //if h2 then -v else v
-        float32v h1 = FS_Casti32_f32( hash << 31 );
-        float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
+        float32v h1 = FS::Cast<float>( hash << 31 );
+        float32v h2 = FS::Cast<float>( (hash & int32v( 2 )) << 30 );
         //then add them
         return ( u ^ h1 ) + ( v ^ h2 );
     }
@@ -166,7 +150,7 @@ namespace FastNoise
         float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
         float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
 
-        return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
+        return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
     }
 
     template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
@@ -174,21 +158,21 @@ namespace FastNoise
     {
         int32v p = hash & int32v( 3 << 3 );
 
-        float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
+        float32v a = FS::Select( p > int32v( 0 ), fX, fY );
         float32v b;
         if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
         {
-            b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );        
+            b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );        
         }
         else
         {
-            b = FS_Select_f32( hash << 27, fY, fZ );
+            b = FS::Select( hash << 27, fY, fZ );
         }
-        float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
+        float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
 
-        float32v aSign = FS_Casti32_f32( hash << 31 );
-        float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
-        float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
+        float32v aSign = FS::Cast<float>( hash << 31 );
+        float32v bSign = FS::Cast<float>( (hash << 30) & int32v( 0x80000000 ) );
+        float32v cSign = FS::Cast<float>( (hash << 29) & int32v( 0x80000000 ) );
 
         return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
     }
@@ -201,7 +185,7 @@ namespace FastNoise
         float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
         float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
 
-        return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
+        return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
     }
 
     template<typename SIMD = FS, typename... P>
@@ -237,19 +221,19 @@ namespace FastNoise
     template<typename SIMD = FS>
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )
     {
-        return FS_FMulAdd_f32( t, b - a, a );
+        return FS::FMulAdd( t, b - a, a );
     }
 
     template<typename SIMD = FS>
      FS_FORCEINLINE static float32v InterpHermite( float32v t )
     {
-        return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
+        return t * t * FS::FNMulAdd( t, float32v( 2 ), float32v( 3 ));
     }
 
     template<typename SIMD = FS>
      FS_FORCEINLINE static float32v InterpQuintic( float32v t )
     {
-        return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
+        return t * t * t * FS::FMulAdd( t, FS::FMulAdd( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
     }
 
     template<typename SIMD = FS, typename... P>
@@ -261,7 +245,7 @@ namespace FastNoise
             case DistanceFunction::Euclidean:
             {
                 float32v distSqr = dX * dX;
-                ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
+                ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
 
                 return FS_InvSqrt_f32( distSqr ) * distSqr;
             }
@@ -269,31 +253,31 @@ namespace FastNoise
             case DistanceFunction::EuclideanSquared:
             {
                 float32v distSqr = dX * dX;
-                ((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
+                ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
 
                 return distSqr;
             }
 
             case DistanceFunction::Manhattan:
             {
-                float32v dist = FS_Abs_f32( dX );
-                dist += (FS_Abs_f32( d ) + ...);
+                float32v dist = FS::Abs( dX );
+                dist += (FS::Abs( d ) + ...);
 
                 return dist;
             }
 
             case DistanceFunction::Hybrid:
             {
-                float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
-                ((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
+                float32v both = FS::FMulAdd( dX, dX, FS::Abs( dX ) );
+                ((both += FS::FMulAdd( d, d, FS::Abs( d ) )), ...);
 
                 return both;
             }
 
             case DistanceFunction::MaxAxis:
             {
-                float32v max = FS_Abs_f32( dX );
-                ((max = FS::Max( FS_Abs_f32(d), max )), ...);
+                float32v max = FS::Abs( dX );
+                ((max = FS::Max( FS::Abs(d), max )), ...);
 
                 return max;
             }
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 6d432e73..9ae6b539 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -1,85 +1,86 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Value.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Value, FS> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-
-        return FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-
-        return FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-        ws = FnUtils::InterpHermite( w - ws );
-
-        return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
-            FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
-    }
-};
+#include "FastSIMD/InlInclude.h"
+
+#include "Value.h"
+#include "Utils.inl"
+
+template<typename FS>
+class FastSIMD::DispatchClass<FastNoise::Value, FS> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+
+        return FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+
+        return FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
+            FnUtils::Lerp(                                                                                
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v xs = FS_Floor_f32( x );
+        float32v ys = FS_Floor_f32( y );
+        float32v zs = FS_Floor_f32( z );
+        float32v ws = FS_Floor_f32( w );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
+        int32v x1 = x0 + int32v( FnPrimes::X );
+        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v w1 = w0 + int32v( FnPrimes::W );
+
+        xs = FnUtils::InterpHermite( x - xs );
+        ys = FnUtils::InterpHermite( y - ys );
+        zs = FnUtils::InterpHermite( z - zs );
+        ws = FnUtils::InterpHermite( w - ws );
+
+        return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
+            FnUtils::Lerp( 
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
+            FnUtils::Lerp( FnUtils::Lerp(
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
+            FnUtils::Lerp( 
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
+            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
+    }
+};

From 2bd80709dd27da24b5edad9e25c7293f6229ecab Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sat, 27 Aug 2022 13:37:31 +0100
Subject: [PATCH 008/139] Convert more nodes to nuFastSIMD

---
 include/FastNoise/FastNoise_BuildList.inl     |  38 +--
 .../FastNoise/Generators/BasicGenerators.inl  |   8 +-
 include/FastNoise/Generators/Blends.inl       | 311 +++++++++---------
 include/FastNoise/Generators/Cellular.inl     |   2 +-
 include/FastNoise/Generators/Generator.inl    |   2 +
 include/FastNoise/Generators/Utils.inl        | 210 ++++++------
 6 files changed, 282 insertions(+), 289 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index 8fe88ff0..6e08e965 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -64,12 +64,12 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 #else
 #include "Generators/Modifiers.inl"
 #endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Blends.h"
-//#else
-//#include "Generators/Blends.inl"
-//#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Blends.h"
+#else
+#include "Generators/Blends.inl"
+#endif
 
 // Nodes
 // Order is important!
@@ -77,11 +77,11 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 // inserting will break existing encoded node trees
 
 FASTNOISE_REGISTER_NODE( Constant );
-//FASTNOISE_REGISTER_NODE( White );
+FASTNOISE_REGISTER_NODE( White );
 FASTNOISE_REGISTER_NODE( Checkerboard );
 FASTNOISE_REGISTER_NODE( SineWave );
 FASTNOISE_REGISTER_NODE( PositionOutput );
-//FASTNOISE_REGISTER_NODE( DistanceToPoint );
+FASTNOISE_REGISTER_NODE( DistanceToPoint );
 //                    
 //FASTNOISE_REGISTER_NODE( Value );
 //FASTNOISE_REGISTER_NODE( Perlin );
@@ -106,20 +106,20 @@ FASTNOISE_REGISTER_NODE( DomainRotate );
 FASTNOISE_REGISTER_NODE( SeedOffset );
 FASTNOISE_REGISTER_NODE( Remap );
 FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
-//                    
-//FASTNOISE_REGISTER_NODE( Add );
-//FASTNOISE_REGISTER_NODE( Subtract );
-//FASTNOISE_REGISTER_NODE( Multiply );
-//FASTNOISE_REGISTER_NODE( Divide );
-//FASTNOISE_REGISTER_NODE( Min );
-//FASTNOISE_REGISTER_NODE( Max );
-//FASTNOISE_REGISTER_NODE( MinSmooth );
-//FASTNOISE_REGISTER_NODE( MaxSmooth );
-//FASTNOISE_REGISTER_NODE( Fade );
+                    
+FASTNOISE_REGISTER_NODE( Add );
+FASTNOISE_REGISTER_NODE( Subtract );
+FASTNOISE_REGISTER_NODE( Multiply );
+FASTNOISE_REGISTER_NODE( Divide );
+FASTNOISE_REGISTER_NODE( Min );
+FASTNOISE_REGISTER_NODE( Max );
+FASTNOISE_REGISTER_NODE( MinSmooth );
+FASTNOISE_REGISTER_NODE( MaxSmooth );
+FASTNOISE_REGISTER_NODE( Fade );
                     
 FASTNOISE_REGISTER_NODE( Terrace );
 //FASTNOISE_REGISTER_NODE( PowFloat );
-//FASTNOISE_REGISTER_NODE( PowInt );
+FASTNOISE_REGISTER_NODE( PowInt );
 FASTNOISE_REGISTER_NODE( DomainAxisScale );
 FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index c906ffdf..769b7e35 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -1,5 +1,5 @@
 #include "BasicGenerators.h"
-//#include "Utils.inl"
+#include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
@@ -22,9 +22,9 @@ class FastSIMD::DispatchClass<FastNoise::White, SIMD> : public virtual FastNoise
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         size_t idx = 0;
-        ((pos = FS::Cast<float>( (FS::Cast<int32_t>( pos ) ^ (FS::Cast<int32_t>( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
+        ((pos = FS::Cast<float>( (FS::Cast<int32_t>( pos ) ^ (FS::Cast<int32_t>( pos ) >> 16)) * int32v( Primes::Lookup[idx++] ) )), ...);
 
-        return FnUtils::GetValueCoord( seed, FS::Cast<int32_t>( pos )... );
+        return GetValueCoord( seed, FS::Cast<int32_t>( pos )... );
     }
 };
 
@@ -81,6 +81,6 @@ class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> : public virtual
         size_t pointIdx = 0;
 
         ((pos -= float32v( mPoint[pointIdx++] )), ...);
-        return FnUtils::CalcDistance( mDistanceFunction, pos... );
+        return CalcDistance( mDistanceFunction, pos... );
     }
 };
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 2ff5c6cd..58f0fc07 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -1,151 +1,160 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Blends.h"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Add, FS> : public virtual FastNoise::Add, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Subtract, FS> : public virtual FastNoise::Subtract, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Multiply, FS> : public virtual FastNoise::Multiply, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Divide, FS> : public virtual FastNoise::Divide, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::PowFloat, FS> : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::PowInt, FS> : public virtual FastNoise::PowInt, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v value = this->GetSourceValue( mValue, seed, pos... );
-        float32v pow = value * value;
-
-        for( int i = 2; i < mPow; i++ )
-        {
-            pow *= value;
-        }
-
-        return pow;
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Min, FS> : public virtual FastNoise::Min, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS::Min( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Max, FS> : public virtual FastNoise::Max, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return FS::Max( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::MinSmooth, FS> : public virtual FastNoise::MinSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v a = this->GetSourceValue( mLHS, seed, pos... );
-        float32v b = this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
-
-        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
-
-        h *= FS_Reciprocal_f32( smoothness );
-
-        return FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::MaxSmooth, FS> : public virtual FastNoise::MaxSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v a = -this->GetSourceValue( mLHS, seed, pos... );
-        float32v b = -this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
-
-        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
-
-        h *= FS_Reciprocal_f32( smoothness );
-
-        return -FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Fade, FS> : public virtual FastNoise::Fade, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-    
-    template<typename... P> 
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v fade = FS::Abs( this->GetSourceValue( mFade, seed, pos... ) );
-
-        return FS::FMulAdd( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
-    }
-};
-
+#include "Blends.h"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Add, SIMD> : public virtual FastNoise::Add, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Subtract, SIMD> : public virtual FastNoise::Subtract, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Multiply, SIMD> : public virtual FastNoise::Multiply, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Divide, SIMD> : public virtual FastNoise::Divide, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::PowInt, SIMD> : public virtual FastNoise::PowInt, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v value = this->GetSourceValue( mValue, seed, pos... );
+        float32v pow = value * value;
+
+        for( int i = 2; i < mPow; i++ )
+        {
+            pow *= value;
+        }
+
+        return pow;
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Min, SIMD> : public virtual FastNoise::Min, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS::Min( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Max, SIMD> : public virtual FastNoise::Max, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return FS::Max( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::MinSmooth, SIMD> : public virtual FastNoise::MinSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v a = this->GetSourceValue( mLHS, seed, pos... );
+        float32v b = this->GetSourceValue( mRHS, seed, pos... );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+
+        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
+
+        h *= FS::Reciprocal( smoothness );
+
+        return FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::MaxSmooth, SIMD> : public virtual FastNoise::MaxSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v a = -this->GetSourceValue( mLHS, seed, pos... );
+        float32v b = -this->GetSourceValue( mRHS, seed, pos... );
+        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+
+        float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
+
+        h *= FS::Reciprocal( smoothness );
+
+        return -FS::FNMulAdd( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS::Min( a, b ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Fade, SIMD> : public virtual FastNoise::Fade, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+    
+    template<typename... P> 
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v fade = FS::Abs( this->GetSourceValue( mFade, seed, pos... ) );
+
+        return FS::FMulAdd( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
+    }
+};
+
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 1efc41c6..f597a9c5 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -461,7 +461,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         }
         case ReturnType::Index0Div1:
         {
-            return distance[mDistanceIndex0] * FS_Reciprocal_f32( distance[mDistanceIndex1] );
+            return distance[mDistanceIndex0] * FS::Reciprocal( distance[mDistanceIndex1] );
         }
         }
     }
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index a5a37675..fd7f8f30 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -5,6 +5,8 @@
 
 #pragma warning( disable:4250 )
 
+using namespace FastNoise;
+
 using float32v = FS::NativeRegister<float>;
 using int32v = FS::NativeRegister<std::int32_t>;
 using mask32v = FS::NativeRegister<FS::Mask<32>>;
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 0b05187e..36e2549a 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -16,85 +16,93 @@ namespace FastNoise
     static constexpr float ROOT2 = 1.4142135623730950488f;
     static constexpr float ROOT3 = 1.7320508075688772935f;
 
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
-    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    {
-        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+    //template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    //{
+    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        // Bit-4 = Choose X Y ordering
-        mask32v xy;
+    //    // Bit-4 = Choose X Y ordering
+    //    mask32v xy;
 
-        xy = index << 29;
+    //    xy = index << 29;
 
-        if constexpr( !(SIMD & FastSIMD::FeatureFlag::SSE41) )
-        {
-            xy >>= 31;
-        }        
+    //    if constexpr( !(SIMD & FastSIMD::FeatureFlag::SSE41) )
+    //    {
+    //        xy >>= 31;
+    //    }        
 
-        float32v a = FS::Select( xy, fY, fX );
-        float32v b = FS::Select( xy, fX, fY );
+    //    float32v a = FS::Select( xy, fY, fX );
+    //    float32v b = FS::Select( xy, fX, fY );
 
-        // Bit-1 = b flip sign
-        b ^= FS::Cast<float>( index << 31 );
+    //    // Bit-1 = b flip sign
+    //    b ^= FS::Cast<float>( index << 31 );
 
-        // Bit-2 = Mul a by 2 or Root3
-        mask32v aMul2 = (index << 30) >> 31;        
+    //    // Bit-2 = Mul a by 2 or Root3
+    //    mask32v aMul2 = (index << 30) >> 31;        
 
-        a *= FS::Select( aMul2, float32v( 2 ), float32v( ROOT3 ) );
-        // b zero value if a mul 2
-        b = FS_NMask_f32( b, aMul2 );
+    //    a *= FS::Select( aMul2, float32v( 2 ), float32v( ROOT3 ) );
+    //    // b zero value if a mul 2
+    //    b = FS_NMask_f32( b, aMul2 );
 
-        // Bit-8 = Flip sign of a + b
-        return ( a + b ) ^ FS::Cast<float>( (index >> 3) << 31 );
-    }
+    //    // Bit-8 = Flip sign of a + b
+    //    return ( a + b ) ^ FS::Cast<float>( (index >> 3) << 31 );
+    //}
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
-    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    {
-        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+    //template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
+    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    //{
+    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
-        float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
+    //    float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
+    //    float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
 
-        // Bit-8 = Flip sign of a + b
-        return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( (index >> 3) << 31 );
-    }
+    //    // Bit-8 = Flip sign of a + b
+    //    return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( (index >> 3) << 31 );
+    //}
 
-    template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
-    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    {
-        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+    //template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
+    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    //{
+    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
-        float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
+    //    float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
+    //    float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
 
-        return FS::FMulAdd( gX, fX, fY * gY );
-    }
+    //    return FS::FMulAdd( gX, fX, fY * gY );
+    //}
 
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
+    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
     {
         // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
         // ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
 
+        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
+            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
+
+            return FS::FMulAdd( gX, fX, fY * gY );
+        }
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        {
+            float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
+            float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
+
+            return FS::FMulAdd( gX, fX, fY * gY );
+        }*/
+
         int32v  bit1 = (hash << 31);
         int32v  bit2 = (hash >> 1) << 31;
         mask32v bit4;
+        
+        bit4 = hash << 29;
 
-        if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
-        {
-            bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
-        }
-        else
+        if constexpr( !( SIMD & FastSIMD::FeatureFlag::SSE41 ) )
         {
-            bit4 = hash << 29;
-
-            if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
-            {
-                bit4 >>= 31;
-            }
-        }
+            bit4 >>= 31;
+        }        
 
         fX ^= FS::Cast<float>( bit1 );
         fY ^= FS::Cast<float>( bit2 );
@@ -104,28 +112,19 @@ namespace FastNoise
         
         return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
     }
+    
+    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
+    {        
+        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
+            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
-    {
-        float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
-        float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
-
-        return FS::FMulAdd( gX, fX, fY * gY );
-    }
-
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
-     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
-    {
-        float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
-        float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
-
-        return FS::FMulAdd( gX, fX, fY * gY );
-    }
+            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
+        }*/
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
-    {
         int32v hasha13 = hash & int32v( 13 );
 
         //if h < 8 then x, else y
@@ -142,31 +141,31 @@ namespace FastNoise
         //then add them
         return ( u ^ h1 ) + ( v ^ h2 );
     }
-
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
+    
+    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
     {
-        float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
-        float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-        float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
+        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
+            float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
+            float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
 
-        return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
-    }
+            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
+        }*/
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
-    {
         int32v p = hash & int32v( 3 << 3 );
 
         float32v a = FS::Select( p > int32v( 0 ), fX, fY );
         float32v b;
-        if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
         {
-            b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );        
+            b = FS::Select( hash << 27, fY, fZ );
         }
         else
         {
-            b = FS::Select( hash << 27, fY, fZ );
+            b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );        
         }
         float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
 
@@ -177,18 +176,7 @@ namespace FastNoise
         return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
     }
 
-    template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
-    {
-        float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-        float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
-        float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
-        float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
-
-        return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
-    }
-
-    template<typename SIMD = FS, typename... P>
+    template<typename... P>
     FS_FORCEINLINE static int32v HashPrimes( int32v seed, P... primedPos )
     {
         int32v hash = seed;
@@ -198,7 +186,7 @@ namespace FastNoise
         return (hash >> 15) ^ hash;
     }
 
-    template<typename SIMD = FS, typename... P>
+    template<typename... P>
     FS_FORCEINLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
     {
         int32v hash = seed;
@@ -208,7 +196,7 @@ namespace FastNoise
         return hash;
     }  
 
-    template<typename SIMD = FS, typename... P>
+    template<typename... P>
      FS_FORCEINLINE static float32v GetValueCoord( int32v seed, P... primedPos )
     {
         int32v hash = seed;
@@ -217,26 +205,23 @@ namespace FastNoise
         hash *= hash * int32v( 0x27d4eb2d );
         return FS::Convert<float>( hash ) * float32v( 1.0f / (float)INT_MAX );
     }
-
-    template<typename SIMD = FS>
+     
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )
     {
         return FS::FMulAdd( t, b - a, a );
     }
-
-    template<typename SIMD = FS>
-     FS_FORCEINLINE static float32v InterpHermite( float32v t )
+    
+    FS_FORCEINLINE static float32v InterpHermite( float32v t )
     {
         return t * t * FS::FNMulAdd( t, float32v( 2 ), float32v( 3 ));
     }
-
-    template<typename SIMD = FS>
+     
      FS_FORCEINLINE static float32v InterpQuintic( float32v t )
     {
         return t * t * t * FS::FMulAdd( t, FS::FMulAdd( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
     }
 
-    template<typename SIMD = FS, typename... P>
+    template<typename... P>
     FS_FORCEINLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
     {
         switch( distFunc )
@@ -247,7 +232,7 @@ namespace FastNoise
                 float32v distSqr = dX * dX;
                 ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
 
-                return FS_InvSqrt_f32( distSqr ) * distSqr;
+                return FS::InvSqrt( distSqr ) * distSqr;
             }
 
             case DistanceFunction::EuclideanSquared:
@@ -283,7 +268,4 @@ namespace FastNoise
             }
         }
     }    
-}
-
-using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
-namespace FnPrimes = FastNoise::Primes;
\ No newline at end of file
+}
\ No newline at end of file

From fcd6a0e2d91bf3a3d316dff38fcfb17c5b60aa2e Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sat, 3 Sep 2022 22:57:00 +0100
Subject: [PATCH 009/139] Coherent gradient nodse working

---
 include/FastNoise/FastNoise_BuildList.inl     |  52 +-
 include/FastNoise/Generators/Blends.inl       |   2 +-
 include/FastNoise/Generators/Cellular.inl     | 146 ++--
 include/FastNoise/Generators/DomainWarp.inl   | 128 +--
 .../Generators/DomainWarpFractal.inl          |   4 +-
 include/FastNoise/Generators/Fractal.inl      |   6 +-
 include/FastNoise/Generators/Perlin.h         |  47 +-
 include/FastNoise/Generators/Perlin.inl       | 124 ++-
 include/FastNoise/Generators/Simplex.inl      | 734 +++++++++---------
 include/FastNoise/Generators/Utils.inl        |  70 +-
 include/FastNoise/Generators/Value.inl        | 120 ++-
 11 files changed, 715 insertions(+), 718 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index 6e08e965..6388d246 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -16,25 +16,25 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 #else
 #include "Generators/BasicGenerators.inl"
 #endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Value.h"
-//#else
-//#include "Generators/Value.inl"
-//#endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Perlin.h"
-//#else
-//#include "Generators/Perlin.inl"
-//#endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Simplex.h"
-//#else
-//#include "Generators/Simplex.inl"
-//#endif
-//
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Value.h"
+#else
+#include "Generators/Value.inl"
+#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Perlin.h"
+#else
+#include "Generators/Perlin.inl"
+#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Simplex.h"
+#else
+#include "Generators/Simplex.inl"
+#endif
+
 //#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
 //#include "Generators/Cellular.h"
 //#else
@@ -82,12 +82,12 @@ FASTNOISE_REGISTER_NODE( Checkerboard );
 FASTNOISE_REGISTER_NODE( SineWave );
 FASTNOISE_REGISTER_NODE( PositionOutput );
 FASTNOISE_REGISTER_NODE( DistanceToPoint );
-//                    
-//FASTNOISE_REGISTER_NODE( Value );
-//FASTNOISE_REGISTER_NODE( Perlin );
-//FASTNOISE_REGISTER_NODE( Simplex );
-//FASTNOISE_REGISTER_NODE( OpenSimplex2 );
-//                    
+
+FASTNOISE_REGISTER_NODE( Value );
+FASTNOISE_REGISTER_NODE( Perlin );
+FASTNOISE_REGISTER_NODE( Simplex );
+FASTNOISE_REGISTER_NODE( OpenSimplex2 );
+               
 //FASTNOISE_REGISTER_NODE( CellularValue );
 //FASTNOISE_REGISTER_NODE( CellularDistance );
 //FASTNOISE_REGISTER_NODE( CellularLookup );
@@ -118,7 +118,7 @@ FASTNOISE_REGISTER_NODE( MaxSmooth );
 FASTNOISE_REGISTER_NODE( Fade );
                     
 FASTNOISE_REGISTER_NODE( Terrace );
-//FASTNOISE_REGISTER_NODE( PowFloat );
+FASTNOISE_REGISTER_NODE( PowFloat );
 FASTNOISE_REGISTER_NODE( PowInt );
 FASTNOISE_REGISTER_NODE( DomainAxisScale );
 FASTNOISE_REGISTER_NODE( AddDimension );
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 58f0fc07..1c81d6ae 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -56,7 +56,7 @@ class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> : public virtual FastNo
     template<typename... P> 
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
+        return Pow( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
     }
 };
 
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index f597a9c5..37dd6edc 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -1,5 +1,3 @@
-#include "FastSIMD/InlInclude.h"
-
 #include <cfloat>
 #include <array>
 
@@ -33,8 +31,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -42,7 +40,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
             int32v yc = ycBase;
             for( int yi = 0; yi < 3; yi++ )
             {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
@@ -51,7 +49,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
                 float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
 
                 for( int i = 0; ; i++ )
                 {
@@ -73,10 +71,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                 }
 
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return value[mValueIndex];
@@ -99,9 +97,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
     
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
     
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -113,7 +111,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                 int32v zc = zcBase;
                 for( int zi = 0; zi < 3; zi++ )
                 {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
@@ -124,7 +122,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                     zd = FS::FMulAdd( zd, invMag, zcf );
                 
                     float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
                 
                     for( int i = 0; ; i++ )
                     {
@@ -146,13 +144,13 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                     }
             
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
     
         return value[mValueIndex];
@@ -177,10 +175,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
         float32v wcfBase = FS::Convert<float>( wcBase ) - w;
     
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
     
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -196,7 +194,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                     int32v wc = wcBase;
                     for( int wi = 0; wi < 3; wi++ )
                     {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
@@ -209,7 +207,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
                         float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
 
                         for( int i = 0; ; i++ )
                         {
@@ -231,16 +229,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual Fas
                         }
 
                         wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
+                        wc += int32v( Primes::W );
                     }
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
     
         return value[mValueIndex];
@@ -262,8 +260,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -271,7 +269,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
             int32v yc = ycBase;
             for ( int yi = 0; yi < 3; yi++ )
             {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
@@ -279,7 +277,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
 
                 for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                 {
@@ -289,10 +287,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                 distance[0] = FS::Min( distance[0], newDistance );
 
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return GetReturn( distance );
@@ -313,9 +311,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -327,7 +325,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                 int32v zc = zcBase;
                 for( int zi = 0; zi < 3; zi++ )
                 {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
@@ -337,7 +335,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
 
                     for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                     {
@@ -347,13 +345,13 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                     distance[0] = FS::Min( distance[0], newDistance );
 
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return GetReturn( distance );
@@ -376,10 +374,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
         float32v wcfBase = FS::Convert<float>( wcBase ) - w;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -395,7 +393,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                     int32v wc = wcBase;
                     for( int wi = 0; wi < 3; wi++ )
                     {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
@@ -407,7 +405,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
 
                         for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                         {
@@ -417,16 +415,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual
                         distance[0] = FS::Min( distance[0], newDistance );
 
                         wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
+                        wc += int32v( Primes::W );
                     }
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return GetReturn( distance );
@@ -481,8 +479,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v xcf = FS::Convert<float>( xc ) - x;
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -490,7 +488,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
             int32v yc = ycBase;
             for( int yi = 0; yi < 3; yi++ )
             {
-                int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
+                int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
                 float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
 
@@ -498,7 +496,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
 
                 mask32v closer = newDistance < distance;
                 distance = FS::Min( newDistance, distance );
@@ -507,10 +505,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                 cellY = FS::Select( closer, yd + y, cellY );
 
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
@@ -530,9 +528,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v ycfBase = FS::Convert<float>( ycBase ) - y;
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -544,7 +542,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                 int32v zc = zcBase;
                 for( int zi = 0; zi < 3; zi++ )
                 {
-                    int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
                     float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
@@ -554,7 +552,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
 
                     mask32v closer = newDistance < distance;
                     distance = FS::Min( newDistance, distance );
@@ -564,13 +562,13 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                     cellZ = FS::Select( closer, zd + z, cellZ );
 
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
@@ -592,10 +590,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
         float32v zcfBase = FS::Convert<float>( zcBase ) - z;
         float32v wcfBase = FS::Convert<float>( wcBase ) - w;
 
-        xc *= int32v( FnPrimes::X );
-        ycBase *= int32v( FnPrimes::Y );
-        zcBase *= int32v( FnPrimes::Z );
-        wcBase *= int32v( FnPrimes::W );
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
 
         for( int xi = 0; xi < 3; xi++ )
         {
@@ -611,7 +609,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                     int32v wc = wcBase;
                     for( int wi = 0; wi < 3; wi++ )
                     {
-                        int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
@@ -623,7 +621,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
 
                         mask32v closer = newDistance < distance;
                         distance = FS::Min( newDistance, distance );
@@ -634,16 +632,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual Fa
                         cellW = FS::Select( closer, wd + w, cellW );
 
                         wcf += float32v( 1 );
-                        wc += int32v( FnPrimes::W );
+                        wc += int32v( Primes::W );
                     }
                     zcf += float32v( 1 );
-                    zc += int32v( FnPrimes::Z );
+                    zc += int32v( Primes::Z );
                 }
                 ycf += float32v( 1 );
-                yc += int32v( FnPrimes::Y );
+                yc += int32v( Primes::Y );
             }
             xcf += float32v( 1 );
-            xc += int32v( FnPrimes::X );
+            xc += int32v( Primes::X );
         }
 
         return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index b0cdb3fa..5bbe984b 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -30,19 +30,19 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
 {public:
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
 
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
 
     #define GRADIENT_COORD( _x, _y )\
-        int32v hash##_x##_y = FnUtils::HashPrimesHB(seed, x##_x, y##_y );\
+        int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
         float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
         float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
 
@@ -55,8 +55,8 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
 
         float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
 
-        float32v xWarp = (FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -68,23 +68,23 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
             
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
 
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
 
     #define GRADIENT_COORD( _x, _y, _z )\
-        int32v hash##_x##_y##_z = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z );\
+        int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
         float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
         float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
         float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
@@ -100,19 +100,19 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
 
     #undef GRADIENT_COORD
 
-        float32v x0z = FnUtils::Lerp( FnUtils::Lerp( x000, x100, xs ), FnUtils::Lerp( x010, x110, xs ), ys );
-        float32v y0z = FnUtils::Lerp( FnUtils::Lerp( y000, y100, xs ), FnUtils::Lerp( y010, y110, xs ), ys );
-        float32v z0z = FnUtils::Lerp( FnUtils::Lerp( z000, z100, xs ), FnUtils::Lerp( z010, z110, xs ), ys );
+        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
+        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
+        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
                    
-        float32v x1z = FnUtils::Lerp( FnUtils::Lerp( x001, x101, xs ), FnUtils::Lerp( x011, x111, xs ), ys );
-        float32v y1z = FnUtils::Lerp( FnUtils::Lerp( y001, y101, xs ), FnUtils::Lerp( y011, y111, xs ), ys );
-        float32v z1z = FnUtils::Lerp( FnUtils::Lerp( z001, z101, xs ), FnUtils::Lerp( z011, z111, xs ), ys );
+        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
+        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
+        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
 
         float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
 
-        float32v xWarp = (FnUtils::Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v zWarp = (FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -125,27 +125,27 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
             
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
-
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-        ws = FnUtils::InterpHermite( w - ws );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+        float32v ws = FS::Floor( w );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+        int32v w1 = w0 + int32v( Primes::W );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+        ws = InterpHermite( w - ws );
 
     #define GRADIENT_COORD( _x, _y, _z, _w )\
-        int32v hash##_x##_y##_z##_w = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
+        int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
         float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
         float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
         float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
@@ -170,22 +170,22 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtua
 
     #undef GRADIENT_COORD
 
-        float32v x0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0000, x1000, xs ), FnUtils::Lerp( x0100, x1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0010, x1010, xs ), FnUtils::Lerp( x0110, x1110, xs ), ys ), zs );
-        float32v y0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0000, y1000, xs ), FnUtils::Lerp( y0100, y1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0010, y1010, xs ), FnUtils::Lerp( y0110, y1110, xs ), ys ), zs );
-        float32v z0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0000, z1000, xs ), FnUtils::Lerp( z0100, z1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0010, z1010, xs ), FnUtils::Lerp( z0110, z1110, xs ), ys ), zs );
-        float32v w0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0000, w1000, xs ), FnUtils::Lerp( w0100, w1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0010, w1010, xs ), FnUtils::Lerp( w0110, w1110, xs ), ys ), zs );
+        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
+        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
+        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
+        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
 
-        float32v x1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0001, x1001, xs ), FnUtils::Lerp( x0101, x1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0011, x1011, xs ), FnUtils::Lerp( x0111, x1111, xs ), ys ), zs );
-        float32v y1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0001, y1001, xs ), FnUtils::Lerp( y0101, y1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0011, y1011, xs ), FnUtils::Lerp( y0111, y1111, xs ), ys ), zs );
-        float32v z1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0001, z1001, xs ), FnUtils::Lerp( z0101, z1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0011, z1011, xs ), FnUtils::Lerp( z0111, z1111, xs ), ys ), zs );
-        float32v w1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0001, w1001, xs ), FnUtils::Lerp( w0101, w1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0011, w1011, xs ), FnUtils::Lerp( w0111, w1111, xs ), ys ), zs );                        
+        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
+        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
+        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
+        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
 
         float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
 
-        float32v xWarp = (FnUtils::Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v yWarp = (FnUtils::Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v zWarp = (FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v wWarp = (FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
diff --git a/include/FastNoise/Generators/DomainWarpFractal.inl b/include/FastNoise/Generators/DomainWarpFractal.inl
index 6bcd5503..f6cc8f26 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.inl
+++ b/include/FastNoise/Generators/DomainWarpFractal.inl
@@ -25,7 +25,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, FS> : pub
         {
             seedInc -= int32v( -1 );
             freq *= lacunarity;
-            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+            amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
             amp *= gain;
             strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
         }
@@ -59,7 +59,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, FS> : pub
             {
                 seedInc -= int32v( -1 );
                 freq *= lacunarity;
-                amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+                amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
                 amp *= gain;
                 strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
             }
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index 8d996f1e..4a1b7d49 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -26,7 +26,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalFBm, FS> : public virtual FastNo
         for( int i = 1; i < mOctaves; i++ )
         {
             seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
             amp *= gain;
 
             noise = this->GetSourceValue( mSource, seed, (pos *= lacunarity)... );
@@ -55,7 +55,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalRidged, FS> : public virtual Fas
         for( int i = 1; i < mOctaves; i++ )
         {
             seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
+            amp *= Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
             amp *= gain;
 
             noise = FS::Abs( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
@@ -91,7 +91,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalPingPong, FS> : public virtual F
         for( int i = 1; i < mOctaves; i++ )
         {
             seed -= int32v( -1 );
-            amp *= FnUtils::Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
             amp *= gain;
 
             noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
diff --git a/include/FastNoise/Generators/Perlin.h b/include/FastNoise/Generators/Perlin.h
index 4f9db4f8..267c8d00 100644
--- a/include/FastNoise/Generators/Perlin.h
+++ b/include/FastNoise/Generators/Perlin.h
@@ -1,23 +1,24 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class Perlin : public virtual Generator
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Perlin> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class Perlin : public virtual Generator
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Perlin> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index 7b59ef33..fced7329 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -1,45 +1,43 @@
-#include "FastSIMD/InlInclude.h"
-
 #include "Perlin.h"
 #include "Utils.inl"
 
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
 
         float32v xf0 = xs = x - xs;
         float32v yf0 = ys = y - ys;
         float32v xf1 = xf0 - float32v( 1 );
         float32v yf1 = yf0 - float32v( 1 );
 
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
+        xs = InterpQuintic( xs );
+        ys = InterpQuintic( ys );
 
-        return float32v( 0.579106986522674560546875f ) * FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0 ), xf0, yf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1 ), xf0, yf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
+        return float32v( 0.579106986522674560546875f ) * Lerp(
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0 ), xf0, yf0 ), GetGradientDot( HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDot( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
 
         float32v xf0 = xs = x - xs;
         float32v yf0 = ys = y - ys;
@@ -48,33 +46,33 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise:
         float32v yf1 = yf0 - float32v( 1 );
         float32v zf1 = zf0 - float32v( 1 );
 
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
-        zs = FnUtils::InterpQuintic( zs );
+        xs = InterpQuintic( xs );
+        ys = InterpQuintic( ys );
+        zs = InterpQuintic( zs );
 
-        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
+        return float32v( 0.964921414852142333984375f ) * Lerp( Lerp(
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
+            Lerp( 
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+        float32v ws = FS::Floor( w );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+        int32v w1 = w0 + int32v( Primes::W );
 
         float32v xf0 = xs = x - xs;
         float32v yf0 = ys = y - ys;
@@ -85,22 +83,22 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, FS> : public virtual FastNoise:
         float32v zf1 = zf0 - float32v( 1 );
         float32v wf1 = wf0 - float32v( 1 );
 
-        xs = FnUtils::InterpQuintic( xs );
-        ys = FnUtils::InterpQuintic( ys );
-        zs = FnUtils::InterpQuintic( zs );
-        ws = FnUtils::InterpQuintic( ws );
-
-        return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                                                                                     
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
-            FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                                                                                     
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
+        xs = InterpQuintic( xs );
+        ys = InterpQuintic( ys );
+        zs = InterpQuintic( zs );
+        ws = InterpQuintic( ws );
+
+        return float32v( 0.964921414852142333984375f ) * Lerp( Lerp( Lerp(
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
+            Lerp(                                                                                                                                                     
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
+            Lerp( Lerp(
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
+            Lerp(                                                                                                                                                     
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
     }
 };
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 592ab430..a87d0d43 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -1,367 +1,367 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Simplex.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Simplex, FS> : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        const float SQRT3 = 1.7320508075688772935274463415059f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS_Floor_f32( x + f );
-        float32v y0 = FS_Floor_f32( y + f );
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
-
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
-
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //NMasked funcs
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
-
-        float32v x2 = x0 + float32v( G2 * 2 - 1 );
-        float32v y2 = y0 + float32v( G2 * 2 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, float32v( 0.5f ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, float32v( 0.5f ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, float32v( 0.5f ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
-
-        return float32v( 38.283687591552734375f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        const float F3 = 1.0f / 3.0f;
-        const float G3 = 1.0f / 2.0f;
-
-        float32v s = float32v( F3 ) * (x + y + z);
-        x += s;
-        y += s;
-        z += s;
-
-        float32v x0 = FS_Floor_f32( x );
-        float32v y0 = FS_Floor_f32( y );
-        float32v z0 = FS_Floor_f32( z );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS::Convert<int32_t>( z0 ) * int32v( FnPrimes::Z );
-
-        mask32v x_ge_y = xi >= yi;
-        mask32v y_ge_z = yi >= zi;
-        mask32v x_ge_z = xi >= zi;
-
-        float32v g = float32v( G3 ) * (xi + yi + zi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-
-        mask32v i1 = x_ge_y & x_ge_z;
-        mask32v j1 = FS_BitwiseAndNot_m32( y_ge_z, x_ge_y );
-        mask32v k1 = FS_BitwiseAndNot_m32( ~x_ge_z, y_ge_z );
-
-        mask32v i2 = x_ge_y | x_ge_z;
-        mask32v j2 = ~x_ge_y | y_ge_z;
-        mask32v k2 = x_ge_z & y_ge_z; //NMasked
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G3 );
-        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G3 );
-        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G3 );
-        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G3 * 2 );
-        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G3 * 2 );
-        float32v z2 = FS_NMaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G3 * 2 );
-        float32v x3 = x0 + float32v( G3 * 3 - 1 );
-        float32v y3 = y0 + float32v( G3 * 3 - 1 );
-        float32v z3 = z0 + float32v( G3 * 3 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, float32v( 0.6f ) ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, float32v( 0.6f ) ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, float32v( 0.6f ) ) ) );
-        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, float32v( 0.6f ) ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-        t3 = FS::Max( t3, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;             
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k ), x0, y0, z0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ), FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ) ), x1, y1, z1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
-        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
-
-        return float32v( 32.69428253173828125f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        const float SQRT5 = 2.236067977499f;
-        const float F4 = (SQRT5 - 1.0f) / 4.0f;
-        const float G4 = (5.0f - SQRT5) / 20.0f;
-
-        float32v s = float32v( F4 ) * (x + y + z + w);
-        x += s;
-        y += s;
-        z += s;
-        w += s;
-
-        float32v x0 = FS_Floor_f32( x );
-        float32v y0 = FS_Floor_f32( y );
-        float32v z0 = FS_Floor_f32( z );
-        float32v w0 = FS_Floor_f32( w );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-        float32v wi = w - w0;
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
-        int32v k = FS::Convert<int32_t>( z0 ) * int32v( FnPrimes::Z );
-        int32v l = FS::Convert<int32_t>( w0 ) * int32v( FnPrimes::W );
-
-        float32v g = float32v( G4 ) * (xi + yi + zi + wi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-        w0 = wi - g;
-
-        int32v rankx( 0 );
-        int32v ranky( 0 );
-        int32v rankz( 0 );
-        int32v rankw( 0 );
-
-        mask32v x_ge_y = x0 >= y0;
-        rankx = FS::MaskedIncrement( rankx, x_ge_y );
-        ranky = FS::MaskedIncrement( ranky, ~x_ge_y );
-
-        mask32v x_ge_z = x0 >= z0;
-        rankx = FS::MaskedIncrement( rankx, x_ge_z );
-        rankz = FS::MaskedIncrement( rankz, ~x_ge_z );
-
-        mask32v x_ge_w = x0 >= w0;
-        rankx = FS::MaskedIncrement( rankx, x_ge_w );
-        rankw = FS::MaskedIncrement( rankw, ~x_ge_w );
-
-        mask32v y_ge_z = y0 >= z0;
-        ranky = FS::MaskedIncrement( ranky, y_ge_z );
-        rankz = FS::MaskedIncrement( rankz, ~y_ge_z );
-
-        mask32v y_ge_w = y0 >= w0;
-        ranky = FS::MaskedIncrement( ranky, y_ge_w );
-        rankw = FS::MaskedIncrement( rankw, ~y_ge_w );
-
-        mask32v z_ge_w = z0 >= w0;
-        rankz = FS::MaskedIncrement( rankz, z_ge_w );
-        rankw = FS::MaskedIncrement( rankw, ~z_ge_w );
-
-        mask32v i1 = rankx > int32v( 2 );
-        mask32v j1 = ranky > int32v( 2 );
-        mask32v k1 = rankz > int32v( 2 );
-        mask32v l1 = rankw > int32v( 2 );
-
-        mask32v i2 = rankx > int32v( 1 );
-        mask32v j2 = ranky > int32v( 1 );
-        mask32v k2 = rankz > int32v( 1 );
-        mask32v l2 = rankw > int32v( 1 );
-
-        mask32v i3 = rankx > int32v( 0 );
-        mask32v j3 = ranky > int32v( 0 );
-        mask32v k3 = rankz > int32v( 0 );
-        mask32v l3 = rankw > int32v( 0 );
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G4 );
-        float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G4 );
-        float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G4 );
-        float32v w1 = FS_MaskedSub_f32( w0, float32v( 1 ), l1 ) + float32v( G4 );
-        float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G4 * 2 );
-        float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G4 * 2 );
-        float32v z2 = FS_MaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G4 * 2 );
-        float32v w2 = FS_MaskedSub_f32( w0, float32v( 1 ), l2 ) + float32v( G4 * 2 );
-        float32v x3 = FS_MaskedSub_f32( x0, float32v( 1 ), i3 ) + float32v( G4 * 3 );
-        float32v y3 = FS_MaskedSub_f32( y0, float32v( 1 ), j3 ) + float32v( G4 * 3 );
-        float32v z3 = FS_MaskedSub_f32( z0, float32v( 1 ), k3 ) + float32v( G4 * 3 );
-        float32v w3 = FS_MaskedSub_f32( w0, float32v( 1 ), l3 ) + float32v( G4 * 3 );
-        float32v x4 = x0 + float32v( G4 * 4 - 1 );
-        float32v y4 = y0 + float32v( G4 * 4 - 1 );
-        float32v z4 = z0 + float32v( G4 * 4 - 1 );
-        float32v w4 = w0 + float32v( G4 * 4 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, FS::FNMulAdd( w0, w0, float32v( 0.6f ) ) ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, FS::FNMulAdd( w1, w1, float32v( 0.6f ) ) ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, FS::FNMulAdd( w2, w2, float32v( 0.6f ) ) ) ) );
-        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, FS::FNMulAdd( w3, w3, float32v( 0.6f ) ) ) ) );
-        float32v t4 = FS::FNMulAdd( x4, x4, FS::FNMulAdd( y4, y4, FS::FNMulAdd( z4, z4, FS::FNMulAdd( w4, w4, float32v( 0.6f ) ) ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-        t3 = FS::Max( t3, float32v( 0 ) );
-        t4 = FS::Max( t4, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;
-        t4 *= t4; t4 *= t4;
-
-        float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
-        float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l1 ) ), x1, y1, z1, w1 );
-        float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, 
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l2 ) ), x2, y2, z2, w2 );
-        float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
-            FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i3 ),
-            FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j3 ),
-            FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k3 ),
-            FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
-        float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
-
-        return float32v( 27.f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, FS> : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        const float SQRT3 = 1.7320508075f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS_Floor_f32( x + f );
-        float32v y0 = FS_Floor_f32( y + f );
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( FnPrimes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( FnPrimes::Y );
-
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
-
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //NMasked funcs
-
-        float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
-        float32v x2 = x0 + float32v( (G2 * 2) - 1 );
-        float32v y2 = y0 + float32v( (G2 * 2) - 1 );
-
-        float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
-        float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
-        float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-
-        float32v n0 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
-        float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
-
-        return float32v( 49.918426513671875f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
-        float32v xr = f - x;
-        float32v yr = f - y;
-        float32v zr = f - z;
-
-        float32v val( 0 );
-        for( size_t i = 0; ; i++ )
-        {
-            float32v v0xr = FS::Round( xr );
-            float32v v0yr = FS::Round( yr );
-            float32v v0zr = FS::Round( zr );
-            float32v d0xr = xr - v0xr;
-            float32v d0yr = yr - v0yr;
-            float32v d0zr = zr - v0zr;
-
-            float32v score0xr = FS::Abs( d0xr );
-            float32v score0yr = FS::Abs( d0yr );
-            float32v score0zr = FS::Abs( d0zr );
-            mask32v dir0xr = FS::Max( score0yr, score0zr ) <= score0xr;
-            mask32v dir0yr = FS_BitwiseAndNot_m32( FS::Max( score0zr, score0xr ) <= score0yr, dir0xr );
-            mask32v dir0zr = ~(dir0xr | dir0yr);
-            float32v v1xr = FS_MaskedAdd_f32( v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ), dir0xr );
-            float32v v1yr = FS_MaskedAdd_f32( v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ), dir0yr );
-            float32v v1zr = FS_MaskedAdd_f32( v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ), dir0zr );
-            float32v d1xr = xr - v1xr;
-            float32v d1yr = yr - v1yr;
-            float32v d1zr = zr - v1zr;
-
-            int32v hv0xr = FS::Convert<int32_t>( v0xr ) * int32v( FnPrimes::X );
-            int32v hv0yr = FS::Convert<int32_t>( v0yr ) * int32v( FnPrimes::Y );
-            int32v hv0zr = FS::Convert<int32_t>( v0zr ) * int32v( FnPrimes::Z );
-
-            int32v hv1xr = FS::Convert<int32_t>( v1xr ) * int32v( FnPrimes::X );
-            int32v hv1yr = FS::Convert<int32_t>( v1yr ) * int32v( FnPrimes::Y );
-            int32v hv1zr = FS::Convert<int32_t>( v1zr ) * int32v( FnPrimes::Z );
-
-            float32v t0 = FS::FNMulAdd( d0zr, d0zr, FS::FNMulAdd( d0yr, d0yr, FS::FNMulAdd( d0xr, d0xr, float32v( 0.6f ) ) ) );
-            float32v t1 = FS::FNMulAdd( d1zr, d1zr, FS::FNMulAdd( d1yr, d1yr, FS::FNMulAdd( d1xr, d1xr, float32v( 0.6f ) ) ) );
-            t0 = FS::Max( t0, float32v( 0 ) );
-            t1 = FS::Max( t1, float32v( 0 ) );
-            t0 *= t0; t0 *= t0;
-            t1 *= t1; t1 *= t1;
-
-            float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
-            float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
-
-            val = FS::FMulAdd( v0, t0, FS::FMulAdd( v1, t1, val ) );
-
-            if( i == 1 )
-            {
-                break;
-            }
-
-            xr += float32v( 0.5f );
-            yr += float32v( 0.5f );
-            zr += float32v( 0.5f );
-            seed = ~seed;
-        }
-
-        return float32v( 32.69428253173828125f ) * val;
-    } 
-};
-
+#include "Simplex.h"
+#include "Utils.inl"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        const float SQRT3 = 1.7320508075688772935274463415059f;
+        const float F2 = 0.5f * (SQRT3 - 1.0f);
+        const float G2 = (3.0f - SQRT3) / 6.0f;
+
+        float32v f = float32v( F2 ) * (x + y);
+        float32v x0 = FS::Floor( x + f );
+        float32v y0 = FS::Floor( y + f );
+
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
+
+        float32v g = float32v( G2 ) * (x0 + y0);
+        x0 = x - (x0 - g);
+        y0 = y - (y0 - g);
+
+        mask32v i1 = x0 > y0;
+        //mask32v j1 = ~i1; //NMasked funcs
+
+        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1.f ) ) + float32v( G2 );
+        float32v y1 = FS::InvMaskedSub( i1, y0, float32v( 1.f ) ) + float32v( G2 );
+
+        float32v x2 = x0 + float32v( G2 * 2 - 1 );
+        float32v y2 = y0 + float32v( G2 * 2 - 1 );
+
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, float32v( 0.5f ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, float32v( 0.5f ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, float32v( 0.5f ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+
+        float32v n0 = GetGradientDot( HashPrimes( seed, i, j ), x0, y0 );
+        float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
+        float32v n2 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
+
+        return float32v( 38.283687591552734375f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        const float F3 = 1.0f / 3.0f;
+        const float G3 = 1.0f / 2.0f;
+
+        float32v s = float32v( F3 ) * (x + y + z);
+        x += s;
+        y += s;
+        z += s;
+
+        float32v x0 = FS::Floor( x );
+        float32v y0 = FS::Floor( y );
+        float32v z0 = FS::Floor( z );
+        float32v xi = x - x0;
+        float32v yi = y - y0;
+        float32v zi = z - z0;
+
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
+        int32v k = FS::Convert<int32_t>( z0 ) * int32v( Primes::Z );
+
+        mask32v x_ge_y = xi >= yi;
+        mask32v y_ge_z = yi >= zi;
+        mask32v x_ge_z = xi >= zi;
+
+        float32v g = float32v( G3 ) * (xi + yi + zi);
+        x0 = xi - g;
+        y0 = yi - g;
+        z0 = zi - g;
+
+        mask32v i1 = x_ge_y & x_ge_z;
+        mask32v j1 = FS::BitwiseAndNot( y_ge_z, x_ge_y );
+        mask32v k1 = FS::BitwiseAndNot( ~x_ge_z, y_ge_z );
+
+        mask32v i2 = x_ge_y | x_ge_z;
+        mask32v j2 = ~x_ge_y | y_ge_z;
+        mask32v k2 = x_ge_z & y_ge_z; //NMasked
+
+        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1 ) ) + float32v( G3 );
+        float32v y1 = FS::MaskedSub( j1, y0, float32v( 1 ) ) + float32v( G3 );
+        float32v z1 = FS::MaskedSub( k1, z0, float32v( 1 ) ) + float32v( G3 );
+        float32v x2 = FS::MaskedSub( i2, x0, float32v( 1 ) ) + float32v( G3 * 2 );
+        float32v y2 = FS::MaskedSub( j2, y0, float32v( 1 ) ) + float32v( G3 * 2 );
+        float32v z2 = FS::InvMaskedSub( k2, z0, float32v( 1 ) ) + float32v( G3 * 2 );
+        float32v x3 = x0 + float32v( G3 * 3 - 1 );
+        float32v y3 = y0 + float32v( G3 * 3 - 1 );
+        float32v z3 = z0 + float32v( G3 * 3 - 1 );
+
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, float32v( 0.6f ) ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, float32v( 0.6f ) ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, float32v( 0.6f ) ) ) );
+        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, float32v( 0.6f ) ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+        t3 = FS::Max( t3, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+        t3 *= t3; t3 *= t3;             
+
+        float32v n0 = GetGradientDot( HashPrimes( seed, i, j, k ), x0, y0, z0 );
+        float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::MaskedAdd( j1, j, int32v( Primes::Y ) ), FS::MaskedAdd( k1, k, int32v( Primes::Z ) ) ), x1, y1, z1 );
+        float32v n2 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i2, i, int32v( Primes::X ) ), FS::MaskedAdd( j2, j, int32v( Primes::Y ) ), FS::InvMaskedAdd( k2, k, int32v( Primes::Z ) ) ), x2, y2, z2 );
+        float32v n3 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ) ), x3, y3, z3 );
+
+        return float32v( 32.69428253173828125f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        const float SQRT5 = 2.236067977499f;
+        const float F4 = (SQRT5 - 1.0f) / 4.0f;
+        const float G4 = (5.0f - SQRT5) / 20.0f;
+
+        float32v s = float32v( F4 ) * (x + y + z + w);
+        x += s;
+        y += s;
+        z += s;
+        w += s;
+
+        float32v x0 = FS::Floor( x );
+        float32v y0 = FS::Floor( y );
+        float32v z0 = FS::Floor( z );
+        float32v w0 = FS::Floor( w );
+        float32v xi = x - x0;
+        float32v yi = y - y0;
+        float32v zi = z - z0;
+        float32v wi = w - w0;
+
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
+        int32v k = FS::Convert<int32_t>( z0 ) * int32v( Primes::Z );
+        int32v l = FS::Convert<int32_t>( w0 ) * int32v( Primes::W );
+
+        float32v g = float32v( G4 ) * (xi + yi + zi + wi);
+        x0 = xi - g;
+        y0 = yi - g;
+        z0 = zi - g;
+        w0 = wi - g;
+
+        int32v rankx( 0 );
+        int32v ranky( 0 );
+        int32v rankz( 0 );
+        int32v rankw( 0 );
+
+        mask32v x_ge_y = x0 >= y0;
+        rankx = FS::MaskedIncrement( x_ge_y, rankx );
+        ranky = FS::MaskedIncrement( ~x_ge_y, ranky );
+
+        mask32v x_ge_z = x0 >= z0;
+        rankx = FS::MaskedIncrement( x_ge_z, rankx );
+        rankz = FS::MaskedIncrement( ~x_ge_z, rankz );
+
+        mask32v x_ge_w = x0 >= w0;
+        rankx = FS::MaskedIncrement( x_ge_w, rankx );
+        rankw = FS::MaskedIncrement( ~x_ge_w, rankw );
+
+        mask32v y_ge_z = y0 >= z0;
+        ranky = FS::MaskedIncrement( y_ge_z, ranky );
+        rankz = FS::MaskedIncrement( ~y_ge_z, rankz );
+
+        mask32v y_ge_w = y0 >= w0;
+        ranky = FS::MaskedIncrement( y_ge_w, ranky );
+        rankw = FS::MaskedIncrement( ~y_ge_w, rankw );
+
+        mask32v z_ge_w = z0 >= w0;
+        rankz = FS::MaskedIncrement( z_ge_w, rankz );
+        rankw = FS::MaskedIncrement( ~z_ge_w, rankw );
+
+        mask32v i1 = rankx > int32v( 2 );
+        mask32v j1 = ranky > int32v( 2 );
+        mask32v k1 = rankz > int32v( 2 );
+        mask32v l1 = rankw > int32v( 2 );
+
+        mask32v i2 = rankx > int32v( 1 );
+        mask32v j2 = ranky > int32v( 1 );
+        mask32v k2 = rankz > int32v( 1 );
+        mask32v l2 = rankw > int32v( 1 );
+
+        mask32v i3 = rankx > int32v( 0 );
+        mask32v j3 = ranky > int32v( 0 );
+        mask32v k3 = rankz > int32v( 0 );
+        mask32v l3 = rankw > int32v( 0 );
+
+        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1 ) ) + float32v( G4 );
+        float32v y1 = FS::MaskedSub( j1, y0, float32v( 1 ) ) + float32v( G4 );
+        float32v z1 = FS::MaskedSub( k1, z0, float32v( 1 ) ) + float32v( G4 );
+        float32v w1 = FS::MaskedSub( l1, w0, float32v( 1 ) ) + float32v( G4 );
+        float32v x2 = FS::MaskedSub( i2, x0, float32v( 1 ) ) + float32v( G4 * 2 );
+        float32v y2 = FS::MaskedSub( j2, y0, float32v( 1 ) ) + float32v( G4 * 2 );
+        float32v z2 = FS::MaskedSub( k2, z0, float32v( 1 ) ) + float32v( G4 * 2 );
+        float32v w2 = FS::MaskedSub( l2, w0, float32v( 1 ) ) + float32v( G4 * 2 );
+        float32v x3 = FS::MaskedSub( i3, x0, float32v( 1 ) ) + float32v( G4 * 3 );
+        float32v y3 = FS::MaskedSub( j3, y0, float32v( 1 ) ) + float32v( G4 * 3 );
+        float32v z3 = FS::MaskedSub( k3, z0, float32v( 1 ) ) + float32v( G4 * 3 );
+        float32v w3 = FS::MaskedSub( l3, w0, float32v( 1 ) ) + float32v( G4 * 3 );
+        float32v x4 = x0 + float32v( G4 * 4 - 1 );
+        float32v y4 = y0 + float32v( G4 * 4 - 1 );
+        float32v z4 = z0 + float32v( G4 * 4 - 1 );
+        float32v w4 = w0 + float32v( G4 * 4 - 1 );
+
+        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, FS::FNMulAdd( w0, w0, float32v( 0.6f ) ) ) ) );
+        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, FS::FNMulAdd( w1, w1, float32v( 0.6f ) ) ) ) );
+        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, FS::FNMulAdd( w2, w2, float32v( 0.6f ) ) ) ) );
+        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, FS::FNMulAdd( w3, w3, float32v( 0.6f ) ) ) ) );
+        float32v t4 = FS::FNMulAdd( x4, x4, FS::FNMulAdd( y4, y4, FS::FNMulAdd( z4, z4, FS::FNMulAdd( w4, w4, float32v( 0.6f ) ) ) ) );
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+        t3 = FS::Max( t3, float32v( 0 ) );
+        t4 = FS::Max( t4, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+        t3 *= t3; t3 *= t3;
+        t4 *= t4; t4 *= t4;
+
+        float32v n0 = GetGradientDot( HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
+        float32v n1 = GetGradientDot( HashPrimes( seed, 
+            FS::MaskedAdd( i1, i, int32v( Primes::X ) ),
+            FS::MaskedAdd( j1, j, int32v( Primes::Y ) ),
+            FS::MaskedAdd( k1, k, int32v( Primes::Z ) ),
+            FS::MaskedAdd( l1, l, int32v( Primes::W ) ) ), x1, y1, z1, w1 );
+        float32v n2 = GetGradientDot( HashPrimes( seed, 
+            FS::MaskedAdd( i2, i, int32v( Primes::X ) ),
+            FS::MaskedAdd( j2, j, int32v( Primes::Y ) ),
+            FS::MaskedAdd( k2, k, int32v( Primes::Z ) ),
+            FS::MaskedAdd( l2, l, int32v( Primes::W ) ) ), x2, y2, z2, w2 );
+        float32v n3 = GetGradientDot( HashPrimes( seed,
+            FS::MaskedAdd( i3, i, int32v( Primes::X ) ),
+            FS::MaskedAdd( j3, j, int32v( Primes::Y ) ),
+            FS::MaskedAdd( k3, k, int32v( Primes::Z ) ),
+            FS::MaskedAdd( l3, l, int32v( Primes::W ) ) ), x3, y3, z3, w3 );
+        float32v n4 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ), l + int32v( Primes::W ) ), x4, y4, z4, w4 );
+
+        return float32v( 27.f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        const float SQRT3 = 1.7320508075f;
+        const float F2 = 0.5f * (SQRT3 - 1.0f);
+        const float G2 = (3.0f - SQRT3) / 6.0f;
+
+        float32v f = float32v( F2 ) * (x + y);
+        float32v x0 = FS::Floor( x + f );
+        float32v y0 = FS::Floor( y + f );
+
+        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
+        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
+
+        float32v g = float32v( G2 ) * (x0 + y0);
+        x0 = x - (x0 - g);
+        y0 = y - (y0 - g);
+
+        mask32v i1 = x0 > y0;
+        //mask32v j1 = ~i1; //NMasked funcs
+
+        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1.f ) ) + float32v( G2 );
+        float32v y1 = FS::InvMaskedSub( i1, y0, float32v( 1.f ) ) + float32v( G2 );
+        float32v x2 = x0 + float32v( (G2 * 2) - 1 );
+        float32v y2 = y0 + float32v( (G2 * 2) - 1 );
+
+        float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
+        float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
+        float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
+
+        t0 = FS::Max( t0, float32v( 0 ) );
+        t1 = FS::Max( t1, float32v( 0 ) );
+        t2 = FS::Max( t2, float32v( 0 ) );
+
+        t0 *= t0; t0 *= t0;
+        t1 *= t1; t1 *= t1;
+        t2 *= t2; t2 *= t2;
+
+        float32v n0 = GetGradientDotFancy( HashPrimes( seed, i, j ), x0, y0 );
+        float32v n1 = GetGradientDotFancy( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
+        float32v n2 = GetGradientDotFancy( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
+
+        return float32v( 49.918426513671875f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
+        float32v xr = f - x;
+        float32v yr = f - y;
+        float32v zr = f - z;
+
+        float32v val( 0 );
+        for( size_t i = 0; ; i++ )
+        {
+            float32v v0xr = FS::Round( xr );
+            float32v v0yr = FS::Round( yr );
+            float32v v0zr = FS::Round( zr );
+            float32v d0xr = xr - v0xr;
+            float32v d0yr = yr - v0yr;
+            float32v d0zr = zr - v0zr;
+
+            float32v score0xr = FS::Abs( d0xr );
+            float32v score0yr = FS::Abs( d0yr );
+            float32v score0zr = FS::Abs( d0zr );
+            mask32v dir0xr = FS::Max( score0yr, score0zr ) <= score0xr;
+            mask32v dir0yr = FS::BitwiseAndNot( FS::Max( score0zr, score0xr ) <= score0yr, dir0xr );
+            mask32v dir0zr = ~(dir0xr | dir0yr);
+            float32v v1xr = FS::MaskedAdd( dir0xr, v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ) );
+            float32v v1yr = FS::MaskedAdd( dir0yr, v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ) );
+            float32v v1zr = FS::MaskedAdd( dir0zr, v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ) );
+            float32v d1xr = xr - v1xr;
+            float32v d1yr = yr - v1yr;
+            float32v d1zr = zr - v1zr;
+
+            int32v hv0xr = FS::Convert<int32_t>( v0xr ) * int32v( Primes::X );
+            int32v hv0yr = FS::Convert<int32_t>( v0yr ) * int32v( Primes::Y );
+            int32v hv0zr = FS::Convert<int32_t>( v0zr ) * int32v( Primes::Z );
+
+            int32v hv1xr = FS::Convert<int32_t>( v1xr ) * int32v( Primes::X );
+            int32v hv1yr = FS::Convert<int32_t>( v1yr ) * int32v( Primes::Y );
+            int32v hv1zr = FS::Convert<int32_t>( v1zr ) * int32v( Primes::Z );
+
+            float32v t0 = FS::FNMulAdd( d0zr, d0zr, FS::FNMulAdd( d0yr, d0yr, FS::FNMulAdd( d0xr, d0xr, float32v( 0.6f ) ) ) );
+            float32v t1 = FS::FNMulAdd( d1zr, d1zr, FS::FNMulAdd( d1yr, d1yr, FS::FNMulAdd( d1xr, d1xr, float32v( 0.6f ) ) ) );
+            t0 = FS::Max( t0, float32v( 0 ) );
+            t1 = FS::Max( t1, float32v( 0 ) );
+            t0 *= t0; t0 *= t0;
+            t1 *= t1; t1 *= t1;
+
+            float32v v0 = GetGradientDot( HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
+            float32v v1 = GetGradientDot( HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
+
+            val = FS::FMulAdd( v0, t0, FS::FMulAdd( v1, t1, val ) );
+
+            if( i == 1 )
+            {
+                break;
+            }
+
+            xr += float32v( 0.5f );
+            yr += float32v( 0.5f );
+            zr += float32v( 0.5f );
+            seed = ~seed;
+        }
+
+        return float32v( 32.69428253173828125f ) * val;
+    } 
+};
+
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 36e2549a..8910610b 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -16,37 +16,39 @@ namespace FastNoise
     static constexpr float ROOT2 = 1.4142135623730950488f;
     static constexpr float ROOT3 = 1.7320508075688772935f;
 
-    //template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
-    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    //{
-    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
-
-    //    // Bit-4 = Choose X Y ordering
-    //    mask32v xy;
-
-    //    xy = index << 29;
+    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    {
+        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-    //    if constexpr( !(SIMD & FastSIMD::FeatureFlag::SSE41) )
-    //    {
-    //        xy >>= 31;
-    //    }        
+        // Bit-3 = Choose X Y ordering
+        mask32v bit3;
+        
+        if constexpr( (SIMD & FastSIMD::FeatureFlag::SSE41) && !(SIMD & FastSIMD::FeatureFlag::AVX512_F) )
+        {
+            bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
+        }      
+        else
+        {
+            bit3 = ( index & int32v( 1 << 2 ) ) == int32v( 0 );
+        }
 
-    //    float32v a = FS::Select( xy, fY, fX );
-    //    float32v b = FS::Select( xy, fX, fY );
+        float32v a = FS::Select( bit3, fY, fX );
+        float32v b = FS::Select( bit3, fX, fY );
 
-    //    // Bit-1 = b flip sign
-    //    b ^= FS::Cast<float>( index << 31 );
+        // Bit-1 = b flip sign
+        b ^= FS::Cast<float>( index << 31 );
 
-    //    // Bit-2 = Mul a by 2 or Root3
-    //    mask32v aMul2 = (index << 30) >> 31;        
+        // Bit-2 = Mul a by 2 or Root3     
+        mask32v bit2 = ( index & int32v( 2 ) ) == int32v( 0 );   
 
-    //    a *= FS::Select( aMul2, float32v( 2 ), float32v( ROOT3 ) );
-    //    // b zero value if a mul 2
-    //    b = FS_NMask_f32( b, aMul2 );
+        a *= FS::Select( bit2, float32v( 2 ), float32v( ROOT3 ) );
+        // b zero value if a mul 2
+        float32v c = FS::MaskedAdd( bit2, a, b );
 
-    //    // Bit-8 = Flip sign of a + b
-    //    return ( a + b ) ^ FS::Cast<float>( (index >> 3) << 31 );
-    //}
+        // Bit-4 = Flip sign of a + b
+        return c ^ FS::Cast<float>( (index >> 3) << 31 );
+    }
 
     //template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
     //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
@@ -93,22 +95,22 @@ namespace FastNoise
             return FS::FMulAdd( gX, fX, fY * gY );
         }*/
 
-        int32v  bit1 = (hash << 31);
-        int32v  bit2 = (hash >> 1) << 31;
-        mask32v bit4;
-        
-        bit4 = hash << 29;
+        int32v bit1 = hash << 31;
+        int32v bit2 = (hash >> 1) << 31;
+        int32v bit4 = hash << 29;
 
         if constexpr( !( SIMD & FastSIMD::FeatureFlag::SSE41 ) )
         {
             bit4 >>= 31;
-        }        
+        }
+
+        auto bit4Mask = FS::Cast<FS::Mask<32, false>>( bit4 );
 
         fX ^= FS::Cast<float>( bit1 );
         fY ^= FS::Cast<float>( bit2 );
         
-        float32v a = FS::Select( bit4, fY, fX );
-        float32v b = FS::Select( bit4, fX, fY );
+        float32v a = FS::Select( bit4Mask, fY, fX );
+        float32v b = FS::Select( bit4Mask, fX, fY );
         
         return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
     }
@@ -161,7 +163,7 @@ namespace FastNoise
         float32v b;
         if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
         {
-            b = FS::Select( hash << 27, fY, fZ );
+            b = FS::Select( FS::Cast<FS::Mask<32>>( hash << 27 ), fY, fZ );
         }
         else
         {
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 9ae6b539..3d304827 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -1,86 +1,84 @@
-#include "FastSIMD/InlInclude.h"
-
 #include "Value.h"
 #include "Utils.inl"
 
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Value, FS> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Value, SIMD> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
 
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
 
-        return FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
+        return Lerp(
+            Lerp( GetValueCoord( seed, x0, y0 ), GetValueCoord( seed, x1, y0 ), xs ),
+            Lerp( GetValueCoord( seed, x0, y1 ), GetValueCoord( seed, x1, y1 ), xs ), ys );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
 
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
 
-        return FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
-            FnUtils::Lerp(                                                                                
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
+        return Lerp( Lerp(
+            Lerp( GetValueCoord( seed, x0, y0, z0 ), GetValueCoord( seed, x1, y0, z0 ), xs ),
+            Lerp( GetValueCoord( seed, x0, y1, z0 ), GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
+            Lerp(                                                                                
+            Lerp( GetValueCoord( seed, x0, y0, z1 ), GetValueCoord( seed, x1, y0, z1 ), xs ),    
+            Lerp( GetValueCoord( seed, x0, y1, z1 ), GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
     {
-        float32v xs = FS_Floor_f32( x );
-        float32v ys = FS_Floor_f32( y );
-        float32v zs = FS_Floor_f32( z );
-        float32v ws = FS_Floor_f32( w );
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+        float32v ws = FS::Floor( w );
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( FnPrimes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( FnPrimes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( FnPrimes::Z );
-        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( FnPrimes::W );
-        int32v x1 = x0 + int32v( FnPrimes::X );
-        int32v y1 = y0 + int32v( FnPrimes::Y );
-        int32v z1 = z0 + int32v( FnPrimes::Z );
-        int32v w1 = w0 + int32v( FnPrimes::W );
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+        int32v w1 = w0 + int32v( Primes::W );
 
-        xs = FnUtils::InterpHermite( x - xs );
-        ys = FnUtils::InterpHermite( y - ys );
-        zs = FnUtils::InterpHermite( z - zs );
-        ws = FnUtils::InterpHermite( w - ws );
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+        ws = InterpHermite( w - ws );
 
-        return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
-            FnUtils::Lerp( FnUtils::Lerp(
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
-            FnUtils::Lerp( 
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
-            FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
+        return Lerp( Lerp( Lerp(
+            Lerp( GetValueCoord( seed, x0, y0, z0, w0 ), GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
+            Lerp( GetValueCoord( seed, x0, y1, z0, w0 ), GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
+            Lerp( 
+            Lerp( GetValueCoord( seed, x0, y0, z1, w0 ), GetValueCoord( seed, x1, y0, z1, w0 ), xs ),    
+            Lerp( GetValueCoord( seed, x0, y1, z1, w0 ), GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
+            Lerp( Lerp(
+            Lerp( GetValueCoord( seed, x0, y0, z0, w1 ), GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
+            Lerp( GetValueCoord( seed, x0, y1, z0, w1 ), GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
+            Lerp( 
+            Lerp( GetValueCoord( seed, x0, y0, z1, w1 ), GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
+            Lerp( GetValueCoord( seed, x0, y1, z1, w1 ), GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
     }
 };

From 134c6f0d2bb5fcc75b9694ad1057c4410a9d49ea Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sun, 4 Sep 2022 19:48:16 +0100
Subject: [PATCH 010/139] All nodes now functional

---
 include/FastNoise/FastNoise_BuildList.inl     |   72 +-
 include/FastNoise/Generators/Cellular.h       |  249 ++--
 include/FastNoise/Generators/Cellular.inl     | 1301 +++++++++--------
 include/FastNoise/Generators/DomainWarp.inl   |  400 ++---
 .../Generators/DomainWarpFractal.inl          |  142 +-
 include/FastNoise/Generators/Fractal.inl      |  207 +--
 include/FastNoise/Generators/Modifiers.inl    |   18 +-
 src/CMakeLists.txt                            |    2 +-
 8 files changed, 1201 insertions(+), 1190 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index 6388d246..d74e4c66 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -35,30 +35,30 @@ template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
 #include "Generators/Simplex.inl"
 #endif
 
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Cellular.h"
-//#else
-//#include "Generators/Cellular.inl"
-//#endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/Fractal.h"
-//#else
-//#include "Generators/Fractal.inl"
-//#endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/DomainWarp.h"
-//#else
-//#include "Generators/DomainWarp.inl"
-//#endif
-//
-//#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-//#include "Generators/DomainWarpFractal.h"
-//#else
-//#include "Generators/DomainWarpFractal.inl"
-//#endif
-//
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Cellular.h"
+#else
+#include "Generators/Cellular.inl"
+#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/Fractal.h"
+#else
+#include "Generators/Fractal.inl"
+#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/DomainWarp.h"
+#else
+#include "Generators/DomainWarp.inl"
+#endif
+
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include "Generators/DomainWarpFractal.h"
+#else
+#include "Generators/DomainWarpFractal.inl"
+#endif
+
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
 #include "Generators/Modifiers.h"
 #else
@@ -88,17 +88,17 @@ FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Simplex );
 FASTNOISE_REGISTER_NODE( OpenSimplex2 );
                
-//FASTNOISE_REGISTER_NODE( CellularValue );
-//FASTNOISE_REGISTER_NODE( CellularDistance );
-//FASTNOISE_REGISTER_NODE( CellularLookup );
-//                    
-//FASTNOISE_REGISTER_NODE( FractalFBm );
-//FASTNOISE_REGISTER_NODE( FractalPingPong );
-//FASTNOISE_REGISTER_NODE( FractalRidged );
-//                    
-//FASTNOISE_REGISTER_NODE( DomainWarpGradient );
-//FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
-//FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
+FASTNOISE_REGISTER_NODE( CellularValue );
+FASTNOISE_REGISTER_NODE( CellularDistance );
+FASTNOISE_REGISTER_NODE( CellularLookup );
+                    
+FASTNOISE_REGISTER_NODE( FractalFBm );
+FASTNOISE_REGISTER_NODE( FractalPingPong );
+FASTNOISE_REGISTER_NODE( FractalRidged );
+                    
+FASTNOISE_REGISTER_NODE( DomainWarpGradient );
+FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
+FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
                     
 FASTNOISE_REGISTER_NODE( DomainScale );
 FASTNOISE_REGISTER_NODE( DomainOffset );
@@ -123,4 +123,4 @@ FASTNOISE_REGISTER_NODE( PowInt );
 FASTNOISE_REGISTER_NODE( DomainAxisScale );
 FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
-//FASTNOISE_REGISTER_NODE( GeneratorCache );
+FASTNOISE_REGISTER_NODE( GeneratorCache );
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index f4e28082..39fa4fac 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -1,123 +1,126 @@
-#pragma once
-#include "Generator.h"
-
-#include <algorithm>
-
-namespace FastNoise
-{
-    class Cellular : public virtual Generator
-    {
-    public:
-        void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
-        void SetJitterModifier( float value ) { mJitterModifier = value; }
-        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
-
-    protected:
-        HybridSource mJitterModifier = 1.0f;
-        DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Cellular> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-            this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
-            this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, kDistanceFunction_Strings );
-        }
-    };
-#endif
-
-    class CellularValue : public virtual Cellular
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        static const int kMaxDistanceCount = 4;
-
-        void SetValueIndex( int value ) { mValueIndex = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-
-    protected:
-        int mValueIndex = 0;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularValue> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, CellularValue::kMaxDistanceCount - 1 );
-        }
-    };
-#endif
-
-    class CellularDistance : public virtual Cellular
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        enum class ReturnType
-        {
-            Index0,
-            Index0Add1,
-            Index0Sub1,
-            Index0Mul1,
-            Index0Div1
-        };
-
-        static const int kMaxDistanceCount = 4;
-
-        void SetDistanceIndex0( int value ) { mDistanceIndex0 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-        void SetDistanceIndex1( int value ) { mDistanceIndex1 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
-        void SetReturnType( ReturnType value ) { mReturnType = value; }
-
-    protected:
-        ReturnType mReturnType = ReturnType::Index0;
-        int mDistanceIndex0 = 0;
-        int mDistanceIndex1 = 1;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularDistance> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, CellularDistance::kMaxDistanceCount - 1 );
-            this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, CellularDistance::kMaxDistanceCount - 1 );
-            this->AddVariableEnum( "Return Type", CellularDistance::ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
-        }
-    };
-#endif
-
-    class CellularLookup : public virtual Cellular
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
-        void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
-
-    protected:
-        GeneratorSource mLookup;
-        float mLookupFreq = 0.1f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<CellularLookup> : MetadataT<Cellular>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
-            this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+#include <algorithm>
+
+namespace FastNoise
+{
+    class Cellular : public virtual Generator
+    {
+    public:
+        void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
+        void SetJitterModifier( float value ) { mJitterModifier = value; }
+        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
+
+    protected:
+        HybridSource mJitterModifier = 1.0f;
+        DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Cellular> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
+            this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
+            this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, kDistanceFunction_Strings );
+        }
+    };
+#endif
+
+    class CellularValue : public virtual Cellular
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        static const int kMaxDistanceCount = 4;
+
+        void SetValueIndex( int value ) { mValueIndex = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+
+    protected:
+        int mValueIndex = 0;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularValue> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, CellularValue::kMaxDistanceCount - 1 );
+        }
+    };
+#endif
+
+    class CellularDistance : public virtual Cellular
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        enum class ReturnType
+        {
+            Index0,
+            Index0Add1,
+            Index0Sub1,
+            Index0Mul1,
+            Index0Div1
+        };
+
+        static const int kMaxDistanceCount = 4;
+
+        void SetDistanceIndex0( int value ) { mDistanceIndex0 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+        void SetDistanceIndex1( int value ) { mDistanceIndex1 = std::min( std::max( value, 0 ), kMaxDistanceCount - 1 ); }
+        void SetReturnType( ReturnType value ) { mReturnType = value; }
+
+    protected:
+        ReturnType mReturnType = ReturnType::Index0;
+        int mDistanceIndex0 = 0;
+        int mDistanceIndex1 = 1;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularDistance> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, CellularDistance::kMaxDistanceCount - 1 );
+            this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, CellularDistance::kMaxDistanceCount - 1 );
+            this->AddVariableEnum( "Return Type", CellularDistance::ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
+        }
+    };
+#endif
+
+    class CellularLookup : public virtual Cellular
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
+        void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
+
+    protected:
+        GeneratorSource mLookup;
+        float mLookupFreq = 0.1f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<CellularLookup> : MetadataT<Cellular>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
+            this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 37dd6edc..31793036 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -1,649 +1,652 @@
-#include <cfloat>
-#include <array>
-
-#include "Cellular.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::Cellular, FS> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{
-protected:
-    const float kJitter2D = 0.437016f;
-    const float kJitter3D = 0.396144f;
-    const float kJitter4D = 0.366025f;
-    const float kJitterIdx23 = 0.190983f;
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::CellularValue, FS> : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = HashPrimesHB( seed, xc, yc );
-                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
-                xd = FS::FMulAdd( xd, invMag, xcf );
-                yd = FS::FMulAdd( yd, invMag, ycf );
-
-                float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
-
-                for( int i = 0; ; i++ )
-                {
-                    mask32v closer = newDistance < distance[i];
-
-                    float32v localDistance = distance[i];
-                    float32v localCellValue = value[i];
-
-                    distance[i] = FS::Select( closer, newDistance, distance[i] );
-                    value[i] = FS::Select( closer, newCellValue, value[i] );
-
-                    if( i > mValueIndex )
-                    {
-                        break;
-                    }
-
-                    newDistance = FS::Select( closer, localDistance, newDistance );
-                    newCellValue = FS::Select( closer, localCellValue, newCellValue );
-                }
-
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return value[mValueIndex];
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-        
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-        
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-    
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-    
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
-                    xd = FS::FMulAdd( xd, invMag, xcf );
-                    yd = FS::FMulAdd( yd, invMag, ycf );
-                    zd = FS::FMulAdd( zd, invMag, zcf );
-                
-                    float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
-                
-                    for( int i = 0; ; i++ )
-                    {
-                        mask32v closer = newDistance < distance[i];
-
-                        float32v localDistance = distance[i];
-                        float32v localCellValue = value[i];
-
-                        distance[i] = FS::Select( closer, newDistance, distance[i] );
-                        value[i] = FS::Select( closer, newCellValue, value[i] );
-
-                        if( i > mValueIndex )
-                        {
-                            break;
-                        }
-
-                        newDistance = FS::Select( closer, localDistance, newDistance );
-                        newCellValue = FS::Select( closer, localCellValue, newCellValue );
-                    }
-            
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-    
-        return value[mValueIndex];
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-        std::array<float32v, kMaxDistanceCount> value;
-        std::array<float32v, kMaxDistanceCount> distance;
-        
-        value.fill( float32v( INFINITY ) );
-        distance.fill( float32v( INFINITY ) );
-        
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
-        
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
-    
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-        wcBase *= int32v( Primes::W );
-    
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
-                        xd = FS::FMulAdd( xd, invMag, xcf );
-                        yd = FS::FMulAdd( yd, invMag, ycf );
-                        zd = FS::FMulAdd( zd, invMag, zcf );
-                        wd = FS::FMulAdd( wd, invMag, wcf );
-
-                        float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        for( int i = 0; ; i++ )
-                        {
-                            mask32v closer = newDistance < distance[i];
-
-                            float32v localDistance = distance[i];
-                            float32v localCellValue = value[i];
-
-                            distance[i] = FS::Select( closer, newDistance, distance[i] );
-                            value[i] = FS::Select( closer, newCellValue, value[i] );
-
-                            if( i > mValueIndex )
-                            {
-                                break;
-                            }
-
-                            newDistance = FS::Select( closer, localDistance, newDistance );
-                            newCellValue = FS::Select( closer, localCellValue, newCellValue );
-                        }
-
-                        wcf += float32v( 1 );
-                        wc += int32v( Primes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-    
-        return value[mValueIndex];
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::CellularDistance, FS> : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for ( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = HashPrimesHB( seed, xc, yc );
-                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
-                xd = FS::FMulAdd( xd, invMag, xcf );
-                yd = FS::FMulAdd( yd, invMag, ycf );
-
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
-
-                for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                {
-                    distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
-                }
-
-                distance[0] = FS::Min( distance[0], newDistance );
-
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
-                    xd = FS::FMulAdd( xd, invMag, xcf );
-                    yd = FS::FMulAdd( yd, invMag, ycf );
-                    zd = FS::FMulAdd( zd, invMag, zcf );
-
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
-
-                    for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                    {
-                        distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
-                    }
-
-                    distance[0] = FS::Min( distance[0], newDistance );
-
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-
-        std::array<float32v, kMaxDistanceCount> distance;
-        distance.fill( float32v( INFINITY ) );
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-        wcBase *= int32v( Primes::W );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
-                        xd = FS::FMulAdd( xd, invMag, xcf );
-                        yd = FS::FMulAdd( yd, invMag, ycf );
-                        zd = FS::FMulAdd( zd, invMag, zcf );
-                        wd = FS::FMulAdd( wd, invMag, wcf );
-
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        for( int i = kMaxDistanceCount - 1; i > 0; i-- )
-                        {
-                            distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
-                        }
-
-                        distance[0] = FS::Min( distance[0], newDistance );
-
-                        wcf += float32v( 1 );
-                        wc += int32v( Primes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return GetReturn( distance );
-    }
-
-    FS_FORCEINLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
-    {
-        if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
-        {
-            distance[mDistanceIndex0] *= FS_InvSqrt_f32( distance[mDistanceIndex0] );
-            distance[mDistanceIndex1] *= FS_InvSqrt_f32( distance[mDistanceIndex1] );
-        }
-
-        switch( mReturnType )
-        {
-        default:
-        case ReturnType::Index0:
-        {
-            return distance[mDistanceIndex0];
-        }
-        case ReturnType::Index0Add1:
-        {
-            return distance[mDistanceIndex0] + distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Sub1:
-        {
-            return distance[mDistanceIndex0] - distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Mul1:
-        {
-            return distance[mDistanceIndex0] * distance[mDistanceIndex1];
-        }
-        case ReturnType::Index0Div1:
-        {
-            return distance[mDistanceIndex0] * FS::Reciprocal( distance[mDistanceIndex1] );
-        }
-        }
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::CellularLookup, FS> : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, FS>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY;
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                int32v hash = HashPrimesHB( seed, xc, yc );
-                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-
-                float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, yd * yd ) );
-                xd = FS::FMulAdd( xd, invMag, xcf );
-                yd = FS::FMulAdd( yd, invMag, ycf );
-
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
-
-                mask32v closer = newDistance < distance;
-                distance = FS::Min( newDistance, distance );
-
-                cellX = FS::Select( closer, xd + x, cellX );
-                cellY = FS::Select( closer, yd + y, cellY );
-
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY, cellZ;
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
-                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-
-                    float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
-                    xd = FS::FMulAdd( xd, invMag, xcf );
-                    yd = FS::FMulAdd( yd, invMag, ycf );
-                    zd = FS::FMulAdd( zd, invMag, zcf );
-
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
-
-                    mask32v closer = newDistance < distance;
-                    distance = FS::Min( newDistance, distance );
-
-                    cellX = FS::Select( closer, xd + x, cellX );
-                    cellY = FS::Select( closer, yd + y, cellY );
-                    cellZ = FS::Select( closer, zd + z, cellZ );
-
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-        float32v distance( FLT_MAX );
-        float32v cellX, cellY, cellZ, cellW;
-
-        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
-        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
-        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
-        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
-
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
-
-        xc *= int32v( Primes::X );
-        ycBase *= int32v( Primes::Y );
-        zcBase *= int32v( Primes::Z );
-        wcBase *= int32v( Primes::W );
-
-        for( int xi = 0; xi < 3; xi++ )
-        {
-            float32v ycf = ycfBase;
-            int32v yc = ycBase;
-            for( int yi = 0; yi < 3; yi++ )
-            {
-                float32v zcf = zcfBase;
-                int32v zc = zcBase;
-                for( int zi = 0; zi < 3; zi++ )
-                {
-                    float32v wcf = wcfBase;
-                    int32v wc = wcBase;
-                    for( int wi = 0; wi < 3; wi++ )
-                    {
-                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
-                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-
-                        float32v invMag = jitter * FS_InvSqrt_f32( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
-                        xd = FS::FMulAdd( xd, invMag, xcf );
-                        yd = FS::FMulAdd( yd, invMag, ycf );
-                        zd = FS::FMulAdd( zd, invMag, zcf );
-                        wd = FS::FMulAdd( wd, invMag, wcf );
-
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
-
-                        mask32v closer = newDistance < distance;
-                        distance = FS::Min( newDistance, distance );
-
-                        cellX = FS::Select( closer, xd + x, cellX );
-                        cellY = FS::Select( closer, yd + y, cellY );
-                        cellZ = FS::Select( closer, zd + z, cellZ );
-                        cellW = FS::Select( closer, wd + w, cellW );
-
-                        wcf += float32v( 1 );
-                        wc += int32v( Primes::W );
-                    }
-                    zcf += float32v( 1 );
-                    zc += int32v( Primes::Z );
-                }
-                ycf += float32v( 1 );
-                yc += int32v( Primes::Y );
-            }
-            xcf += float32v( 1 );
-            xc += int32v( Primes::X );
-        }
-
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
-    }
-};
+#include <cfloat>
+#include <array>
+
+#include "Cellular.h"
+#include "Utils.inl"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Cellular, SIMD> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+protected:
+    const float kJitter2D = 0.437016f;
+    const float kJitter3D = 0.396144f;
+    const float kJitter4D = 0.366025f;
+    const float kJitterIdx23 = 0.190983f;
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
+
+                float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+
+                for( int i = 0; ; i++ )
+                {
+                    mask32v closer = newDistance < distance[i];
+
+                    float32v localDistance = distance[i];
+                    float32v localCellValue = value[i];
+
+                    distance[i] = FS::Select( closer, newDistance, distance[i] );
+                    value[i] = FS::Select( closer, newCellValue, value[i] );
+
+                    if( i > mValueIndex )
+                    {
+                        break;
+                    }
+
+                    newDistance = FS::Select( closer, localDistance, newDistance );
+                    newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                }
+
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return value[mValueIndex];
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+        
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+    
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+    
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                
+                    float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
+                
+                    float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+                
+                    for( int i = 0; ; i++ )
+                    {
+                        mask32v closer = newDistance < distance[i];
+
+                        float32v localDistance = distance[i];
+                        float32v localCellValue = value[i];
+
+                        distance[i] = FS::Select( closer, newDistance, distance[i] );
+                        value[i] = FS::Select( closer, newCellValue, value[i] );
+
+                        if( i > mValueIndex )
+                        {
+                            break;
+                        }
+
+                        newDistance = FS::Select( closer, localDistance, newDistance );
+                        newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                    }
+            
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+    
+        return value[mValueIndex];
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+        std::array<float32v, kMaxDistanceCount> value;
+        std::array<float32v, kMaxDistanceCount> distance;
+        
+        value.fill( float32v( INFINITY ) );
+        distance.fill( float32v( INFINITY ) );
+        
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
+        
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+    
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
+    
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
+
+                        float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        for( int i = 0; ; i++ )
+                        {
+                            mask32v closer = newDistance < distance[i];
+
+                            float32v localDistance = distance[i];
+                            float32v localCellValue = value[i];
+
+                            distance[i] = FS::Select( closer, newDistance, distance[i] );
+                            value[i] = FS::Select( closer, newCellValue, value[i] );
+
+                            if( i > mValueIndex )
+                            {
+                                break;
+                            }
+
+                            newDistance = FS::Select( closer, localDistance, newDistance );
+                            newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                        }
+
+                        wcf += float32v( 1 );
+                        wc += int32v( Primes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+    
+        return value[mValueIndex];
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for ( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
+
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+
+                for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                {
+                    distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                }
+
+                distance[0] = FS::Min( distance[0], newDistance );
+
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+
+                    float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
+
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+
+                    for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                    {
+                        distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                    }
+
+                    distance[0] = FS::Min( distance[0], newDistance );
+
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+
+        std::array<float32v, kMaxDistanceCount> distance;
+        distance.fill( float32v( INFINITY ) );
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
+
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        for( int i = kMaxDistanceCount - 1; i > 0; i-- )
+                        {
+                            distance[i] = FS::Max( FS::Min( distance[i], newDistance ), distance[i - 1] );
+                        }
+
+                        distance[0] = FS::Min( distance[0], newDistance );
+
+                        wcf += float32v( 1 );
+                        wc += int32v( Primes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return GetReturn( distance );
+    }
+
+    FS_FORCEINLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
+    {
+        if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
+        {
+            distance[mDistanceIndex0] *= FS::InvSqrt( distance[mDistanceIndex0] );
+            distance[mDistanceIndex1] *= FS::InvSqrt( distance[mDistanceIndex1] );
+        }
+
+        switch( mReturnType )
+        {
+        default:
+        case ReturnType::Index0:
+        {
+            return distance[mDistanceIndex0];
+        }
+        case ReturnType::Index0Add1:
+        {
+            return distance[mDistanceIndex0] + distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Sub1:
+        {
+            return distance[mDistanceIndex0] - distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Mul1:
+        {
+            return distance[mDistanceIndex0] * distance[mDistanceIndex1];
+        }
+        case ReturnType::Index0Div1:
+        {
+            return distance[mDistanceIndex0] * FS::Reciprocal( distance[mDistanceIndex1] );
+        }
+        }
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    {
+        float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY;
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                int32v hash = HashPrimesHB( seed, xc, yc );
+                float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+
+                float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
+                xd = FS::FMulAdd( xd, invMag, xcf );
+                yd = FS::FMulAdd( yd, invMag, ycf );
+
+                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+
+                mask32v closer = newDistance < distance;
+                distance = FS::Min( newDistance, distance );
+
+                cellX = FS::Select( closer, xd + x, cellX );
+                cellY = FS::Select( closer, yd + y, cellY );
+
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY, cellZ;
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    int32v hash = HashPrimesHB( seed, xc, yc, zc );
+                    float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+
+                    float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
+                    xd = FS::FMulAdd( xd, invMag, xcf );
+                    yd = FS::FMulAdd( yd, invMag, ycf );
+                    zd = FS::FMulAdd( zd, invMag, zcf );
+
+                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+
+                    mask32v closer = newDistance < distance;
+                    distance = FS::Min( newDistance, distance );
+
+                    cellX = FS::Select( closer, xd + x, cellX );
+                    cellY = FS::Select( closer, yd + y, cellY );
+                    cellZ = FS::Select( closer, zd + z, cellZ );
+
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
+    }
+
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
+        float32v distance( FLT_MAX );
+        float32v cellX, cellY, cellZ, cellW;
+
+        int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
+        int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
+        int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
+        int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
+
+        float32v xcf = FS::Convert<float>( xc ) - x;
+        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+
+        xc *= int32v( Primes::X );
+        ycBase *= int32v( Primes::Y );
+        zcBase *= int32v( Primes::Z );
+        wcBase *= int32v( Primes::W );
+
+        for( int xi = 0; xi < 3; xi++ )
+        {
+            float32v ycf = ycfBase;
+            int32v yc = ycBase;
+            for( int yi = 0; yi < 3; yi++ )
+            {
+                float32v zcf = zcfBase;
+                int32v zc = zcBase;
+                for( int zi = 0; zi < 3; zi++ )
+                {
+                    float32v wcf = wcfBase;
+                    int32v wc = wcBase;
+                    for( int wi = 0; wi < 3; wi++ )
+                    {
+                        int32v hash = HashPrimesHB( seed, xc, yc, zc, wc );
+                        float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+
+                        float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
+                        xd = FS::FMulAdd( xd, invMag, xcf );
+                        yd = FS::FMulAdd( yd, invMag, ycf );
+                        zd = FS::FMulAdd( zd, invMag, zcf );
+                        wd = FS::FMulAdd( wd, invMag, wcf );
+
+                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+
+                        mask32v closer = newDistance < distance;
+                        distance = FS::Min( newDistance, distance );
+
+                        cellX = FS::Select( closer, xd + x, cellX );
+                        cellY = FS::Select( closer, yd + y, cellY );
+                        cellZ = FS::Select( closer, zd + z, cellZ );
+                        cellW = FS::Select( closer, wd + w, cellW );
+
+                        wcf += float32v( 1 );
+                        wc += int32v( Primes::W );
+                    }
+                    zcf += float32v( 1 );
+                    zc += int32v( Primes::Z );
+                }
+                ycf += float32v( 1 );
+                yc += int32v( Primes::Y );
+            }
+            xcf += float32v( 1 );
+            xc += int32v( Primes::X );
+        }
+
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
+    }
+};
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index 5bbe984b..d442330d 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -1,200 +1,200 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "DomainWarp.h"
-#include "Utils.inl"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::DomainWarp, FS> : public virtual FastNoise::DomainWarp, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
-
-        return this->GetSourceValue( mSource, seed, pos...);
-    }
-
-public:
-    float GetWarpFrequency() const { return mWarpFrequency; }
-    const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
-    const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
-
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
-    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, FS> : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, FS>
-{public:
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
-    {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
-
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-
-    #define GRADIENT_COORD( _x, _y )\
-        int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
-        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
-        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
-
-        GRADIENT_COORD( 0, 0 );
-        GRADIENT_COORD( 1, 0 );
-        GRADIENT_COORD( 0, 1 );
-        GRADIENT_COORD( 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
-
-        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
-
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, yWarp * yWarp );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
-    {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
-        float32v zs = FS::Floor( z );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
-        int32v z1 = z0 + int32v( Primes::Z );
-
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
-
-    #define GRADIENT_COORD( _x, _y, _z )\
-        int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
-        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
-        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
-        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
-
-        GRADIENT_COORD( 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0 );
-        GRADIENT_COORD( 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 1 );
-        GRADIENT_COORD( 0, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
-        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
-        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
-                   
-        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
-        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
-        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
-
-        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
-        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
-
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, zWarp * zWarp ) );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
-    {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
-        float32v zs = FS::Floor( z );
-        float32v ws = FS::Floor( w );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
-        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
-        int32v z1 = z0 + int32v( Primes::Z );
-        int32v w1 = w0 + int32v( Primes::W );
-
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
-        ws = InterpHermite( w - ws );
-
-    #define GRADIENT_COORD( _x, _y, _z, _w )\
-        int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
-        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
-        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
-        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
-        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
-
-        GRADIENT_COORD( 0, 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0, 0 );
-        GRADIENT_COORD( 1, 1, 0, 0 );
-        GRADIENT_COORD( 0, 0, 1, 0 );
-        GRADIENT_COORD( 1, 0, 1, 0 );
-        GRADIENT_COORD( 0, 1, 1, 0 );
-        GRADIENT_COORD( 1, 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 0, 1 );
-        GRADIENT_COORD( 0, 1, 0, 1 );
-        GRADIENT_COORD( 1, 1, 0, 1 );
-        GRADIENT_COORD( 0, 0, 1, 1 );
-        GRADIENT_COORD( 1, 0, 1, 1 );
-        GRADIENT_COORD( 0, 1, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
-        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
-        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
-        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
-
-        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
-        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
-        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
-        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
-
-        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
-        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
-        wOut = FS::FMulAdd( wWarp, warpAmp, wOut );
-
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, FS::FMulAdd( zWarp, zWarp, wWarp * wWarp ) ) );
-
-        return warpLengthSq * FS_InvSqrt_f32( warpLengthSq );
-    }
-};
-
+#include "DomainWarp.h"
+#include "Utils.inl"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD> : public virtual FastNoise::DomainWarp, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
+
+        return this->GetSourceValue( mSource, seed, pos...);
+    }
+
+public:
+    float GetWarpFrequency() const { return mWarpFrequency; }
+    const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
+    const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
+
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
+    virtual float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, SIMD> : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
+{
+public:
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+
+    #define GRADIENT_COORD( _x, _y )\
+        int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
+        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
+        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
+
+        GRADIENT_COORD( 0, 0 );
+        GRADIENT_COORD( 1, 0 );
+        GRADIENT_COORD( 0, 1 );
+        GRADIENT_COORD( 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
+
+        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, yWarp * yWarp );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+
+    #define GRADIENT_COORD( _x, _y, _z )\
+        int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
+        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
+        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
+        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
+
+        GRADIENT_COORD( 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0 );
+        GRADIENT_COORD( 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 1 );
+        GRADIENT_COORD( 0, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
+        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
+        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
+                   
+        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
+        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
+        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
+
+        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
+
+        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, zWarp * zWarp ) );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+        float32v ws = FS::Floor( w );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+        int32v w1 = w0 + int32v( Primes::W );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+        ws = InterpHermite( w - ws );
+
+    #define GRADIENT_COORD( _x, _y, _z, _w )\
+        int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
+        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
+        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
+        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
+        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
+
+        GRADIENT_COORD( 0, 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0, 0 );
+        GRADIENT_COORD( 1, 1, 0, 0 );
+        GRADIENT_COORD( 0, 0, 1, 0 );
+        GRADIENT_COORD( 1, 0, 1, 0 );
+        GRADIENT_COORD( 0, 1, 1, 0 );
+        GRADIENT_COORD( 1, 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 0, 1 );
+        GRADIENT_COORD( 0, 1, 0, 1 );
+        GRADIENT_COORD( 1, 1, 0, 1 );
+        GRADIENT_COORD( 0, 0, 1, 1 );
+        GRADIENT_COORD( 1, 0, 1, 1 );
+        GRADIENT_COORD( 0, 1, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
+        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
+        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
+        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
+
+        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
+        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
+        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
+        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
+
+        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
+
+        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
+        wOut = FS::FMulAdd( wWarp, warpAmp, wOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, FS::FMulAdd( zWarp, zWarp, wWarp * wWarp ) ) );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+};
+
diff --git a/include/FastNoise/Generators/DomainWarpFractal.inl b/include/FastNoise/Generators/DomainWarpFractal.inl
index f6cc8f26..5765527b 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.inl
+++ b/include/FastNoise/Generators/DomainWarpFractal.inl
@@ -1,71 +1,71 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "DomainWarpFractal.h"
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, FS> : public virtual FastNoise::DomainWarpFractalProgressive, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        auto* warp = this->GetSourceSIMD( mSource );
-
-        float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v freq = float32v( warp->GetWarpFrequency() );
-        int32v seedInc = seed;
-
-        float32v gain = this->GetSourceValue( mGain, seed, pos... );
-        float32v lacunarity( mLacunarity );
-
-        float32v strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
-
-        for (int i = 1; i < mOctaves; i++)
-        {
-            seedInc -= int32v( -1 );
-            freq *= lacunarity;
-            amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
-            amp *= gain;
-            strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
-        }
-
-        return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, FS> : public virtual FastNoise::DomainWarpFractalIndependant, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
-        {
-            auto* warp = this->GetSourceSIMD( mSource );
-
-            float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
-            float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, noisePos... );
-            float32v freq = float32v( warp->GetWarpFrequency() );
-            int32v seedInc = seed;
-
-            float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
-            float32v lacunarity( mLacunarity );
-        
-            float32v strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
-    
-            for( int i = 1; i < mOctaves; i++ )
-            {
-                seedInc -= int32v( -1 );
-                freq *= lacunarity;
-                amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
-                amp *= gain;
-                strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
-            }
-    
-            return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
-
-        } ( pos..., pos... );
-    }
-};
+#include "DomainWarpFractal.h"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, SIMD> : public virtual FastNoise::DomainWarpFractalProgressive, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        auto* warp = this->GetSourceSIMD( mSource );
+
+        float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v freq = float32v( warp->GetWarpFrequency() );
+        int32v seedInc = seed;
+
+        float32v gain = this->GetSourceValue( mGain, seed, pos... );
+        float32v lacunarity( mLacunarity );
+
+        float32v strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
+
+        for (int i = 1; i < mOctaves; i++)
+        {
+            seedInc -= int32v( -1 );
+            freq *= lacunarity;
+            amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+            amp *= gain;
+            strength = warp->Warp( seedInc, amp, (pos * freq)..., pos... );
+        }
+
+        return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, SIMD> : public virtual FastNoise::DomainWarpFractalIndependant, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
+        {
+            auto* warp = this->GetSourceSIMD( mSource );
+
+            float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
+            float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, noisePos... );
+            float32v freq = float32v( warp->GetWarpFrequency() );
+            int32v seedInc = seed;
+
+            float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
+            float32v lacunarity( mLacunarity );
+        
+            float32v strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
+    
+            for( int i = 1; i < mOctaves; i++ )
+            {
+                seedInc -= int32v( -1 );
+                freq *= lacunarity;
+                amp *= Lerp( float32v( 1 ), float32v( 1 ) - strength, weightedStrength );
+                amp *= gain;
+                strength = warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
+            }
+    
+            return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
+
+        } ( pos..., pos... );
+    }
+};
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index 4a1b7d49..b16ae23c 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -1,103 +1,104 @@
-#include "FastSIMD/InlInclude.h"
-
-#include "Fractal.h"
-
-template<typename FS, typename T>
-class FastSIMD::DispatchClass<FastNoise::Fractal<T>, FS> : public virtual FastNoise::Fractal<T>, public FastSIMD::DispatchClass<FastNoise::Generator, FS>
-{
-
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::FractalFBm, FS> : public virtual FastNoise::FractalFBm, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = this->GetSourceValue( mSource, seed, pos... );
-
-        float32v sum = noise * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
-            amp *= gain;
-
-            noise = this->GetSourceValue( mSource, seed, (pos *= lacunarity)... );
-            sum += noise * amp;
-        }
-
-        return sum;
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::FractalRidged, FS> : public virtual FastNoise::FractalRidged, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT(int32v seed, P... pos) const
-    {
-        float32v gain = this->GetSourceValue( mGain, seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = FS::Abs( this->GetSourceValue( mSource, seed, pos... ) );
-
-        float32v sum = (noise * float32v( -2 ) + float32v( 1 )) * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
-            amp *= gain;
-
-            noise = FS::Abs( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
-            sum += (noise * float32v( -2 ) + float32v( 1 )) * amp;
-        }
-
-        return sum;
-    }
-};
-
-template<typename FS>
-class FastSIMD::DispatchClass<FastNoise::FractalPingPong, FS> : public virtual FastNoise::FractalPingPong, public FastSIMD::DispatchClass<FastNoise::Fractal<>, FS>
-{    FASTNOISE_IMPL_GEN_T;
-
-    static float32v PingPong( float32v t )
-    {
-        t -= FS::Round( t * float32v( 0.5f ) ) * float32v( 2 );
-        return FS::Select( t < float32v( 1 ), t, float32v( 2 ) - t );
-    }
-
-    template<typename... P>
-    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
-    {
-        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
-        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
-        float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
-        float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
-
-        float32v sum = noise * amp;
-
-        for( int i = 1; i < mOctaves; i++ )
-        {
-            seed -= int32v( -1 );
-            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
-            amp *= gain;
-
-            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
-            sum += noise * amp;
-        }
-
-        return sum;
-    }
-};
+#include "Fractal.h"
+
+template<FastSIMD::FeatureSet SIMD, typename T>
+class FastSIMD::DispatchClass<FastNoise::Fractal<T>, SIMD> : public virtual FastNoise::Fractal<T>, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalFBm, SIMD> : public virtual FastNoise::FractalFBm, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = this->GetSourceValue( mSource, seed, pos... );
+
+        float32v sum = noise * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= gain;
+
+            noise = this->GetSourceValue( mSource, seed, (pos *= lacunarity)... );
+            sum += noise * amp;
+        }
+
+        return sum;
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalRidged, SIMD> : public virtual FastNoise::FractalRidged, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT(int32v seed, P... pos) const
+    {
+        float32v gain = this->GetSourceValue( mGain, seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = FS::Abs( this->GetSourceValue( mSource, seed, pos... ) );
+
+        float32v sum = (noise * float32v( -2 ) + float32v( 1 )) * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= Lerp( float32v( 1 ), float32v( 1 ) - noise, weightedStrength );
+            amp *= gain;
+
+            noise = FS::Abs( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) );
+            sum += (noise * float32v( -2 ) + float32v( 1 )) * amp;
+        }
+
+        return sum;
+    }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> : public virtual FastNoise::FractalPingPong, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    static float32v PingPong( float32v t )
+    {
+        t -= FS::Round( t * float32v( 0.5f ) ) * float32v( 2 );
+        return FS::Select( t < float32v( 1 ), t, float32v( 2 ) - t );
+    }
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
+        float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
+        float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
+        float32v lacunarity( mLacunarity );
+        float32v amp( mFractalBounding );
+        float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
+
+        float32v sum = noise * amp;
+
+        for( int i = 1; i < mOctaves; i++ )
+        {
+            seed -= int32v( -1 );
+            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= gain;
+
+            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
+            sum += noise * amp;
+        }
+
+        return sum;
+    }
+};
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index ae947597..2cd0a7d6 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -229,17 +229,19 @@ class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         thread_local static const void* CachedGenerator = nullptr;
+        thread_local static std::int32_t CachedSeed[int32v::ElementCount];
+        thread_local static float CachedPos[sizeof...(P)][int32v::ElementCount];
         thread_local static float CachedValue[int32v::ElementCount];
-        thread_local static float CachedPos[int32v::ElementCount][sizeof...( P )];
         // TLS is not always aligned (compiler bug), need to avoid using SIMD types
-
-        float32v arrayPos[] = { pos... };
+        
+        const float32v arrayPos[] = { pos... };
 
         bool isSame = (CachedGenerator == mSource.simdGeneratorPtr);
+        isSame &= !FS::AnyMask( seed != FS::Load<int32v>( CachedSeed ) );
 
         for( size_t i = 0; i < sizeof...( P ); i++ )
         {
-            isSame &= !FS_AnyMask_bool( arrayPos[i] != FS::Load<float32v>( &CachedPos[i] ) );
+            isSame &= !FS::AnyMask( arrayPos[i] != FS::Load<float32v>( CachedPos[i] ) );
         }
 
         if( !isSame )
@@ -247,16 +249,18 @@ class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual
             CachedGenerator = mSource.simdGeneratorPtr;
 
             float32v value = this->GetSourceValue( mSource, seed, pos... );
-            FS::Store( &CachedValue, value );
+
+            FS::Store( CachedValue, value );
+            FS::Store( CachedSeed, seed );
 
             for( size_t i = 0; i < sizeof...(P); i++ )
             {
-                FS::Store( &CachedPos[i], arrayPos[i] );
+                FS::Store( CachedPos[i], arrayPos[i] );
             }
 
             return value;
         }
 
-        return FS::Load<float32v>( &CachedValue[0] );
+        return FS::Load<float32v>( CachedValue );
     }
 };
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 983afea5..fe66b3ae 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 5ff01e0274dd7605d0b80d4957e3affb04c1178f
+    GIT_TAG 6f016d253dd32dc8c128df375c225017ba2558c1
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"

From 671452a13f130b98a2dbdbd9e33864ee2ecca807 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sun, 4 Sep 2022 20:18:40 +0100
Subject: [PATCH 011/139] Temp fix for benchmarking

---
 tests/FastNoiseBenchmark.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 605820aa..389d7a68 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -144,14 +144,11 @@ int main( int argc, char** argv )
     {
         gPositionFloats[idx] = (float)idx * 0.6f;
     }
+
+    FastSIMD::FeatureSet Levels[] = { FastSIMD::FeatureSet::SSE2, FastSIMD::FeatureSet::SSE41 };
     
-    for( FastSIMD::FeatureSet level = FastSIMD::CPUMaxSIMDLevel(); level != FastSIMD::Level_Null; level = (FastSIMD::FeatureSet)(level >> 1) )
+    for( auto level : Levels )
     {
-        if( !(level & FastSIMD::COMPILED_SIMD_LEVELS & FastNoise::SUPPORTED_SIMD_LEVELS) )
-        {
-            continue;
-        }
-
         for( const FastNoise::Metadata* metadata : FastNoise::Metadata::GetAll() )
         {
             const char* groupName = "Misc";

From 7d6807d9d5ec3c3a6a375920d746eb952116b063 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 5 Sep 2022 00:36:38 +0100
Subject: [PATCH 012/139] Fix benchmark naming

---
 tests/FastNoiseBenchmark.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 389d7a68..85156213 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -112,14 +112,15 @@ void RegisterBenchmarks( FastSIMD::FeatureSet level, const char* groupName, cons
     {
         benchName += enumName.data() + find + 1;
     }
-    else
+    else if( *enumName.data() != 0 )
     {
         benchName += enumName;
     }
-#else
-    benchName += std::to_string( (int)level );
+    else
 #endif
-
+    {
+        benchName += std::to_string( (int)level );
+    }
 
     benchName += '/';
     benchName += groupName;

From 6d69f45ac133e8722b1a63b0b95995b1bb248571 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 6 Sep 2022 00:49:13 +0100
Subject: [PATCH 013/139] Latest FastSIMD, GCC fixes

---
 src/CMakeLists.txt              | 2 +-
 tests/FastNoiseCpp11Include.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fe66b3ae..5451bfe6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 6f016d253dd32dc8c128df375c225017ba2558c1
+    GIT_TAG ff74d6350f9e674ef81d28118af3cd3eeb5f27bd
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"
diff --git a/tests/FastNoiseCpp11Include.cpp b/tests/FastNoiseCpp11Include.cpp
index aa01c9a7..497e52ed 100644
--- a/tests/FastNoiseCpp11Include.cpp
+++ b/tests/FastNoiseCpp11Include.cpp
@@ -7,7 +7,7 @@ int main()
 {
     auto node = FastNoise::New<FastNoise::FractalFBm>();
 
-    std::cout << node->GetSIMDLevel() << std::endl;
+    std::cout << (unsigned)node->GetLiveFeatureSet() << std::endl;
 
     node->SetSource( FastNoise::New<FastNoise::Simplex>() );
     node->SetGain( FastNoise::New<FastNoise::Value>() );

From 879b4795b9a1f0efa21a8acc504a0eadb7b07a1b Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 6 Sep 2022 21:35:13 +0100
Subject: [PATCH 014/139] Small optimisation to GetGradient

---
 include/FastNoise/Generators/Utils.inl | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 8910610b..823bf871 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -130,7 +130,16 @@ namespace FastNoise
         int32v hasha13 = hash & int32v( 13 );
 
         //if h < 8 then x, else y
-        float32v u = FS::Select( hasha13 < int32v( 8 ), fX, fY );
+        mask32v less8;
+        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+        {
+            less8 = FS::Cast<FS::Mask<32>>( hasha13 << 28 );
+        }
+        else
+        {
+            less8 = hasha13 < int32v( 8 );    
+        }
+        float32v u = FS::Select( less8, fX, fY );
 
         //if h < 4 then y else if h is 12 or 14 then x else z
         float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
@@ -139,7 +148,7 @@ namespace FastNoise
         //if h1 then -u else u
         //if h2 then -v else v
         float32v h1 = FS::Cast<float>( hash << 31 );
-        float32v h2 = FS::Cast<float>( (hash & int32v( 2 )) << 30 );
+        float32v h2 = FS::Cast<float>( (hash >> 1) << 31 );
         //then add them
         return ( u ^ h1 ) + ( v ^ h2 );
     }
@@ -172,8 +181,8 @@ namespace FastNoise
         float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
 
         float32v aSign = FS::Cast<float>( hash << 31 );
-        float32v bSign = FS::Cast<float>( (hash << 30) & int32v( 0x80000000 ) );
-        float32v cSign = FS::Cast<float>( (hash << 29) & int32v( 0x80000000 ) );
+        float32v bSign = FS::Cast<float>( (hash >> 1) << 31 );
+        float32v cSign = FS::Cast<float>( (hash >> 2) << 31 );
 
         return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
     }

From da0fc5c0a97e0c511dd42b130036969b1ae951ec Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Fri, 9 Sep 2022 21:37:22 +0100
Subject: [PATCH 015/139] Support new FastSIMD cmake library function

---
 .gitignore         | 1 +
 src/CMakeLists.txt | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index f959da93..a8cde1a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,4 @@
 /build
 /enc_temp_folder
 /cpm-cache
+/CMakeUserPresets.json
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5451bfe6..ec870136 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG ff74d6350f9e674ef81d28118af3cd3eeb5f27bd
+    GIT_TAG 5d95d8b2e7b2531c73b9afcdf7eb1ea88eb22493
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"
@@ -58,7 +58,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_simd_library(FastSIMD_FastNoise "../include/FastNoise/FastNoise_BuildList.inl")
+fastsimd_create_simd_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 

From 74a24f04f970befc0c781ede7ec51bc913f48d2f Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sun, 30 Oct 2022 23:28:50 +0000
Subject: [PATCH 016/139] Make reference id direct pointer to atomic ref count

---
 include/FastNoise/SmartNode.h |   2 +-
 src/FastNoise/SmartNode.cpp   | 100 ++++++++++++++++++----------------
 2 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/include/FastNoise/SmartNode.h b/include/FastNoise/SmartNode.h
index 09156728..d966f0c8 100644
--- a/include/FastNoise/SmartNode.h
+++ b/include/FastNoise/SmartNode.h
@@ -13,7 +13,7 @@ namespace FastNoise
     class FASTNOISE_API SmartNodeManager
     {
     public:
-        static constexpr uint64_t kInvalidReferenceId = (uint64_t)-1;
+        static constexpr uint64_t kInvalidReferenceId = (uint64_t)0;
 
         SmartNodeManager() = delete;
 
diff --git a/src/FastNoise/SmartNode.cpp b/src/FastNoise/SmartNode.cpp
index 9f39ba17..1b649193 100644
--- a/src/FastNoise/SmartNode.cpp
+++ b/src/FastNoise/SmartNode.cpp
@@ -14,15 +14,7 @@
 
 namespace FastNoise
 {
-    union SmartNodeReference
-    {
-        uint64_t u64;
-        struct
-        {
-            uint32_t pool;
-            uint32_t id;
-        } u32;
-    };
+    using SmartNodeReference = std::atomic<uint32_t>*;
     
     struct SmartNodeManagerPool
     {
@@ -31,6 +23,7 @@ namespace FastNoise
         struct SlotHeader
         {
             std::atomic<uint32_t> references;
+            uint32_t size;
         };
 
         struct Slot
@@ -66,15 +59,20 @@ namespace FastNoise
             delete[] pool;
         }
 
-        auto GetUsedSlotItr( const void* ptr ) const
+        bool Contains( const void* ptr ) const
         {
-            if( ptr > pool && ptr < pool + poolSize )
+            return ptr >= pool && ptr < pool + poolSize;
+        }
+
+        auto GetUsedSlotItr( const void* ref ) const
+        {
+            if( ref >= pool && ref < pool + poolSize )
             {
                 for( auto itr = usedSlots.begin(); itr != usedSlots.end(); ++itr )
                 {
                     const uint8_t* start = pool + itr->pos;
 
-                    if( start < ptr && start + itr->size > ptr )
+                    if( start <= ref && start + itr->size > ref )
                     {
                         return itr;
                     }
@@ -92,9 +90,9 @@ namespace FastNoise
             } );
         }
         
-        bool ValidatePtr( uint32_t pos, const void* ptr ) const
+        bool ValidatePtr( SmartNodeReference pos, const void* ptr ) const
         {            
-            if( pos >= poolSize )
+            if( *pos == 0 )
             {
                 assert( 0 );
                 return false;
@@ -110,7 +108,7 @@ namespace FastNoise
             }
 
             // Check pos is correct
-            if( slot->pos != pos )
+            if( pool + slot->pos != (uint8_t*)pos )
             {
                 assert( 0 );
                 return false;
@@ -127,16 +125,16 @@ namespace FastNoise
             return slot->references;
         }
 
-        uint32_t GetReferenceId( const void* ptr ) const
+        SmartNodeReference GetReferenceId( const void* ptr ) const
         {
             auto slot = GetUsedSlotItr( ptr );
 
             if( slot == usedSlots.end() )
             {
-                return UINT32_MAX;
+                return nullptr;
             }
 
-            return slot->pos;
+            return &reinterpret_cast<SlotHeader*>( pool + slot->pos )->references ;
         }
 
         void* TryAlloc( size_t size, size_t align )
@@ -170,7 +168,7 @@ namespace FastNoise
 
                     assert( freeSlots[idx].size >= slotSize );
                     
-                    new( startSlot ) SlotHeader { 0u };
+                    new( startSlot ) SlotHeader { { 0u }, slotSize };
                     usedSlots.emplace_back( Slot{ freeSlots[idx].pos, slotSize } );
 
                     // Check if remaining free slot is empty
@@ -192,10 +190,10 @@ namespace FastNoise
             return nullptr;
         }
 
-        void DeAlloc( uint32_t pos )
+        void DeAlloc( SmartNodeReference ref )
         {
-            SlotHeader* slotHeader = (SlotHeader*)( pool + pos );
-            auto slot = GetUsedSlotItr( pos );
+            SlotHeader* slotHeader = (SlotHeader*)( ref + offsetof( SlotHeader, references ) );
+            auto slot = GetUsedSlotItr( ref );
 
             assert( slot != usedSlots.end() );            
             assert( slotHeader->references == 0 );
@@ -204,6 +202,7 @@ namespace FastNoise
             // Merge free slots as necessary
             Slot* expandedBefore = nullptr;
             uint32_t idx = 0;
+            uint32_t pos = (uint32_t)((uint8_t*)ref - pool);
 
             for( ; idx < freeSlots.size(); idx++ )
             {
@@ -262,42 +261,35 @@ namespace FastNoise
         {
             std::lock_guard lock( mMutex );
 
-            if( ref.u32.pool >= mPools.size() )
+            for(const auto & pool : mPools)
             {
-                assert( 0 );
-                return false;
+                if( pool.Contains( ptr ) )
+                {
+                    return pool.ValidatePtr( ref, ptr );
+                }
             }
-
-            return std::next( mPools.begin(), ref.u32.pool )->ValidatePtr( ref.u32.id, ptr );
-        }
-
-        std::atomic<uint32_t>& GetReferenceCount( SmartNodeReference ref ) const
-        {
-            std::lock_guard lock( mMutex );
-
-            return std::next( mPools.begin(), ref.u32.pool )->GetReferenceCount( ref.u32.id );
+            
+            return false;
         }
 
         SmartNodeReference GetReference( const void* ptr )
         {
             std::lock_guard lock( mMutex );
 
-            SmartNodeReference ref = { 0 };
+            SmartNodeReference ref = nullptr;
 
             for( auto& poolItr : mPools )
             {
-                ref.u32.id = poolItr.GetReferenceId( ptr );
-                if( ref.u32.id != UINT32_MAX )
+                ref = poolItr.GetReferenceId( ptr );
+                if( ref )
                 {
                     return ref;
                 }
-
-                ref.u32.pool++;
             }
 
             // Could not find ptr in pools, probably not allocated using this class
             assert( 0 );
-            return { SmartNodeManager::kInvalidReferenceId };
+            return nullptr;
         }
 
         void* Alloc( size_t size, size_t align ) 
@@ -318,7 +310,17 @@ namespace FastNoise
         {
             std::lock_guard lock( mMutex );
 
-            std::next( mPools.begin(), ref.u32.pool )->DeAlloc( ref.u32.id );
+            
+            for( auto& poolItr : mPools )
+            {
+                if( poolItr.Contains( ref ) )
+                {
+                    poolItr.DeAlloc( ref );
+                    return;
+                }
+            }
+
+            assert( 0 );
         }
         
     private:
@@ -355,25 +357,25 @@ namespace FastNoise
     {
         assert( ptr );
 
-        return gMemoryAllocator.GetReference( ptr ).u64;
+        return (uint64_t)gMemoryAllocator.GetReference( ptr );
     }
 
     void SmartNodeManager::IncReference( uint64_t id )
     {
         assert( id != kInvalidReferenceId );
 
-        std::atomic<uint32_t>& refCount = gMemoryAllocator.GetReferenceCount( { id } );
+        std::atomic<uint32_t>& refCount = *(SmartNodeReference)id;
 
         ++refCount;
     }
 
     void SmartNodeManager::DecReference( uint64_t id, void* ptr, void ( *destructorFunc )( void* ) )
     {
-        assert( gMemoryAllocator.ValidatePtr( { id }, ptr ) );
+        SmartNodeReference refCount = (SmartNodeReference)id;
 
-        std::atomic<uint32_t>& refCount = gMemoryAllocator.GetReferenceCount( { id } );    
+        assert( gMemoryAllocator.ValidatePtr( refCount, ptr ) );   
 
-        uint32_t previousRefCount = refCount.fetch_sub( 1 );
+        uint32_t previousRefCount = refCount->fetch_sub( 1 );
 
         assert( previousRefCount );
 
@@ -381,15 +383,17 @@ namespace FastNoise
         {
             destructorFunc( ptr );
 
-            gMemoryAllocator.Dealloc( { id } );
+            gMemoryAllocator.Dealloc( refCount );
         }
     }
 
     uint32_t SmartNodeManager::ReferenceCount( uint64_t id )
     {
         assert( id != kInvalidReferenceId );
+
+        SmartNodeReference refCount = (SmartNodeReference)id;
         
-        return gMemoryAllocator.GetReferenceCount( { id } );
+        return *refCount;
     }
 
     void* SmartNodeManager::Allocate( size_t size, size_t align )

From 1856157489ace21c7a1534ecf51f505580f57ec5 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 5 Mar 2023 22:54:59 +0000
Subject: [PATCH 017/139] Update to latest FastSIMD

---
 include/FastNoise/Generators/Generator.inl |    8 +-
 src/CMakeLists.txt                         |    4 +-
 tests/CMakeLists.txt                       |   19 -
 tests/FastNoiseBenchmark.cpp               |    2 -
 tests/SIMDUnitTest.cpp                     |  286 -----
 tests/magic_enum.h                         | 1103 --------------------
 6 files changed, 6 insertions(+), 1416 deletions(-)
 delete mode 100644 tests/SIMDUnitTest.cpp
 delete mode 100644 tests/magic_enum.h

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index fd7f8f30..5c78df26 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -91,7 +91,7 @@ public:
         size_t totalValues = xSize * ySize;
         size_t index = 0;
 
-        xIdx += FS::Incremented<int32v>();
+        xIdx += FS::LoadIncremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
 
@@ -141,7 +141,7 @@ public:
         size_t totalValues = xSize * ySize * zSize;
         size_t index = 0;
 
-        xIdx += FS::Incremented<int32v>();
+        xIdx += FS::LoadIncremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
         AxisReset<true>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
@@ -198,7 +198,7 @@ public:
         size_t totalValues = xSize * ySize * zSize * wSize;
         size_t index = 0;
 
-        xIdx += FS::Incremented<int32v>();
+        xIdx += FS::LoadIncremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
         AxisReset<true>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
@@ -368,7 +368,7 @@ public:
         float32v xMul = float32v( 1 / xSizePi );
         float32v yMul = float32v( 1 / ySizePi );
 
-        xIdx += FS::Incremented<int32v>();
+        xIdx += FS::LoadIncremented<int32v>();
 
         AxisReset<true>( xIdx, yIdx, xMax, xSizeV, xSize );
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ec870136..6a21b0d4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 5d95d8b2e7b2531c73b9afcdf7eb1ea88eb22493
+    GIT_TAG 26b3593be46cf0bdf6426821435997839b80845f
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"
@@ -58,7 +58,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_simd_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
+fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 95130ab0..56c75bf7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,22 +40,3 @@ set_target_properties(FastNoiseCpp11Test PROPERTIES CXX_STANDARD 11)
 target_link_libraries(FastNoiseCpp11Test
     FastNoise
 )
-
-add_executable(FastSIMDTest
-    "SIMDUnitTest.cpp"
-)
-
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-    set_source_files_properties("SIMDUnitTest.cpp" PROPERTIES COMPILE_FLAGS "/arch:AVX512")
-
-elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
-    set_source_files_properties("SIMDUnitTest.cpp" PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512dq -mavx2 -mfma -msse4.2")
-endif()
-
-target_link_libraries(FastSIMDTest
-    FastNoise
-)
- 
-add_dependencies(FastSIMDTest 
-    FastNoise
-)
\ No newline at end of file
diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 85156213..bb330030 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -4,8 +4,6 @@
 
 #include "../NoiseTool/DemoNodeTrees.inl"
 
-#include "magic_enum.h"
-
 static const size_t gPositionCount = 8192;
 static float gPositionFloats[gPositionCount]; 
 
diff --git a/tests/SIMDUnitTest.cpp b/tests/SIMDUnitTest.cpp
deleted file mode 100644
index 9c59b55e..00000000
--- a/tests/SIMDUnitTest.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-#include <cfloat>
-#include <climits>
-#include <random>
-#include <iostream>
-#include <cmath>
-
-#include "FastSIMD/FunctionList.h"
-#include "../src/FastSIMD/Internal/Scalar.h"
-
-#if FASTSIMD_x86
-#include "../src/FastSIMD/Internal/SSE.h"
-#include "../src/FastSIMD/Internal/AVX.h"
-#include "../src/FastSIMD/Internal/AVX512.h"
-#endif
-
-#if FASTSIMD_ARM
-#include "../src/FastSIMD/Internal/NEON.h"
-#endif
-
-#include <vector>
-#include <functional>
-#include <type_traits>
-
-template<typename... T>
-struct SIMDClassContainer
-{
-    using Top = void;
-
-    template<typename L>
-    using GetNext = void;
-};
-
-template<typename HEAD, typename... TAIL>
-struct SIMDClassContainer<HEAD, TAIL...>
-{
-    using Top = HEAD;
-
-    template<typename L>
-    using GetNext = std::conditional_t<std::is_same_v<L, HEAD>, typename SIMDClassContainer<TAIL...>::Top, typename SIMDClassContainer<TAIL...>::template GetNext<L>>;
-};
-
-typedef SIMDClassContainer<
-    FastSIMD::Scalar,
-    FastSIMD::SSE2,
-    FastSIMD::SSE41,
-    FastSIMD::AVX2,
-    FastSIMD::AVX512
->
-SIMDClassList;
-
-class SIMDUnitTest
-{
-public:
-
-    static void RunAll();
-
-    SIMDUnitTest( std::function<void( void* )> func )
-    {
-        tests.emplace_back( func );
-    }
-
-private:
-    inline static std::vector<std::function<void( void* )> > tests;
-
-};
-
-const std::size_t TestCount = 1073741824 / 16;
-const std::size_t NonVecMask = ~15;
-
-int  * rndInts0;
-int  * rndInts1;
-float* rndFloats0;
-float* rndFloats1;
-
-float GenNormalFloat( std::mt19937& gen )
-{
-    union
-    {
-        float f;
-        int32_t i;
-    } u;
-
-    do
-    {
-        u.i = gen();
-
-    } while ( !std::isnormal( u.f ) );
-
-    return u.f;
-}
-
-void SIMDUnitTest::RunAll()
-{
-    rndInts0 = new int[TestCount];
-    rndInts1 = new int[TestCount];
-    rndFloats0 = new float[TestCount];
-    rndFloats1 = new float[TestCount];
-
-    std::random_device rd;  //Will be used to obtain a seed for the random number engine
-    std::mt19937 gen( rd() ); //Standard mersenne_twister_engine seeded with rd()
-
-    for ( std::size_t i = 0; i < TestCount; i++ )
-    {
-        rndInts0[i] = gen();
-        rndInts1[i] = gen();
-        rndFloats0[i] = GenNormalFloat( gen );
-        rndFloats1[i] = GenNormalFloat( gen );
-    }
-
-    for ( const auto& test : tests )
-    {
-        test( nullptr );
-    }
-
-    delete[] rndInts0;
-    delete[] rndInts1;
-    delete[] rndFloats0;
-    delete[] rndFloats1;
-}
-
-#define SIMD_FUNCTION_TEST( NAME, RETURN_TYPE, FUNC ) SIMD_FUNCTION_TEST_BASE( NAME, RETURN_TYPE, SIMDClassList::Top, FUNC )
-
-#define SIMD_FUNCTION_TEST_BASE( NAME, RETURN_TYPE, LEVEL, FUNC )                                          \
-template<typename T, typename FS>                                                                          \
-std::enable_if_t<std::is_same<void, FS>::value> TestFunction_##NAME( void* baseData = nullptr )      \
-{                                                                                                          \
-    std::cout << "\n";                                                                                     \
-    delete[] (T*)baseData;                                                                                     \
-}                                                                                                          \
-                                                                                                           \
-template<typename T, typename FS>                                                                          \
-std::enable_if_t<!std::is_same<void, FS>::value> TestFunction_##NAME( void* baseData = nullptr )     \
-{                                                                                                          \
-    bool isBase = baseData == nullptr;                                                                     \
-                                                                                                           \
-    if ( isBase )                                                                                          \
-    {                                                                                                      \
-        std::cout << #NAME " - Base: " << FS::SIMD_Level;                                                  \
-        baseData = new T[TestCount];                                                                       \
-    }                                                                                                      \
-    else { std::cout << " Testing: " << FS::SIMD_Level; }                                                  \
-                                                                                                           \
-    if ( FS::SIMD_Level > FastSIMD::CPUMaxSIMDLevel() )                                                    \
-    {                                                                                                      \
-        std::cout << " CPU N//A: " << FS::SIMD_Level;                                                      \
-    }                                                                                                      \
-    else                                                                                                   \
-    {                                                                                                      \
-        T result[int32v::ElementCount];                                                    \
-        int failCount = 0;                                                                                    \
-                                                                                                           \
-        for ( std::size_t i = 0; i < TestCount; i += int32v::ElementCount )                \
-        {                                                                                                  \
-            FUNC;                                                                                          \
-                                                                                                           \
-            for ( std::size_t ir = 0; ir < int32v::ElementCount; ir++ )                    \
-            {                                                                                              \
-                if ( isBase )                                                                              \
-                {                                                                                          \
-                    ((T*)baseData)[i + ir] = result[ir];                                                   \
-                }                                                                                          \
-                else if ( result[ir] != ((T*)baseData)[i + ir] &&                                          \
-                    (result[ir] == result[ir] ||                                                           \
-                    ((T*)baseData)[i + ir] == ((T*)baseData)[i + ir]) )                                    \
-                {                                                                                           \
-                    failCount++;                                                                                           \
-                    std::cout << "\n" << FS::SIMD_Level << " Failed: expected: " << ((T*)baseData)[i + ir];                         \
-                    std::cout << " actual: " << result[ir] << " index: " << i+ir;                          \
-                    if(std::is_integral_v<T>) std::cout << " ints: " << rndInts0[i + ir] << " : " << rndInts1[i + ir];               \
-                    else std::cout << " floats: " << rndFloats0[i + ir] << " : " << rndFloats1[i + ir] << "\n"; \
-                }                                                                                          \
-            }                                                                                              \
-            if( failCount >= 32 ) break;                                                                    \
-        }                                                                                                  \
-    }                                                                                                      \
-                                                                                                           \
-    TestFunction_##NAME<T, SIMDClassList::GetNext<FS>>( baseData );                \
-}                                                                                                          \
-SIMDUnitTest test_##NAME( TestFunction_##NAME<RETURN_TYPE, LEVEL> );
-
-SIMD_FUNCTION_TEST( LoadStore_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) ) )
-
-SIMD_FUNCTION_TEST( LoadStore_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) ) )
-
-
-SIMD_FUNCTION_TEST( Casti32_f32, float, FS::Store( &result, FS_Casti32_f32( FS_Load_i32( &rndInts0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Castf32_i32, int32_t, FS_Store_i32( &result, FS_Castf32_i32( FS::Load( &rndFloats0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Converti32_f32, float, FS::Store( &result, FS::Convert<float>( FS_Load_i32( &rndInts0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Convertf32_i32, int32_t, FS_Store_i32( &result, FS_Convertf32_i32( FS::Load( &rndFloats0[i] ) ) ) )
-
-
-SIMD_FUNCTION_TEST( Equal_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) == FS::Load( &rndFloats1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( GreaterThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) > FS::Load( &rndFloats1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( LessThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) < FS::Load( &rndFloats1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( GreaterEqualThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) >= FS::Load( &rndFloats1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( LessEqualThan_f32, float, FS::Store( &result, FS_Mask_f32( typename FS::float32v( 1 ), ( FS::Load( &rndFloats0[i] ) <= FS::Load( &rndFloats1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( Equal_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32( typename FS::int32v( 1 ), ( FS_Load_i32( &rndInts0[i] ) == FS_Load_i32( &rndInts1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( GreaterThan_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32( typename FS::int32v( 1 ), ( FS_Load_i32( &rndInts0[i] ) > FS_Load_i32( &rndInts1[i] ) ) ) ) )
-
-SIMD_FUNCTION_TEST( LessThan_i32, int32_t, FS_Store_i32( &result, FS_Mask_i32( typename FS::int32v( 1 ), ( FS_Load_i32( &rndInts0[i] ) < FS_Load_i32( &rndInts1[i] ) ) ) ) )
-
-
-SIMD_FUNCTION_TEST( Select_f32, float, FS::Store( &result, FS_Select_f32( ( FS::Load( &rndFloats0[i] ) > FS::Load( &rndFloats1[i] ) ), FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Select_i32, int32_t, FS_Store_i32( &result, FS_Select_i32( ( FS_Load_i32( &rndInts0[i] ) > FS_Load_i32( &rndInts1[i] ) ), FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
-
-
-SIMD_FUNCTION_TEST( Min_f32, float, FS::Store( &result, FS::Min( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Max_f32, float, FS::Store( &result, FS::Max( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Min_i32, int32_t, FS_Store_i32( &result, FS_Min_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Max_i32, int32_t, FS_Store_i32( &result, FS_Max_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
-
-
-SIMD_FUNCTION_TEST( BitwiseAndNot_f32, float, FS::Store( &result, FS_BitwiseAndNot_f32( FS::Load( &rndFloats0[i] ), FS::Load( &rndFloats1[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( BitwiseAndNot_i32, int32_t, FS_Store_i32( &result, FS_BitwiseAndNot_i32( FS_Load_i32( &rndInts0[i] ), FS_Load_i32( &rndInts1[i] ) ) ) )
-
-
-SIMD_FUNCTION_TEST( BitwiseShiftRightZX_f32, float, FS::Store( &result, FS_BitwiseShiftRightZX_f32( FS::Load( &rndFloats0[i] ), (rndInts1[i & NonVecMask] & 31) ) ) )
-
-SIMD_FUNCTION_TEST( BitwiseShiftRightZX_i32, int32_t, FS_Store_i32( &result, FS_BitwiseShiftRightZX_i32( FS_Load_i32( &rndInts0[i] ), (rndInts1[i & NonVecMask] & 31) ) ) )
-
-
-SIMD_FUNCTION_TEST( Abs_f32, float, FS::Store( &result, FS_Abs_f32( FS::Load( &rndFloats0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Abs_i32, int32_t, FS_Store_i32( &result, FS_Abs_i32( FS_Load_i32( &rndInts0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Sqrt_f32, float, FS::Store( &result, FS_Sqrt_f32( FS::Load( &rndFloats0[i] ) ) ) )
-
-//SIMD_FUNCTION_TEST( InvSqrt_f32, float, FS::Store( &result, FS_InvSqrt_f32( FS::Load( &rndFloats0[i] ) ) ) )
-
-
-const float MAX_ROUNDING = (float)INT_MAX / 2.0f;
-
-SIMD_FUNCTION_TEST( Floor_f32, float, FS::Store( &result, FS_Floor_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS::Load( &rndFloats0[i] ) ) ) )
-
-SIMD_FUNCTION_TEST( Ceil_f32, float, FS::Store( &result, FS_Ceil_f32( typename FS::float32v( MAX_ROUNDING / FLT_MAX ) * FS::Load( &rndFloats0[i] ) ) ) )
-
-//SIMD_FUNCTION_TEST( Round_f32, float, FS::Store( &result, FS_Round_f32( FS::Min( FS::float32v( MAX_ROUNDING ), FS::Max( FS::float32v( -MAX_ROUNDING ), FS::Load( &rndFloats0[i] ) ) ) ) ) )
-
-SIMD_FUNCTION_TEST( Add_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) + FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Sub_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) - FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Mul_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) * FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Div_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) / FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( And_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) & FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Xor_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) ^ FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Or_f32, float, FS::Store( &result, FS::Load( &rndFloats0[i] ) | FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Not_f32, float, FS::Store( &result, ~FS::Load( &rndFloats1[i] ) ) )
-SIMD_FUNCTION_TEST( Negate_f32, float, FS::Store( &result, -FS::Load( &rndFloats1[i] ) ) )
-
-SIMD_FUNCTION_TEST( Add_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) + FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Sub_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) - FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Mul_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) * FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( And_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) & FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Xor_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) ^ FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Or_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) | FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Not_i32, int32_t, FS_Store_i32( &result, ~FS_Load_i32( &rndInts1[i] ) ) )
-SIMD_FUNCTION_TEST( Negate_i32, int32_t, FS_Store_i32( &result, -FS_Load_i32( &rndInts1[i] ) ) )
-
-SIMD_FUNCTION_TEST( ShiftL_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) << (rndInts1[i & NonVecMask] & 31) ) )
-SIMD_FUNCTION_TEST( ShiftR_i32, int32_t, FS_Store_i32( &result, FS_Load_i32( &rndInts0[i] ) >> (rndInts1[i & NonVecMask] & 31) ) )
-
-
-int main( int argc, char** argv )
-{
-    std::cout << std::fixed;
-
-    SIMDUnitTest::RunAll();
-
-    std::cout << "Tests Complete!\n";
-
-    getchar();
-    return 0;
-}
diff --git a/tests/magic_enum.h b/tests/magic_enum.h
deleted file mode 100644
index 4f227472..00000000
--- a/tests/magic_enum.h
+++ /dev/null
@@ -1,1103 +0,0 @@
-//  __  __             _        ______                          _____
-// |  \/  |           (_)      |  ____|                        / ____|_     _
-// | \  / | __ _  __ _ _  ___  | |__   _ __  _   _ _ __ ___   | |   _| |_ _| |_
-// | |\/| |/ _` |/ _` | |/ __| |  __| | '_ \| | | | '_ ` _ \  | |  |_   _|_   _|
-// | |  | | (_| | (_| | | (__  | |____| | | | |_| | | | | | | | |____|_|   |_|
-// |_|  |_|\__,_|\__, |_|\___| |______|_| |_|\__,_|_| |_| |_|  \_____|
-//                __/ | https://github.com/Neargye/magic_enum
-//               |___/  version 0.6.6
-//
-// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2019 - 2020 Daniil Goncharov <neargye@gmail.com>.
-//
-// Permission is hereby  granted, free of charge, to any  person obtaining a copy
-// of this software and associated  documentation files (the "Software"), to deal
-// in the Software  without restriction, including without  limitation the rights
-// to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
-// copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
-// IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
-// FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
-// AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
-// LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-#ifndef NEARGYE_MAGIC_ENUM_HPP
-#define NEARGYE_MAGIC_ENUM_HPP
-
-#define MAGIC_ENUM_VERSION_MAJOR 0
-#define MAGIC_ENUM_VERSION_MINOR 6
-#define MAGIC_ENUM_VERSION_PATCH 6
-
-#include <array>
-#include <cassert>
-#include <cstdint>
-#include <cstddef>
-#include <iosfwd>
-#include <limits>
-#include <type_traits>
-#include <utility>
-
-#if !defined(MAGIC_ENUM_USING_ALIAS_OPTIONAL)
-#include <optional>
-#endif
-#if !defined(MAGIC_ENUM_USING_ALIAS_STRING)
-#include <string>
-#endif
-#if !defined(MAGIC_ENUM_USING_ALIAS_STRING_VIEW)
-#include <string_view>
-#endif
-
-#if defined(__clang__)
-#  pragma clang diagnostic push
-#elif defined(__GNUC__)
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // May be used uninitialized 'return {};'.
-#elif defined(_MSC_VER)
-#  pragma warning(push)
-#  pragma warning(disable : 26495) // Variable 'static_string<N>::chars' is uninitialized.
-#endif
-
-// Checks magic_enum compiler compatibility.
-#if defined(__clang__) && __clang_major__ >= 5 || defined(__GNUC__) && __GNUC__ >= 9 || defined(_MSC_VER) && _MSC_VER >= 1910
-#  undef  MAGIC_ENUM_SUPPORTED
-#  define MAGIC_ENUM_SUPPORTED 1
-#endif
-
-// Checks magic_enum compiler aliases compatibility.
-#if defined(__clang__) && __clang_major__ >= 5 || defined(__GNUC__) && __GNUC__ >= 9 || defined(_MSC_VER) && _MSC_VER >= 1920
-#  undef  MAGIC_ENUM_SUPPORTED_ALIASES
-#  define MAGIC_ENUM_SUPPORTED_ALIASES 1
-#endif
-
-// Enum value must be greater or equals than MAGIC_ENUM_RANGE_MIN. By default MAGIC_ENUM_RANGE_MIN = -128.
-// If need another min range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MIN.
-#if !defined(MAGIC_ENUM_RANGE_MIN)
-#  define MAGIC_ENUM_RANGE_MIN -128
-#endif
-
-// Enum value must be less or equals than MAGIC_ENUM_RANGE_MAX. By default MAGIC_ENUM_RANGE_MAX = 128.
-// If need another max range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MAX.
-#if !defined(MAGIC_ENUM_RANGE_MAX)
-#  define MAGIC_ENUM_RANGE_MAX 128
-#endif
-
-namespace magic_enum {
-
-// If need another optional type, define the macro MAGIC_ENUM_USING_ALIAS_OPTIONAL.
-#if defined(MAGIC_ENUM_USING_ALIAS_OPTIONAL)
-MAGIC_ENUM_USING_ALIAS_OPTIONAL
-#else
-template <typename T>
-using optional = std::optional<T>;
-#endif
-
-// If need another optional type, define the macro MAGIC_ENUM_USING_ALIAS_STRING_VIEW.
-#if defined(MAGIC_ENUM_USING_ALIAS_STRING_VIEW)
-MAGIC_ENUM_USING_ALIAS_STRING_VIEW
-#else
-using string_view = std::string_view;
-#endif
-
-// If need another optional type, define the macro MAGIC_ENUM_USING_ALIAS_STRING.
-#if defined(MAGIC_ENUM_USING_ALIAS_STRING)
-MAGIC_ENUM_USING_ALIAS_STRING
-#else
-using string = std::string;
-#endif
-
-namespace customize {
-
-// Enum value must be in range [MAGIC_ENUM_RANGE_MIN, MAGIC_ENUM_RANGE_MAX]. By default MAGIC_ENUM_RANGE_MIN = -128, MAGIC_ENUM_RANGE_MAX = 128.
-// If need another range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MIN and MAGIC_ENUM_RANGE_MAX.
-// If need another range for specific enum type, add specialization enum_range for necessary enum type.
-template <typename E>
-struct enum_range {
-  static_assert(std::is_enum_v<E>, "magic_enum::customize::enum_range requires enum type.");
-  inline static constexpr int min = MAGIC_ENUM_RANGE_MIN;
-  inline static constexpr int max = MAGIC_ENUM_RANGE_MAX;
-  static_assert(max > min, "magic_enum::customize::enum_range requires max > min.");
-};
-
-static_assert(MAGIC_ENUM_RANGE_MIN <= 0, "MAGIC_ENUM_RANGE_MIN must be less or equals than 0.");
-static_assert(MAGIC_ENUM_RANGE_MIN > (std::numeric_limits<std::int16_t>::min)(), "MAGIC_ENUM_RANGE_MIN must be greater than INT16_MIN.");
-
-static_assert(MAGIC_ENUM_RANGE_MAX > 0, "MAGIC_ENUM_RANGE_MAX must be greater than 0.");
-static_assert(MAGIC_ENUM_RANGE_MAX < (std::numeric_limits<std::int16_t>::max)(), "MAGIC_ENUM_RANGE_MAX must be less than INT16_MAX.");
-
-static_assert(MAGIC_ENUM_RANGE_MAX > MAGIC_ENUM_RANGE_MIN, "MAGIC_ENUM_RANGE_MAX must be greater than MAGIC_ENUM_RANGE_MIN.");
-
-// If need cunstom names for enum type, add specialization enum_name for necessary enum type.
-template <typename E>
-constexpr string_view enum_name(E) noexcept {
-  static_assert(std::is_enum_v<E>, "magic_enum::customize::enum_name requires enum type.");
-
-  return {};
-}
-
-} // namespace magic_enum::customize
-
-namespace detail {
-
-template <typename T>
-struct supported
-#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED || defined(MAGIC_ENUM_NO_CHECK_SUPPORT)
-    : std::true_type {};
-#else
-    : std::false_type {};
-#endif
-
-template <std::size_t N>
-struct static_string {
-  constexpr explicit static_string(string_view str) noexcept : static_string{str, std::make_index_sequence<N>{}} {
-    assert(str.size() == N);
-  }
-
-  constexpr const char* data() const noexcept { return chars.data(); }
-
-  constexpr std::size_t size() const noexcept { return N; }
-
-  constexpr operator string_view() const noexcept { return {data(), size()}; }
-
- private:
-  template <std::size_t... I>
-  constexpr static_string(string_view str, std::index_sequence<I...>) noexcept : chars{{str[I]..., '\0'}} {}
-
-  const std::array<char, N + 1> chars;
-};
-
-template <>
-struct static_string<0> {
-  constexpr explicit static_string(string_view) noexcept {}
-
-  constexpr const char* data() const noexcept { return nullptr; }
-
-  constexpr std::size_t size() const noexcept { return 0; }
-
-  constexpr operator string_view() const noexcept { return {}; }
-};
-
-struct char_equal_to {
-  constexpr bool operator()(char lhs, char rhs) const noexcept {
-    return lhs == rhs;
-  }
-};
-
-constexpr string_view pretty_name(string_view name) noexcept {
-  for (std::size_t i = name.size(); i > 0; --i) {
-    if (!((name[i - 1] >= '0' && name[i - 1] <= '9') ||
-          (name[i - 1] >= 'a' && name[i - 1] <= 'z') ||
-          (name[i - 1] >= 'A' && name[i - 1] <= 'Z') ||
-          (name[i - 1] == '_'))) {
-      name.remove_prefix(i);
-      break;
-    }
-  }
-
-  if (name.size() > 0 && ((name.front() >= 'a' && name.front() <= 'z') ||
-                          (name.front() >= 'A' && name.front() <= 'Z') ||
-                          (name.front() == '_'))) {
-    return name;
-  }
-
-  return {}; // Invalid name.
-}
-
-constexpr std::size_t find(string_view str, char c) noexcept {
-#if defined(__clang__) && __clang_major__ < 9 && defined(__GLIBCXX__) || defined(_MSC_VER) && _MSC_VER < 1920
-// https://stackoverflow.com/questions/56484834/constexpr-stdstring-viewfind-last-of-doesnt-work-on-clang-8-with-libstdc
-// https://developercommunity.visualstudio.com/content/problem/360432/vs20178-regression-c-failed-in-test.html
-  constexpr auto workaroung = true;
-#else
-  constexpr auto workaroung = false;
-#endif
-  if constexpr (workaroung) {
-    for (std::size_t i = 0; i < str.size(); ++i) {
-      if (str[i] == c) {
-        return i;
-      }
-    }
-
-    return string_view::npos;
-  } else {
-    return str.find_first_of(c);
-  }
-}
-
-template <typename BinaryPredicate>
-constexpr bool cmp_equal(string_view lhs, string_view rhs, BinaryPredicate&& p) noexcept(std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char, char>) {
-#if defined(_MSC_VER) && _MSC_VER < 1920
-  // https://developercommunity.visualstudio.com/content/problem/360432/vs20178-regression-c-failed-in-test.html
-  // https://developercommunity.visualstudio.com/content/problem/232218/c-constexpr-string-view.html
-  constexpr auto workaroung = true;
-#else
-  constexpr auto workaroung = false;
-#endif
-  if constexpr (std::is_same_v<std::decay_t<BinaryPredicate>, char_equal_to> && !workaroung) {
-    static_cast<void>(p);
-    return lhs == rhs;
-  } else {
-    if (lhs.size() != rhs.size()) {
-      return false;
-    }
-
-    const auto size = lhs.size();
-    for (std::size_t i = 0; i < size; ++i) {
-      if (!p(lhs[i], rhs[i])) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-}
-
-template <typename L, typename R>
-constexpr bool cmp_less(L lhs, R rhs) noexcept {
-  static_assert(std::is_integral_v<L> && std::is_integral_v<R>, "magic_enum::detail::cmp_less requires integral type.");
-
-  if constexpr (std::is_signed_v<L> == std::is_signed_v<R>) {
-    // If same signedness (both signed or both unsigned).
-    return lhs < rhs;
-  } else if constexpr (std::is_signed_v<R>) {
-    // If 'right' is negative, then result is 'false', otherwise cast & compare.
-    return rhs > 0 && lhs < static_cast<std::make_unsigned_t<R>>(rhs);
-  } else {
-    // If 'left' is negative, then result is 'true', otherwise cast & compare.
-    return lhs < 0 || static_cast<std::make_unsigned_t<L>>(lhs) < rhs;
-  }
-}
-
-template <typename I>
-constexpr I log2(I value) noexcept {
-  static_assert(std::is_integral_v<I>, "magic_enum::detail::log2 requires integral type.");
-
-  auto ret = I{0};
-  for (; value > I{1}; value >>= I{1}, ++ret) {};
-
-  return ret;
-}
-
-template <typename I>
-constexpr bool is_pow2(I x) noexcept {
-  static_assert(std::is_integral_v<I>, "magic_enum::detail::is_pow2 requires integral type.");
-
-  return x != 0 && (x & (x - 1)) == 0;
-}
-
-template <typename T>
-inline constexpr bool is_enum_v = std::is_enum_v<T> && std::is_same_v<T, std::decay_t<T>>;
-
-template <typename E>
-constexpr auto n() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::n requires enum type.");
-#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED
-#  if defined(__clang__)
-  constexpr string_view name{__PRETTY_FUNCTION__ + 34, sizeof(__PRETTY_FUNCTION__) - 36};
-#  elif defined(__GNUC__)
-  constexpr string_view name{__PRETTY_FUNCTION__ + 49, sizeof(__PRETTY_FUNCTION__) - 51};
-#  elif defined(_MSC_VER)
-  constexpr string_view name{__FUNCSIG__ + 40, sizeof(__FUNCSIG__) - 57};
-#  endif
-  return static_string<name.size()>{name};
-#else
-  return string_view{}; // Unsupported compiler.
-#endif
-}
-
-template <typename E>
-inline constexpr auto type_name_v = n<E>();
-
-template <typename E, E V>
-constexpr auto n() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::n requires enum type.");
-  constexpr auto custom_name = customize::enum_name<E>(V);
-
-  if constexpr (custom_name.empty()) {
-    static_cast<void>(custom_name);
-#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED
-#  if defined(__clang__) || defined(__GNUC__)
-    constexpr auto name = pretty_name({__PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 2});
-#  elif defined(_MSC_VER)
-    constexpr auto name = pretty_name({__FUNCSIG__, sizeof(__FUNCSIG__) - 17});
-#  endif
-    return static_string<name.size()>{name};
-#else
-    return string_view{}; // Unsupported compiler.
-#endif
-  } else {
-    return static_string<custom_name.size()>{custom_name};
-  }
-}
-
-template <typename E, E V>
-inline constexpr auto enum_name_v = n<E, V>();
-
-template <typename E, auto V>
-constexpr bool is_valid() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::is_valid requires enum type.");
-
-  return n<E, static_cast<E>(V)>().size() != 0;
-}
-
-template <typename E, bool IsFlags, typename U = std::underlying_type_t<E>>
-constexpr int reflected_min() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::reflected_min requires enum type.");
-
-  if constexpr (IsFlags) {
-    return 0;
-  } else {
-    constexpr auto lhs = customize::enum_range<E>::min;
-    static_assert(lhs > (std::numeric_limits<std::int16_t>::min)(), "magic_enum::enum_range requires min must be greater than INT16_MIN.");
-    constexpr auto rhs = (std::numeric_limits<U>::min)();
-
-    if constexpr (cmp_less(lhs, rhs)) {
-      return rhs;
-    } else {
-      return lhs;
-    }
-  }
-}
-
-template <typename E, bool IsFlags, typename U = std::underlying_type_t<E>>
-constexpr int reflected_max() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::reflected_max requires enum type.");
-
-  if constexpr (IsFlags) {
-    return std::numeric_limits<U>::digits - 1;
-  } else {
-    constexpr auto lhs = customize::enum_range<E>::max;
-    static_assert(lhs < (std::numeric_limits<std::int16_t>::max)(), "magic_enum::enum_range requires max must be less than INT16_MAX.");
-    constexpr auto rhs = (std::numeric_limits<U>::max)();
-
-    if constexpr (cmp_less(lhs, rhs)) {
-      return lhs;
-    } else {
-      return rhs;
-    }
-  }
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto reflected_min_v = reflected_min<E, IsFlags>();
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto reflected_max_v = reflected_max<E, IsFlags>();
-
-template <typename E, int O, bool IsFlags = false, typename U = std::underlying_type_t<E>>
-constexpr E value(std::size_t i) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::value requires enum type.");
-
-  if constexpr (IsFlags) {
-    return static_cast<E>(U{1} << static_cast<U>(static_cast<int>(i) + O));
-  } else {
-    return static_cast<E>(static_cast<int>(i) + O);
-  }
-}
-
-template <typename E, bool IsFlags, int Min, std::size_t... I>
-constexpr auto values(std::index_sequence<I...>) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::values requires enum type.");
-  constexpr std::array<bool, sizeof...(I)> valid{{is_valid<E, value<E, Min, IsFlags>(I)>()...}};
-  constexpr std::size_t count = ((valid[I] ? std::size_t{1} : std::size_t{0}) + ...);
-
-  std::array<E, count> values{};
-  for (std::size_t i = 0, v = 0; v < count; ++i) {
-    if (valid[i]) {
-      values[v++] = value<E, Min, IsFlags>(i);
-    }
-  }
-
-  return values;
-}
-
-template <typename E, bool IsFlags, typename U = std::underlying_type_t<E>>
-constexpr auto values() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::values requires enum type.");
-  constexpr auto range_size = reflected_max_v<E, IsFlags> - reflected_min_v<E, IsFlags> + 1;
-  static_assert(range_size > 0, "magic_enum::enum_range requires valid size.");
-  static_assert(range_size < (std::numeric_limits<std::uint16_t>::max)(), "magic_enum::enum_range requires valid size.");
-
-  return values<E, IsFlags, reflected_min_v<E, IsFlags>>(std::make_index_sequence<range_size>{});
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto values_v = values<E, IsFlags>();
-
-template <typename E, bool IsFlags = false, typename D = std::decay_t<E>>
-using values_t = decltype((values_v<D, IsFlags>));
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto count_v = values_v<E, IsFlags>.size();
-
-template <typename E, bool IsFlags = false, typename U = std::underlying_type_t<E>>
-inline constexpr auto min_v = static_cast<U>(values_v<E, IsFlags>.front());
-
-template <typename E, bool IsFlags = false, typename U = std::underlying_type_t<E>>
-inline constexpr auto max_v = static_cast<U>(values_v<E, IsFlags>.back());
-
-template <typename E, bool IsFlags, typename U = std::underlying_type_t<E>>
-constexpr std::size_t range_size() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::range_size requires enum type.");
-  constexpr auto max = IsFlags ? log2(max_v<E, IsFlags>) : max_v<E, IsFlags>;
-  constexpr auto min = IsFlags ? log2(min_v<E, IsFlags>) : min_v<E, IsFlags>;
-  constexpr auto range_size = max - min + U{1};
-  static_assert(range_size > 0, "magic_enum::enum_range requires valid size.");
-  static_assert(range_size < (std::numeric_limits<std::uint16_t>::max)(), "magic_enum::enum_range requires valid size.");
-
-  return static_cast<std::size_t>(range_size);
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto range_size_v = range_size<E, IsFlags>();
-
-template <typename E, bool IsFlags = false>
-using index_t = std::conditional_t<range_size_v<E, IsFlags> < (std::numeric_limits<std::uint8_t>::max)(), std::uint8_t, std::uint16_t>;
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto invalid_index_v = (std::numeric_limits<index_t<E, IsFlags>>::max)();
-
-template <typename E, bool IsFlags, std::size_t... I>
-constexpr auto indexes(std::index_sequence<I...>) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::indexes requires enum type.");
-  constexpr auto min = IsFlags ? log2(min_v<E, IsFlags>) : min_v<E, IsFlags>;
-  [[maybe_unused]] auto i = index_t<E, IsFlags>{0};
-
-  return std::array<decltype(i), sizeof...(I)>{{(is_valid<E, value<E, min, IsFlags>(I)>() ? i++ : invalid_index_v<E, IsFlags>)...}};
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto indexes_v = indexes<E, IsFlags>(std::make_index_sequence<range_size_v<E, IsFlags>>{});
-
-template <typename E, bool IsFlags, std::size_t... I>
-constexpr auto names(std::index_sequence<I...>) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::names requires enum type.");
-
-  return std::array<string_view, sizeof...(I)>{{enum_name_v<E, values_v<E, IsFlags>[I]>...}};
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto names_v = names<E, IsFlags>(std::make_index_sequence<count_v<E, IsFlags>>{});
-
-template <typename E, bool IsFlags = false, typename D = std::decay_t<E>>
-using names_t = decltype((names_v<D, IsFlags>));
-
-template <typename E, bool IsFlags, std::size_t... I>
-constexpr auto entries(std::index_sequence<I...>) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::entries requires enum type.");
-
-  return std::array<std::pair<E, string_view>, sizeof...(I)>{{{values_v<E, IsFlags>[I], enum_name_v<E, values_v<E, IsFlags>[I]>}...}};
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr auto entries_v = entries<E, IsFlags>(std::make_index_sequence<count_v<E, IsFlags>>{});
-
-template <typename E, bool IsFlags = false, typename D = std::decay_t<E>>
-using entries_t = decltype((entries_v<D, IsFlags>));
-
-template <typename E, bool IsFlags, typename U = std::underlying_type_t<E>>
-constexpr bool is_sparse() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::is_sparse requires enum type.");
-
-  return range_size_v<E, IsFlags> != count_v<E, IsFlags>;
-}
-
-template <typename E, bool IsFlags = false>
-inline constexpr bool is_sparse_v = is_sparse<E, IsFlags>();
-
-template <typename E, typename U = std::underlying_type_t<E>>
-constexpr std::size_t undex(U value) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::undex requires enum type.");
-
-  if (const auto i = static_cast<std::size_t>(value - min_v<E>); value >= min_v<E> && value <= max_v<E>) {
-    if constexpr (is_sparse_v<E>) {
-      if (const auto idx = indexes_v<E>[i]; idx != invalid_index_v<E>) {
-        return idx;
-      }
-    } else {
-      return i;
-    }
-  }
-
-  return invalid_index_v<E>; // Value out of range.
-}
-
-template <typename E, typename U = std::underlying_type_t<E>>
-constexpr std::size_t endex(E value) noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::endex requires enum type.");
-
-  return undex<E>(static_cast<U>(value));
-}
-
-template <typename E, typename U = std::underlying_type_t<E>>
-constexpr U value_ors() noexcept {
-  static_assert(is_enum_v<E>, "magic_enum::detail::endex requires enum type.");
-
-  auto value = U{0};
-  for (std::size_t i = 0; i < count_v<E, true>; ++i) {
-    value |= static_cast<U>(values_v<E, true>[i]);
-  }
-
-  return value;
-}
-
-template <bool, bool, typename T, typename R>
-struct enable_if_enum {};
-
-template <typename T, typename R>
-struct enable_if_enum<true, false, T, R> {
-  using type = R;
-  using D = std::decay_t<T>;
-  static_assert(supported<D>::value, "magic_enum unsupported compiler (https://github.com/Neargye/magic_enum#compiler-compatibility).");
-  static_assert(count_v<D, false> > 0, "magic_enum requires enum implementation and valid max and min.");
-};
-
-template <typename T, typename R>
-struct enable_if_enum<true, true, T, R> {
-  using type = R;
-  using D = std::decay_t<T>;
-  static_assert(supported<D>::value, "magic_enum unsupported compiler (https://github.com/Neargye/magic_enum#compiler-compatibility).");
-  static_assert(count_v<D, true> > 0, "magic_enum::flags requires enum-flags implementation.");
-};
-
-template <typename T, typename R = void>
-using enable_if_enum_t = typename enable_if_enum<std::is_enum_v<std::decay_t<T>>, false, T, R>::type;
-
-template <typename T, typename R = void>
-using enable_if_enum_flags_t = typename enable_if_enum<std::is_enum_v<std::decay_t<T>>, true, T, R>::type;
-
-template <typename T, typename Enable = std::enable_if_t<std::is_enum_v<std::decay_t<T>>>>
-using enum_concept = T;
-
-template <typename T, bool = std::is_enum_v<T>>
-struct is_scoped_enum : std::false_type {};
-
-template <typename T>
-struct is_scoped_enum<T, true> : std::bool_constant<!std::is_convertible_v<T, std::underlying_type_t<T>>> {};
-
-template <typename T, bool = std::is_enum_v<T>>
-struct is_unscoped_enum : std::false_type {};
-
-template <typename T>
-struct is_unscoped_enum<T, true> : std::bool_constant<std::is_convertible_v<T, std::underlying_type_t<T>>> {};
-
-template <typename T, bool = std::is_enum_v<std::decay_t<T>>>
-struct underlying_type {};
-
-template <typename T>
-struct underlying_type<T, true> : std::underlying_type<std::decay_t<T>> {};
-
-} // namespace magic_enum::detail
-
-// Checks is magic_enum supported compiler.
-inline constexpr bool is_magic_enum_supported = detail::supported<void>::value;
-
-template <typename T>
-using Enum = detail::enum_concept<T>;
-
-// Checks whether T is an Unscoped enumeration type.
-// Provides the member constant value which is equal to true, if T is an [Unscoped enumeration](https://en.cppreference.com/w/cpp/language/enum#Unscoped_enumeration) type. Otherwise, value is equal to false.
-template <typename T>
-struct is_unscoped_enum : detail::is_unscoped_enum<T> {};
-
-template <typename T>
-inline constexpr bool is_unscoped_enum_v = is_unscoped_enum<T>::value;
-
-// Checks whether T is an Scoped enumeration type.
-// Provides the member constant value which is equal to true, if T is an [Scoped enumeration](https://en.cppreference.com/w/cpp/language/enum#Scoped_enumerations) type. Otherwise, value is equal to false.
-template <typename T>
-struct is_scoped_enum : detail::is_scoped_enum<T> {};
-
-template <typename T>
-inline constexpr bool is_scoped_enum_v = is_scoped_enum<T>::value;
-
-// If T is a complete enumeration type, provides a member typedef type that names the underlying type of T.
-// Otherwise, if T is not an enumeration type, there is no member type. Otherwise (T is an incomplete enumeration type), the program is ill-formed.
-template <typename T>
-struct underlying_type : detail::underlying_type<T> {};
-
-template <typename T>
-using underlying_type_t = typename underlying_type<T>::type;
-
-// Returns type name of enum.
-template <typename E>
-[[nodiscard]] constexpr auto enum_type_name() noexcept -> std::enable_if_t<std::is_enum_v<std::decay_t<E>>, string_view> {
-  using D = std::decay_t<E>;
-  constexpr string_view name = detail::type_name_v<D>;
-  static_assert(name.size() > 0, "Enum type does not have a name.");
-
-  return name;
-}
-
-// Returns number of enum values.
-template <typename E>
-[[nodiscard]] constexpr auto enum_count() noexcept -> detail::enable_if_enum_t<E, std::size_t> {
-  using D = std::decay_t<E>;
-
-  return detail::count_v<D>;
-}
-
-// Returns enum value at specified index.
-// No bounds checking is performed: the behavior is undefined if index >= number of enum values.
-template <typename E>
-[[nodiscard]] constexpr auto enum_value(std::size_t index) noexcept -> detail::enable_if_enum_t<E, std::decay_t<E>> {
-  using D = std::decay_t<E>;
-
-  if constexpr (detail::is_sparse_v<D>) {
-    return assert((index < detail::count_v<D>)), detail::values_v<D>[index];
-  } else {
-    return assert((index < detail::count_v<D>)), detail::value<D, detail::min_v<D>>(index);
-  }
-}
-
-// Returns std::array with enum values, sorted by enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_values() noexcept -> detail::enable_if_enum_t<E, detail::values_t<E>> {
-  using D = std::decay_t<E>;
-
-  return detail::values_v<D>;
-}
-
-// Returns name from static storage enum variable.
-// This version is much lighter on the compile times and is not restricted to the enum_range limitation.
-template <auto V>
-[[nodiscard]] constexpr auto enum_name() noexcept -> std::enable_if_t<std::is_enum_v<std::decay_t<decltype(V)>>, string_view> {
-  using D = std::decay_t<decltype(V)>;
-  constexpr string_view name = detail::enum_name_v<D, V>;
-  static_assert(name.size() > 0, "Enum value does not have a name.");
-
-  return name;
-}
-
-// Returns name from enum value.
-// If enum value does not have name or value out of range, returns empty string.
-template <typename E>
-[[nodiscard]] constexpr auto enum_name(E value) noexcept -> detail::enable_if_enum_t<E, string_view> {
-  using D = std::decay_t<E>;
-
-  if (const auto i = detail::endex<D>(value); i != detail::invalid_index_v<D>) {
-    return detail::names_v<D>[i];
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Returns std::array with names, sorted by enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_names() noexcept -> detail::enable_if_enum_t<E, detail::names_t<E>> {
-  using D = std::decay_t<E>;
-
-  return detail::names_v<D>;
-}
-
-// Returns std::array with pairs (value, name), sorted by enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_entries() noexcept -> detail::enable_if_enum_t<E, detail::entries_t<E>> {
-  using D = std::decay_t<E>;
-
-  return detail::entries_v<D>;
-}
-
-// Obtains enum value from integer value.
-// Returns optional with enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_cast(underlying_type_t<E> value) noexcept -> detail::enable_if_enum_t<E, optional<std::decay_t<E>>> {
-  using D = std::decay_t<E>;
-
-  if (detail::undex<D>(value) != detail::invalid_index_v<D>) {
-    return static_cast<D>(value);
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Obtains enum value from name.
-// Returns optional with enum value.
-template <typename E, typename BinaryPredicate>
-[[nodiscard]] constexpr auto enum_cast(string_view value, BinaryPredicate p) noexcept(std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char, char>) -> detail::enable_if_enum_t<E, optional<std::decay_t<E>>> {
-  static_assert(std::is_invocable_r_v<bool, BinaryPredicate, char, char>, "magic_enum::enum_cast requires bool(char, char) invocable predicate.");
-  using D = std::decay_t<E>;
-
-  for (std::size_t i = 0; i < detail::count_v<D>; ++i) {
-    if (detail::cmp_equal(value, detail::names_v<D>[i], p)) {
-      return enum_value<D>(i);
-    }
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Obtains enum value from name.
-// Returns optional with enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_cast(string_view value) noexcept -> detail::enable_if_enum_t<E, optional<std::decay_t<E>>> {
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value, detail::char_equal_to{});
-}
-
-// Returns integer value from enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_integer(E value) noexcept -> std::enable_if_t<std::is_enum_v<std::decay_t<E>>, underlying_type_t<E>> {
-  return static_cast<underlying_type_t<E>>(value);
-}
-
-// Obtains index in enum values from enum value.
-// Returns optional with index.
-template <typename E>
-[[nodiscard]] constexpr auto enum_index(E value) noexcept -> detail::enable_if_enum_t<E, optional<std::size_t>> {
-  using D = std::decay_t<E>;
-
-  if (const auto i = detail::endex<D>(value); i != detail::invalid_index_v<D>) {
-    return i;
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Checks whether enum contains enumerator with such enum value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(E value) noexcept -> detail::enable_if_enum_t<E, bool> {
-  using D = std::decay_t<E>;
-
-  return detail::endex<D>(value) != detail::invalid_index_v<D>;
-}
-
-// Checks whether enum contains enumerator with such integer value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(underlying_type_t<E> value) noexcept -> detail::enable_if_enum_t<E, bool> {
-  using D = std::decay_t<E>;
-
-  return detail::undex<D>(value) != detail::invalid_index_v<D>;
-}
-
-// Checks whether enum contains enumerator with such name.
-template <typename E, typename BinaryPredicate>
-[[nodiscard]] constexpr auto enum_contains(string_view value, BinaryPredicate p) noexcept(std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char, char>) -> detail::enable_if_enum_t<E, bool> {
-  using D = std::decay_t<E>;
-  static_assert(std::is_invocable_r_v<bool, BinaryPredicate, char, char>, "magic_enum::enum_contains requires bool(char, char) invocable predicate.");
-
-  return enum_cast<D>(value, std::move_if_noexcept(p)).has_value();
-}
-
-// Checks whether enum contains enumerator with such name.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(string_view value) noexcept -> detail::enable_if_enum_t<E, bool> {
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value).has_value();
-}
-
-namespace ostream_operators {
-
-template <typename Char, typename Traits, typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os, E value) {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED
-  if (const auto name = magic_enum::enum_name<D>(value); !name.empty()) {
-    for (const auto c : name) {
-      os.put(c);
-    }
-    return os;
-  }
-#endif
-  return (os << static_cast<U>(value));
-}
-
-template <typename Char, typename Traits, typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os, optional<E> value) {
-  return value.has_value() ? (os << value.value()) : os;
-}
-
-} // namespace magic_enum::ostream_operators
-
-namespace bitwise_operators {
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E operator~(E rhs) noexcept {
-  return static_cast<E>(~static_cast<underlying_type_t<E>>(rhs));
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E operator|(E lhs, E rhs) noexcept {
-  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) | static_cast<underlying_type_t<E>>(rhs));
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E operator&(E lhs, E rhs) noexcept {
-  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) & static_cast<underlying_type_t<E>>(rhs));
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E operator^(E lhs, E rhs) noexcept {
-  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) ^ static_cast<underlying_type_t<E>>(rhs));
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E& operator|=(E& lhs, E rhs) noexcept {
-  return lhs = (lhs | rhs);
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E& operator&=(E& lhs, E rhs) noexcept {
-  return lhs = (lhs & rhs);
-}
-
-template <typename E, std::enable_if_t<std::is_enum_v<E>, int> = 0>
-constexpr E& operator^=(E& lhs, E rhs) noexcept {
-  return lhs = (lhs ^ rhs);
-}
-
-} // namespace magic_enum::bitwise_operators
-
-namespace flags {
-
-// Returns type name of enum.
-using magic_enum::enum_type_name;
-
-// Returns number of enum-flags values.
-template <typename E>
-[[nodiscard]] constexpr auto enum_count() noexcept -> detail::enable_if_enum_flags_t<E, std::size_t> {
-  using D = std::decay_t<E>;
-
-  return detail::count_v<D, true>;
-}
-
-// Returns enum-flags value at specified index.
-// No bounds checking is performed: the behavior is undefined if index >= number of enum-flags values.
-template <typename E>
-[[nodiscard]] constexpr auto enum_value(std::size_t index) noexcept -> detail::enable_if_enum_flags_t<E, std::decay_t<E>> {
-  using D = std::decay_t<E>;
-
-  if constexpr (detail::is_sparse_v<D, true>) {
-    return assert((index < detail::count_v<D, true>)), detail::values_v<D, true>[index];
-  } else {
-    constexpr auto min = detail::log2(detail::min_v<D, true>);
-
-    return assert((index < detail::count_v<D, true>)), detail::value<D, min, true>(index);
-  }
-}
-
-// Returns std::array with enum-flags values, sorted by enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_values() noexcept -> detail::enable_if_enum_flags_t<E, detail::values_t<E, true>> {
-  using D = std::decay_t<E>;
-
-  return detail::values_v<D, true>;
-}
-
-// Returns name from enum-flags value.
-// If enum-flags value does not have name or value out of range, returns empty string.
-template <typename E>
-[[nodiscard]] auto enum_name(E value) -> detail::enable_if_enum_flags_t<E, string> {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-
-  string name;
-  auto check_value = U{0};
-  for (std::size_t i = 0; i < detail::count_v<D, true>; ++i) {
-    if (const auto v = static_cast<U>(enum_value<D>(i)); (static_cast<U>(value) & v) != 0) {
-      check_value |= v;
-      const auto n = detail::names_v<D, true>[i];
-      if (!name.empty()) {
-        name.append(1, '|');
-      }
-      name.append(n.data(), n.size());
-    }
-  }
-
-  if (check_value != 0 && check_value == static_cast<U>(value)) {
-    return name;
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Returns std::array with string names, sorted by enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_names() noexcept -> detail::enable_if_enum_flags_t<E, detail::names_t<E, true>> {
-  using D = std::decay_t<E>;
-
-  return detail::names_v<D, true>;
-}
-
-// Returns std::array with pairs (value, name), sorted by enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_entries() noexcept -> detail::enable_if_enum_flags_t<E, detail::entries_t<E, true>> {
-  using D = std::decay_t<E>;
-
-  return detail::entries_v<D, true>;
-}
-
-// Obtains enum-flags value from integer value.
-// Returns optional with enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_cast(underlying_type_t<E> value) noexcept -> detail::enable_if_enum_flags_t<E, optional<std::decay_t<E>>> {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-
-  if constexpr (detail::is_sparse_v<D, true>) {
-    auto check_value = U{0};
-    for (std::size_t i = 0; i < detail::count_v<D, true>; ++i) {
-      if (const auto v = static_cast<U>(enum_value<D>(i)); (value & v) != 0) {
-        check_value |= v;
-      }
-    }
-
-    if (check_value != 0 && check_value == value) {
-      return static_cast<D>(value);
-    }
-  } else {
-    constexpr auto min = detail::min_v<D, true>;
-    constexpr auto max = detail::value_ors<D>();
-
-    if (value >= min && value <= max) {
-      return static_cast<D>(value);
-    }
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Obtains enum-flags value from name.
-// Returns optional with enum-flags value.
-template <typename E, typename BinaryPredicate>
-[[nodiscard]] constexpr auto enum_cast(string_view value, BinaryPredicate p) noexcept(std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char, char>) -> detail::enable_if_enum_flags_t<E, optional<std::decay_t<E>>> {
-  static_assert(std::is_invocable_r_v<bool, BinaryPredicate, char, char>, "magic_enum::flags::enum_cast requires bool(char, char) invocable predicate.");
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-
-  auto result = U{0};
-  while (!value.empty()) {
-    const auto d = detail::find(value, '|');
-    const auto s = (d == string_view::npos) ? value : value.substr(0, d);
-    auto f = U{0};
-    for (std::size_t i = 0; i < detail::count_v<D, true>; ++i) {
-      if (detail::cmp_equal(s, detail::names_v<D, true>[i], p)) {
-        f = static_cast<U>(enum_value<D>(i));
-        result |= f;
-        break;
-      }
-    }
-    if (f == U{0}) {
-      return {}; // Invalid value or out of range.
-    }
-    value.remove_prefix((d == string_view::npos) ? value.size() : d + 1);
-  }
-
-  if (result == U{0}) {
-    return {}; // Invalid value or out of range.
-  } else {
-    return static_cast<D>(result);
-  }
-}
-
-// Obtains enum-flags value from name.
-// Returns optional with enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_cast(string_view value) noexcept -> detail::enable_if_enum_flags_t<E, optional<std::decay_t<E>>> {
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value, detail::char_equal_to{});
-}
-
-// Returns integer value from enum value.
-using magic_enum::enum_integer;
-
-// Obtains index in enum-flags values from enum-flags value.
-// Returns optional with index.
-template <typename E>
-[[nodiscard]] constexpr auto enum_index(E value) noexcept -> detail::enable_if_enum_flags_t<E, optional<std::size_t>> {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-
-  if (detail::is_pow2(static_cast<U>(value))) {
-    for (std::size_t i = 0; i < detail::count_v<D, true>; ++i) {
-      if (enum_value<D>(i) == value) {
-        return i;
-      }
-    }
-  }
-
-  return {}; // Invalid value or out of range.
-}
-
-// Checks whether enum-flags contains enumerator with such enum-flags value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(E value) noexcept -> detail::enable_if_enum_flags_t<E, bool> {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-
-  return enum_cast<D>(static_cast<U>(value)).has_value();
-}
-
-// Checks whether enum-flags contains enumerator with such integer value.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(underlying_type_t<E> value) noexcept -> detail::enable_if_enum_flags_t<E, bool> {
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value).has_value();
-}
-
-// Checks whether enum-flags contains enumerator with such name.
-template <typename E, typename BinaryPredicate>
-[[nodiscard]] constexpr auto enum_contains(string_view value, BinaryPredicate p) noexcept(std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char, char>) -> detail::enable_if_enum_flags_t<E, bool> {
-  static_assert(std::is_invocable_r_v<bool, BinaryPredicate, char, char>, "magic_enum::flags::enum_contains requires bool(char, char) invocable predicate.");
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value, std::move_if_noexcept(p)).has_value();
-}
-
-// Checks whether enum-flags contains enumerator with such name.
-template <typename E>
-[[nodiscard]] constexpr auto enum_contains(string_view value) noexcept -> detail::enable_if_enum_flags_t<E, bool> {
-  using D = std::decay_t<E>;
-
-  return enum_cast<D>(value).has_value();
-}
-
-} // namespace magic_enum::flags
-
-namespace flags::ostream_operators {
-
-template <typename Char, typename Traits, typename E, detail::enable_if_enum_flags_t<E, int> = 0>
-std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os, E value) {
-  using D = std::decay_t<E>;
-  using U = underlying_type_t<D>;
-#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED
-  if (const auto name = magic_enum::flags::enum_name<D>(value); !name.empty()) {
-    for (const auto c : name) {
-      os.put(c);
-    }
-    return os;
-  }
-#endif
-  return (os << static_cast<U>(value));
-}
-
-template <typename Char, typename Traits, typename E, detail::enable_if_enum_flags_t<E, int> = 0>
-std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os, optional<E> value) {
-  return value.has_value() ? (os << value.value()) : os;
-}
-
-} // namespace magic_enum::flags::ostream_operators
-
-} // namespace magic_enum
-
-#if defined(__clang__)
-#  pragma clang diagnostic pop
-#elif defined(__GNUC__)
-#  pragma GCC diagnostic pop
-#elif defined(_MSC_VER)
-#  pragma warning(pop)
-#endif
-
-#endif // NEARGYE_MAGIC_ENUM_HPP

From 420029d021e89b9367d0614b2b061686c35b7295 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 15 Apr 2023 13:03:51 +0100
Subject: [PATCH 018/139] Avoid race condition in CPUMaxSIMDLevel()

---
 src/FastSIMD/FastSIMD.cpp | 77 ++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/src/FastSIMD/FastSIMD.cpp b/src/FastSIMD/FastSIMD.cpp
index 558d3998..a8acf396 100644
--- a/src/FastSIMD/FastSIMD.cpp
+++ b/src/FastSIMD/FastSIMD.cpp
@@ -79,106 +79,101 @@ static int64_t xgetbv( int ctr )
 }
 #endif
 
-FASTSIMD_API FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
+static FastSIMD::eLevel GetCPUMaxSIMDLevel()
 {
-    static eLevel simdLevel = Level_Null;
-
-    if ( simdLevel > Level_Null )
-    {
-        return simdLevel;
-    }
+    FastSIMD::eLevel simdLevel = FastSIMD::Level_Null;
 
 #if FASTSIMD_x86
-    int abcd[4] = { 0,0,0,0 }; // cpuid results
+    int abcd[4] = { 0, 0, 0, 0 }; // cpuid results
 
 #if !FASTSIMD_64BIT
     simdLevel = Level_Scalar; // default value
 
     cpuid( abcd, 0 ); // call cpuid function 0
-    if ( abcd[0] == 0 )
+    if( abcd[0] == 0 )
         return simdLevel; // no further cpuid function supported
 
     cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
-    if ( (abcd[3] & (1 << 0)) == 0 )
+    if( ( abcd[3] & ( 1 << 0 ) ) == 0 )
         return simdLevel; // no floating point
-    if ( (abcd[3] & (1 << 23)) == 0 )
+    if( ( abcd[3] & ( 1 << 23 ) ) == 0 )
         return simdLevel; // no MMX
-    if ( (abcd[3] & (1 << 15)) == 0 )
+    if( ( abcd[3] & ( 1 << 15 ) ) == 0 )
         return simdLevel; // no conditional move
-    if ( (abcd[3] & (1 << 24)) == 0 )
+    if( ( abcd[3] & ( 1 << 24 ) ) == 0 )
         return simdLevel; // no FXSAVE
-    if ( (abcd[3] & (1 << 25)) == 0 )
+    if( ( abcd[3] & ( 1 << 25 ) ) == 0 )
         return simdLevel; // no SSE
     simdLevel = Level_SSE;
     // 1: SSE supported
 
-    if ( (abcd[3] & (1 << 26)) == 0 )
+    if( ( abcd[3] & ( 1 << 26 ) ) == 0 )
         return simdLevel; // no SSE2
 #else
     cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
 #endif
 
-    simdLevel = Level_SSE2; // default value for 64bit
+    simdLevel = FastSIMD::Level_SSE2; // default value for 64bit
     // 2: SSE2 supported
 
-    if ( (abcd[2] & (1 << 0)) == 0 )
+    if( ( abcd[2] & ( 1 << 0 ) ) == 0 )
         return simdLevel; // no SSE3
-    simdLevel = Level_SSE3;
+    simdLevel = FastSIMD::Level_SSE3;
     // 3: SSE3 supported
 
-    if ( (abcd[2] & (1 << 9)) == 0 )
+    if( ( abcd[2] & ( 1 << 9 ) ) == 0 )
         return simdLevel; // no SSSE3
-    simdLevel = Level_SSSE3;
+    simdLevel = FastSIMD::Level_SSSE3;
     // 4: SSSE3 supported
 
-    if ( (abcd[2] & (1 << 19)) == 0 )
+    if( ( abcd[2] & ( 1 << 19 ) ) == 0 )
         return simdLevel; // no SSE4.1
-    simdLevel = Level_SSE41;
+    simdLevel = FastSIMD::Level_SSE41;
     // 5: SSE4.1 supported
 
-    if ( (abcd[2] & (1 << 23)) == 0 )
+    if( ( abcd[2] & ( 1 << 23 ) ) == 0 )
         return simdLevel; // no POPCNT
-    if ( (abcd[2] & (1 << 20)) == 0 )
+    if( ( abcd[2] & ( 1 << 20 ) ) == 0 )
         return simdLevel; // no SSE4.2
-    simdLevel = Level_SSE42;
+    simdLevel = FastSIMD::Level_SSE42;
     // 6: SSE4.2 supported
 
-    if ( (abcd[2] & (1 << 26)) == 0 )
+    if( ( abcd[2] & ( 1 << 26 ) ) == 0 )
         return simdLevel; // no XSAVE
-    if ( (abcd[2] & (1 << 27)) == 0 )
+    if( ( abcd[2] & ( 1 << 27 ) ) == 0 )
         return simdLevel; // no OSXSAVE
-    if ( (abcd[2] & (1 << 28)) == 0 )
+    if( ( abcd[2] & ( 1 << 28 ) ) == 0 )
         return simdLevel; // no AVX
 
     uint64_t osbv = xgetbv( 0 );
-    if ( (osbv & 6) != 6 )
+    if( ( osbv & 6 ) != 6 )
         return simdLevel; // AVX not enabled in O.S.
-    simdLevel = Level_AVX;
+    simdLevel = FastSIMD::Level_AVX;
     // 7: AVX supported
 
     cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
-    if ( (abcd[1] & (1 << 5)) == 0 )
+    if( ( abcd[1] & ( 1 << 5 ) ) == 0 )
         return simdLevel; // no AVX2
-    simdLevel = Level_AVX2;
+    simdLevel = FastSIMD::Level_AVX2;
     // 8: AVX2 supported
 
-    if( (osbv & (0xE0)) != 0xE0 )
+    if( ( osbv & ( 0xE0 ) ) != 0xE0 )
         return simdLevel; // AVX512 not enabled in O.S.
-    if ( (abcd[1] & (1 << 16)) == 0 )
+    if( ( abcd[1] & ( 1 << 16 ) ) == 0 )
         return simdLevel; // no AVX512
     cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
-    if ( (abcd[0] & 0x60) != 0x60 )
+    if( ( abcd[0] & 0x60 ) != 0x60 )
         return simdLevel; // no AVX512
     // 9: AVX512 supported
 
     cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
-    if ( (abcd[1] & (1 << 31)) == 0 )
+    if( ( abcd[1] & ( 1 << 31 ) ) == 0 )
         return simdLevel; // no AVX512VL
     // 10: AVX512VL supported
 
-    if ( (abcd[1] & 0x40020000) != 0x40020000 )
+    if( ( abcd[1] & 0x40020000 ) != 0x40020000 )
         return simdLevel; // no AVX512BW, AVX512DQ
-    simdLevel = Level_AVX512;
+    simdLevel = FastSIMD::Level_AVX512;
     // 11: AVX512BW & AVX512DQ supported
 #endif
 
@@ -189,6 +184,12 @@ FASTSIMD_API FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
     return simdLevel;
 }
 
+FASTSIMD_API FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
+{
+    static eLevel simdLevel = GetCPUMaxSIMDLevel();
+    return simdLevel;
+}
+
 template<typename CLASS_T, FastSIMD::eLevel SIMD_LEVEL>
 CLASS_T* SIMDLevelSelector( FastSIMD::eLevel maxSIMDLevel, FastSIMD::MemoryAllocator allocator )
 {

From db92c55d35d1942668966b6ee6014e50d3a554c6 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 17 Apr 2023 00:00:38 +0100
Subject: [PATCH 019/139] Switch to linear memory allocator for SmartNodes

---
 include/FastNoise/Generators/Generator.h |  10 +
 include/FastNoise/SmartNode.h            | 112 +++----
 src/FastNoise/SmartNode.cpp              | 372 +++++------------------
 3 files changed, 132 insertions(+), 362 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 354f9c49..a0ce474a 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -2,6 +2,7 @@
 #include <cassert>
 #include <cmath>
 #include <algorithm>
+#include <atomic>
 
 #include "FastNoise/FastNoise_Config.h"
 
@@ -93,6 +94,10 @@ namespace FastNoise
         template<typename T>
         friend struct MetadataT;
 
+        Generator() : mReferences( 0 ) {}
+        Generator( const Generator& ) = delete;
+        Generator( Generator&& ) = delete;
+
         virtual ~Generator() = default;
 
         virtual FastSIMD::eLevel GetSIMDLevel() const = 0;
@@ -146,6 +151,11 @@ namespace FastNoise
 
     private:
         virtual void SetSourceSIMDPtr( const Generator* base, const void** simdPtr ) = 0;
+
+        template<typename>
+        friend class SmartNode;
+
+        mutable std::atomic<uint32_t> mReferences;
     };
 
     using GeneratorSource = GeneratorSourceT<Generator>;
diff --git a/include/FastNoise/SmartNode.h b/include/FastNoise/SmartNode.h
index d966f0c8..9f2d7a40 100644
--- a/include/FastNoise/SmartNode.h
+++ b/include/FastNoise/SmartNode.h
@@ -29,15 +29,9 @@ namespace FastNoise
         template<typename T>
         friend SmartNode<T> New( FastSIMD::eLevel );
 
-        static uint64_t GetReference( const void* ptr );
-
-        static void IncReference( uint64_t id );
-
-        static void DecReference( uint64_t id, void* ptr, void ( *destructorFunc )( void* ) );
-
-        static uint32_t ReferenceCount( uint64_t id );
-
         static void* Allocate( size_t size, size_t align );
+
+        static void Free( const void* ptr );
     };
 
     template<typename T>
@@ -47,62 +41,52 @@ namespace FastNoise
         static_assert( std::is_base_of<Generator, T>::value, "SmartNode should only be used for FastNoise node classes" );
 
         template<typename U>
-        static SmartNode DynamicCast( SmartNode<U> node )
+        static SmartNode DynamicCast( const SmartNode<U>& node )
         {
             if( T* dynamicCast = dynamic_cast<T*>( node.get() ) )
             {
-                return FastNoise::SmartNode<T>( node, dynamicCast );
+                return FastNoise::SmartNode<T>( dynamicCast );
             }
 
             return nullptr;
         }
 
         constexpr SmartNode( std::nullptr_t = nullptr ) noexcept :
-            mReferenceId( SmartNodeManager::kInvalidReferenceId ),
             mPtr( nullptr )
         {}
         
-        SmartNode( const SmartNode& node )
+        SmartNode( const SmartNode& node ) noexcept :
+            mPtr( node.mPtr )
         {
-            TryInc( node.mReferenceId );
-            mReferenceId = node.mReferenceId;
-            mPtr = node.mPtr;
+            TryInc( mPtr );
         }
 
         template<typename U>
-        SmartNode( const SmartNode<U>& node )
+        SmartNode( const SmartNode<U>& node ) noexcept :
+            mPtr( node.mPtr )
         {
-            TryInc( node.mReferenceId );
-            mReferenceId = node.mReferenceId;
-            mPtr = node.mPtr;
+            TryInc( mPtr );
         }
 
         template<typename U>
-        SmartNode( const SmartNode<U>& node, T* ptr )
+        SmartNode( const SmartNode<U>& node, T* ptr ) noexcept :
+            mPtr( ptr )
         {
-            assert( ptr );
+            TryInc( mPtr );
 
-            TryInc( node.mReferenceId );
-            mReferenceId = node.mReferenceId;
-            mPtr = ptr;
+            assert( &node.mPtr->mReferences == &mPtr->mReferences );
         }
 
-        SmartNode( SmartNode&& node ) noexcept
+        SmartNode( SmartNode&& node ) noexcept :
+            mPtr( node.mPtr )
         {
-            mReferenceId = node.mReferenceId;
-            mPtr = node.mPtr;
-
-            node.mReferenceId = SmartNodeManager::kInvalidReferenceId;
             node.mPtr = nullptr;
         }
 
         template<typename U>
-        SmartNode( SmartNode<U>&& node ) noexcept
+        SmartNode( SmartNode<U>&& node ) noexcept :
+            mPtr( node.mPtr )
         {
-            mReferenceId = node.mReferenceId;
-            mPtr = node.mPtr;
-
-            node.mReferenceId = SmartNodeManager::kInvalidReferenceId;
             node.mPtr = nullptr;
         }
 
@@ -120,17 +104,11 @@ namespace FastNoise
         template<typename U>
         SmartNode& operator=( SmartNode<U>&& node ) noexcept
         {
-            if( mReferenceId == node.mReferenceId )
-            {
-                mPtr = node.mPtr;                
-            }
-            else
+            if( mPtr != node.mPtr )            
             {
                 Release();
-                mReferenceId = node.mReferenceId;
                 mPtr = node.mPtr;
 
-                node.mReferenceId = SmartNodeManager::kInvalidReferenceId;
                 node.mPtr = nullptr;
             }
 
@@ -139,13 +117,12 @@ namespace FastNoise
 
         SmartNode& operator=( const SmartNode& node ) noexcept
         {
-            if( mReferenceId != node.mReferenceId )
+            if( mPtr != node.mPtr )
             {
-                TryInc( node.mReferenceId );
+                TryInc( node.mPtr );
                 Release();
-                mReferenceId = node.mReferenceId;
+                mPtr = node.mPtr;
             }
-            mPtr = node.mPtr;
 
             return *this;
         }
@@ -153,13 +130,12 @@ namespace FastNoise
         template<typename U>
         SmartNode& operator=( const SmartNode<U>& node ) noexcept
         {
-            if( mReferenceId != node.mReferenceId )
+            if( mPtr != node.mPtr )
             {
-                TryInc( node.mReferenceId );
+                TryInc( node.mPtr );
                 Release();
-                mReferenceId = node.mReferenceId;
+                mPtr = node.mPtr;
             }
-            mPtr = node.mPtr;
 
             return *this;
         }
@@ -178,11 +154,13 @@ namespace FastNoise
 
         T& operator*() const noexcept
         {
+            assert( mPtr->mReferences );
             return *mPtr;
         }
 
         T* operator->() const noexcept
         {
+            assert( mPtr->mReferences );
             return mPtr;
         }
 
@@ -203,18 +181,17 @@ namespace FastNoise
 
         void swap( SmartNode& node ) noexcept
         {
-            std::swap( mReferenceId, node.mReferenceId );
             std::swap( mPtr, node.mPtr );
         }
 
         long use_count() const noexcept
         {
-            if( mReferenceId == SmartNodeManager::kInvalidReferenceId )
+            if( mPtr )
             {
-                return 0;
+                return mPtr->mReferences;
             }
 
-            return (long)SmartNodeManager::ReferenceCount( mReferenceId );
+            return 0;
         }
 
         bool unique() const noexcept
@@ -233,34 +210,41 @@ namespace FastNoise
         friend class SmartNode;
 
         explicit SmartNode( T* ptr ) :
-            mReferenceId( SmartNodeManager::GetReference( ptr ) ),
             mPtr( ptr )
         {
-            SmartNodeManager::IncReference( mReferenceId );
+            TryInc( ptr );
         }
 
         void Release()
         {
             using U = typename std::remove_const<T>::type;
 
-            if( mReferenceId != SmartNodeManager::kInvalidReferenceId )
+            if( mPtr )
             {
-                SmartNodeManager::DecReference( mReferenceId, const_cast<U*>( mPtr ), []( void* ptr ) { ( (U*)ptr )->~T(); } );
+                uint32_t previousRefCount = mPtr->mReferences.fetch_sub( 1, std::memory_order_relaxed );
+
+                assert( previousRefCount );
+
+                if( previousRefCount == 1 )
+                {
+                    const_cast<U*>( mPtr )->~U();
+
+                    SmartNodeManager::Free( mPtr );
+                }
             }
 
-            mReferenceId = SmartNodeManager::kInvalidReferenceId;
-            mPtr = nullptr;            
+            mPtr = nullptr;
         }
 
-        static void TryInc( uint64_t id )
+        template<typename U>
+        static void TryInc( U* ptr ) noexcept
         {
-            if( id != SmartNodeManager::kInvalidReferenceId )
+            if( ptr )
             {
-                SmartNodeManager::IncReference( id );
+                ptr->mReferences.fetch_add( 1, std::memory_order_relaxed );
             }
         }
-
-        uint64_t mReferenceId;
+        
         T* mPtr;
     };
 } // namespace FastNoise
diff --git a/src/FastNoise/SmartNode.cpp b/src/FastNoise/SmartNode.cpp
index 1b649193..f1fae555 100644
--- a/src/FastNoise/SmartNode.cpp
+++ b/src/FastNoise/SmartNode.cpp
@@ -14,310 +14,126 @@
 
 namespace FastNoise
 {
-    using SmartNodeReference = std::atomic<uint32_t>*;
-    
-    struct SmartNodeManagerPool
+    class SmartNodeManagerPool
     {
-        static constexpr uint32_t kInvalidSlot = (uint32_t)-1;
-
-        struct SlotHeader
-        {
-            std::atomic<uint32_t> references;
-            uint32_t size;
-        };
-
-        struct Slot
-        {
-            uint32_t pos;
-            uint32_t size;            
-        };
-
-        SmartNodeManagerPool( uint32_t size )
-        {
-            size = std::min<uint32_t>( size, INT32_MAX );
-
-            uint32_t alignOffset = size % alignof( SlotHeader );
-            if( alignOffset )
-            {
-                // pool size needs to be multiple of `alignof( SlotHeader )` (likely 4)
-                size += alignof( SlotHeader ) - alignOffset;
-            }
-
-            poolSize = size;
-            pool = (uint8_t*)new SlotHeader[size / sizeof( SlotHeader )];
-
-            freeSlots = { { 0, poolSize } };
-        }
+    public:
+        SmartNodeManagerPool( uint32_t size ) :
+            mAllocState( 0 ), mNextPool( nullptr ), mPoolSize( std::min<uint32_t>( size, INT32_MAX ) )
+        { }
 
         SmartNodeManagerPool( const SmartNodeManagerPool& ) = delete;
         SmartNodeManagerPool( SmartNodeManagerPool&& ) = delete;
-
-        ~SmartNodeManagerPool()
-        {
-            assert( usedSlots.empty() );
-
-            delete[] pool;
-        }
-
-        bool Contains( const void* ptr ) const
-        {
-            return ptr >= pool && ptr < pool + poolSize;
-        }
-
-        auto GetUsedSlotItr( const void* ref ) const
-        {
-            if( ref >= pool && ref < pool + poolSize )
-            {
-                for( auto itr = usedSlots.begin(); itr != usedSlots.end(); ++itr )
-                {
-                    const uint8_t* start = pool + itr->pos;
-
-                    if( start <= ref && start + itr->size > ref )
-                    {
-                        return itr;
-                    }
-                }
-            }
-
-            return usedSlots.end();
-        }
-
-        auto GetUsedSlotItr( uint32_t pos ) const
-        {
-            return std::find_if( usedSlots.begin(), usedSlots.end(), [pos]( const Slot& slot ) 
-            {
-                return slot.pos == pos;    
-            } );
-        }
         
-        bool ValidatePtr( SmartNodeReference pos, const void* ptr ) const
-        {            
-            if( *pos == 0 )
-            {
-                assert( 0 );
-                return false;
-            }
-
-            auto slot = GetUsedSlotItr( ptr );
-
-            // Check pos pointing at garbage data
-            if( slot == usedSlots.end() )
-            {
-                assert( 0 );
-                return false;
-            }
-
-            // Check pos is correct
-            if( pool + slot->pos != (uint8_t*)pos )
-            {
-                assert( 0 );
-                return false;
-            }
-            return true;
-        }
-
-        std::atomic<uint32_t>& GetReferenceCount( uint32_t pos ) const
+        bool Contains( const void* ptr ) const
         {
-            SlotHeader* slot = (SlotHeader*)( pool + pos );
-
-            assert( pos < poolSize );
+            uint8_t* pool = GetPool();
+            uint32_t nextFreeIndex = (uint32_t)( mAllocState.load( std::memory_order_relaxed ) >> 32 );
 
-            return slot->references;
+            return ptr >= pool && ptr < pool + ( nextFreeIndex - 1 );
         }
 
-        SmartNodeReference GetReferenceId( const void* ptr ) const
+        void* TryAlloc( size_t size, size_t align )
         {
-            auto slot = GetUsedSlotItr( ptr );
+            uint8_t* pool = GetPool();
+            uint64_t allocState = mAllocState.load( std::memory_order_relaxed );
+            uint64_t newAllocState;
+            void* startSlot;
 
-            if( slot == usedSlots.end() )
+            do
             {
-                return nullptr;
-            }
+                uint32_t activeAllocs = (uint32_t)allocState;
+                uint32_t nextFreeIndex = (uint32_t)(allocState >> 32);
 
-            return &reinterpret_cast<SlotHeader*>( pool + slot->pos )->references ;
-        }
-
-        void* TryAlloc( size_t size, size_t align )
-        {
-            align = std::max( align, alignof( SlotHeader ) );
+                // Reset pool counter if there are no allocs
+                startSlot    = activeAllocs ? pool      + nextFreeIndex : pool;
+                size_t space = activeAllocs ? mPoolSize - nextFreeIndex : mPoolSize;
 
-            for( uint32_t idx = 0; idx < freeSlots.size(); idx++ )
-            {
-                if( freeSlots[idx].size < size + sizeof( SlotHeader ) )
+                if( !std::align( align, size, startSlot, space ) )
                 {
-                    continue;
+                    return nullptr;
                 }
 
-                uint8_t* startSlot = pool + freeSlots[idx].pos;
-                void* ptr = startSlot + sizeof( SlotHeader );
-                size_t space = freeSlots[idx].size - sizeof( SlotHeader );
-
-                if( std::align( align, size, ptr, space ) )
-                {                   
-                    uint8_t* endSlot = (uint8_t*)ptr + size;
-
-                    // Align next slot correctly for SlotHeader
-                    size_t alignmentOffset = (size_t)endSlot % alignof( SlotHeader );
-
-                    if( alignmentOffset )
-                    {
-                        endSlot += alignof( SlotHeader ) - alignmentOffset;
-                    }
-
-                    uint32_t slotSize = (uint32_t)( endSlot - startSlot );
+                nextFreeIndex = static_cast<uint32_t>( ( (uint8_t*)startSlot + size ) - pool );
+                activeAllocs++;
 
-                    assert( freeSlots[idx].size >= slotSize );
-                    
-                    new( startSlot ) SlotHeader { { 0u }, slotSize };
-                    usedSlots.emplace_back( Slot{ freeSlots[idx].pos, slotSize } );
+                newAllocState = (uint64_t)activeAllocs | ( (uint64_t)nextFreeIndex << 32 );
+                                
+            } while( !mAllocState.compare_exchange_weak( allocState, newAllocState, std::memory_order_relaxed ) );
 
-                    // Check if remaining free slot is empty
-                    if( freeSlots[idx].size <= slotSize )
-                    {
-                        assert( freeSlots[idx].size == slotSize );
-                        freeSlots.erase( freeSlots.cbegin() + idx );
-                        return ptr;
-                    }
-
-                    freeSlots[idx].pos += slotSize;
-                    freeSlots[idx].size -= slotSize;
-
-                    return ptr;
-                }
-            }
-
-            assert( freeSlots.empty() || freeSlots[0].size != poolSize ); // Empty pool not large enough to fit alloc, increase the pool size
-            return nullptr;
+            return startSlot;
         }
 
-        void DeAlloc( SmartNodeReference ref )
+        bool DeAlloc( const void* ptr )
         {
-            SlotHeader* slotHeader = (SlotHeader*)( ref + offsetof( SlotHeader, references ) );
-            auto slot = GetUsedSlotItr( ref );
-
-            assert( slot != usedSlots.end() );            
-            assert( slotHeader->references == 0 );
-            assert( slot->size < poolSize );
-
-            // Merge free slots as necessary
-            Slot* expandedBefore = nullptr;
-            uint32_t idx = 0;
-            uint32_t pos = (uint32_t)((uint8_t*)ref - pool);
-
-            for( ; idx < freeSlots.size(); idx++ )
+            if( Contains( ptr ) )
             {
-                if( freeSlots[idx].pos > pos )
-                {
-                    break;
-                }
+                uint64_t allocState = mAllocState.fetch_sub( 1, std::memory_order_relaxed );
 
-                // Found slot before, expand
-                if( freeSlots[idx].pos + freeSlots[idx].size == pos )
-                {
-                    freeSlots[idx].size += slot->size;
-                    expandedBefore = &freeSlots[idx];
-                    idx++;
-                    break;
-                }
+                assert( (uint32_t)allocState != 0 );
+                return true;
             }
 
-            if( idx < freeSlots.size() && freeSlots[idx].pos == pos + slot->size )
-            {
-                // Found slot before and after, expand before again, delete after
-                if( expandedBefore )
-                {
-                    expandedBefore->size += freeSlots[idx].size;
-                    freeSlots.erase( freeSlots.begin() + idx );
-                }
-                else // Found slot after, expand
-                {
-                    freeSlots[idx].pos = pos;
-                    freeSlots[idx].size += slot->size;
-                }
-            }
-            else if( !expandedBefore ) // No slots before or after, create new
-            {
-                freeSlots.emplace( freeSlots.begin() + idx, Slot { pos, slot->size } );
-            }
-            
-            slotHeader->~SlotHeader();
-            assert( memset( slotHeader, 255, slot->size ) );
+            return false;
+        }
 
-            usedSlots.erase( slot );
+        uint8_t* GetPool() const
+        {
+            return (uint8_t*)this + sizeof( SmartNodeManagerPool );
         }
 
-        uint32_t poolSize;
-        uint8_t* pool;
-        std::vector<Slot> freeSlots;
-        std::vector<Slot> usedSlots;
+        std::atomic<uint64_t> mAllocState;
+        SmartNodeManagerPool* mNextPool;
+        uint32_t mPoolSize;
     };
     
     class SmartNodeMemoryAllocator
     {
     public:
-        static inline uint32_t sNewPoolSize = 256 * 1024;
+        static inline uint32_t sNewPoolSize = 64 * 1024;
 
-        bool ValidatePtr( SmartNodeReference ref, const void* ptr )
+        void* Alloc( size_t size, size_t align ) 
         {
-            std::lock_guard lock( mMutex );
-
-            for(const auto & pool : mPools)
+            if( void* ptr = AllocFromPools( size, align ) )
             {
-                if( pool.Contains( ptr ) )
-                {
-                    return pool.ValidatePtr( ref, ptr );
-                }
+                return ptr;
             }
-            
-            return false;
-        }
 
-        SmartNodeReference GetReference( const void* ptr )
-        {
             std::lock_guard lock( mMutex );
 
-            SmartNodeReference ref = nullptr;
-
-            for( auto& poolItr : mPools )
+            if( void* ptr = AllocFromPools( size, align ) )
             {
-                ref = poolItr.GetReferenceId( ptr );
-                if( ref )
-                {
-                    return ref;
-                }
+                return ptr;
             }
 
-            // Could not find ptr in pools, probably not allocated using this class
-            assert( 0 );
-            return nullptr;
-        }
-
-        void* Alloc( size_t size, size_t align ) 
-        {
-            std::lock_guard lock( mMutex );
+            SmartNodeManagerPool** pool = &mPools;
 
-            if( void* ptr = AllocFromPools( size, align ) )
+            while( *pool )
             {
-                return ptr;
-            }
+                pool = &(*pool)->mNextPool;
+            }            
 
-            mPools.emplace_back( sNewPoolSize );
+            if( void* alloc = std::malloc( sNewPoolSize ) )
+            {        
+                *pool = new( alloc ) SmartNodeManagerPool( sNewPoolSize - (uint32_t)sizeof( SmartNodeManagerPool ) );
+
+                return (*pool)->TryAlloc( size, align );  
+            }
 
-            return AllocFromPools( size, align );
+            return nullptr;
         }
 
-        void Dealloc( SmartNodeReference ref )
+        void Dealloc( const void* ptr )
         {
-            std::lock_guard lock( mMutex );
+            SmartNodeManagerPool* pool = mPools;
 
-            
-            for( auto& poolItr : mPools )
+            while( pool )
             {
-                if( poolItr.Contains( ref ) )
+                if( pool->DeAlloc( ptr ) )
                 {
-                    poolItr.DeAlloc( ref );
                     return;
                 }
+
+                pool = pool->mNextPool;
             }
 
             assert( 0 );
@@ -326,23 +142,21 @@ namespace FastNoise
     private:
         void* AllocFromPools( size_t size, size_t align )
         {
-            uint32_t idx = 0;            
+            SmartNodeManagerPool* pool = mPools;
 
-            for( auto& poolItr : mPools )
+            while( pool )
             {
-                if( void* ptr = poolItr.TryAlloc( size, align ) )
+                if( void* ptr = pool->TryAlloc( size, align ) )
                 {
                     return ptr;
                 }
 
-                idx++;
+                pool = pool->mNextPool;
             }
             return nullptr;
         }
-
-        // std::list is used to allow lock free reads to pools
-        // In most use cases there should only be 1 pool so performance is not a concern
-        std::list<SmartNodeManagerPool> mPools;
+        
+        SmartNodeManagerPool* mPools;
         mutable std::mutex mMutex;
     };
 
@@ -353,52 +167,14 @@ namespace FastNoise
         SmartNodeMemoryAllocator::sNewPoolSize = size;
     }
 
-    uint64_t SmartNodeManager::GetReference( const void* ptr )
-    {
-        assert( ptr );
-
-        return (uint64_t)gMemoryAllocator.GetReference( ptr );
-    }
-
-    void SmartNodeManager::IncReference( uint64_t id )
-    {
-        assert( id != kInvalidReferenceId );
-
-        std::atomic<uint32_t>& refCount = *(SmartNodeReference)id;
-
-        ++refCount;
-    }
-
-    void SmartNodeManager::DecReference( uint64_t id, void* ptr, void ( *destructorFunc )( void* ) )
-    {
-        SmartNodeReference refCount = (SmartNodeReference)id;
-
-        assert( gMemoryAllocator.ValidatePtr( refCount, ptr ) );   
-
-        uint32_t previousRefCount = refCount->fetch_sub( 1 );
-
-        assert( previousRefCount );
-
-        if( previousRefCount == 1 )
-        {
-            destructorFunc( ptr );
-
-            gMemoryAllocator.Dealloc( refCount );
-        }
-    }
-
-    uint32_t SmartNodeManager::ReferenceCount( uint64_t id )
+    void* SmartNodeManager::Allocate( size_t size, size_t align )
     {
-        assert( id != kInvalidReferenceId );
-
-        SmartNodeReference refCount = (SmartNodeReference)id;
-        
-        return *refCount;
+        return gMemoryAllocator.Alloc( size, align );
     }
 
-    void* SmartNodeManager::Allocate( size_t size, size_t align )
+    void SmartNodeManager::Free( const void* ptr )
     {
-        return gMemoryAllocator.Alloc( size, align );
+        gMemoryAllocator.Dealloc( ptr );        
     }
 } // namespace FastNoise
 

From 46a7a3b599e6965e0c3ef5342271d7f3160bd73d Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 17 Apr 2023 21:12:10 +0100
Subject: [PATCH 020/139] Move generator reference inside internal class to
 remove std from header

---
 include/FastNoise/Generators/Generator.h   |  5 ++---
 include/FastNoise/Generators/Generator.inl | 15 +++++++++++++++
 include/FastNoise/SmartNode.h              | 14 ++++++--------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index a0ce474a..57a91489 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -94,7 +94,7 @@ namespace FastNoise
         template<typename T>
         friend struct MetadataT;
 
-        Generator() : mReferences( 0 ) {}
+        Generator() = default;
         Generator( const Generator& ) = delete;
         Generator( Generator&& ) = delete;
 
@@ -151,11 +151,10 @@ namespace FastNoise
 
     private:
         virtual void SetSourceSIMDPtr( const Generator* base, const void** simdPtr ) = 0;
+        virtual int32_t ReferencesFetchAdd( int32_t add = 0 ) const noexcept = 0;
 
         template<typename>
         friend class SmartNode;
-
-        mutable std::atomic<uint32_t> mReferences;
     };
 
     using GeneratorSource = GeneratorSourceT<Generator>;
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 9ae9110d..2d2039d4 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -23,6 +23,9 @@ public:
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
 
+    FS_T() : mReferences( 0 )
+    { }
+
     FastSIMD::eLevel GetSIMDLevel() const final
     {
         return FS::SIMD_Level;
@@ -458,4 +461,16 @@ private:
 
         return minMax;
     }
+
+    int32_t ReferencesFetchAdd( int32_t add ) const noexcept final
+    {
+        if( add )
+        {
+            return mReferences.fetch_add( add, std::memory_order_relaxed );
+        }
+
+        return mReferences.load( std::memory_order_relaxed );
+    }
+    
+    mutable std::atomic<uint32_t> mReferences;
 };
diff --git a/include/FastNoise/SmartNode.h b/include/FastNoise/SmartNode.h
index 9f2d7a40..b4267c4b 100644
--- a/include/FastNoise/SmartNode.h
+++ b/include/FastNoise/SmartNode.h
@@ -69,12 +69,10 @@ namespace FastNoise
         }
 
         template<typename U>
-        SmartNode( const SmartNode<U>& node, T* ptr ) noexcept :
+        SmartNode( const SmartNode<U>&, T* ptr ) noexcept :
             mPtr( ptr )
         {
             TryInc( mPtr );
-
-            assert( &node.mPtr->mReferences == &mPtr->mReferences );
         }
 
         SmartNode( SmartNode&& node ) noexcept :
@@ -154,13 +152,13 @@ namespace FastNoise
 
         T& operator*() const noexcept
         {
-            assert( mPtr->mReferences );
+            assert( mPtr->ReferencesFetchAdd() );
             return *mPtr;
         }
 
         T* operator->() const noexcept
         {
-            assert( mPtr->mReferences );
+            assert( mPtr->ReferencesFetchAdd() );
             return mPtr;
         }
 
@@ -188,7 +186,7 @@ namespace FastNoise
         {
             if( mPtr )
             {
-                return mPtr->mReferences;
+                return mPtr->ReferencesFetchAdd();
             }
 
             return 0;
@@ -221,7 +219,7 @@ namespace FastNoise
 
             if( mPtr )
             {
-                uint32_t previousRefCount = mPtr->mReferences.fetch_sub( 1, std::memory_order_relaxed );
+                int32_t previousRefCount = mPtr->ReferencesFetchAdd( -1 );
 
                 assert( previousRefCount );
 
@@ -241,7 +239,7 @@ namespace FastNoise
         {
             if( ptr )
             {
-                ptr->mReferences.fetch_add( 1, std::memory_order_relaxed );
+                ptr->ReferencesFetchAdd( 1 );
             }
         }
         

From 4f728fc8107077a3363a1b81dfcea3522663dafd Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 17 Apr 2023 21:56:16 +0100
Subject: [PATCH 021/139] Free empty pools if there are 2 or more empty

---
 src/FastNoise/SmartNode.cpp | 111 ++++++++++++++++++++++++++++++------
 1 file changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/FastNoise/SmartNode.cpp b/src/FastNoise/SmartNode.cpp
index f1fae555..aa9372a6 100644
--- a/src/FastNoise/SmartNode.cpp
+++ b/src/FastNoise/SmartNode.cpp
@@ -63,17 +63,36 @@ namespace FastNoise
             return startSlot;
         }
 
-        bool DeAlloc( const void* ptr )
+        int32_t Free( const void* ptr )
         {
             if( Contains( ptr ) )
             {
                 uint64_t allocState = mAllocState.fetch_sub( 1, std::memory_order_relaxed );
 
                 assert( (uint32_t)allocState != 0 );
-                return true;
+                return (int32_t)allocState - 1;
             }
 
-            return false;
+            return -1;
+        }
+
+        int32_t AllocCount() const
+        {
+            return (int32_t)mAllocState.load( std::memory_order_relaxed );
+        }
+
+        bool MarkForRemoval()
+        {
+            uint64_t allocState = mAllocState.load( std::memory_order_relaxed );
+
+            if( (uint32_t)allocState != 0 )
+            {
+                return false;
+            }
+
+            uint64_t newAllocState = ( (uint64_t)mPoolSize << 32 ) + 1; // Set as full
+
+            return mAllocState.compare_exchange_strong( allocState, newAllocState, std::memory_order_relaxed );
         }
 
         uint8_t* GetPool() const
@@ -82,7 +101,7 @@ namespace FastNoise
         }
 
         std::atomic<uint64_t> mAllocState;
-        SmartNodeManagerPool* mNextPool;
+        std::atomic<SmartNodeManagerPool*> mNextPool;
         uint32_t mPoolSize;
     };
     
@@ -104,39 +123,57 @@ namespace FastNoise
             {
                 return ptr;
             }
+      
+            if( void* poolAlloc = std::malloc( std::max( (uint32_t)sizeof( SmartNodeManagerPool ), sNewPoolSize ) ) )
+            {        
+                SmartNodeManagerPool* newPool = new( poolAlloc ) SmartNodeManagerPool( sNewPoolSize - (uint32_t)sizeof( SmartNodeManagerPool ) );
 
-            SmartNodeManagerPool** pool = &mPools;
+                void* alloc = newPool->TryAlloc( size, align );
+                assert( alloc ); // Alloc too large to fit in empty pool, increase pool size
 
-            while( *pool )
-            {
-                pool = &(*pool)->mNextPool;
-            }            
+                if( mPools )
+                {
+                    SmartNodeManagerPool* pool = mPools;
 
-            if( void* alloc = std::malloc( sNewPoolSize ) )
-            {        
-                *pool = new( alloc ) SmartNodeManagerPool( sNewPoolSize - (uint32_t)sizeof( SmartNodeManagerPool ) );
+                    while( SmartNodeManagerPool* nextPool = pool->mNextPool.load( std::memory_order_relaxed ) )
+                    {
+                        pool = nextPool;
+                    }  
 
-                return (*pool)->TryAlloc( size, align );  
-            }
+                    pool->mNextPool.store( newPool, std::memory_order_release );
+                }
+                else
+                {
+                    mPools = newPool;
+                }
+
+                return alloc;
+            } 
 
             return nullptr;
         }
 
-        void Dealloc( const void* ptr )
+        void Free( const void* ptr )
         {
             SmartNodeManagerPool* pool = mPools;
 
             while( pool )
             {
-                if( pool->DeAlloc( ptr ) )
+                int32_t allocCount = pool->Free( ptr );
+
+                if( allocCount >= 0 )
                 {
+                    if( allocCount == 0 )
+                    {
+                        RemoveEmptyPool();
+                    }
                     return;
                 }
 
                 pool = pool->mNextPool;
             }
 
-            assert( 0 );
+            assert( 0 ); // Pointer not in any of the pools
         }
         
     private:
@@ -155,8 +192,44 @@ namespace FastNoise
             }
             return nullptr;
         }
+
+        void RemoveEmptyPool()
+        {
+            SmartNodeManagerPool* pool = mPools;
+            SmartNodeManagerPool* emptyPool = mPools->AllocCount() > 0 ? nullptr : mPools;
+
+            while( SmartNodeManagerPool* nextPool = pool->mNextPool.load( std::memory_order_relaxed ) )
+            {
+                int32_t allocCount = nextPool->AllocCount();
+
+                if( allocCount == 0 )
+                {
+                    if( emptyPool ) // Only remove a pool if we have 2 empty pools
+                    {
+                        std::lock_guard lock( mMutex );
+
+                        SmartNodeManagerPool* toRemove = nextPool;
+
+                        if( toRemove->MarkForRemoval() )
+                        {
+                            pool->mNextPool.store( toRemove->mNextPool.load( std::memory_order_relaxed ) );
+
+                            toRemove->~SmartNodeManagerPool();
+
+                            std::free( toRemove );
+                        }
+
+                        return;
+                    }
+
+                    emptyPool = nextPool;                    
+                }
+
+                pool = nextPool;
+            }
+        }
         
-        SmartNodeManagerPool* mPools;
+        SmartNodeManagerPool* mPools = nullptr;
         mutable std::mutex mMutex;
     };
 
@@ -174,7 +247,7 @@ namespace FastNoise
 
     void SmartNodeManager::Free( const void* ptr )
     {
-        gMemoryAllocator.Dealloc( ptr );        
+        gMemoryAllocator.Free( ptr );        
     }
 } // namespace FastNoise
 

From ff33d6f5617a49d8adc0ba53fc79c4deadd41c10 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 17 Apr 2023 22:41:07 +0100
Subject: [PATCH 022/139] Fix hover strings truncating last character

---
 NoiseTool/FastNoiseNodeEditor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index 22a89955..d5438d7c 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -80,7 +80,7 @@ std::string string_format( const char* format, Args... args )
     {
         return "";
     }
-    auto size = static_cast<size_t>( size_s );
+    auto size = static_cast<size_t>( size_s + 1 );
     std::string buf( size, 0 );
     std::snprintf( buf.data(), size, format, args... );
     return buf;

From 7f163ef940b04ed96be1251b99e86cdd81adaa10 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 17 Apr 2023 23:59:42 +0100
Subject: [PATCH 023/139] Don't try to format non-literal strings

---
 NoiseTool/FastNoiseNodeEditor.cpp | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index d5438d7c..1c893481 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -72,8 +72,8 @@ static std::string TimeWithUnits( int64_t time, int significantDigits = 3 )
     return ss.str();
 }
 
-template<typename... Args>
-std::string string_format( const char* format, Args... args )
+template<size_t N, typename... Args>
+std::string string_format( const char (&format)[N], const Args&... args )
 {
     int size_s = std::snprintf( nullptr, 0, format, args... );
     if( size_s <= 0 )
@@ -86,21 +86,26 @@ std::string string_format( const char* format, Args... args )
     return buf;
 }
 
-template<typename... T>
-static bool DoHoverPopup( const char* format, T... args )
+const char* string_format( const char* txt )
+{
+    return txt;
+}
+
+template<typename T, typename... Args>
+static bool DoHoverPopup( T&& format, const Args&... args )
 {
     if( ImGui::IsItemHovered() )
     {
-        std::string hoverTxt = string_format( format, args... );
+        auto hoverTxt = string_format( format, args... );
 
-        if( hoverTxt.empty() )
+        if( !hoverTxt[0] )
         {
             return false;
         }
 
         ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 4.f, 4.f ) );
         ImGui::BeginTooltip();
-        ImGui::TextUnformatted( hoverTxt.c_str() );
+        ImGui::TextUnformatted( &hoverTxt[0] );
         ImGui::EndTooltip();
         ImGui::PopStyleVar();
         return true;

From f6cb106a5630d1a31b03bea41a89f59f7a6a54c5 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Sat, 8 Jul 2023 21:06:51 +0100
Subject: [PATCH 024/139] Update to latest FastSIMD

---
 NoiseTool/CMakeLists.txt                   |  1 +
 NoiseTool/FastNoiseNodeEditor.cpp          | 28 ++--------------------
 NoiseTool/FastNoiseNodeEditor.h            |  4 +---
 NoiseTool/NoiseTexture.cpp                 |  2 +-
 NoiseTool/NoiseToolApp.cpp                 | 20 +++++++---------
 include/FastNoise/Generators/Generator.h   |  4 ++--
 include/FastNoise/Generators/Generator.inl |  4 ++--
 include/FastNoise/Generators/Utils.inl     |  8 +++----
 src/CMakeLists.txt                         |  4 ++--
 src/FastNoise/FastNoise_C.cpp              |  2 +-
 tests/FastNoiseCpp11Include.cpp            |  2 +-
 11 files changed, 25 insertions(+), 54 deletions(-)

diff --git a/NoiseTool/CMakeLists.txt b/NoiseTool/CMakeLists.txt
index f61de475..3160482f 100644
--- a/NoiseTool/CMakeLists.txt
+++ b/NoiseTool/CMakeLists.txt
@@ -111,6 +111,7 @@ target_include_directories(NoiseTool PRIVATE
 
 target_link_libraries(NoiseTool PRIVATE
     FastNoise
+    FastSIMD_FastNoise
     Magnum::Application
     Magnum::Shaders
     Magnum::SceneGraph
diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index b76d9c44..d4608bc7 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -552,7 +552,7 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
     ImGui::DockSpaceOverViewport( viewport, ImGuiDockNodeFlags_PassthruCentralNode ); 
 
     std::string simdTxt = "Current Feature Set: ";
-    simdTxt += GetFeatureSetName( mActualFeatureSet );
+    simdTxt += FastSIMD::GetFeatureSetString( mActualFeatureSet );
     ImGui::TextUnformatted( simdTxt.c_str() );
 
     ImGui::DragInt( "Node Benchmark Count", &mNodeBenchmarkMax, 8, 8, 64 * 1024 );
@@ -1166,7 +1166,7 @@ FastNoise::SmartNode<> FastNoiseNodeEditor::GenerateSelectedPreview()
 
         if( generator )
         {
-            mActualFeatureSet = generator->GetLiveFeatureSet();
+            mActualFeatureSet = generator->GetActiveFeatureSet();
         }
     }
 
@@ -1247,27 +1247,3 @@ void FastNoiseNodeEditor::ChangeSelectedNode( FastNoise::NodeData* newId )
         mMeshNoisePreview.ReGenerate( generator );
     }
 }
-
-const char* FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::FeatureSet lvl )
-{
-    switch( lvl )
-    {
-    default:
-    case FastSIMD::FeatureSet::Null:   return "NULL";
-    case FastSIMD::FeatureSet::Scalar: return "Scalar";
-    case FastSIMD::FeatureSet::SSE:    return "SSE";
-    case FastSIMD::FeatureSet::SSE2:   return "SSE2";
-    case FastSIMD::FeatureSet::SSE3:   return "SSE3";
-    case FastSIMD::FeatureSet::SSSE3:  return "SSSE3";
-    case FastSIMD::FeatureSet::SSE41:  return "SSE4.1";
-    case FastSIMD::FeatureSet::SSE42:  return "SSE4.2";
-    case FastSIMD::FeatureSet::AVX:    return "AVX";
-    case FastSIMD::FeatureSet::AVX2:   return "AVX2";
-    case FastSIMD::FeatureSet::AVX2_FMA:   return "AVX2_FMA";
-    case FastSIMD::FeatureSet::AVX512_Baseline: return "AVX512";
-    case FastSIMD::FeatureSet::AVX512_Baseline_FMA: return "AVX512_FMA";
-    case FastSIMD::FeatureSet::NEON:   return "NEON";
-    case FastSIMD::FeatureSet::NEON_FMA:   return "NEON_FMA";
-    case FastSIMD::FeatureSet::Max:   return "AUTO";
-    }
-}
diff --git a/NoiseTool/FastNoiseNodeEditor.h b/NoiseTool/FastNoiseNodeEditor.h
index 63c7693e..0de9b95a 100644
--- a/NoiseTool/FastNoiseNodeEditor.h
+++ b/NoiseTool/FastNoiseNodeEditor.h
@@ -25,8 +25,6 @@ namespace Magnum
         void Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition );
         void SetSIMDLevel( FastSIMD::FeatureSet lvl );
 
-        static const char* GetFeatureSetName( FastSIMD::FeatureSet lvl );
-
     private:
         struct Node
         {
@@ -139,6 +137,6 @@ namespace Magnum
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
         FastSIMD::FeatureSet mMaxFeatureSet    = FastSIMD::FeatureSet::Max;
-        FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Null;
+        FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Invalid;
     };
 }
\ No newline at end of file
diff --git a/NoiseTool/NoiseTexture.cpp b/NoiseTool/NoiseTexture.cpp
index 4bdc37a4..434700e7 100644
--- a/NoiseTool/NoiseTexture.cpp
+++ b/NoiseTool/NoiseTexture.cpp
@@ -320,7 +320,7 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     static thread_local std::vector<float> noiseData;
     noiseData.resize( (size_t)buildData.size.x() * buildData.size.y() );
 
-    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetLiveFeatureSet() );
+    auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetActiveFeatureSet() );
     gen->SetSource( buildData.generator );
 
     FastNoise::OutputMinMax minMax;
diff --git a/NoiseTool/NoiseToolApp.cpp b/NoiseTool/NoiseToolApp.cpp
index 3c04357a..a1561061 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/NoiseTool/NoiseToolApp.cpp
@@ -10,6 +10,7 @@
 
 #include "NoiseToolApp.h"
 #include "ImGuiExtra.h"
+#include "FastSIMD/FastSIMD_FastNoise_config.h"
 
 using namespace Magnum;
 
@@ -62,21 +63,16 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     GL::Renderer::setBlendEquation( GL::Renderer::BlendEquation::Add, GL::Renderer::BlendEquation::Add );
     GL::Renderer::setBlendFunction( GL::Renderer::BlendFunction::SourceAlpha, GL::Renderer::BlendFunction::OneMinusSourceAlpha );
 
-    Debug{} << "FastSIMD detected max CPU supported feature set:" << FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::DetectCpuMaxFeatureSet() );
+    Debug{} << "FastSIMD detected max CPU supported feature set:" << FastSIMD::GetFeatureSetString( FastSIMD::DetectCpuMaxFeatureSet() );
 
-    mFeatureSetSelection = 
-    { 
-        FastSIMD::FeatureSet::Max,
-        FastSIMD::FeatureSet::Scalar,
-        FastSIMD::FeatureSet::SSE2,
-        FastSIMD::FeatureSet::SSE41,
-        FastSIMD::FeatureSet::AVX2_FMA,
-        FastSIMD::FeatureSet::AVX512_Baseline_FMA,
-    };
+    mFeatureSetSelection = { FastSIMD::FeatureSet::Max };
+    mFeatureSetSelection.insert( mFeatureSetSelection.end(),
+        std::begin( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ), 
+        std::end( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ) );
 
     for( FastSIMD::FeatureSet featureSet : mFeatureSetSelection )
     {
-        mFeatureSetNames.push_back( FastNoiseNodeEditor::GetFeatureSetName( featureSet ) );
+        mFeatureSetNames.push_back( FastSIMD::GetFeatureSetString( featureSet ) );
     }
 }
 
@@ -116,7 +112,7 @@ void NoiseToolApp::drawEvent()
         ImGui::Text( "Application average %.3f ms/frame (%.1f FPS)",
             1000.0 / Double( ImGui::GetIO().Framerate ), Double( ImGui::GetIO().Framerate ) );
 
-        if( ImGui::Combo( "Max Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
+        if( ImGui::Combo( "Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
             ImGuiExtra::ScrollCombo( &mMaxFeatureSet, (int)mFeatureSetSelection.size() ) )
         {   
             FastSIMD::FeatureSet newLevel = mFeatureSetSelection[mMaxFeatureSet];
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index b831bf85..637c6dbe 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -95,7 +95,7 @@ namespace FastNoise
 
         virtual ~Generator() = default;
 
-        virtual FastSIMD::FeatureSet GetLiveFeatureSet() const = 0;
+        virtual FastSIMD::FeatureSet GetActiveFeatureSet() const = 0;
         virtual const Metadata& GetMetadata() const = 0;
 
         virtual OutputMinMax GenUniformGrid2D( float* out,
@@ -138,7 +138,7 @@ namespace FastNoise
         {
             static_assert( std::is_base_of<Generator, T>::value, "T must be child of FastNoise::Generator class" );
 
-            assert( !gen.get() || GetLiveFeatureSet() == gen->GetLiveFeatureSet() ); // Ensure that all SIMD levels match
+            assert( !gen.get() || GetActiveFeatureSet() == gen->GetActiveFeatureSet() ); // Ensure that all SIMD levels match
 
             SetSourceSIMDPtr( dynamic_cast<const Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
             memberVariable.base = gen;
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 5c78df26..fc44386a 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -24,9 +24,9 @@ public:
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
 
-    FastSIMD::FeatureSet GetLiveFeatureSet() const final
+    FastSIMD::FeatureSet GetActiveFeatureSet() const final
     {
-        return FASTSIMD_DEFAULT_FEATURE_SET;
+        return FastSIMD::FeatureSetDefault();
     }
 
     using VoidPtrStorageType = const DispatchClass<Generator, SIMD>*;
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 823bf871..24bfcec2 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -16,7 +16,7 @@ namespace FastNoise
     static constexpr float ROOT2 = 1.4142135623730950488f;
     static constexpr float ROOT3 = 1.7320508075688772935f;
 
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
         int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
@@ -74,7 +74,7 @@ namespace FastNoise
     //}
 
 
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
     {
         // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
@@ -115,7 +115,7 @@ namespace FastNoise
         return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
     }
     
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
     {        
         /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
@@ -153,7 +153,7 @@ namespace FastNoise
         return ( u ^ h1 ) + ( v ^ h2 );
     }
     
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
     {
         /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6a21b0d4..2c7e9f42 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 26b3593be46cf0bdf6426821435997839b80845f
+    GIT_TAG 8dff5637f6c2ad83a8e6634249c22df01ee9e083
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"
@@ -58,7 +58,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
+fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl" FEATURE_SETS SSE2 SSE41 AVX2_FMA)
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index 3944c285..bcc39dbb 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -37,7 +37,7 @@ void fnDeleteNodeRef( void* node )
 
 unsigned fnGetSIMDLevel( const void* node )
 {
-    return (unsigned)ToGen( node )->GetLiveFeatureSet();
+    return (unsigned)ToGen( node )->GetActiveFeatureSet();
 }
 
 int fnGetMetadataID( const void* node )
diff --git a/tests/FastNoiseCpp11Include.cpp b/tests/FastNoiseCpp11Include.cpp
index 497e52ed..b7672f86 100644
--- a/tests/FastNoiseCpp11Include.cpp
+++ b/tests/FastNoiseCpp11Include.cpp
@@ -7,7 +7,7 @@ int main()
 {
     auto node = FastNoise::New<FastNoise::FractalFBm>();
 
-    std::cout << (unsigned)node->GetLiveFeatureSet() << std::endl;
+    std::cout << (unsigned)node->GetActiveFeatureSet() << std::endl;
 
     node->SetSource( FastNoise::New<FastNoise::Simplex>() );
     node->SetGain( FastNoise::New<FastNoise::Value>() );

From 1e018390154958d4b29535711ec44091225a108c Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 15 Jul 2023 00:39:06 +0100
Subject: [PATCH 025/139] Update to latest FastSIMD

---
 NoiseTool/FastNoiseNodeEditor.cpp          |  8 ++--
 NoiseTool/FastNoiseNodeEditor.h            |  2 +-
 NoiseTool/NoiseToolApp.cpp                 |  4 +-
 include/FastNoise/Generators/Generator.inl |  2 +-
 include/FastNoise/Generators/Utils.inl     | 45 +++++++++++++---------
 src/CMakeLists.txt                         |  4 +-
 tests/FastNoiseBenchmark.cpp               | 19 +--------
 7 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/NoiseTool/FastNoiseNodeEditor.cpp
index b76d9c44..db710c28 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/NoiseTool/FastNoiseNodeEditor.cpp
@@ -1253,8 +1253,8 @@ const char* FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::FeatureSet lvl )
     switch( lvl )
     {
     default:
-    case FastSIMD::FeatureSet::Null:   return "NULL";
-    case FastSIMD::FeatureSet::Scalar: return "Scalar";
+    case FastSIMD::FeatureSet::Invalid:   return "NULL";
+    case FastSIMD::FeatureSet::SCALAR: return "Scalar";
     case FastSIMD::FeatureSet::SSE:    return "SSE";
     case FastSIMD::FeatureSet::SSE2:   return "SSE2";
     case FastSIMD::FeatureSet::SSE3:   return "SSE3";
@@ -1264,8 +1264,8 @@ const char* FastNoiseNodeEditor::GetFeatureSetName( FastSIMD::FeatureSet lvl )
     case FastSIMD::FeatureSet::AVX:    return "AVX";
     case FastSIMD::FeatureSet::AVX2:   return "AVX2";
     case FastSIMD::FeatureSet::AVX2_FMA:   return "AVX2_FMA";
-    case FastSIMD::FeatureSet::AVX512_Baseline: return "AVX512";
-    case FastSIMD::FeatureSet::AVX512_Baseline_FMA: return "AVX512_FMA";
+    case FastSIMD::FeatureSet::AVX512: return "AVX512";
+    case FastSIMD::FeatureSet::AVX512_FMA: return "AVX512_FMA";
     case FastSIMD::FeatureSet::NEON:   return "NEON";
     case FastSIMD::FeatureSet::NEON_FMA:   return "NEON_FMA";
     case FastSIMD::FeatureSet::Max:   return "AUTO";
diff --git a/NoiseTool/FastNoiseNodeEditor.h b/NoiseTool/FastNoiseNodeEditor.h
index 63c7693e..3bae0d18 100644
--- a/NoiseTool/FastNoiseNodeEditor.h
+++ b/NoiseTool/FastNoiseNodeEditor.h
@@ -139,6 +139,6 @@ namespace Magnum
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
         FastSIMD::FeatureSet mMaxFeatureSet    = FastSIMD::FeatureSet::Max;
-        FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Null;
+        FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Invalid;
     };
 }
\ No newline at end of file
diff --git a/NoiseTool/NoiseToolApp.cpp b/NoiseTool/NoiseToolApp.cpp
index 3c04357a..353ad245 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/NoiseTool/NoiseToolApp.cpp
@@ -67,11 +67,11 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     mFeatureSetSelection = 
     { 
         FastSIMD::FeatureSet::Max,
-        FastSIMD::FeatureSet::Scalar,
+        FastSIMD::FeatureSet::SCALAR,
         FastSIMD::FeatureSet::SSE2,
         FastSIMD::FeatureSet::SSE41,
         FastSIMD::FeatureSet::AVX2_FMA,
-        FastSIMD::FeatureSet::AVX512_Baseline_FMA,
+        FastSIMD::FeatureSet::AVX512_FMA,
     };
 
     for( FastSIMD::FeatureSet featureSet : mFeatureSetSelection )
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 5c78df26..05e8f1a0 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -26,7 +26,7 @@ public:
 
     FastSIMD::FeatureSet GetLiveFeatureSet() const final
     {
-        return FASTSIMD_DEFAULT_FEATURE_SET;
+        return FastSIMD::FeatureSetDefault();
     }
 
     using VoidPtrStorageType = const DispatchClass<Generator, SIMD>*;
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 823bf871..fef0cb51 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -16,21 +16,28 @@ namespace FastNoise
     static constexpr float ROOT2 = 1.4142135623730950488f;
     static constexpr float ROOT3 = 1.7320508075688772935f;
 
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
-    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     {
         int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
         // Bit-3 = Choose X Y ordering
         mask32v bit3;
         
-        if constexpr( (SIMD & FastSIMD::FeatureFlag::SSE41) && !(SIMD & FastSIMD::FeatureFlag::AVX512_F) )
+        if constexpr( ( SIMD & FastSIMD::FeatureFlag::SSE2 ) && !( SIMD & FastSIMD::FeatureFlag::AVX512_F ) )
         {
-            bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
-        }      
+            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+            {
+                bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
+            }
+            else
+            {
+                bit3 = FS::Cast<FS::Mask<32>>( ( index << 29 ) >> 31 );
+            }
+        }   
         else
         {
-            bit3 = ( index & int32v( 1 << 2 ) ) == int32v( 0 );
+            bit3 = ( index & int32v( 1 << 2 ) ) != int32v( 0 );
         }
 
         float32v a = FS::Select( bit3, fY, fX );
@@ -74,7 +81,7 @@ namespace FastNoise
     //}
 
 
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
     {
         // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
@@ -87,13 +94,13 @@ namespace FastNoise
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
-        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        else*/ if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
-            float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
-            float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
 
             return FS::FMulAdd( gX, fX, fY * gY );
-        }*/
+        }
 
         int32v bit1 = hash << 31;
         int32v bit2 = (hash >> 1) << 31;
@@ -115,7 +122,7 @@ namespace FastNoise
         return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
     }
     
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
     {        
         /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
@@ -129,17 +136,17 @@ namespace FastNoise
 
         int32v hasha13 = hash & int32v( 13 );
 
-        //if h < 8 then x, else y
-        mask32v less8;
+        //if h > 7 then y, else x
+        mask32v gt7;
         if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
         {
-            less8 = FS::Cast<FS::Mask<32>>( hasha13 << 28 );
+            gt7 = FS::Cast<FS::Mask<32>>( hash << 28 );
         }
         else
         {
-            less8 = hasha13 < int32v( 8 );    
+            gt7 = hasha13 > int32v( 7 );    
         }
-        float32v u = FS::Select( less8, fX, fY );
+        float32v u = FS::Select( gt7, fY, fX );
 
         //if h < 4 then y else if h is 12 or 14 then x else z
         float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
@@ -153,7 +160,7 @@ namespace FastNoise
         return ( u ^ h1 ) + ( v ^ h2 );
     }
     
-    template<FastSIMD::FeatureSet SIMD = FASTSIMD_DEFAULT_FEATURE_SET>
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
     {
         /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
@@ -170,7 +177,7 @@ namespace FastNoise
 
         float32v a = FS::Select( p > int32v( 0 ), fX, fY );
         float32v b;
-        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 && !( SIMD & FastSIMD::FeatureFlag::AVX512_F ) )
         {
             b = FS::Select( FS::Cast<FS::Mask<32>>( hash << 27 ), fY, fZ );
         }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6a21b0d4..4ae6d189 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 26b3593be46cf0bdf6426821435997839b80845f
+    GIT_TAG be7a50c52786502fe072f37298fa4b482b0c0ee6
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"
@@ -58,7 +58,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
+fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl" FEATURE_SETS SCALAR SSE2 SSE41 AVX2_FMA)
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index bb330030..924208fd 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -102,24 +102,7 @@ template<typename T>
 void RegisterBenchmarks( FastSIMD::FeatureSet level, const char* groupName, const char* name, T generatorFunc )
 {
     std::string benchName = "0D/";
-
-#ifdef MAGIC_ENUM_SUPPORTED
-    auto enumName = magic_enum::flags::enum_name( level );
-    auto find = enumName.find( '_' );
-    if( find != std::string::npos )
-    {
-        benchName += enumName.data() + find + 1;
-    }
-    else if( *enumName.data() != 0 )
-    {
-        benchName += enumName;
-    }
-    else
-#endif
-    {
-        benchName += std::to_string( (int)level );
-    }
-
+    benchName += FastSIMD::GetFeatureSetString( level );  
     benchName += '/';
     benchName += groupName;
     benchName += '/';

From 706f77755e76c08da3819a6e82767a5ecfd4de93 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 17 Jul 2023 18:28:27 +0100
Subject: [PATCH 026/139] Fix tuple include for clang

---
 include/FastNoise/Generators/Generator.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 346632f5..cf04d82a 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -4,6 +4,10 @@
 #include <algorithm>
 #include <atomic>
 
+#ifdef FASTNOISE_METADATA
+#include <tuple>
+#endif
+
 #include "FastNoise/FastNoise_Config.h"
 
 #if !defined( FASTNOISE_METADATA ) && defined( __INTELLISENSE__ )

From fda2ea6b5969619f9736931e0be499a5ee3c39d3 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 17 Jul 2023 21:06:41 +0100
Subject: [PATCH 027/139] Latest FastSIMD

---
 src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4ae6d189..41d90371 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG be7a50c52786502fe072f37298fa4b482b0c0ee6
+    GIT_TAG 18e06c2e9b39ced08fd607ccab864a01efe23d21
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"

From e0d33d2b8f120568b2f0e5890aea84137b19ae8b Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 17 Jul 2023 23:09:49 +0100
Subject: [PATCH 028/139] NewFastSIMD bench all feature sets

---
 tests/FastNoiseBenchmark.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 924208fd..782f1184 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -1,3 +1,6 @@
+#include <iostream>
+#include <ostream>
+
 #include <benchmark/benchmark.h>
 #include "FastNoise/FastNoise.h"
 #include "FastNoise/Metadata.h"
@@ -120,6 +123,8 @@ void RegisterBenchmarks( FastSIMD::FeatureSet level, const char* groupName, cons
 
 int main( int argc, char** argv )
 {
+    std::cout << "FastSIMD Max Supported Feature Set: " << FastSIMD::GetFeatureSetString( FastSIMD::DetectCpuMaxFeatureSet() ) << std::endl;
+
     benchmark::Initialize( &argc, argv );
 
     for( size_t idx = 0; idx < gPositionCount; idx++ )
@@ -127,7 +132,7 @@ int main( int argc, char** argv )
         gPositionFloats[idx] = (float)idx * 0.6f;
     }
 
-    FastSIMD::FeatureSet Levels[] = { FastSIMD::FeatureSet::SSE2, FastSIMD::FeatureSet::SSE41 };
+    FastSIMD::FeatureSet Levels[] = { FastSIMD::FeatureSet::SSE2, FastSIMD::FeatureSet::SSE41, FastSIMD::FeatureSet::AVX2_FMA, FastSIMD::FeatureSet::AVX512_FMA };
     
     for( auto level : Levels )
     {

From c8591478585e15cb189f0bdb77a3cf728e0453c4 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 18 Jul 2023 13:34:16 +0000
Subject: [PATCH 029/139] Double sized registers

---
 .github/workflows/benchmark.yml            | 1 +
 include/FastNoise/Generators/Generator.inl | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 446421e4..9cdf1179 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - 'master'
+      - 'NewFastSIMD'
     paths-ignore:
       - 'NoiseTool/**'
       - '.github/**'
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index a69a7bb7..fc987954 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -7,9 +7,9 @@
 
 using namespace FastNoise;
 
-using float32v = FS::NativeRegister<float>;
-using int32v = FS::NativeRegister<std::int32_t>;
-using mask32v = FS::NativeRegister<FS::Mask<32>>;
+using float32v = FS::Register<float, NativeRegisterCount<float>() * 2>;
+using int32v = FS::Register<std::int32_t, NativeRegisterCount<std::int32_t>() * 2>;
+using mask32v = typename float32v::MaskType;
 
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Generator, SIMD> : public virtual FastNoise::Generator

From f3f33ca78b7c5345bdb157c5d923925b39fae65a Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 18 Jul 2023 13:40:30 +0000
Subject: [PATCH 030/139] Double Sized Registers Fix compile

---
 include/FastNoise/Generators/Generator.inl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index fc987954..ff002cef 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -7,8 +7,8 @@
 
 using namespace FastNoise;
 
-using float32v = FS::Register<float, NativeRegisterCount<float>() * 2>;
-using int32v = FS::Register<std::int32_t, NativeRegisterCount<std::int32_t>() * 2>;
+using float32v = FS::Register<float, FS::NativeRegisterCount<float>() * 2>;
+using int32v = FS::Register<std::int32_t, FS::NativeRegisterCount<std::int32_t>() * 2>;
 using mask32v = typename float32v::MaskType;
 
 template<FastSIMD::FeatureSet SIMD>

From 6c7846c8e6cd457d4de7710f0adbd19c5ebfa428 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 19 Jul 2023 19:15:00 +0100
Subject: [PATCH 031/139] Use generated FastNoise compiled feature sets array
 for NoiseTool and benchmarking

---
 NoiseTool/NoiseToolApp.cpp   | 10 +++++-----
 src/CMakeLists.txt           |  2 +-
 tests/FastNoiseBenchmark.cpp |  5 ++---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/NoiseTool/NoiseToolApp.cpp b/NoiseTool/NoiseToolApp.cpp
index ba777cf0..dc6236c9 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/NoiseTool/NoiseToolApp.cpp
@@ -10,7 +10,7 @@
 
 #include "NoiseToolApp.h"
 #include "ImGuiExtra.h"
-//#include "FastSIMD/FastSIMD_FastNoise_config.h"
+#include "FastSIMD/FastSIMD_FastNoise_config.h"
 
 using namespace Magnum;
 
@@ -66,9 +66,9 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     Debug{} << "FastSIMD detected max CPU supported feature set:" << FastSIMD::GetFeatureSetString( FastSIMD::DetectCpuMaxFeatureSet() );
 
     mFeatureSetSelection = { FastSIMD::FeatureSet::Max };
-    //mFeatureSetSelection.insert( mFeatureSetSelection.end(),
-    //    std::begin( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ), 
-    //    std::end( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ) );
+    mFeatureSetSelection.insert( mFeatureSetSelection.end(),
+        std::rbegin( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ), 
+        std::rend( FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray ) );
 
     for( FastSIMD::FeatureSet featureSet : mFeatureSetSelection )
     {
@@ -112,7 +112,7 @@ void NoiseToolApp::drawEvent()
         ImGui::Text( "Application average %.3f ms/frame (%.1f FPS)",
             1000.0 / Double( ImGui::GetIO().Framerate ), Double( ImGui::GetIO().Framerate ) );
 
-        if( ImGui::Combo( "Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
+        if( ImGui::Combo( "Max Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
             ImGuiExtra::ScrollCombo( &mMaxFeatureSet, (int)mFeatureSetSelection.size() ) )
         {   
             FastSIMD::FeatureSet newLevel = mFeatureSetSelection[mMaxFeatureSet];
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 41d90371..d3524625 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -66,7 +66,7 @@ if(NOT BUILD_SHARED_LIBS)
     target_compile_definitions(FastNoise PUBLIC FASTNOISE_STATIC_LIB)
 endif()
 
-target_link_libraries(FastNoise PUBLIC FastSIMD PRIVATE FastSIMD_FastNoise)
+target_link_libraries(FastNoise PUBLIC FastSIMD FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
     target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251)
diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 782f1184..c40c88f0 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -4,6 +4,7 @@
 #include <benchmark/benchmark.h>
 #include "FastNoise/FastNoise.h"
 #include "FastNoise/Metadata.h"
+#include "FastSIMD/FastSIMD_FastNoise_config.h"
 
 #include "../NoiseTool/DemoNodeTrees.inl"
 
@@ -131,10 +132,8 @@ int main( int argc, char** argv )
     {
         gPositionFloats[idx] = (float)idx * 0.6f;
     }
-
-    FastSIMD::FeatureSet Levels[] = { FastSIMD::FeatureSet::SSE2, FastSIMD::FeatureSet::SSE41, FastSIMD::FeatureSet::AVX2_FMA, FastSIMD::FeatureSet::AVX512_FMA };
     
-    for( auto level : Levels )
+    for( auto level : FastSIMD::FastSIMD_FastNoise::CompiledFeatureSets::AsArray )
     {
         for( const FastNoise::Metadata* metadata : FastNoise::Metadata::GetAll() )
         {

From 5aa5e1ffe862dc8b5fc4304112f42b426a301258 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 19 Jul 2023 19:39:10 +0100
Subject: [PATCH 032/139] GetGradientDotFancy AVX2 optimisation

---
 include/FastNoise/Generators/Utils.inl | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index d727decf..0ad69326 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -21,6 +21,15 @@ namespace FastNoise
     {
         int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        {
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
+
+            // Bit-8 = Flip sign of a + b 
+            return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( ( index >> 3 ) << 31 );
+        }
+
         // Bit-3 = Choose X Y ordering
         mask32v bit3;
         
@@ -57,18 +66,6 @@ namespace FastNoise
         return c ^ FS::Cast<float>( (index >> 3) << 31 );
     }
 
-    //template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
-    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    //{
-    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
-
-    //    float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
-    //    float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
-
-    //    // Bit-8 = Flip sign of a + b
-    //    return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( (index >> 3) << 31 );
-    //}
-
     //template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
     //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
     //{

From cecbf71e4ca886c744ac239ed8b3e29799b802c6 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 19 Jul 2023 19:39:28 +0100
Subject: [PATCH 033/139] Bump Scalar register size to 4

---
 include/FastNoise/Generators/Generator.inl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index ff002cef..4f0a1292 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -7,8 +7,9 @@
 
 using namespace FastNoise;
 
-using float32v = FS::Register<float, FS::NativeRegisterCount<float>() * 2>;
-using int32v = FS::Register<std::int32_t, FS::NativeRegisterCount<std::int32_t>() * 2>;
+static constexpr size_t kRegisterSize = std::max<size_t>( 4, FS::NativeRegisterCount<float>() * 2 );
+using float32v = FS::Register<float, kRegisterSize>;
+using int32v = FS::Register<std::int32_t, kRegisterSize>;
 using mask32v = typename float32v::MaskType;
 
 template<FastSIMD::FeatureSet SIMD>

From b2d63baabdca70ef06a5e1f89abf5284e1686ea8 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 19 Jul 2023 21:51:47 +0100
Subject: [PATCH 034/139] Fixup and enable AVX512

---
 include/FastNoise/Generators/Utils.inl | 211 +++++++++++++------------
 src/CMakeLists.txt                     |   2 +-
 2 files changed, 108 insertions(+), 105 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 0ad69326..113f2987 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -21,7 +21,14 @@ namespace FastNoise
     {
         int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
 
-        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
+
+            return FS::FMulAdd( gX, fX, fY * gY );
+        }
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
             float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
             float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
@@ -29,166 +36,162 @@ namespace FastNoise
             // Bit-8 = Flip sign of a + b 
             return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( ( index >> 3 ) << 31 );
         }
-
-        // Bit-3 = Choose X Y ordering
-        mask32v bit3;
-        
-        if constexpr( ( SIMD & FastSIMD::FeatureFlag::SSE2 ) && !( SIMD & FastSIMD::FeatureFlag::AVX512_F ) )
+        else
         {
-            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+            // Bit-3 = Choose X Y ordering
+            mask32v bit3;
+
+            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE2 )
             {
-                bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
+                if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+                {
+                    bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
+                }
+                else
+                {
+                    bit3 = FS::Cast<FS::Mask<32>>( ( index << 29 ) >> 31 );
+                }
             }
             else
             {
-                bit3 = FS::Cast<FS::Mask<32>>( ( index << 29 ) >> 31 );
+                bit3 = ( index & int32v( 1 << 2 ) ) != int32v( 0 );
             }
-        }   
-        else
-        {
-            bit3 = ( index & int32v( 1 << 2 ) ) != int32v( 0 );
-        }
 
-        float32v a = FS::Select( bit3, fY, fX );
-        float32v b = FS::Select( bit3, fX, fY );
+            float32v a = FS::Select( bit3, fY, fX );
+            float32v b = FS::Select( bit3, fX, fY );
 
-        // Bit-1 = b flip sign
-        b ^= FS::Cast<float>( index << 31 );
+            // Bit-1 = b flip sign
+            b ^= FS::Cast<float>( index << 31 );
 
-        // Bit-2 = Mul a by 2 or Root3     
-        mask32v bit2 = ( index & int32v( 2 ) ) == int32v( 0 );   
+            // Bit-2 = Mul a by 2 or Root3
+            mask32v bit2 = ( index & int32v( 2 ) ) == int32v( 0 );
 
-        a *= FS::Select( bit2, float32v( 2 ), float32v( ROOT3 ) );
-        // b zero value if a mul 2
-        float32v c = FS::MaskedAdd( bit2, a, b );
+            a *= FS::Select( bit2, float32v( 2 ), float32v( ROOT3 ) );
+            // b zero value if a mul 2
+            float32v c = FS::MaskedAdd( bit2, a, b );
 
-        // Bit-4 = Flip sign of a + b
-        return c ^ FS::Cast<float>( (index >> 3) << 31 );
+            // Bit-4 = Flip sign of a + b
+            return c ^ FS::Cast<float>( ( index >> 3 ) << 31 );
+        }
     }
 
-    //template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
-    //FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
-    //{
-    //    int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
-
-    //    float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
-    //    float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
-
-    //    return FS::FMulAdd( gX, fX, fY * gY );
-    //}
-
-
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
     {
         // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
         // ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
 
-        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
-            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
-        else*/ if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
             float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
             float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
+        else
+        {
+            int32v bit1 = hash << 31;
+            int32v bit2 = ( hash >> 1 ) << 31;
+            int32v bit4 = hash << 29;
 
-        int32v bit1 = hash << 31;
-        int32v bit2 = (hash >> 1) << 31;
-        int32v bit4 = hash << 29;
+            if constexpr( !( SIMD & FastSIMD::FeatureFlag::SSE41 ) )
+            {
+                bit4 >>= 31;
+            }
 
-        if constexpr( !( SIMD & FastSIMD::FeatureFlag::SSE41 ) )
-        {
-            bit4 >>= 31;
-        }
+            auto bit4Mask = FS::Cast<FS::Mask<32, false>>( bit4 );
 
-        auto bit4Mask = FS::Cast<FS::Mask<32, false>>( bit4 );
+            fX ^= FS::Cast<float>( bit1 );
+            fY ^= FS::Cast<float>( bit2 );
 
-        fX ^= FS::Cast<float>( bit1 );
-        fY ^= FS::Cast<float>( bit2 );
-        
-        float32v a = FS::Select( bit4Mask, fY, fX );
-        float32v b = FS::Select( bit4Mask, fX, fY );
-        
-        return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
+            float32v a = FS::Select( bit4Mask, fY, fX );
+            float32v b = FS::Select( bit4Mask, fX, fY );
+
+            return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
+        }
     }
     
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
     {        
-        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
-            float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
 
             return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
-        }*/
-
-        int32v hasha13 = hash & int32v( 13 );
-
-        //if h > 7 then y, else x
-        mask32v gt7;
-        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
-        {
-            gt7 = FS::Cast<FS::Mask<32>>( hash << 28 );
         }
         else
         {
-            gt7 = hasha13 > int32v( 7 );    
+            int32v hasha13 = hash & int32v( 13 );
+
+            // if h > 7 then y, else x
+            mask32v gt7;
+            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+            {
+                gt7 = FS::Cast<FS::Mask<32>>( hash << 28 );
+            }
+            else
+            {
+                gt7 = hasha13 > int32v( 7 );
+            }
+            float32v u = FS::Select( gt7, fY, fX );
+
+            // if h < 4 then y else if h is 12 or 14 then x else z
+            float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
+            v = FS::Select( hasha13 < int32v( 2 ), fY, v );
+
+            // if h1 then -u else u
+            // if h2 then -v else v
+            float32v h1 = FS::Cast<float>( hash << 31 );
+            float32v h2 = FS::Cast<float>( ( hash >> 1 ) << 31 );
+            // then add them
+            return ( u ^ h1 ) + ( v ^ h2 );
         }
-        float32v u = FS::Select( gt7, fY, fX );
-
-        //if h < 4 then y else if h is 12 or 14 then x else z
-        float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
-        v = FS::Select( hasha13 < int32v( 2 ), fY, v );
-
-        //if h1 then -u else u
-        //if h2 then -v else v
-        float32v h1 = FS::Cast<float>( hash << 31 );
-        float32v h2 = FS::Cast<float>( (hash >> 1) << 31 );
-        //then add them
-        return ( u ^ h1 ) + ( v ^ h2 );
     }
     
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
     {
-        /*if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
-            float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
-            float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
+            float32v gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
 
             return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
-        }*/
-
-        int32v p = hash & int32v( 3 << 3 );
-
-        float32v a = FS::Select( p > int32v( 0 ), fX, fY );
-        float32v b;
-        if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 && !( SIMD & FastSIMD::FeatureFlag::AVX512_F ) )
-        {
-            b = FS::Select( FS::Cast<FS::Mask<32>>( hash << 27 ), fY, fZ );
         }
         else
         {
-            b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );        
-        }
-        float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
+            int32v p = hash & int32v( 3 << 3 );
+
+            float32v a = FS::Select( p > int32v( 0 ), fX, fY );
+            float32v b;
+            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+            {
+                b = FS::Select( FS::Cast<FS::Mask<32>>( hash << 27 ), fY, fZ );
+            }
+            else
+            {
+                b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );
+            }
+            float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
 
-        float32v aSign = FS::Cast<float>( hash << 31 );
-        float32v bSign = FS::Cast<float>( (hash >> 1) << 31 );
-        float32v cSign = FS::Cast<float>( (hash >> 2) << 31 );
+            float32v aSign = FS::Cast<float>( hash << 31 );
+            float32v bSign = FS::Cast<float>( ( hash >> 1 ) << 31 );
+            float32v cSign = FS::Cast<float>( ( hash >> 2 ) << 31 );
 
-        return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
+            return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
+        }
     }
 
     template<typename... P>
@@ -218,7 +221,7 @@ namespace FastNoise
         hash ^= (primedPos ^ ...);
         
         hash *= hash * int32v( 0x27d4eb2d );
-        return FS::Convert<float>( hash ) * float32v( 1.0f / (float)INT_MAX );
+        return FS::Convert<float>( hash ) * float32v( 1.0f / (float)-INT_MIN );
     }
      
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d3524625..f64b587d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -58,7 +58,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl" FEATURE_SETS SCALAR SSE2 SSE41 AVX2_FMA)
+fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 

From c93db4acc5c36dab6e2f3ad65fea6fad5d8b979e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 20 Jul 2023 10:39:15 +0100
Subject: [PATCH 035/139] Test disable VECTORCALL

---
 include/FastNoise/Generators/Generator.inl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 4f0a1292..1cf67f5b 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -3,6 +3,9 @@
 
 #include "Generator.h"
 
+#undef FS_VECTORCALL
+#define FS_VECTORCALL
+
 #pragma warning( disable:4250 )
 
 using namespace FastNoise;

From 76226297325700ec43da88005549127983ffbf24 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 20 Jul 2023 23:07:11 +0100
Subject: [PATCH 036/139] Enable vectorcall for MSVC and Clang

---
 include/FastNoise/Generators/Generator.inl | 3 ---
 src/CMakeLists.txt                         | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 1cf67f5b..4f0a1292 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -3,9 +3,6 @@
 
 #include "Generator.h"
 
-#undef FS_VECTORCALL
-#define FS_VECTORCALL
-
 #pragma warning( disable:4250 )
 
 using namespace FastNoise;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f64b587d..2dcbaa2c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 18e06c2e9b39ced08fd607ccab864a01efe23d21
+    GIT_TAG 08041995183d52ef8f3504429ec9150f0f89a9f5
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"

From 627aa801f1b5f3368f916e309d5f2ec546f1e1c8 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 20 Jul 2023 23:16:22 +0100
Subject: [PATCH 037/139] Remove ZeroUpper asm generation

---
 src/CMakeLists.txt | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2dcbaa2c..6d935f1e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -69,14 +69,20 @@ endif()
 target_link_libraries(FastNoise PUBLIC FastSIMD FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251)
+    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251 /d2vzeroupper-)
     
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
-        target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast -mllvm -x86-use-vzeroupper=0)
     else()
         target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector)        
     endif()
 
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+        target_compile_options(FastSIMD_FastNoise PRIVATE -mno-vzeroupper)
+    else()
+        target_compile_options(FastSIMD_FastNoise PRIVATE -mllvm -x86-use-vzeroupper=0)        
+    endif()
+
 endif()
 

From b195868732d09d578f1b024ff4b9cc0c238b96d7 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 22 Jul 2023 12:48:01 +0100
Subject: [PATCH 038/139] Latest FastSIMD regcall on ClangCl

---
 src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6d935f1e..4ca0bce1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 08041995183d52ef8f3504429ec9150f0f89a9f5
+    GIT_TAG a09c99d3a25ba16fed75d1312efa3b3f570cc15d
     EXCLUDE_FROM_ALL YES
     #OPTIONS
     #    "BUILD_SHARED_LIBS OFF"

From e4a22ddb830b0de8ba785a832fd08da6070c15f4 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 26 Jul 2023 23:15:55 +0100
Subject: [PATCH 039/139] Update FastSIMD with ARM support

---
 src/CMakeLists.txt | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4ca0bce1..a7266102 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,12 +1,8 @@
-set(CMAKE_CXX_STANDARD 17)
 
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG a09c99d3a25ba16fed75d1312efa3b3f570cc15d
-    EXCLUDE_FROM_ALL YES
-    #OPTIONS
-    #    "BUILD_SHARED_LIBS OFF"
+    GIT_TAG 5731a1d26f5182f0f89f449c252f9bba2f36b933
 )
 
 set(install_targets ${install_targets}

From 4f5c040246d161c7afa099cba7deb6dfca5177eb Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 27 Jul 2023 21:55:41 +0100
Subject: [PATCH 040/139] Mark nodes as final

---
 .../FastNoise/Generators/BasicGenerators.inl  | 12 +++++-----
 include/FastNoise/Generators/Blends.inl       | 22 +++++++++----------
 include/FastNoise/Generators/Cellular.inl     |  6 ++---
 include/FastNoise/Generators/DomainWarp.inl   |  2 +-
 .../Generators/DomainWarpFractal.inl          |  4 ++--
 include/FastNoise/Generators/Fractal.inl      |  6 ++---
 include/FastNoise/Generators/Modifiers.inl    | 22 +++++++++----------
 include/FastNoise/Generators/Perlin.inl       |  2 +-
 include/FastNoise/Generators/Simplex.inl      |  6 ++---
 include/FastNoise/Generators/Value.inl        |  2 +-
 10 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 769b7e35..e0443b66 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> final : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -14,7 +14,7 @@ class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> : public virtual FastNo
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::White, SIMD> : public virtual FastNoise::White, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::White, SIMD> final : public virtual FastNoise::White, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -29,7 +29,7 @@ class FastSIMD::DispatchClass<FastNoise::White, SIMD> : public virtual FastNoise
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -43,7 +43,7 @@ class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -55,7 +55,7 @@ class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> : public virtual FastNo
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> : public virtual FastNoise::PositionOutput, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> final : public virtual FastNoise::PositionOutput, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -71,7 +71,7 @@ class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> : public virtual
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> : public virtual FastNoise::DistanceToPoint, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> final : public virtual FastNoise::DistanceToPoint, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 1c81d6ae..e70f6209 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -1,7 +1,7 @@
 #include "Blends.h"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Add, SIMD> : public virtual FastNoise::Add, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Add, SIMD> final : public virtual FastNoise::Add, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -13,7 +13,7 @@ class FastSIMD::DispatchClass<FastNoise::Add, SIMD> : public virtual FastNoise::
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Subtract, SIMD> : public virtual FastNoise::Subtract, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Subtract, SIMD> final : public virtual FastNoise::Subtract, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -25,7 +25,7 @@ class FastSIMD::DispatchClass<FastNoise::Subtract, SIMD> : public virtual FastNo
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Multiply, SIMD> : public virtual FastNoise::Multiply, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Multiply, SIMD> final : public virtual FastNoise::Multiply, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -37,7 +37,7 @@ class FastSIMD::DispatchClass<FastNoise::Multiply, SIMD> : public virtual FastNo
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Divide, SIMD> : public virtual FastNoise::Divide, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Divide, SIMD> final : public virtual FastNoise::Divide, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -49,7 +49,7 @@ class FastSIMD::DispatchClass<FastNoise::Divide, SIMD> : public virtual FastNois
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> final : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -61,7 +61,7 @@ class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> : public virtual FastNo
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::PowInt, SIMD> : public virtual FastNoise::PowInt, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::PowInt, SIMD> final : public virtual FastNoise::PowInt, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -81,7 +81,7 @@ class FastSIMD::DispatchClass<FastNoise::PowInt, SIMD> : public virtual FastNois
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Min, SIMD> : public virtual FastNoise::Min, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Min, SIMD> final : public virtual FastNoise::Min, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -93,7 +93,7 @@ class FastSIMD::DispatchClass<FastNoise::Min, SIMD> : public virtual FastNoise::
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Max, SIMD> : public virtual FastNoise::Max, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Max, SIMD> final : public virtual FastNoise::Max, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -105,7 +105,7 @@ class FastSIMD::DispatchClass<FastNoise::Max, SIMD> : public virtual FastNoise::
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::MinSmooth, SIMD> : public virtual FastNoise::MinSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::MinSmooth, SIMD> final : public virtual FastNoise::MinSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -125,7 +125,7 @@ class FastSIMD::DispatchClass<FastNoise::MinSmooth, SIMD> : public virtual FastN
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::MaxSmooth, SIMD> : public virtual FastNoise::MaxSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::MaxSmooth, SIMD> final : public virtual FastNoise::MaxSmooth, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -145,7 +145,7 @@ class FastSIMD::DispatchClass<FastNoise::MaxSmooth, SIMD> : public virtual FastN
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Fade, SIMD> : public virtual FastNoise::Fade, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Fade, SIMD> final : public virtual FastNoise::Fade, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 31793036..27d47582 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -15,7 +15,7 @@ protected:
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -247,7 +247,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> : public virtual F
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -468,7 +468,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> : public virtua
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index d442330d..4be8dac1 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -25,7 +25,7 @@ public:
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, SIMD> : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, SIMD> final : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
 {
 public:
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
diff --git a/include/FastNoise/Generators/DomainWarpFractal.inl b/include/FastNoise/Generators/DomainWarpFractal.inl
index 5765527b..56186148 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.inl
+++ b/include/FastNoise/Generators/DomainWarpFractal.inl
@@ -1,7 +1,7 @@
 #include "DomainWarpFractal.h"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, SIMD> : public virtual FastNoise::DomainWarpFractalProgressive, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, SIMD> final : public virtual FastNoise::DomainWarpFractalProgressive, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -34,7 +34,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalProgressive, SIMD> : p
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, SIMD> : public virtual FastNoise::DomainWarpFractalIndependant, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpFractalIndependant, SIMD> final : public virtual FastNoise::DomainWarpFractalIndependant, public FastSIMD::DispatchClass<FastNoise::Fractal<FastNoise::DomainWarp>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index b16ae23c..a877a900 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -7,7 +7,7 @@ class FastSIMD::DispatchClass<FastNoise::Fractal<T>, SIMD> : public virtual Fast
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::FractalFBm, SIMD> : public virtual FastNoise::FractalFBm, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalFBm, SIMD> final : public virtual FastNoise::FractalFBm, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -37,7 +37,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalFBm, SIMD> : public virtual Fast
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::FractalRidged, SIMD> : public virtual FastNoise::FractalRidged, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalRidged, SIMD> final : public virtual FastNoise::FractalRidged, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -67,7 +67,7 @@ class FastSIMD::DispatchClass<FastNoise::FractalRidged, SIMD> : public virtual F
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> : public virtual FastNoise::FractalPingPong, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> final : public virtual FastNoise::FractalPingPong, public FastSIMD::DispatchClass<FastNoise::Fractal<>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index 2cd0a7d6..7b37b994 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -1,7 +1,7 @@
 #include "Modifiers.h"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainScale, SIMD> : public virtual FastNoise::DomainScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainScale, SIMD> final : public virtual FastNoise::DomainScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -13,7 +13,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainScale, SIMD> : public virtual Fas
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainOffset, SIMD> : public virtual FastNoise::DomainOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainOffset, SIMD> final : public virtual FastNoise::DomainOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
     
@@ -31,7 +31,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainOffset, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> : public virtual FastNoise::DomainRotate, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> final : public virtual FastNoise::DomainRotate, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -61,7 +61,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::SeedOffset, SIMD> : public virtual FastNoise::SeedOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::SeedOffset, SIMD> final : public virtual FastNoise::SeedOffset, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -73,7 +73,7 @@ class FastSIMD::DispatchClass<FastNoise::SeedOffset, SIMD> : public virtual Fast
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Remap, SIMD> : public virtual FastNoise::Remap, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Remap, SIMD> final : public virtual FastNoise::Remap, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -87,7 +87,7 @@ class FastSIMD::DispatchClass<FastNoise::Remap, SIMD> : public virtual FastNoise
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::ConvertRGBA8, SIMD> : public virtual FastNoise::ConvertRGBA8, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::ConvertRGBA8, SIMD> final : public virtual FastNoise::ConvertRGBA8, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -114,7 +114,7 @@ class FastSIMD::DispatchClass<FastNoise::ConvertRGBA8, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Terrace, SIMD> : public virtual FastNoise::Terrace, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Terrace, SIMD> final : public virtual FastNoise::Terrace, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -146,7 +146,7 @@ class FastSIMD::DispatchClass<FastNoise::Terrace, SIMD> : public virtual FastNoi
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainAxisScale, SIMD> : public virtual FastNoise::DomainAxisScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainAxisScale, SIMD> final : public virtual FastNoise::DomainAxisScale, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -161,7 +161,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainAxisScale, SIMD> : public virtual
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::AddDimension, SIMD> : public virtual FastNoise::AddDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::AddDimension, SIMD> final : public virtual FastNoise::AddDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -180,7 +180,7 @@ class FastSIMD::DispatchClass<FastNoise::AddDimension, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> : public virtual FastNoise::RemoveDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> final : public virtual FastNoise::RemoveDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -221,7 +221,7 @@ class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> : public virtual
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> : public virtual FastNoise::GeneratorCache, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> final : public virtual FastNoise::GeneratorCache, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index fced7329..674c1143 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
         float32v xs = FS::Floor( x );
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index f2ba24e5..b08c72af 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -254,7 +254,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> : public virtual FastNoi
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
@@ -366,7 +366,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> : public virtual Fa
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 3d304827..922eec89 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Value, SIMD> : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {

From 5b0022701026791bfbf3cdd2d8e8c4cc69552f01 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 20 Oct 2023 00:45:42 +0100
Subject: [PATCH 041/139] Use ScaledGenerator base class for coherent noise
 nodess

---
 include/FastNoise/FastNoise_BuildList.inl     |  4 +-
 .../FastNoise/Generators/BasicGenerators.h    | 52 +++++++++++-----
 .../FastNoise/Generators/BasicGenerators.inl  | 24 +++++--
 include/FastNoise/Generators/Blends.inl       |  8 ++-
 include/FastNoise/Generators/Cellular.h       |  7 +--
 include/FastNoise/Generators/Cellular.inl     | 62 ++++++++++++-------
 include/FastNoise/Generators/DomainWarp.h     | 11 ++--
 include/FastNoise/Generators/DomainWarp.inl   | 12 ++--
 include/FastNoise/Generators/Modifiers.h      | 25 ++++++++
 include/FastNoise/Generators/Modifiers.inl    | 26 ++++++--
 include/FastNoise/Generators/Perlin.h         |  4 +-
 include/FastNoise/Generators/Perlin.inl       | 14 +++--
 include/FastNoise/Generators/Simplex.h        | 12 ++--
 include/FastNoise/Generators/Simplex.inl      | 34 +++++++---
 include/FastNoise/Generators/Utils.inl        |  3 +-
 include/FastNoise/Generators/Value.h          |  4 +-
 include/FastNoise/Generators/Value.inl        | 14 +++--
 src/CMakeLists.txt                            |  2 +-
 tests/FastNoiseCpp11Include.cpp               |  2 +-
 19 files changed, 221 insertions(+), 99 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index 76266734..db909d61 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -2,7 +2,8 @@
 
 #ifndef FASTNOISE_REGISTER_NODE
 #define FASTNOISE_REGISTER_NODE( CLASS ) \
-template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>
+template class FastSIMD::RegisterDispatchClass<FastNoise::CLASS>;\
+static_assert( std::is_final_v<FastSIMD::DispatchClass<CLASS, FastSIMD::FeatureSetDefault()>> )
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
@@ -124,5 +125,6 @@ FASTNOISE_REGISTER_NODE( DomainAxisScale );
 FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
 FASTNOISE_REGISTER_NODE( GeneratorCache );
+FASTNOISE_REGISTER_NODE( SquareRoot );
 
 FASTNOISE_REGISTER_NODE( OpenSimplex2S );
diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index f9f4bb64..0ba4388f 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -3,6 +3,31 @@
 
 namespace FastNoise
 {
+    class ScalableGenerator : public virtual Generator
+    {
+    public:
+        void SetScale( float value )
+        {
+            mScale = value;
+            mFrequency = 1.0f / value;
+        }
+
+    protected:
+        float mScale = 100;
+        float mFrequency = 1.0f / 100;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<ScalableGenerator> : MetadataT<Generator>
+    {
+        MetadataT()
+        {
+            this->AddVariable( "Scale", 100.0f, &ScalableGenerator::SetScale );
+        }
+    };
+#endif
+
     class Constant : public virtual Generator
     {
     public:
@@ -47,52 +72,51 @@ namespace FastNoise
     };
 #endif
 
-    class Checkerboard : public virtual Generator
+    class Checkerboard : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
-
-        void SetSize( float value ) { mSizeInv = 1 / value; }
+        
+        void SetHigh( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mHigh, gen ); }
+        void SetHigh( float value ) { mHigh = value; }
+        void SetLow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLow, gen ); }
+        void SetLow( float value ) { mLow = value; }
 
     protected:
-        float mSizeInv = 1.0f;
+        HybridSource mHigh = 1.0f;
+        HybridSource mLow = -1.0f;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Checkerboard> : MetadataT<Generator>
+    struct MetadataT<Checkerboard> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddVariable( "Size", 1.0f, &Checkerboard::SetSize );
+            this->AddHybridSource( "High", 1.0f, &Checkerboard::SetHigh, &Checkerboard::SetHigh );
+            this->AddHybridSource( "Low", -1.0f, &Checkerboard::SetLow, &Checkerboard::SetLow );
         }
     };
 #endif
 
-    class SineWave : public virtual Generator
+    class SineWave : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
-
-        void SetScale( float value ) { mScaleInv = 1 / value; }
-
-    protected:
-        float mScaleInv = 1.0f;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<SineWave> : MetadataT<Generator>
+    struct MetadataT<SineWave> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddVariable( "Scale", 1.0f, &SineWave::SetScale );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index e0443b66..4119f6cf 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -1,6 +1,18 @@
 #include "BasicGenerators.h"
 #include "Utils.inl"
 
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD> : public virtual FastNoise::ScalableGenerator, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+protected:
+    template<typename... P>
+    FS_FORCEINLINE void ScalePositions( P&... pos ) const
+    {
+        float32v vFrequency( mFrequency );
+        ( (pos *= vFrequency), ... );
+    }
+};
+
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> final : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
@@ -29,28 +41,32 @@ class FastSIMD::DispatchClass<FastNoise::White, SIMD> final : public virtual Fas
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        int32v value = (FS::Convert<int32_t>( pos * float32v( mSizeInv ) ) ^ ...);
+        this->ScalePositions( pos... );
+
+        int32v value = (FS::Convert<int32_t>( pos ) ^ ...);
 
         return float32v( 1.0f ) ^ FS::Cast<float>( value << 31 );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        return (FS::Sin( pos * float32v( mScaleInv ) ) * ...);
+        this->ScalePositions( pos... );
+
+        return (FS::Sin( pos ) * ...);
     }
 };
 
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index e70f6209..6efae578 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -56,7 +56,9 @@ class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> final : public virtual
     template<typename... P> 
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        return Pow( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
+        float32v value = FS::Max( FS::Abs( this->GetSourceValue( mValue, seed, pos... ) ), float32v( FLT_MIN ) );
+
+        return Pow( value, this->GetSourceValue( mPow, seed, pos... ) );
     }
 };
 
@@ -114,7 +116,7 @@ class FastSIMD::DispatchClass<FastNoise::MinSmooth, SIMD> final : public virtual
     {
         float32v a = this->GetSourceValue( mLHS, seed, pos... );
         float32v b = this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+        float32v smoothness = FS::Max( float32v( FLT_MIN ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
 
         float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
 
@@ -134,7 +136,7 @@ class FastSIMD::DispatchClass<FastNoise::MaxSmooth, SIMD> final : public virtual
     {
         float32v a = -this->GetSourceValue( mLHS, seed, pos... );
         float32v b = -this->GetSourceValue( mRHS, seed, pos... );
-        float32v smoothness = FS::Max( float32v( 1.175494351e-38f ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
+        float32v smoothness = FS::Max( float32v( FLT_MIN ), FS::Abs( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
 
         float32v h = FS::Max( smoothness - FS::Abs( a - b ), float32v( 0.0f ) );
 
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 0676faa5..5fad191e 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -5,7 +5,7 @@
 
 namespace FastNoise
 {
-    class Cellular : public virtual Generator
+    class Cellular : public virtual ScalableGenerator
     {
     public:
         void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
@@ -19,7 +19,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Cellular> : MetadataT<Generator>
+    struct MetadataT<Cellular> : MetadataT<ScalableGenerator>
     {
         MetadataT()
         {
@@ -114,11 +114,9 @@ namespace FastNoise
         const Metadata& GetMetadata() const override;
 
         void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
-        void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
 
     protected:
         GeneratorSource mLookup;
-        float mLookupFreq = 0.1f;
     };
 
 #ifdef FASTNOISE_METADATA
@@ -130,7 +128,6 @@ namespace FastNoise
         MetadataT()
         {
             this->AddGeneratorSource( { "Lookup", "Used to generate cell values" }, &CellularLookup::SetLookup );
-            this->AddVariable( { "Lookup Frequency", "Relative to the cellular frequency" }, 0.1f, &CellularLookup::SetLookupFrequency );
             
             description = 
                 "Returns value of closest cell\n"
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index 27d47582..daae9795 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -5,7 +5,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Cellular, SIMD> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Cellular, SIMD> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
 protected:
     const float kJitter2D = 0.437016f;
@@ -17,7 +17,7 @@ protected:
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
         std::array<float32v, kMaxDistanceCount> value;
@@ -26,6 +26,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
 
+        this->ScalePositions( x, y );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
@@ -50,7 +52,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
                 float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
 
                 for( int i = 0; ; i++ )
                 {
@@ -81,7 +83,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
         return value[mValueIndex];
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
         std::array<float32v, kMaxDistanceCount> value;
@@ -89,6 +91,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
         
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
+
+        this->ScalePositions( x, y, z );
         
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
@@ -123,7 +127,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                     zd = FS::FMulAdd( zd, invMag, zcf );
                 
                     float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
                 
                     for( int i = 0; ; i++ )
                     {
@@ -157,7 +161,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
         return value[mValueIndex];
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const
     {
         float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
         std::array<float32v, kMaxDistanceCount> value;
@@ -165,6 +169,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
         
         value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
+
+        this->ScalePositions( x, y, z, w );
         
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
@@ -208,7 +214,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
                         float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
 
                         for( int i = 0; ; i++ )
                         {
@@ -249,13 +255,15 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
 
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
+        this->ScalePositions( x, y );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
@@ -279,7 +287,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
 
                 for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                 {
@@ -298,13 +306,15 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
         return GetReturn( distance );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
 
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
+        this->ScalePositions( x, y, z );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
@@ -337,7 +347,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
 
                     for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                     {
@@ -359,13 +369,15 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
         return GetReturn( distance );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
 
         std::array<float32v, kMaxDistanceCount> distance;
         distance.fill( float32v( INFINITY ) );
 
+        this->ScalePositions( x, y, z, w );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
@@ -407,7 +419,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
 
                         for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                         {
@@ -470,12 +482,14 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
         float32v distance( FLT_MAX );
         float32v cellX, cellY;
 
+        this->ScalePositions( x, y );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
@@ -499,7 +513,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = CalcDistance( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
 
                 mask32v closer = newDistance < distance;
                 distance = FS::Min( newDistance, distance );
@@ -514,15 +528,17 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
             xc += int32v( Primes::X );
         }
 
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mScale ), cellY * float32v( mScale ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
         float32v distance( FLT_MAX );
         float32v cellX, cellY, cellZ;
 
+        this->ScalePositions( x, y, z );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
@@ -555,7 +571,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
 
                     mask32v closer = newDistance < distance;
                     distance = FS::Min( newDistance, distance );
@@ -574,15 +590,17 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
             xc += int32v( Primes::X );
         }
 
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mScale ), cellY * float32v( mScale ), cellZ * float32v( mScale ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
         float32v distance( FLT_MAX );
         float32v cellX, cellY, cellZ, cellW;
 
+        this->ScalePositions( x, y, z, w );
+
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
@@ -624,7 +642,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = CalcDistance( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
 
                         mask32v closer = newDistance < distance;
                         distance = FS::Min( newDistance, distance );
@@ -647,6 +665,6 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
             xc += int32v( Primes::X );
         }
 
-        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
+        return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mScale ), cellY * float32v( mScale ), cellZ * float32v( mScale ), cellW * float32v( mScale ) );
     }
 };
diff --git a/include/FastNoise/Generators/DomainWarp.h b/include/FastNoise/Generators/DomainWarp.h
index 1bbfe095..f86f76a3 100644
--- a/include/FastNoise/Generators/DomainWarp.h
+++ b/include/FastNoise/Generators/DomainWarp.h
@@ -3,30 +3,27 @@
 
 namespace FastNoise
 {
-    class DomainWarp : public virtual Generator
+    class DomainWarp : public virtual ScalableGenerator
     {
     public:
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
         void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
         void SetWarpAmplitude( float value ) { mWarpAmplitude = value; } 
-        void SetWarpFrequency( float value ) { mWarpFrequency = value; }
 
     protected:
         GeneratorSource mSource;
-        HybridSource mWarpAmplitude = 1.0f;
-        float mWarpFrequency = 0.5f;
+        HybridSource mWarpAmplitude = 50.0f;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<DomainWarp> : MetadataT<Generator>
+    struct MetadataT<DomainWarp> : MetadataT<ScalableGenerator>
     {
         MetadataT()
         {
             groups.push_back( "Domain Warp" );
             this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
-            this->AddHybridSource( "Warp Amplitude", 1.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
-            this->AddVariable( "Warp Frequency", 0.5f, &DomainWarp::SetWarpFrequency );
+            this->AddHybridSource( "Warp Amplitude", 50.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index 4be8dac1..e086163a 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -2,20 +2,20 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD> : public virtual FastNoise::DomainWarp, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD> : public virtual FastNoise::DomainWarp, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
+        Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), ( pos * float32v( this->mFrequency ) )..., pos... );
 
         return this->GetSourceValue( mSource, seed, pos...);
     }
 
 public:
-    float GetWarpFrequency() const { return mWarpFrequency; }
+    float GetWarpFrequency() const { return this->mFrequency; }
     const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
     const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
 
@@ -28,7 +28,7 @@ template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::DomainWarpGradient, SIMD> final : public virtual FastNoise::DomainWarpGradient, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
 {
 public:
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
     {
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
@@ -66,7 +66,7 @@ public:
         return warpLengthSq * FS::InvSqrt( warpLengthSq );
     }
             
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
     {
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
@@ -123,7 +123,7 @@ public:
         return warpLengthSq * FS::InvSqrt( warpLengthSq );
     }
             
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
     {
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index 595bdab1..d7e42060 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -405,5 +405,30 @@ namespace FastNoise
         }
     };
 #endif
+
+    class SquareRoot : public virtual Generator
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+
+    protected:
+        GeneratorSource mSource;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<SquareRoot> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Modifiers" );
+            this->AddGeneratorSource( "Source", &SquareRoot::SetSource );
+        }
+    };
+#endif
     
 }
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index 7b37b994..e8ca0257 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -33,7 +33,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainOffset, SIMD> final : public virt
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> final : public virtual FastNoise::DomainRotate, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         if( mPitchSin == 0.0f && mRollSin == 0.0f )
         {
@@ -45,7 +45,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> final : public virt
         return Gen( seed, x, y, float32v( 0 ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         return this->GetSourceValue( mSource, seed,
             FS::FMulAdd( x, float32v( mXa ), FS::FMulAdd( y, float32v( mXb ), z * float32v( mXc ) ) ),
@@ -53,7 +53,7 @@ class FastSIMD::DispatchClass<FastNoise::DomainRotate, SIMD> final : public virt
             FS::FMulAdd( x, float32v( mZa ), FS::FMulAdd( y, float32v( mZb ), z * float32v( mZc ) ) ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         // No rotation for 4D yet
         return this->GetSourceValue( mSource, seed, x, y, z, w );
@@ -182,12 +182,12 @@ class FastSIMD::DispatchClass<FastNoise::AddDimension, SIMD> final : public virt
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> final : public virtual FastNoise::RemoveDimension, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         return this->GetSourceValue( mSource, seed, x, y );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         switch( mRemoveDimension )
         {
@@ -202,7 +202,7 @@ class FastSIMD::DispatchClass<FastNoise::RemoveDimension, SIMD> final : public v
         }
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         switch( mRemoveDimension )
         {
@@ -264,3 +264,17 @@ class FastSIMD::DispatchClass<FastNoise::GeneratorCache, SIMD> final : public vi
         return FS::Load<float32v>( CachedValue );
     }
 };
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::SquareRoot, SIMD> final : public virtual FastNoise::SquareRoot, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v value = this->GetSourceValue( mSource, seed, pos... );
+        
+        return FS::InvSqrt( FS::Max( FS::Abs( value ), float32v( FLT_MIN ) ) ) * value;
+    }
+};
diff --git a/include/FastNoise/Generators/Perlin.h b/include/FastNoise/Generators/Perlin.h
index fcce39b7..8096537a 100644
--- a/include/FastNoise/Generators/Perlin.h
+++ b/include/FastNoise/Generators/Perlin.h
@@ -3,7 +3,7 @@
 
 namespace FastNoise
 {
-    class Perlin : public virtual Generator
+    class Perlin : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -11,7 +11,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Perlin> : MetadataT<Generator>
+    struct MetadataT<Perlin> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index 674c1143..b9a3190d 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -2,9 +2,11 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
-{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+{    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
+        this->ScalePositions( x, y );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
 
@@ -26,8 +28,10 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
             Lerp( GetGradientDot( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDot( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
+        this->ScalePositions( x, y, z );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
         float32v zs = FS::Floor( z );
@@ -58,8 +62,10 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
             Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
+        this->ScalePositions( x, y, z, w );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
         float32v zs = FS::Floor( z );
diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index ca518440..00fd17e0 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -3,7 +3,7 @@
 
 namespace FastNoise
 {
-    class Simplex : public virtual Generator
+    class Simplex : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -11,7 +11,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Simplex> : MetadataT<Generator>
+    struct MetadataT<Simplex> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -26,7 +26,7 @@ namespace FastNoise
     };
 #endif
 
-    class OpenSimplex2 : public virtual Generator
+    class OpenSimplex2 : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -34,7 +34,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<OpenSimplex2> : MetadataT<Generator>
+    struct MetadataT<OpenSimplex2> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -49,7 +49,7 @@ namespace FastNoise
     };
 #endif
 
-    class OpenSimplex2S : public virtual Generator
+    class OpenSimplex2S : public virtual ScalableGenerator
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -57,7 +57,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<OpenSimplex2S> : MetadataT<Generator>
+    struct MetadataT<OpenSimplex2S> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index b08c72af..f4b1d60e 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -2,10 +2,12 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
+        this->ScalePositions( x, y );
+
         const float SQRT3 = 1.7320508075688772935274463415059f;
         const float F2 = 0.5f * (SQRT3 - 1.0f);
         const float G2 = (3.0f - SQRT3) / 6.0f;
@@ -49,8 +51,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         return float32v( 38.283687591552734375f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
+        this->ScalePositions( x, y, z );
+
         const float F3 = 1.0f / 3.0f;
         const float G3 = 1.0f / 2.0f;
 
@@ -120,8 +124,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         return float32v( 32.69428253173828125f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
+        this->ScalePositions( x, y, z, w );
+
         const float SQRT5 = 2.236067977499f;
         const float F4 = (SQRT5 - 1.0f) / 4.0f;
         const float G4 = (5.0f - SQRT5) / 20.0f;
@@ -254,10 +260,12 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
+        this->ScalePositions( x, y );
+
         const float SQRT3 = 1.7320508075f;
         const float F2 = 0.5f * (SQRT3 - 1.0f);
         const float G2 = (3.0f - SQRT3) / 6.0f;
@@ -300,8 +308,10 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virt
         return float32v( 49.918426513671875f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
+        this->ScalePositions( x, y, z );
+
         float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
         float32v xr = f - x;
         float32v yr = f - y;
@@ -366,10 +376,12 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virt
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
+        this->ScalePositions( x, y );
+
         const float SQRT3 = 1.7320508075688772935274463415059f;
         const float F2 = 0.5f * ( SQRT3 - 1.0f );
         const float G2 = ( SQRT3 - 3.0f ) / 6.0f;
@@ -432,8 +444,10 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
         return float32v( 9.28993664146183f ) * value;
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
+        this->ScalePositions( x, y, z );
+
         float32v f = float32v( 2.0f / 3.0f ) * ( x + y + z );
         float32v xr = f - x;
         float32v yr = f - y;
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 113f2987..0470b37d 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -239,13 +239,14 @@ namespace FastNoise
         return t * t * t * FS::FMulAdd( t, FS::FMulAdd( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
     }
 
-    template<typename... P>
+    template<bool DO_SQRT = true, typename... P>
     FS_FORCEINLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
     {
         switch( distFunc )
         {
             default:
             case DistanceFunction::Euclidean:
+            if constexpr( DO_SQRT )
             {
                 float32v distSqr = dX * dX;
                 ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
diff --git a/include/FastNoise/Generators/Value.h b/include/FastNoise/Generators/Value.h
index e23dabcb..87b55603 100644
--- a/include/FastNoise/Generators/Value.h
+++ b/include/FastNoise/Generators/Value.h
@@ -3,14 +3,14 @@
 
 namespace FastNoise
 {
-    class Value : public virtual Generator
+    class Value : public virtual ScalableGenerator
     {
     public:        const Metadata& GetMetadata() const override;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Value> : MetadataT<Generator>
+    struct MetadataT<Value> : MetadataT<ScalableGenerator>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 922eec89..0d162b54 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -2,10 +2,12 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
+        this->ScalePositions( x, y );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
 
@@ -22,8 +24,10 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
             Lerp( GetValueCoord( seed, x0, y1 ), GetValueCoord( seed, x1, y1 ), xs ), ys );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
+        this->ScalePositions( x, y, z );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
         float32v zs = FS::Floor( z );
@@ -47,8 +51,10 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
             Lerp( GetValueCoord( seed, x0, y1, z1 ), GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
+        this->ScalePositions( x, y, z, w );
+
         float32v xs = FS::Floor( x );
         float32v ys = FS::Floor( y );
         float32v zs = FS::Floor( z );
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a7266102..030501e8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,7 @@
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 5731a1d26f5182f0f89f449c252f9bba2f36b933
+    GIT_TAG 5bcddffbefa144138f572fe32e4e1d50a1d32deb
 )
 
 set(install_targets ${install_targets}
diff --git a/tests/FastNoiseCpp11Include.cpp b/tests/FastNoiseCpp11Include.cpp
index b7672f86..69d542e7 100644
--- a/tests/FastNoiseCpp11Include.cpp
+++ b/tests/FastNoiseCpp11Include.cpp
@@ -38,7 +38,7 @@ int main()
         auto checkerboard = FastNoise::SmartNode<FastNoise::Checkerboard>::DynamicCast( base );
 
         // Ok
-        checkerboard->SetSize( 8.0f );
+        checkerboard->SetScale( 8.0f );
 
         // Down cast to wrong type will return nullptr
         auto simplex = FastNoise::SmartNode<FastNoise::Simplex>::DynamicCast( base );

From e4abb290703b6458d03b613387b78fcf68952c14 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 23 Oct 2023 22:44:42 +0100
Subject: [PATCH 042/139] Updated base64 encoding to compress consecutive As

---
 include/FastNoise/FastNoise_BuildList.inl |   3 +-
 src/FastNoise/Base64.h                    | 186 +++++++++++++++-------
 2 files changed, 127 insertions(+), 62 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index db909d61..040ddd42 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -88,6 +88,7 @@ FASTNOISE_REGISTER_NODE( Value );
 FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Simplex );
 FASTNOISE_REGISTER_NODE( OpenSimplex2 );
+FASTNOISE_REGISTER_NODE( OpenSimplex2S );
                        
 FASTNOISE_REGISTER_NODE( CellularValue );
 FASTNOISE_REGISTER_NODE( CellularDistance );
@@ -126,5 +127,3 @@ FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
 FASTNOISE_REGISTER_NODE( GeneratorCache );
 FASTNOISE_REGISTER_NODE( SquareRoot );
-
-FASTNOISE_REGISTER_NODE( OpenSimplex2S );
diff --git a/src/FastNoise/Base64.h b/src/FastNoise/Base64.h
index 9d7449fa..a2f52322 100644
--- a/src/FastNoise/Base64.h
+++ b/src/FastNoise/Base64.h
@@ -1,40 +1,17 @@
 #pragma once
 
+#include <cstdint>
 #include <cstring>
 #include <string>
 #include <vector>
-#include <cstdint>
 
 namespace FastNoise
 {
-    /** https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594
-     * The MIT License (MIT)
-     * Copyright (c) 2016 tomykaira
-     *
-     * Permission is hereby granted, free of charge, to any person obtaining
-     * a copy of this software and associated documentation files (the
-     * "Software"), to deal in the Software without restriction, including
-     * without limitation the rights to use, copy, modify, merge, publish,
-     * distribute, sublicense, and/or sell copies of the Software, and to
-     * permit persons to whom the Software is furnished to do so, subject to
-     * the following conditions:
-     *
-     * The above copyright notice and this permission notice shall be
-     * included in all copies or substantial portions of the Software.
-     *
-     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-     * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-     * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-     * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-     * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-     * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-     * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-     */
     namespace Base64
     {
         static std::string Encode( const std::vector<uint8_t>& data )
         {
-            static constexpr char sEncodingTable[] = {
+            static constexpr char kEncodingTable[] = {
                 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
                 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
                 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
@@ -45,33 +22,70 @@ namespace FastNoise
                 '4', '5', '6', '7', '8', '9', '+', '/'
             };
 
-            size_t in_len = data.size();
-            size_t out_len = 4 * ((in_len + 2) / 3);
-            std::string ret( out_len, '\0' );
+            size_t inLen = data.size();
+            std::string ret;
+            size_t consecutiveAs = 0;
+
+            auto appendChar = [&]( char c ) {
+                if( c == 'A' ) // Compress "A"s into @ with count in following char
+                {
+                    if( consecutiveAs++ <= 1 )
+                    {
+                        ret += 'A';
+                    }
+                    else if( consecutiveAs >= std::size( kEncodingTable ) + 2 )
+                    {
+                        ret[ret.length() - 2] = '@';
+                        ret[ret.length() - 1] = kEncodingTable[consecutiveAs - 3];
+
+                        ret += 'A';
+                        consecutiveAs = 1;
+                    }
+                }
+                else
+                {
+                    if( consecutiveAs >= 3 )
+                    {
+                        ret[ret.length() - 2] = '@';
+                        ret[ret.length() - 1] = kEncodingTable[consecutiveAs - 3];
+                    }
+                    if( c != '\0' )
+                    {
+                        ret += c;
+                    }
+
+                    consecutiveAs = 0;
+                }
+            };
+
             size_t i;
-            char* p = const_cast<char*>(ret.c_str());
 
-            for( i = 0; i < in_len - 2; i += 3 )
+            for( i = 0; i < inLen - 2; i += 3 )
             {
-                *p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
-                *p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
-                *p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
-                *p++ = sEncodingTable[data[i + 2] & 0x3F];
+                appendChar( kEncodingTable[( data[i] >> 2 ) & 0x3F] );
+                appendChar( kEncodingTable[( ( data[i] & 0x3 ) << 4 ) | ( ( data[i + 1] & 0xF0 ) >> 4 )] );
+                appendChar( kEncodingTable[( ( data[i + 1] & 0xF ) << 2 ) | ( ( data[i + 2] & 0xC0 ) >> 6 )] );
+                appendChar( kEncodingTable[data[i + 2] & 0x3F] );
             }
-            if( i < in_len )
+            if( i < inLen )
             {
-                *p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
-                if( i == (in_len - 1) )
+                appendChar( kEncodingTable[( data[i] >> 2 ) & 0x3F] );
+                if( i == ( inLen - 1 ) )
                 {
-                    *p++ = sEncodingTable[((data[i] & 0x3) << 4)];
-                    *p++ = '=';
+                    appendChar( kEncodingTable[( ( data[i] & 0x3 ) << 4 )] );
+                    appendChar( '=' );
                 }
                 else
                 {
-                    *p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
-                    *p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
+                    appendChar( kEncodingTable[( ( data[i] & 0x3 ) << 4 ) | ( ( data[i + 1] & 0xF0 ) >> 4 )] );
+                    appendChar( kEncodingTable[( ( data[i + 1] & 0xF ) << 2 )] );
                 }
-                *p++ = '=';
+                appendChar( '=' );
+            }
+            else
+            {
+                // Handle any trailing As
+                appendChar( '\0' );
             }
 
             return ret;
@@ -83,7 +97,7 @@ namespace FastNoise
                 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
                 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
                 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
-                52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
+                52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 0, 64, 64,
                 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
                 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
                 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
@@ -98,31 +112,83 @@ namespace FastNoise
                 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
             };
 
-            size_t in_len = std::strlen( input );
-            size_t out_len = in_len / 4 * 3;
+            size_t rawLen = 0, decompLen = 0;
+
+            // Check string length with decompress
+            while( input[rawLen] )
+            {
+                if( input[rawLen] == '@' )
+                {
+                    unsigned char aExtra = kDecodingTable[static_cast<unsigned char>( input[++rawLen] )];
 
-            if( out_len == 0 || in_len % 4 != 0 ) return {};
+                    if( aExtra == 64 ) // Error
+                    {
+                        return {};
+                    }
 
-            if( input[in_len - 1] == '=' ) out_len--;
-            if( input[in_len - 2] == '=' ) out_len--;
+                    decompLen += aExtra + 2;
+                }
+                else
+                {
+                    decompLen++;
+                    rawLen++;
+                }
+            }
 
-            std::vector<uint8_t> out( out_len );
+            size_t outLen = decompLen / 4 * 3;
 
-            for( size_t i = 0, j = 0; i < in_len; )
+            if( outLen == 0 || decompLen % 4 != 0 )
+                return {};
+                        
+            if( input[rawLen - 1] == '=' )
             {
-                uint32_t a = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
-                uint32_t b = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
-                uint32_t c = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
-                uint32_t d = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
+                outLen--;
+                if( input[rawLen - 2] == '=' )
+                    outLen--;
+            }
+
+            std::vector<uint8_t> out( outLen );
+            size_t i = 0, j = 0, consecutiveAs = 0;
+
+            while( i < rawLen || consecutiveAs > 0 )
+            {
+                char currentBlock[4] = { 0 };
+
+                for( int k = 0; k < 4; k++ )
+                {
+                    if( consecutiveAs > 0 )
+                    {
+                        currentBlock[k] = 'A';
+                        consecutiveAs--;
+                    }
+                    else if( input[i] == '@' )
+                    {
+                        currentBlock[k] = 'A';
+                        i++;
+                        consecutiveAs = kDecodingTable[static_cast<unsigned char>( input[i++] )] + 2;
+                    }
+                    else
+                    {
+                        currentBlock[k] = input[i++];
+                    }
+                }
+
+                uint32_t a = kDecodingTable[static_cast<unsigned char>( currentBlock[0] )];
+                uint32_t b = kDecodingTable[static_cast<unsigned char>( currentBlock[1] )];
+                uint32_t c = kDecodingTable[static_cast<unsigned char>( currentBlock[2] )];
+                uint32_t d = kDecodingTable[static_cast<unsigned char>( currentBlock[3] )];
 
-                uint32_t triple = (a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6);
+                uint32_t triple = ( a << 3 * 6 ) + ( b << 2 * 6 ) + ( c << 1 * 6 ) + ( d << 0 * 6 );
 
-                if( j < out_len ) out[j++] = (triple >> 2 * 8) & 0xFF;
-                if( j < out_len ) out[j++] = (triple >> 1 * 8) & 0xFF;
-                if( j < out_len ) out[j++] = (triple >> 0 * 8) & 0xFF;
+                if( j < outLen )
+                    out[j++] = ( triple >> 2 * 8 ) & 0xFF;
+                if( j < outLen )
+                    out[j++] = ( triple >> 1 * 8 ) & 0xFF;
+                if( j < outLen )
+                    out[j++] = ( triple >> 0 * 8 ) & 0xFF;
             }
 
             return out;
         }
-    };
-}
+    }; // namespace Base64
+} // namespace FastNoise

From 26c3d6ee4eb79dc930e8bf660c82a2a15b05755e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 23 Oct 2023 23:47:59 +0100
Subject: [PATCH 043/139] Update demo node trees for new scaling generators

---
 NoiseTool/DemoNodeTrees.inl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/NoiseTool/DemoNodeTrees.inl b/NoiseTool/DemoNodeTrees.inl
index 85b6add5..1cbf2a33 100644
--- a/NoiseTool/DemoNodeTrees.inl
+++ b/NoiseTool/DemoNodeTrees.inl
@@ -2,6 +2,6 @@
 
 inline const char* gDemoNodeTrees[][2] =
 {
-    { "Simple Terrain", "EQACAAAAAAAgQBAAAAAAQBkAEwDD9Sg/DQAEAAAAAAAgQAkAAGZmJj8AAAAAPwEEAAAAAAAAAEBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM3MTD4AMzMzPwAAAAA/" },
-    { "Cellular Caves", "EwCamZk+GgABEQACAAAAAADgQBAAAACIQR8AFgABAAAACwADAAAAAgAAAAMAAAAEAAAAAAAAAD8BFAD//wAAAAAAAD8AAAAAPwAAAAA/AAAAAD8BFwAAAIC/AACAPz0KF0BSuB5AEwAAAKBABgAAj8J1PACamZk+AAAAAAAA4XoUPw==" },
+    { "Simple Terrain", "EgAC@EgQBE@ADIQhoADgAE@EgQAk@ACWQwBmZiY/@CD8BB@Ej8J1P@hCBCADMzMz8@E==" },
+    { "Cellular Caves", "EwAC@DDgQBE@BgQhsAASAAFwAB@BD@BCVEAw@BI@BD@BB@H/ARUA//8@DKVD@BpUM@AClQw@ERg@ACAvwAAgD89ChdAUrgeQAY@BCQwDhehQ/@BIEEAmpmZPg@D" },
 };

From cf7a2ec518d3fe3ab9bf9f9f9f06ed6553666551 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 25 Oct 2023 00:45:08 +0100
Subject: [PATCH 044/139] Rename NoiseTool to Node Visualiser

---
 .github/workflows/benchmark.yml               |  2 +-
 .github/workflows/main.yml                    |  6 +--
 CMakeLists.txt                                |  6 +--
 CMakePresets.json                             | 24 ++++++------
 README.md                                     |  8 ++--
 include/FastNoise/FastNoise.h                 |  2 +-
 tests/FastNoiseBenchmark.cpp                  |  2 +-
 {NoiseTool => tools}/CMakeLists.txt           | 28 +++++++-------
 {NoiseTool => tools}/DemoNodeTrees.inl        |  0
 {NoiseTool => tools}/FastNoiseNodeEditor.cpp  |  4 +-
 {NoiseTool => tools}/FastNoiseNodeEditor.h    |  0
 {NoiseTool => tools}/ImGuiExtra.h             |  0
 {NoiseTool => tools}/MeshNoisePreview.cpp     |  8 ++--
 {NoiseTool => tools}/MeshNoisePreview.h       |  0
 {NoiseTool => tools}/MultiThreadQueues.h      |  0
 .../NodeVisualiserApp.cpp                     | 38 +++++++++----------
 .../NodeVisualiserApp.h                       |  6 +--
 {NoiseTool => tools}/NoiseTexture.cpp         |  2 +-
 {NoiseTool => tools}/NoiseTexture.h           |  0
 {NoiseTool => tools}/VertexLight.frag         |  0
 {NoiseTool => tools}/VertexLight.vert         |  0
 {NoiseTool => tools}/WindowsHiDPI.manifest    |  0
 {NoiseTool => tools}/resources.conf           |  6 +--
 23 files changed, 71 insertions(+), 71 deletions(-)
 rename {NoiseTool => tools}/CMakeLists.txt (81%)
 rename {NoiseTool => tools}/DemoNodeTrees.inl (100%)
 rename {NoiseTool => tools}/FastNoiseNodeEditor.cpp (99%)
 rename {NoiseTool => tools}/FastNoiseNodeEditor.h (100%)
 rename {NoiseTool => tools}/ImGuiExtra.h (100%)
 rename {NoiseTool => tools}/MeshNoisePreview.cpp (98%)
 rename {NoiseTool => tools}/MeshNoisePreview.h (100%)
 rename {NoiseTool => tools}/MultiThreadQueues.h (100%)
 rename NoiseTool/NoiseToolApp.cpp => tools/NodeVisualiserApp.cpp (89%)
 rename NoiseTool/NoiseToolApp.h => tools/NodeVisualiserApp.h (92%)
 rename {NoiseTool => tools}/NoiseTexture.cpp (99%)
 rename {NoiseTool => tools}/NoiseTexture.h (100%)
 rename {NoiseTool => tools}/VertexLight.frag (100%)
 rename {NoiseTool => tools}/VertexLight.vert (100%)
 rename {NoiseTool => tools}/WindowsHiDPI.manifest (100%)
 rename {NoiseTool => tools}/resources.conf (50%)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 9cdf1179..8a2f7219 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -37,7 +37,7 @@ jobs:
       uses: actions/checkout@v3            
     
     - name: 'CMake Configure'
-      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release -DFASTNOISE2_NOISETOOL=OFF -DFASTNOISE2_TESTS=ON ${{ matrix.cmake_options }}
+      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release -DFASTNOISE2_TOOLS=OFF -DFASTNOISE2_TESTS=ON ${{ matrix.cmake_options }}
    
     - name: 'CMake Build'
       run: cmake --build ${{ github.workspace }}/build --config Release --target FastNoiseBenchmark --parallel 4
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0f498f38..cb1e06e6 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -49,19 +49,19 @@ jobs:
       uses: actions/checkout@v3
    
     - name: 'CMake Build Debug'
-      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/debug -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/install/FastNoise2" -DFASTNOISE2_NOISETOOL=OFF -DFASTNOISE2_TESTS=OFF ${{ matrix.cmake_options }}
+      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/debug -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/install/FastNoise2" -DFASTNOISE2_TOOLS=OFF -DFASTNOISE2_TESTS=OFF ${{ matrix.cmake_options }}
    
     - name: 'CMake Install Debug'
       run: cmake --build ${{ github.workspace }}/debug --config Debug --target install --parallel 4
    
     - name: 'CMake Build Release'
-      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/release -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/install/FastNoise2" -DFASTNOISE2_NOISETOOL=ON -DFASTNOISE2_TESTS=ON ${{ matrix.cmake_options }}
+      run: cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/release -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/install/FastNoise2" -DFASTNOISE2_TOOLS=ON -DFASTNOISE2_TESTS=ON ${{ matrix.cmake_options }}
    
     - name: 'CMake Install Release'
       run: cmake --build ${{ github.workspace }}/release --config Release --target install --parallel 4
     
     - if: runner.os != 'Windows'
-      run: chmod +x ${{ github.workspace }}/install/FastNoise2/bin/NoiseTool
+      run: chmod +x ${{ github.workspace }}/install/FastNoise2/bin/NodeVisualiser
     
     - name: 'Upload artifact'
       uses: actions/upload-artifact@v3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 325ee9b5..37a779fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@ endif()
 # Build DLL
 #set(BUILD_SHARED_LIBS ON) 
 
-option(FASTNOISE2_NOISETOOL "Build NoiseTool application" ${FASTNOISE2_STANDALONE_PROJECT})
+option(FASTNOISE2_TOOLS "Build \"Node Visualiser\" executable" ${FASTNOISE2_STANDALONE_PROJECT})
 option(FASTNOISE2_TESTS "Build tests" OFF)
 
 if(MSVC)
@@ -39,8 +39,8 @@ set(install_targets "")
 include(cmake/CPM.cmake)
 add_subdirectory(src)
 
-if(FASTNOISE2_NOISETOOL)
-    add_subdirectory(NoiseTool)
+if(FASTNOISE2_TOOLS)
+    add_subdirectory(tools)
 endif()
 
 if(FASTNOISE2_TESTS)
diff --git a/CMakePresets.json b/CMakePresets.json
index 92ea1c3e..39f8a0cf 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -2,8 +2,8 @@
   "version": 3,
   "configurePresets": [
     {
-      "name": "noisetool",
-      "displayName": "NoiseTool",
+      "name": "tools",
+      "displayName": "Tools",
       "generator": "Ninja Multi-Config",
       "binaryDir": "${sourceDir}/out/build/${presetName}",
       "installDir": "${sourceDir}/out/install/${presetName}",
@@ -21,9 +21,9 @@
     {
       "name": "minimal",
       "displayName": "Minimal",
-      "inherits": "noisetool",
+      "inherits": "tools",
       "cacheVariables": {
-        "FASTNOISE2_NOISETOOL": {
+        "FASTNOISE2_TOOLS": {
           "value": "False",
           "type": "BOOL"
         },
@@ -36,9 +36,9 @@
     {
       "name": "all",
       "displayName": "All",
-      "inherits": "noisetool",
+      "inherits": "tools",
       "cacheVariables": {
-        "FASTNOISE2_NOISETOOL": {
+        "FASTNOISE2_TOOLS": {
           "value": "True",
           "type": "BOOL"
         },
@@ -51,14 +51,14 @@
   ],
   "buildPresets": [
     {
-      "name": "noisetool-debug",
-      "displayName": "NoiseTool Debug",
-      "configurePreset": "noisetool"
+      "name": "tools-debug",
+      "displayName": "tools Debug",
+      "configurePreset": "tools"
     },
     {
-      "name": "noisetool-release",
-      "displayName": "NoiseTool Release",
-      "configurePreset": "noisetool",
+      "name": "tools-release",
+      "displayName": "tools Release",
+      "configurePreset": "tools",
       "configuration": "Release"
     },
     {
diff --git a/README.md b/README.md
index 358805f5..f5deb77f 100644
--- a/README.md
+++ b/README.md
@@ -36,13 +36,13 @@ Bindings:
 Roadmap:
 - [Vague collection of ideas](https://github.com/users/Auburn/projects/1)
 
-## Noise Tool
+## Node Visualiser
 
-The FastNoise2 noise tool provides a node graph editor to create trees of FastNoise2 nodes. Node trees can be exported as serialised strings and loaded into the FastNoise2 library in your own code. The noise tool has 2D and 3D previews for the node graph output, see screenshots below for examples.
+The FastNoise2 Node Visualiser tool provides a node graph editor to create trees of FastNoise2 nodes. Node trees can be exported as serialised strings and loaded into the FastNoise2 library in your own code. Node Visualiser has 2D and 3D previews for the node graph output, see screenshots below for examples.
 
-Check the [Releases](https://github.com/Auburn/FastNoise2/releases/latest) for compiled NoiseTool binaries
+Check the [Releases](https://github.com/Auburn/FastNoise2/releases/latest) for compiled Node Visualiser binaries
 
-![NoiseTool](https://user-images.githubusercontent.com/1349548/90967950-4e8da600-e4de-11ea-902a-94e72cb86481.png)
+![Node Visualiser](https://user-images.githubusercontent.com/1349548/90967950-4e8da600-e4de-11ea-902a-94e72cb86481.png)
 
 ## Performance
 
diff --git a/include/FastNoise/FastNoise.h b/include/FastNoise/FastNoise.h
index ba18c1d1..4253ca8d 100644
--- a/include/FastNoise/FastNoise.h
+++ b/include/FastNoise/FastNoise.h
@@ -43,7 +43,7 @@ namespace FastNoise
     /// <example>
     /// FastNoise::SmartNode<> rootNode = FastNoise::NewFromEncodedNodeTree( "DQAFAAAAAAAAQAgAAAAAAD8AAAAAAA==" );
     /// </example>
-    /// <param name="encodedNodeTreeString">Can be generated using the NoiseTool</param>
+    /// <param name="encodedNodeTreeString">Can be generated using the Node Visualiser tool</param>
     /// <param name="maxSimdLevel">Max SIMD level, Max = Auto</param>
     /// <returns>Root node of the tree, nullptr for invalid strings</returns>
     FASTNOISE_API SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max );
diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index c40c88f0..73c2c0ab 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -6,7 +6,7 @@
 #include "FastNoise/Metadata.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
-#include "../NoiseTool/DemoNodeTrees.inl"
+#include "../tools/DemoNodeTrees.inl"
 
 static const size_t gPositionCount = 8192;
 static float gPositionFloats[gPositionCount]; 
diff --git a/NoiseTool/CMakeLists.txt b/tools/CMakeLists.txt
similarity index 81%
rename from NoiseTool/CMakeLists.txt
rename to tools/CMakeLists.txt
index 83435f5d..2ae840a2 100644
--- a/NoiseTool/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -88,7 +88,7 @@ CPMAddPackage(
     EXCLUDE_FROM_ALL YES
 )
 
-# Ensure FastNoise.dll is built into the same dir as NoiseTool.exe
+# Ensure FastNoise.dll is built into the same dir as NodeVisualiser.exe
 set_target_properties(FastNoise
     PROPERTIES
     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}
@@ -98,19 +98,19 @@ set_target_properties(FastNoise
 
 # Bundle a better font
 # Configure resource file for imgui source dir variable
-set(NoiseTool_RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(NodeVisualiser_RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 configure_file("resources.conf" "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
-corrade_add_resource(NoiseTool_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
+corrade_add_resource(NodeVisualiser_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
 
-add_executable(NoiseTool
-    "NoiseToolApp.cpp"
+add_executable(NodeVisualiser
+    "NodeVisualiserApp.cpp"
     "FastNoiseNodeEditor.cpp"
     "MeshNoisePreview.cpp"
     "NoiseTexture.cpp"
-    ${NoiseTool_RESOURCES}
+    ${NodeVisualiser_RESOURCES}
 ) 
 
-target_link_libraries(NoiseTool PRIVATE
+target_link_libraries(NodeVisualiser PRIVATE
     FastNoise
     #FastSIMD_FastNoise
     Magnum::Application
@@ -124,26 +124,26 @@ target_link_libraries(NoiseTool PRIVATE
 
 # Windows HiDPI support
 if(CORRADE_TARGET_WINDOWS)
-    target_sources(NoiseTool PRIVATE WindowsHiDPI.manifest)
+    target_sources(NodeVisualiser PRIVATE WindowsHiDPI.manifest)
 endif()
 
 if (UNIX)
-    target_link_options(NoiseTool PRIVATE -pthread)
+    target_link_options(NodeVisualiser PRIVATE -pthread)
 
     if(APPLE)
-        set_property(TARGET NoiseTool PROPERTY
+        set_property(TARGET NodeVisualiser PROPERTY
             INSTALL_RPATH "@loader_path/../lib")
     else()
-        set_property(TARGET NoiseTool PROPERTY
+        set_property(TARGET NodeVisualiser PROPERTY
             INSTALL_RPATH "\$ORIGIN/../lib")
     endif()
 endif()
 
 if (MSVC)
-    target_compile_definitions(NoiseTool PRIVATE _CRT_SECURE_NO_WARNINGS=1)
+    target_compile_definitions(NodeVisualiser PRIVATE _CRT_SECURE_NO_WARNINGS=1)
 endif()
 
-set(install_targets ${install_targets} NoiseTool PARENT_SCOPE)
+set(install_targets ${install_targets} NodeVisualiser PARENT_SCOPE)
 
 # Make the executable a default target to build & run in Visual Studio
-set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT NoiseTool)
+set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT NodeVisualiser)
diff --git a/NoiseTool/DemoNodeTrees.inl b/tools/DemoNodeTrees.inl
similarity index 100%
rename from NoiseTool/DemoNodeTrees.inl
rename to tools/DemoNodeTrees.inl
diff --git a/NoiseTool/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
similarity index 99%
rename from NoiseTool/FastNoiseNodeEditor.cpp
rename to tools/FastNoiseNodeEditor.cpp
index da1bdb57..950edde2 100644
--- a/NoiseTool/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -372,7 +372,7 @@ void FastNoiseNodeEditor::Node::SerialiseIncludingDependancies( ImGuiSettingsHan
 void FastNoiseNodeEditor::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler nodeSettings;
-    nodeSettings.TypeName = "NoiseToolNodeData";
+    nodeSettings.TypeName = "NodeVisualiserNodeData";
     nodeSettings.TypeHash = ImHashStr( nodeSettings.TypeName );
     nodeSettings.UserData = this;
     nodeSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf )
@@ -471,7 +471,7 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
 
 
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NoiseToolNodeGraph";
+    editorSettings.TypeName = "NodeVisualiserNodeGraph";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf )
diff --git a/NoiseTool/FastNoiseNodeEditor.h b/tools/FastNoiseNodeEditor.h
similarity index 100%
rename from NoiseTool/FastNoiseNodeEditor.h
rename to tools/FastNoiseNodeEditor.h
diff --git a/NoiseTool/ImGuiExtra.h b/tools/ImGuiExtra.h
similarity index 100%
rename from NoiseTool/ImGuiExtra.h
rename to tools/ImGuiExtra.h
diff --git a/NoiseTool/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
similarity index 98%
rename from NoiseTool/MeshNoisePreview.cpp
rename to tools/MeshNoisePreview.cpp
index a5a56fb7..d77d4513 100644
--- a/NoiseTool/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -586,7 +586,7 @@ MeshNoisePreview::Chunk::Chunk( MeshData& meshData )
 
 MeshNoisePreview::VertexLightShader::VertexLightShader()
 {
-    Utility::Resource noiseToolResources( "NoiseTool" );
+    Utility::Resource NodeVisualiserResources( "NodeVisualiser" );
 
 #ifndef MAGNUM_TARGET_GLES
     const GL::Version version = GL::Context::current().supportedVersion( { GL::Version::GL320, GL::Version::GL310, GL::Version::GL300, GL::Version::GL210 } );
@@ -598,9 +598,9 @@ MeshNoisePreview::VertexLightShader::VertexLightShader()
     GL::Shader frag = CreateShader( version, GL::Shader::Type::Fragment );
     
     CORRADE_INTERNAL_ASSERT_OUTPUT(
-        vert.addSource( noiseToolResources.getString( "VertexLight.vert" ) ).compile() );
+        vert.addSource( NodeVisualiserResources.getString( "VertexLight.vert" ) ).compile() );
     CORRADE_INTERNAL_ASSERT_OUTPUT( 
-        frag.addSource( noiseToolResources.getString( "VertexLight.frag" ) ).compile() );
+        frag.addSource( NodeVisualiserResources.getString( "VertexLight.frag" ) ).compile() );
 
     attachShader( vert );
     attachShader( frag );
@@ -685,7 +685,7 @@ float MeshNoisePreview::GetTimerDurationMs()
 void MeshNoisePreview::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NoiseToolMeshNoisePreview";
+    editorSettings.TypeName = "NodeVisualiserMeshNoisePreview";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf ) {
diff --git a/NoiseTool/MeshNoisePreview.h b/tools/MeshNoisePreview.h
similarity index 100%
rename from NoiseTool/MeshNoisePreview.h
rename to tools/MeshNoisePreview.h
diff --git a/NoiseTool/MultiThreadQueues.h b/tools/MultiThreadQueues.h
similarity index 100%
rename from NoiseTool/MultiThreadQueues.h
rename to tools/MultiThreadQueues.h
diff --git a/NoiseTool/NoiseToolApp.cpp b/tools/NodeVisualiserApp.cpp
similarity index 89%
rename from NoiseTool/NoiseToolApp.cpp
rename to tools/NodeVisualiserApp.cpp
index dc6236c9..e88a621c 100644
--- a/NoiseTool/NoiseToolApp.cpp
+++ b/tools/NodeVisualiserApp.cpp
@@ -8,7 +8,7 @@
 #include <Magnum/GL/DefaultFramebuffer.h>
 #include <Magnum/GL/Renderer.h>
 
-#include "NoiseToolApp.h"
+#include "NodeVisualiserApp.h"
 #include "ImGuiExtra.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
@@ -17,14 +17,14 @@ using namespace Magnum;
 void InitResources()
 {
 #ifdef MAGNUM_BUILD_STATIC
-    CORRADE_RESOURCE_INITIALIZE( NoiseTool_RESOURCES )
+    CORRADE_RESOURCE_INITIALIZE( NodeVisualiser_RESOURCES )
 #endif
 }
 
-NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
+NodeVisualiserApp::NodeVisualiserApp( const Arguments& arguments ) :
     Platform::Application{ arguments,
     Configuration{}
-    .setTitle( "FastNoise2 NoiseTool" )
+    .setTitle( "FastNoise2 Node Visualiser" )
     .setSize( Vector2i( 1280, 720 ) )
     .setWindowFlags( Configuration::WindowFlag::Resizable | Configuration::WindowFlag::Maximized ),
     GLConfiguration{}
@@ -41,11 +41,11 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     {
         ImFontConfig fontConfig;
         fontConfig.FontDataOwnedByAtlas = false;
-        const auto font = Utility::Resource{ "NoiseTool" }.getRaw( "Font.ttf" );
+        const auto font = Utility::Resource{ "NodeVisualiser" }.getRaw( "Font.ttf" );
         ImGui::GetIO().Fonts->AddFontFromMemoryTTF( const_cast<char*>( font.data() ), (int)font.size(), 14.0f * framebufferSize().x() / size.x(), &fontConfig );
     }
 
-    ImGui::GetIO().IniFilename = "NoiseTool.ini";
+    ImGui::GetIO().IniFilename = "NodeVisualiser.ini";
     mImGuiIntegrationContext = ImGuiIntegration::Context( *mImGuiContext, size, windowSize(), framebufferSize() );
 
     GL::Renderer::enable( GL::Renderer::Feature::DepthTest );
@@ -76,14 +76,14 @@ NoiseToolApp::NoiseToolApp( const Arguments& arguments ) :
     }
 }
 
-NoiseToolApp::~NoiseToolApp()
+NodeVisualiserApp::~NodeVisualiserApp()
 {
     // Avoid trying to save settings after node editor is already destroyed
     ImGui::SaveIniSettingsToDisk( ImGui::GetIO().IniFilename );
     ImGui::GetIO().IniFilename = nullptr;
 }
 
-void NoiseToolApp::drawEvent()
+void NodeVisualiserApp::drawEvent()
 {
     GL::defaultFramebuffer.clear( GL::FramebufferClear::Color | GL::FramebufferClear::Depth );
 
@@ -188,7 +188,7 @@ void NoiseToolApp::drawEvent()
     mFrameTime.nextFrame();
 }
 
-void NoiseToolApp::viewportEvent( ViewportEvent& event )
+void NodeVisualiserApp::viewportEvent( ViewportEvent& event )
 {
     GL::defaultFramebuffer.setViewport( { {}, event.framebufferSize() } );
 
@@ -197,7 +197,7 @@ void NoiseToolApp::viewportEvent( ViewportEvent& event )
     mImGuiIntegrationContext.relayout( Vector2 { event.windowSize() } / event.dpiScaling(), event.windowSize(), event.framebufferSize() );
 }
 
-void NoiseToolApp::keyPressEvent( KeyEvent& event )
+void NodeVisualiserApp::keyPressEvent( KeyEvent& event )
 {
     if( mImGuiIntegrationContext.handleKeyPressEvent( event ) )
         return;
@@ -205,7 +205,7 @@ void NoiseToolApp::keyPressEvent( KeyEvent& event )
     HandleKeyEvent( event.key(), true );
 }
 
-void NoiseToolApp::keyReleaseEvent( KeyEvent& event )
+void NodeVisualiserApp::keyReleaseEvent( KeyEvent& event )
 {
     if( mImGuiIntegrationContext.handleKeyReleaseEvent( event ) )
         return;
@@ -213,7 +213,7 @@ void NoiseToolApp::keyReleaseEvent( KeyEvent& event )
     HandleKeyEvent( event.key(), false );
 }
 
-void NoiseToolApp::HandleKeyEvent( KeyEvent::Key key, bool value )
+void NodeVisualiserApp::HandleKeyEvent( KeyEvent::Key key, bool value )
 {
     switch( key )
     {
@@ -264,7 +264,7 @@ void NoiseToolApp::HandleKeyEvent( KeyEvent::Key key, bool value )
     }
 }
 
-void NoiseToolApp::mousePressEvent( MouseEvent& event )
+void NodeVisualiserApp::mousePressEvent( MouseEvent& event )
 {
     if( mImGuiIntegrationContext.handleMousePressEvent( event ) )
         return;
@@ -274,7 +274,7 @@ void NoiseToolApp::mousePressEvent( MouseEvent& event )
     event.setAccepted();
 }
 
-void NoiseToolApp::mouseReleaseEvent( MouseEvent& event )
+void NodeVisualiserApp::mouseReleaseEvent( MouseEvent& event )
 {
     if( mImGuiIntegrationContext.handleMouseReleaseEvent( event ) )
         return;
@@ -282,7 +282,7 @@ void NoiseToolApp::mouseReleaseEvent( MouseEvent& event )
     event.setAccepted();
 }
 
-void NoiseToolApp::mouseScrollEvent( MouseScrollEvent& event ) {
+void NodeVisualiserApp::mouseScrollEvent( MouseScrollEvent& event ) {
     if( mImGuiIntegrationContext.handleMouseScrollEvent( event ) )
     {
         /* Prevent scrolling the page */
@@ -291,7 +291,7 @@ void NoiseToolApp::mouseScrollEvent( MouseScrollEvent& event ) {
     }
 }
 
-void NoiseToolApp::mouseMoveEvent( MouseMoveEvent& event )
+void NodeVisualiserApp::mouseMoveEvent( MouseMoveEvent& event )
 {
     if( mImGuiIntegrationContext.handleMouseMoveEvent( event ) )
         return;
@@ -315,16 +315,16 @@ void NoiseToolApp::mouseMoveEvent( MouseMoveEvent& event )
     event.setAccepted();
 }
 
-void NoiseToolApp::textInputEvent( TextInputEvent& event )
+void NodeVisualiserApp::textInputEvent( TextInputEvent& event )
 {
     if( mImGuiIntegrationContext.handleTextInputEvent( event ) )
         return;
 }
 
-void NoiseToolApp::UpdatePespectiveProjection()
+void NodeVisualiserApp::UpdatePespectiveProjection()
 {
     mCamera.setProjectionMatrix( Matrix4::perspectiveProjection( Deg( 70.0f ), Vector2{ windowSize() }.aspectRatio(), 2.0f, 3500.0f ) );
 }
 
 
-MAGNUM_APPLICATION_MAIN( NoiseToolApp )
+MAGNUM_APPLICATION_MAIN( NodeVisualiserApp )
diff --git a/NoiseTool/NoiseToolApp.h b/tools/NodeVisualiserApp.h
similarity index 92%
rename from NoiseTool/NoiseToolApp.h
rename to tools/NodeVisualiserApp.h
index bae0f347..c85a1571 100644
--- a/NoiseTool/NoiseToolApp.h
+++ b/tools/NodeVisualiserApp.h
@@ -12,11 +12,11 @@
 
 namespace Magnum
 {
-    class NoiseToolApp : public Platform::Application
+    class NodeVisualiserApp : public Platform::Application
     {
     public:
-        explicit NoiseToolApp( const Arguments& arguments );
-        ~NoiseToolApp();
+        explicit NodeVisualiserApp( const Arguments& arguments );
+        ~NodeVisualiserApp();
 
     private:
         void drawEvent() override;
diff --git a/NoiseTool/NoiseTexture.cpp b/tools/NoiseTexture.cpp
similarity index 99%
rename from NoiseTool/NoiseTexture.cpp
rename to tools/NoiseTexture.cpp
index 434700e7..d894f23c 100644
--- a/NoiseTool/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -383,7 +383,7 @@ void NoiseTexture::GenerateLoopThread( GenerateQueue<BuildData>& generateQueue,
 void NoiseTexture::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NoiseToolNoiseTexture";
+    editorSettings.TypeName = "NodeVisualiserNoiseTexture";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf ) {
diff --git a/NoiseTool/NoiseTexture.h b/tools/NoiseTexture.h
similarity index 100%
rename from NoiseTool/NoiseTexture.h
rename to tools/NoiseTexture.h
diff --git a/NoiseTool/VertexLight.frag b/tools/VertexLight.frag
similarity index 100%
rename from NoiseTool/VertexLight.frag
rename to tools/VertexLight.frag
diff --git a/NoiseTool/VertexLight.vert b/tools/VertexLight.vert
similarity index 100%
rename from NoiseTool/VertexLight.vert
rename to tools/VertexLight.vert
diff --git a/NoiseTool/WindowsHiDPI.manifest b/tools/WindowsHiDPI.manifest
similarity index 100%
rename from NoiseTool/WindowsHiDPI.manifest
rename to tools/WindowsHiDPI.manifest
diff --git a/NoiseTool/resources.conf b/tools/resources.conf
similarity index 50%
rename from NoiseTool/resources.conf
rename to tools/resources.conf
index 86773924..4c4867f5 100644
--- a/NoiseTool/resources.conf
+++ b/tools/resources.conf
@@ -1,11 +1,11 @@
-group=NoiseTool
+group=NodeVisualiser
 
 [file]
-filename=${NoiseTool_RESOURCES_DIR}/VertexLight.frag
+filename=${NodeVisualiser_RESOURCES_DIR}/VertexLight.frag
 alias=VertexLight.frag
 
 [file]
-filename=${NoiseTool_RESOURCES_DIR}/VertexLight.vert
+filename=${NodeVisualiser_RESOURCES_DIR}/VertexLight.vert
 alias=VertexLight.vert
 
 [file]

From 2ca53fa6216369cd2d8af98c1ed4cdd9ac274563 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 25 Oct 2023 22:37:21 +0100
Subject: [PATCH 045/139] Rename Node Visualiser to Node Editor

I know
---
 .github/workflows/main.yml                    |  2 +-
 CMakeLists.txt                                |  2 +-
 README.md                                     |  8 ++--
 include/FastNoise/FastNoise.h                 |  2 +-
 tools/CMakeLists.txt                          | 29 +++++++-------
 tools/FastNoiseNodeEditor.cpp                 |  4 +-
 tools/MeshNoisePreview.cpp                    |  8 ++--
 ...odeVisualiserApp.cpp => NodeEditorApp.cpp} | 38 +++++++++----------
 .../{NodeVisualiserApp.h => NodeEditorApp.h}  |  6 +--
 tools/NoiseTexture.cpp                        |  2 +-
 tools/resources.conf                          |  6 +--
 11 files changed, 54 insertions(+), 53 deletions(-)
 rename tools/{NodeVisualiserApp.cpp => NodeEditorApp.cpp} (89%)
 rename tools/{NodeVisualiserApp.h => NodeEditorApp.h} (92%)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cb1e06e6..672c0134 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -61,7 +61,7 @@ jobs:
       run: cmake --build ${{ github.workspace }}/release --config Release --target install --parallel 4
     
     - if: runner.os != 'Windows'
-      run: chmod +x ${{ github.workspace }}/install/FastNoise2/bin/NodeVisualiser
+      run: chmod +x ${{ github.workspace }}/install/FastNoise2/bin/NodeEditor
     
     - name: 'Upload artifact'
       uses: actions/upload-artifact@v3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37a779fe..2111ebb7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@ endif()
 # Build DLL
 #set(BUILD_SHARED_LIBS ON) 
 
-option(FASTNOISE2_TOOLS "Build \"Node Visualiser\" executable" ${FASTNOISE2_STANDALONE_PROJECT})
+option(FASTNOISE2_TOOLS "Build \"Node Editor\" executable" ${FASTNOISE2_STANDALONE_PROJECT})
 option(FASTNOISE2_TESTS "Build tests" OFF)
 
 if(MSVC)
diff --git a/README.md b/README.md
index f5deb77f..518361e8 100644
--- a/README.md
+++ b/README.md
@@ -36,13 +36,13 @@ Bindings:
 Roadmap:
 - [Vague collection of ideas](https://github.com/users/Auburn/projects/1)
 
-## Node Visualiser
+## Node Editor
 
-The FastNoise2 Node Visualiser tool provides a node graph editor to create trees of FastNoise2 nodes. Node trees can be exported as serialised strings and loaded into the FastNoise2 library in your own code. Node Visualiser has 2D and 3D previews for the node graph output, see screenshots below for examples.
+The FastNoise2 Node Editor tool provides a node graph editor to create trees of FastNoise2 nodes. Node trees can be exported as serialised strings and loaded into the FastNoise2 library in your own code. Node Editor has 2D and 3D previews for the node graph output, see screenshots below for examples.
 
-Check the [Releases](https://github.com/Auburn/FastNoise2/releases/latest) for compiled Node Visualiser binaries
+Check the [Releases](https://github.com/Auburn/FastNoise2/releases/latest) for compiled Node Editor binaries
 
-![Node Visualiser](https://user-images.githubusercontent.com/1349548/90967950-4e8da600-e4de-11ea-902a-94e72cb86481.png)
+![Node Editor](https://user-images.githubusercontent.com/1349548/90967950-4e8da600-e4de-11ea-902a-94e72cb86481.png)
 
 ## Performance
 
diff --git a/include/FastNoise/FastNoise.h b/include/FastNoise/FastNoise.h
index 4253ca8d..2163c02a 100644
--- a/include/FastNoise/FastNoise.h
+++ b/include/FastNoise/FastNoise.h
@@ -43,7 +43,7 @@ namespace FastNoise
     /// <example>
     /// FastNoise::SmartNode<> rootNode = FastNoise::NewFromEncodedNodeTree( "DQAFAAAAAAAAQAgAAAAAAD8AAAAAAA==" );
     /// </example>
-    /// <param name="encodedNodeTreeString">Can be generated using the Node Visualiser tool</param>
+    /// <param name="encodedNodeTreeString">Can be generated using the Node Editor tool</param>
     /// <param name="maxSimdLevel">Max SIMD level, Max = Auto</param>
     /// <returns>Root node of the tree, nullptr for invalid strings</returns>
     FASTNOISE_API SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max );
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 2ae840a2..0b5a423c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -88,7 +88,7 @@ CPMAddPackage(
     EXCLUDE_FROM_ALL YES
 )
 
-# Ensure FastNoise.dll is built into the same dir as NodeVisualiser.exe
+# Ensure FastNoise.dll is built into the same dir as NodeEditor.exe
 set_target_properties(FastNoise
     PROPERTIES
     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}
@@ -98,19 +98,20 @@ set_target_properties(FastNoise
 
 # Bundle a better font
 # Configure resource file for imgui source dir variable
-set(NodeVisualiser_RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(NodeEditor_RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 configure_file("resources.conf" "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
-corrade_add_resource(NodeVisualiser_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
+corrade_add_resource(NodeEditor_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
 
-add_executable(NodeVisualiser
-    "NodeVisualiserApp.cpp"
+add_executable(NodeEditor
+    
+    "NodeEditorApp.cpp"
     "FastNoiseNodeEditor.cpp"
     "MeshNoisePreview.cpp"
     "NoiseTexture.cpp"
-    ${NodeVisualiser_RESOURCES}
+    ${NodeEditor_RESOURCES}
 ) 
 
-target_link_libraries(NodeVisualiser PRIVATE
+target_link_libraries(NodeEditor PRIVATE
     FastNoise
     #FastSIMD_FastNoise
     Magnum::Application
@@ -124,26 +125,26 @@ target_link_libraries(NodeVisualiser PRIVATE
 
 # Windows HiDPI support
 if(CORRADE_TARGET_WINDOWS)
-    target_sources(NodeVisualiser PRIVATE WindowsHiDPI.manifest)
+    target_sources(NodeEditor PRIVATE WindowsHiDPI.manifest)
 endif()
 
 if (UNIX)
-    target_link_options(NodeVisualiser PRIVATE -pthread)
+    target_link_options(NodeEditor PRIVATE -pthread)
 
     if(APPLE)
-        set_property(TARGET NodeVisualiser PROPERTY
+        set_property(TARGET NodeEditor PROPERTY
             INSTALL_RPATH "@loader_path/../lib")
     else()
-        set_property(TARGET NodeVisualiser PROPERTY
+        set_property(TARGET NodeEditor PROPERTY
             INSTALL_RPATH "\$ORIGIN/../lib")
     endif()
 endif()
 
 if (MSVC)
-    target_compile_definitions(NodeVisualiser PRIVATE _CRT_SECURE_NO_WARNINGS=1)
+    target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
 endif()
 
-set(install_targets ${install_targets} NodeVisualiser PARENT_SCOPE)
+set(install_targets ${install_targets} NodeEditor PARENT_SCOPE)
 
 # Make the executable a default target to build & run in Visual Studio
-set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT NodeVisualiser)
+set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT NodeEditor)
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 950edde2..7b2eb067 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -372,7 +372,7 @@ void FastNoiseNodeEditor::Node::SerialiseIncludingDependancies( ImGuiSettingsHan
 void FastNoiseNodeEditor::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler nodeSettings;
-    nodeSettings.TypeName = "NodeVisualiserNodeData";
+    nodeSettings.TypeName = "NodeEditorNodeData";
     nodeSettings.TypeHash = ImHashStr( nodeSettings.TypeName );
     nodeSettings.UserData = this;
     nodeSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf )
@@ -471,7 +471,7 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
 
 
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NodeVisualiserNodeGraph";
+    editorSettings.TypeName = "NodeEditorNodeGraph";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf )
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index d77d4513..9433115a 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -586,7 +586,7 @@ MeshNoisePreview::Chunk::Chunk( MeshData& meshData )
 
 MeshNoisePreview::VertexLightShader::VertexLightShader()
 {
-    Utility::Resource NodeVisualiserResources( "NodeVisualiser" );
+    Utility::Resource NodeEditorResources( "NodeEditor" );
 
 #ifndef MAGNUM_TARGET_GLES
     const GL::Version version = GL::Context::current().supportedVersion( { GL::Version::GL320, GL::Version::GL310, GL::Version::GL300, GL::Version::GL210 } );
@@ -598,9 +598,9 @@ MeshNoisePreview::VertexLightShader::VertexLightShader()
     GL::Shader frag = CreateShader( version, GL::Shader::Type::Fragment );
     
     CORRADE_INTERNAL_ASSERT_OUTPUT(
-        vert.addSource( NodeVisualiserResources.getString( "VertexLight.vert" ) ).compile() );
+        vert.addSource( NodeEditorResources.getString( "VertexLight.vert" ) ).compile() );
     CORRADE_INTERNAL_ASSERT_OUTPUT( 
-        frag.addSource( NodeVisualiserResources.getString( "VertexLight.frag" ) ).compile() );
+        frag.addSource( NodeEditorResources.getString( "VertexLight.frag" ) ).compile() );
 
     attachShader( vert );
     attachShader( frag );
@@ -685,7 +685,7 @@ float MeshNoisePreview::GetTimerDurationMs()
 void MeshNoisePreview::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NodeVisualiserMeshNoisePreview";
+    editorSettings.TypeName = "NodeEditorMeshNoisePreview";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf ) {
diff --git a/tools/NodeVisualiserApp.cpp b/tools/NodeEditorApp.cpp
similarity index 89%
rename from tools/NodeVisualiserApp.cpp
rename to tools/NodeEditorApp.cpp
index e88a621c..4a9fa558 100644
--- a/tools/NodeVisualiserApp.cpp
+++ b/tools/NodeEditorApp.cpp
@@ -8,7 +8,7 @@
 #include <Magnum/GL/DefaultFramebuffer.h>
 #include <Magnum/GL/Renderer.h>
 
-#include "NodeVisualiserApp.h"
+#include "NodeEditorApp.h"
 #include "ImGuiExtra.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
@@ -17,14 +17,14 @@ using namespace Magnum;
 void InitResources()
 {
 #ifdef MAGNUM_BUILD_STATIC
-    CORRADE_RESOURCE_INITIALIZE( NodeVisualiser_RESOURCES )
+    CORRADE_RESOURCE_INITIALIZE( NodeEditor_RESOURCES )
 #endif
 }
 
-NodeVisualiserApp::NodeVisualiserApp( const Arguments& arguments ) :
+NodeEditorApp::NodeEditorApp( const Arguments& arguments ) :
     Platform::Application{ arguments,
     Configuration{}
-    .setTitle( "FastNoise2 Node Visualiser" )
+    .setTitle( "FastNoise2 Node Editor" )
     .setSize( Vector2i( 1280, 720 ) )
     .setWindowFlags( Configuration::WindowFlag::Resizable | Configuration::WindowFlag::Maximized ),
     GLConfiguration{}
@@ -41,11 +41,11 @@ NodeVisualiserApp::NodeVisualiserApp( const Arguments& arguments ) :
     {
         ImFontConfig fontConfig;
         fontConfig.FontDataOwnedByAtlas = false;
-        const auto font = Utility::Resource{ "NodeVisualiser" }.getRaw( "Font.ttf" );
+        const auto font = Utility::Resource{ "NodeEditor" }.getRaw( "Font.ttf" );
         ImGui::GetIO().Fonts->AddFontFromMemoryTTF( const_cast<char*>( font.data() ), (int)font.size(), 14.0f * framebufferSize().x() / size.x(), &fontConfig );
     }
 
-    ImGui::GetIO().IniFilename = "NodeVisualiser.ini";
+    ImGui::GetIO().IniFilename = "NodeEditor.ini";
     mImGuiIntegrationContext = ImGuiIntegration::Context( *mImGuiContext, size, windowSize(), framebufferSize() );
 
     GL::Renderer::enable( GL::Renderer::Feature::DepthTest );
@@ -76,14 +76,14 @@ NodeVisualiserApp::NodeVisualiserApp( const Arguments& arguments ) :
     }
 }
 
-NodeVisualiserApp::~NodeVisualiserApp()
+NodeEditorApp::~NodeEditorApp()
 {
     // Avoid trying to save settings after node editor is already destroyed
     ImGui::SaveIniSettingsToDisk( ImGui::GetIO().IniFilename );
     ImGui::GetIO().IniFilename = nullptr;
 }
 
-void NodeVisualiserApp::drawEvent()
+void NodeEditorApp::drawEvent()
 {
     GL::defaultFramebuffer.clear( GL::FramebufferClear::Color | GL::FramebufferClear::Depth );
 
@@ -188,7 +188,7 @@ void NodeVisualiserApp::drawEvent()
     mFrameTime.nextFrame();
 }
 
-void NodeVisualiserApp::viewportEvent( ViewportEvent& event )
+void NodeEditorApp::viewportEvent( ViewportEvent& event )
 {
     GL::defaultFramebuffer.setViewport( { {}, event.framebufferSize() } );
 
@@ -197,7 +197,7 @@ void NodeVisualiserApp::viewportEvent( ViewportEvent& event )
     mImGuiIntegrationContext.relayout( Vector2 { event.windowSize() } / event.dpiScaling(), event.windowSize(), event.framebufferSize() );
 }
 
-void NodeVisualiserApp::keyPressEvent( KeyEvent& event )
+void NodeEditorApp::keyPressEvent( KeyEvent& event )
 {
     if( mImGuiIntegrationContext.handleKeyPressEvent( event ) )
         return;
@@ -205,7 +205,7 @@ void NodeVisualiserApp::keyPressEvent( KeyEvent& event )
     HandleKeyEvent( event.key(), true );
 }
 
-void NodeVisualiserApp::keyReleaseEvent( KeyEvent& event )
+void NodeEditorApp::keyReleaseEvent( KeyEvent& event )
 {
     if( mImGuiIntegrationContext.handleKeyReleaseEvent( event ) )
         return;
@@ -213,7 +213,7 @@ void NodeVisualiserApp::keyReleaseEvent( KeyEvent& event )
     HandleKeyEvent( event.key(), false );
 }
 
-void NodeVisualiserApp::HandleKeyEvent( KeyEvent::Key key, bool value )
+void NodeEditorApp::HandleKeyEvent( KeyEvent::Key key, bool value )
 {
     switch( key )
     {
@@ -264,7 +264,7 @@ void NodeVisualiserApp::HandleKeyEvent( KeyEvent::Key key, bool value )
     }
 }
 
-void NodeVisualiserApp::mousePressEvent( MouseEvent& event )
+void NodeEditorApp::mousePressEvent( MouseEvent& event )
 {
     if( mImGuiIntegrationContext.handleMousePressEvent( event ) )
         return;
@@ -274,7 +274,7 @@ void NodeVisualiserApp::mousePressEvent( MouseEvent& event )
     event.setAccepted();
 }
 
-void NodeVisualiserApp::mouseReleaseEvent( MouseEvent& event )
+void NodeEditorApp::mouseReleaseEvent( MouseEvent& event )
 {
     if( mImGuiIntegrationContext.handleMouseReleaseEvent( event ) )
         return;
@@ -282,7 +282,7 @@ void NodeVisualiserApp::mouseReleaseEvent( MouseEvent& event )
     event.setAccepted();
 }
 
-void NodeVisualiserApp::mouseScrollEvent( MouseScrollEvent& event ) {
+void NodeEditorApp::mouseScrollEvent( MouseScrollEvent& event ) {
     if( mImGuiIntegrationContext.handleMouseScrollEvent( event ) )
     {
         /* Prevent scrolling the page */
@@ -291,7 +291,7 @@ void NodeVisualiserApp::mouseScrollEvent( MouseScrollEvent& event ) {
     }
 }
 
-void NodeVisualiserApp::mouseMoveEvent( MouseMoveEvent& event )
+void NodeEditorApp::mouseMoveEvent( MouseMoveEvent& event )
 {
     if( mImGuiIntegrationContext.handleMouseMoveEvent( event ) )
         return;
@@ -315,16 +315,16 @@ void NodeVisualiserApp::mouseMoveEvent( MouseMoveEvent& event )
     event.setAccepted();
 }
 
-void NodeVisualiserApp::textInputEvent( TextInputEvent& event )
+void NodeEditorApp::textInputEvent( TextInputEvent& event )
 {
     if( mImGuiIntegrationContext.handleTextInputEvent( event ) )
         return;
 }
 
-void NodeVisualiserApp::UpdatePespectiveProjection()
+void NodeEditorApp::UpdatePespectiveProjection()
 {
     mCamera.setProjectionMatrix( Matrix4::perspectiveProjection( Deg( 70.0f ), Vector2{ windowSize() }.aspectRatio(), 2.0f, 3500.0f ) );
 }
 
 
-MAGNUM_APPLICATION_MAIN( NodeVisualiserApp )
+MAGNUM_APPLICATION_MAIN( NodeEditorApp )
diff --git a/tools/NodeVisualiserApp.h b/tools/NodeEditorApp.h
similarity index 92%
rename from tools/NodeVisualiserApp.h
rename to tools/NodeEditorApp.h
index c85a1571..60ce3df0 100644
--- a/tools/NodeVisualiserApp.h
+++ b/tools/NodeEditorApp.h
@@ -12,11 +12,11 @@
 
 namespace Magnum
 {
-    class NodeVisualiserApp : public Platform::Application
+    class NodeEditorApp : public Platform::Application
     {
     public:
-        explicit NodeVisualiserApp( const Arguments& arguments );
-        ~NodeVisualiserApp();
+        explicit NodeEditorApp( const Arguments& arguments );
+        ~NodeEditorApp();
 
     private:
         void drawEvent() override;
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index d894f23c..2c760995 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -383,7 +383,7 @@ void NoiseTexture::GenerateLoopThread( GenerateQueue<BuildData>& generateQueue,
 void NoiseTexture::SetupSettingsHandlers()
 {
     ImGuiSettingsHandler editorSettings;
-    editorSettings.TypeName = "NodeVisualiserNoiseTexture";
+    editorSettings.TypeName = "NodeEditorNoiseTexture";
     editorSettings.TypeHash = ImHashStr( editorSettings.TypeName );
     editorSettings.UserData = this;
     editorSettings.WriteAllFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* outBuf ) {
diff --git a/tools/resources.conf b/tools/resources.conf
index 4c4867f5..1aa73822 100644
--- a/tools/resources.conf
+++ b/tools/resources.conf
@@ -1,11 +1,11 @@
-group=NodeVisualiser
+group=NodeEditor
 
 [file]
-filename=${NodeVisualiser_RESOURCES_DIR}/VertexLight.frag
+filename=${NodeEditor_RESOURCES_DIR}/VertexLight.frag
 alias=VertexLight.frag
 
 [file]
-filename=${NodeVisualiser_RESOURCES_DIR}/VertexLight.vert
+filename=${NodeEditor_RESOURCES_DIR}/VertexLight.vert
 alias=VertexLight.vert
 
 [file]

From 871f56446788aba77fb883e78141a3f4a5d19037 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 28 Oct 2023 20:48:13 +0100
Subject: [PATCH 046/139] Remove frequency param from GenUniformGrid functions

---
 include/FastNoise/FastNoise_C.h               |  12 +-
 .../FastNoise/Generators/BasicGenerators.h    |   2 +-
 include/FastNoise/Generators/Generator.h      |   8 +-
 include/FastNoise/Generators/Generator.inl    |  54 +++---
 include/FastNoise/Generators/Modifiers.h      |   8 +-
 src/FastNoise/FastNoise_C.cpp                 |  16 +-
 tests/FastNoiseCpp11Include.cpp               |   2 +-
 tools/FastNoiseNodeEditor.cpp                 |  24 +--
 tools/FastNoiseNodeEditor.h                   |   2 +-
 tools/MeshNoisePreview.cpp                    | 155 +++++++++---------
 tools/MeshNoisePreview.h                      |   3 +-
 tools/NoiseTexture.cpp                        |  36 ++--
 tools/NoiseTexture.h                          |   2 +-
 13 files changed, 163 insertions(+), 161 deletions(-)

diff --git a/include/FastNoise/FastNoise_C.h b/include/FastNoise/FastNoise_C.h
index 1ee53e58..13839a29 100644
--- a/include/FastNoise/FastNoise_C.h
+++ b/include/FastNoise/FastNoise_C.h
@@ -7,7 +7,7 @@
 extern "C" {
 #endif
 
-FASTNOISE_API void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*0 = Auto*/ );
+FASTNOISE_API void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*~0u = Auto*/ );
 
 FASTNOISE_API void fnDeleteNodeRef( void* node );
 
@@ -17,17 +17,17 @@ FASTNOISE_API int fnGetMetadataID( const void* node );
 FASTNOISE_API void fnGenUniformGrid2D( const void* node, float* noiseOut,
                                        int xStart, int yStart,
                                        int xSize, int ySize,
-                                       float frequency, int seed, float* outputMinMax /*nullptr or float[2]*/ );
+                                       int seed, float* outputMinMax /*nullptr or float[2]*/ );
 
 FASTNOISE_API void fnGenUniformGrid3D( const void* node, float* noiseOut,
                                        int xStart, int yStart, int zStart,
                                        int xSize, int ySize, int zSize,
-                                       float frequency, int seed, float* outputMinMax /*nullptr or float[2]*/ );
+                                       int seed, float* outputMinMax /*nullptr or float[2]*/ );
 
 FASTNOISE_API void fnGenUniformGrid4D( const void* node, float* noiseOut,
                                        int xStart, int yStart, int zStart, int wStart,
                                        int xSize, int ySize, int zSize, int wSize,
-                                       float frequency, int seed, float* outputMinMax /*nullptr or float[2]*/ );
+                                       int seed, float* outputMinMax /*nullptr or float[2]*/ );
 
 FASTNOISE_API void fnGenPositionArray2D( const void* node, float* noiseOut, int count,
                                          const float* xPosArray, const float* yPosArray,
@@ -46,7 +46,7 @@ FASTNOISE_API void fnGenPositionArray4D( const void* node, float* noiseOut, int
 
 FASTNOISE_API void fnGenTileable2D( const void* node, float* noiseOut,
                                     int xSize, int ySize,
-                                    float frequency, int seed, float* outputMinMax /*nullptr or float[2]*/ );
+                                    int seed, float* outputMinMax /*nullptr or float[2]*/ );
 
 FASTNOISE_API float fnGenSingle2D( const void* node, float x, float y, int seed );
 FASTNOISE_API float fnGenSingle3D( const void* node, float x, float y, float z, int seed );
@@ -54,7 +54,7 @@ FASTNOISE_API float fnGenSingle4D( const void* node, float x, float y, float z,
 
 FASTNOISE_API int fnGetMetadataCount();
 FASTNOISE_API const char* fnGetMetadataName( int id ); // valid IDs up to `fnGetMetadataCount() - 1`
-FASTNOISE_API void* fnNewFromMetadata( int id, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*0 = Auto*/ );
+FASTNOISE_API void* fnNewFromMetadata( int id, unsigned /*FastSIMD::FeatureSet*/ simdLevel /*~0u = Auto*/ );
 
 FASTNOISE_API int fnGetMetadataVariableCount( int id );
 FASTNOISE_API const char* fnGetMetadataVariableName( int id, int variableIndex );
diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 0ba4388f..f900d379 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -23,7 +23,7 @@ namespace FastNoise
     {
         MetadataT()
         {
-            this->AddVariable( "Scale", 100.0f, &ScalableGenerator::SetScale );
+            this->AddVariable( "Feature Scale", 100.0f, &ScalableGenerator::SetScale );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index cf04d82a..2acf196d 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -110,20 +110,20 @@ namespace FastNoise
         virtual OutputMinMax GenUniformGrid2D( float* out,
             int xStart, int yStart,
             int xSize,  int ySize,
-            float frequency, int seed ) const = 0;
+            int seed ) const = 0;
 
         virtual OutputMinMax GenUniformGrid3D( float* out,
             int xStart, int yStart, int zStart, 
             int xSize,  int ySize,  int zSize, 
-            float frequency, int seed ) const = 0;
+            int seed ) const = 0;
 
         virtual OutputMinMax GenUniformGrid4D( float* out,
             int xStart, int yStart, int zStart, int wStart,
             int xSize,  int ySize,  int zSize,  int wSize,
-            float frequency, int seed ) const = 0;
+            int seed ) const = 0;
 
         virtual OutputMinMax GenTileable2D( float* out,
-            int xSize, int ySize, float frequency, int seed ) const = 0; 
+            int xSize, int ySize, int seed ) const = 0; 
 
         virtual OutputMinMax GenPositionArray2D( float* out, int count,
             const float* xPosArray, const float* yPosArray,
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 4f0a1292..8366ff3f 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -76,7 +76,7 @@ public:
         return simdT;
     }
 
-    FastNoise::OutputMinMax GenUniformGrid2D( float* noiseOut, int xStart, int yStart, int xSize, int ySize, float frequency, int seed ) const final
+    FastNoise::OutputMinMax GenUniformGrid2D( float* noiseOut, int xStart, int yStart, int xSize, int ySize, int seed ) const final
     {
         float32v min( INFINITY );
         float32v max( -INFINITY );
@@ -84,8 +84,6 @@ public:
         int32v xIdx( xStart );
         int32v yIdx( yStart );
 
-        float32v freqV( frequency );
-
         int32v xSizeV( xSize );
         int32v xMax = xSizeV + xIdx + int32v( -1 );
 
@@ -98,8 +96,8 @@ public:
 
         while( index < totalValues - (intptr_t)int32v::ElementCount )
         {
-            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx );
+            float32v yPos = FS::Convert<float>( yIdx );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos );
             FS::Store( &noiseOut[index], gen );
@@ -115,15 +113,15 @@ public:
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
         }
 
-        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx );
+        float32v yPos = FS::Convert<float>( yIdx );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos );
 
         return DoRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
-    FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float frequency, int seed ) const final
+    FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int seed ) const final
     {
         float32v min( INFINITY );
         float32v max( -INFINITY );
@@ -132,8 +130,6 @@ public:
         int32v yIdx( yStart );
         int32v zIdx( zStart );
 
-        float32v freqV( frequency );
-
         int32v xSizeV( xSize );
         int32v xMax = xSizeV + xIdx + int32v( -1 );
         int32v ySizeV( ySize );
@@ -149,9 +145,9 @@ public:
 
         while( index < totalValues - (intptr_t)int32v::ElementCount )
         {
-            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
-            float32v zPos = FS::Convert<float>( zIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx );
+            float32v yPos = FS::Convert<float>( yIdx );
+            float32v zPos = FS::Convert<float>( zIdx );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
             FS::Store( &noiseOut[index], gen );
@@ -168,16 +164,16 @@ public:
             AxisReset<false>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
         }
 
-        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
-        float32v zPos = FS::Convert<float>( zIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx );
+        float32v yPos = FS::Convert<float>( yIdx );
+        float32v zPos = FS::Convert<float>( zIdx );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
 
         return DoRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
-    FastNoise::OutputMinMax GenUniformGrid4D( float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, float frequency, int seed ) const final
+    FastNoise::OutputMinMax GenUniformGrid4D( float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, int seed ) const final
     {
         float32v min( INFINITY );
         float32v max( -INFINITY );
@@ -187,8 +183,6 @@ public:
         int32v zIdx( zStart );
         int32v wIdx( wStart );
 
-        float32v freqV( frequency );
-
         int32v xSizeV( xSize );
         int32v xMax = xSizeV + xIdx + int32v( -1 );
         int32v ySizeV( ySize );
@@ -207,10 +201,10 @@ public:
 
         while( index < totalValues - (intptr_t)int32v::ElementCount )
         {
-            float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-            float32v yPos = FS::Convert<float>( yIdx ) * freqV;
-            float32v zPos = FS::Convert<float>( zIdx ) * freqV;
-            float32v wPos = FS::Convert<float>( wIdx ) * freqV;
+            float32v xPos = FS::Convert<float>( xIdx );
+            float32v yPos = FS::Convert<float>( yIdx );
+            float32v zPos = FS::Convert<float>( zIdx );
+            float32v wPos = FS::Convert<float>( wIdx );
 
             float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
             FS::Store( &noiseOut[index], gen );
@@ -228,10 +222,10 @@ public:
             AxisReset<false>( zIdx, wIdx, zMax, zSizeV, xSize * ySize * zSize );
         }
 
-        float32v xPos = FS::Convert<float>( xIdx ) * freqV;
-        float32v yPos = FS::Convert<float>( yIdx ) * freqV;
-        float32v zPos = FS::Convert<float>( zIdx ) * freqV;
-        float32v wPos = FS::Convert<float>( wIdx ) * freqV;
+        float32v xPos = FS::Convert<float>( xIdx );
+        float32v yPos = FS::Convert<float>( yIdx );
+        float32v zPos = FS::Convert<float>( zIdx );
+        float32v wPos = FS::Convert<float>( wIdx );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
@@ -346,7 +340,7 @@ public:
         return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ), float32v( w ) ) );
     }
 
-    FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int xSize, int ySize, float frequency, int seed ) const final
+    FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int xSize, int ySize, int seed ) const final
     {
         float32v min( INFINITY );
         float32v max( -INFINITY );
@@ -364,8 +358,8 @@ public:
         float pi2Recip( 0.15915493667f );
         float xSizePi = (float)xSize * pi2Recip;
         float ySizePi = (float)ySize * pi2Recip;
-        float32v xFreq = float32v( frequency * xSizePi );
-        float32v yFreq = float32v( frequency * ySizePi );
+        float32v xFreq = float32v( xSizePi );
+        float32v yFreq = float32v( ySizePi );
         float32v xMul = float32v( 1 / xSizePi );
         float32v yMul = float32v( 1 / ySizePi );
 
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index d7e42060..9018e61c 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -9,7 +9,7 @@ namespace FastNoise
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetScale( float value ) { mScale = value; }
+        void SetScaling( float value ) { mScale = value; }
 
     protected:
         GeneratorSource mSource;
@@ -26,7 +26,7 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &DomainScale::SetSource );
-            this->AddVariable( "Scale", 1.0f, &DomainScale::SetScale );
+            this->AddVariable( "Scaling", 1.0f, &DomainScale::SetScaling );
         }
     };
 #endif
@@ -299,7 +299,7 @@ namespace FastNoise
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
 
         template<Dim D>
-        void SetScale( float value ) { mScale[(int)D] = value; }
+        void SetScaling( float value ) { mScale[(int)D] = value; }
 
     protected:
         GeneratorSource mSource;
@@ -319,7 +319,7 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &DomainAxisScale::SetSource );
-            this->AddPerDimensionVariable( "Scale", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
+            this->AddPerDimensionVariable( "Scaling", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
         }
     };
 #endif
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index bcc39dbb..954fbcd7 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -45,19 +45,19 @@ int fnGetMetadataID( const void* node )
     return ToGen( node )->GetMetadata().id;
 }
 
-void fnGenUniformGrid2D( const void* node, float* noiseOut, int xStart, int yStart, int xSize, int ySize, float frequency, int seed, float* outputMinMax )
+void fnGenUniformGrid2D( const void* node, float* noiseOut, int xStart, int yStart, int xSize, int ySize, int seed, float* outputMinMax )
 {
-    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid2D( noiseOut, xStart, yStart, xSize, ySize, frequency, seed ) );    
+    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid2D( noiseOut, xStart, yStart, xSize, ySize, seed ) );    
 }
 
-void fnGenUniformGrid3D( const void* node, float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float frequency, int seed, float* outputMinMax )
+void fnGenUniformGrid3D( const void* node, float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int seed, float* outputMinMax )
 {
-    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid3D( noiseOut, xStart, yStart, zStart, xSize, ySize, zSize, frequency, seed ) );    
+    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid3D( noiseOut, xStart, yStart, zStart, xSize, ySize, zSize, seed ) );    
 }
 
-void fnGenUniformGrid4D( const void* node, float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, float frequency, int seed, float* outputMinMax )
+void fnGenUniformGrid4D( const void* node, float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, int seed, float* outputMinMax )
 {
-    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid4D( noiseOut, xStart, yStart, zStart, wStart, xSize, ySize, zSize, wSize, frequency, seed ) );    
+    StoreMinMax( outputMinMax, ToGen( node )->GenUniformGrid4D( noiseOut, xStart, yStart, zStart, wStart, xSize, ySize, zSize, wSize, seed ) );    
 }
 
 void fnGenPositionArray2D( const void* node, float* noiseOut, int count, const float* xPosArray, const float* yPosArray, float xOffset, float yOffset, int seed, float* outputMinMax )
@@ -90,9 +90,9 @@ float fnGenSingle4D( const void* node, float x, float y, float z, float w, int s
     return ToGen( node )->GenSingle4D( x, y, z, w, seed );
 }
 
-void fnGenTileable2D( const void* node, float* noiseOut, int xSize, int ySize, float frequency, int seed, float* outputMinMax )
+void fnGenTileable2D( const void* node, float* noiseOut, int xSize, int ySize, int seed, float* outputMinMax )
 {
-    StoreMinMax( outputMinMax, ToGen( node )->GenTileable2D( noiseOut, xSize, ySize, frequency, seed ) );
+    StoreMinMax( outputMinMax, ToGen( node )->GenTileable2D( noiseOut, xSize, ySize, seed ) );
 }
 
 int fnGetMetadataCount()
diff --git a/tests/FastNoiseCpp11Include.cpp b/tests/FastNoiseCpp11Include.cpp
index 69d542e7..71192e20 100644
--- a/tests/FastNoiseCpp11Include.cpp
+++ b/tests/FastNoiseCpp11Include.cpp
@@ -16,7 +16,7 @@ int main()
 
     float noise[size * size];
 
-    node->GenUniformGrid2D( noise, 0, 0, size, size, 0.02f, 1337 );
+    node->GenUniformGrid2D( noise, 0, 0, size, size, 1337 );
 
     for( int i = 0; i < sizeof(noise) / sizeof(float); i++ )
     {
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 7b2eb067..21b66d25 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -144,7 +144,10 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
     if( generator )
     {
         auto genRGB = FastNoise::New<FastNoise::ConvertRGBA8>( editor.mMaxFeatureSet );
-        genRGB->SetSource( generator );
+        auto scale = FastNoise::New<FastNoise::DomainScale>( editor.mMaxFeatureSet );
+        genRGB->SetSource( scale );
+        scale->SetSource( generator );
+        scale->SetScaling( 1 / editor.mNodeScale );
 
         FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
         
@@ -482,7 +485,7 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
         ImVec2 gridOffset = ImNodes::EditorContextGetPanning();
         outBuf->appendf( "grid_offset=%f:%f\n", gridOffset.x, gridOffset.y );
 
-        outBuf->appendf( "frequency=%f\n", nodeEditor->mNodeFrequency );
+        outBuf->appendf( "scale=%f\n", nodeEditor->mNodeScale );
         outBuf->appendf( "seed=%d\n", nodeEditor->mNodeSeed );
         outBuf->appendf( "gen_type=%d\n", (int)nodeEditor->mNodeGenType );
     };
@@ -505,7 +508,7 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
             ImNodes::EditorContextResetPanning( imVec2 );
         }
 
-        sscanf( line, "frequency=%f", &nodeEditor->mNodeFrequency );
+        sscanf( line, "scale=%f", &nodeEditor->mNodeScale );
         sscanf( line, "seed=%d", &nodeEditor->mNodeSeed );
         sscanf( line, "gen_type=%d", (int*)&nodeEditor->mNodeGenType );
     };
@@ -631,7 +634,7 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
 
         edited |= ImGui::DragInt( "Seed", &mNodeSeed );
         ImGui::SameLine();
-        edited |= ImGui::DragFloat( "Frequency", &mNodeFrequency, 0.001f );    
+        edited |= ImGui::DragFloat( "Scale", &mNodeScale, 0.05f );    
         ImGui::SameLine();    
 
         if( ImGui::Button( "Retest Node Performance" ) )
@@ -1253,25 +1256,22 @@ FastNoise::OutputMinMax FastNoiseNodeEditor::GenerateNodePreviewNoise( FastNoise
     case NoiseTexture::GenType_2D:
         return gen->GenUniformGrid2D( noise,
             Node::NoiseSize / -2, Node::NoiseSize / -2,
-            Node::NoiseSize, Node::NoiseSize,
-            mNodeFrequency, mNodeSeed );
+            Node::NoiseSize, Node::NoiseSize, mNodeSeed );
 
     case NoiseTexture::GenType_2DTiled:
         return gen->GenTileable2D( noise,
-            Node::NoiseSize, Node::NoiseSize,
-            mNodeFrequency, mNodeSeed );
+            Node::NoiseSize, Node::NoiseSize, mNodeSeed );
 
     case NoiseTexture::GenType_3D:
         return gen->GenUniformGrid3D( noise,
             Node::NoiseSize / -2, Node::NoiseSize / -2, 0,
-            Node::NoiseSize, Node::NoiseSize, 1,
-            mNodeFrequency, mNodeSeed );
+            Node::NoiseSize, Node::NoiseSize, 1, mNodeSeed );
 
     case NoiseTexture::GenType_4D:
         return gen->GenUniformGrid4D( noise,
             Node::NoiseSize / -2, Node::NoiseSize / -2, 0, 0,
-            Node::NoiseSize, Node::NoiseSize, 1, 1,
-            mNodeFrequency, mNodeSeed );
+            Node::NoiseSize, Node::NoiseSize, 1, 1, mNodeSeed );
+
     case NoiseTexture::GenType_Count:
         break;
     }
diff --git a/tools/FastNoiseNodeEditor.h b/tools/FastNoiseNodeEditor.h
index 0c819850..6213b690 100644
--- a/tools/FastNoiseNodeEditor.h
+++ b/tools/FastNoiseNodeEditor.h
@@ -132,7 +132,7 @@ namespace Magnum
         int32_t mNodeBenchmarkIndex = 0;
         int32_t mNodeBenchmarkMax = 128;
 
-        float mNodeFrequency = 0.02f;
+        float mNodeScale = 0.4f;
         int mNodeSeed = 1337;
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 9433115a..9c90148b 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -1,12 +1,12 @@
 #include <algorithm>
-#include <thread>
 #include <cmath>
+#include <thread>
 
 #include <Corrade/Utility/Resource.h>
 #include <Magnum/Math/Color.h>
-#include <Magnum/Math/Matrix4.h>
 #include <Magnum/Math/Frustum.h>
 #include <Magnum/Math/Intersection.h>
+#include <Magnum/Math/Matrix4.h>
 #include <Magnum/Shaders/Implementation/CreateCompatibilityShader.h>
 
 #include "ImGuiExtra.h"
@@ -16,8 +16,8 @@ using namespace Magnum;
 
 MeshNoisePreview::MeshNoisePreview()
 {
-    mBuildData.frequency = 0.005f;
-    mBuildData.seed = 1338;
+    mBuildData.scale = 1.f;
+    mBuildData.seed = 1337;
     mBuildData.isoSurface = 0.0f;
     mBuildData.heightmapMultiplier = 100.0f;
     mBuildData.color = Color3( 1.0f );
@@ -39,7 +39,7 @@ MeshNoisePreview::~MeshNoisePreview()
 {
     mGenerateQueue.KillThreads();
 
-    for( auto& thread : mThreads )
+    for( auto& thread: mThreads )
     {
         thread.join();
     }
@@ -49,6 +49,9 @@ void MeshNoisePreview::ReGenerate( FastNoise::SmartNodeArg<> generator )
 {
     mLoadRange = 200.0f;
     mBuildData.generator = generator;
+    mBuildData.generatorScaled = FastNoise::New<FastNoise::DomainScale>( generator->GetActiveFeatureSet() );
+    mBuildData.generatorScaled->SetScaling( 1 / mBuildData.scale );
+    mBuildData.generatorScaled->SetSource( generator );
     mBuildData.pos = Vector3i( 0 );
 
     mMinMax = {};
@@ -71,8 +74,8 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
 {
     if( ImGui::Checkbox( "Generate Mesh Preview", &mEnabled ) )
     {
-        ReGenerate( mBuildData.generator );    
-        ImGuiExtra::MarkSettingsDirty();    
+        ReGenerate( mBuildData.generator );
+        ImGuiExtra::MarkSettingsDirty();
     }
 
     if( !mBuildData.generator || !mEnabled )
@@ -91,7 +94,7 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
     mMeshesCount = 0;
     uint32_t drawnTriCount = 0;
 
-    for( Chunk& chunk : mChunks )
+    for( Chunk& chunk: mChunks )
     {
         if( GL::Mesh* mesh = chunk.GetMesh() )
         {
@@ -100,7 +103,7 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
             mTriCount += meshTriCount;
             mMeshesCount++;
 
-            Vector3 posf( chunk.GetPos());
+            Vector3 posf( chunk.GetPos() );
             Range3D bbox( posf, posf + Vector3( Chunk::SIZE + 1 ) );
 
             if( mBuildData.meshType == MeshType_Heightmap2D )
@@ -121,19 +124,19 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
     bool edited = false;
     edited |= ImGui::Combo( "Mesh Type", reinterpret_cast<int*>( &mBuildData.meshType ), MeshTypeStrings );
     edited |= ImGuiExtra::ScrollCombo( reinterpret_cast<int*>( &mBuildData.meshType ), MeshType_Count );
-    
+
     if( ImGui::ColorEdit3( "Mesh Colour", mBuildData.color.data() ) )
-    {        
+    {
         mShader.SetColorTint( mBuildData.color );
         ImGuiExtra::MarkSettingsDirty();
     }
 
     edited |= ImGui::DragInt( "Seed", &mBuildData.seed );
-    edited |= ImGui::DragFloat( "Frequency", &mBuildData.frequency, 0.0005f, 0, 0, "%.4f" );
+    edited |= ImGui::DragFloat( "Scale", &mBuildData.scale, 0.05f, 0, 0, "%.4f" );
 
     if( mBuildData.meshType == MeshType_Heightmap2D )
     {
-        edited |= ImGui::DragFloat( "Heightmap Multiplier", &mBuildData.heightmapMultiplier, 0.5f );        
+        edited |= ImGui::DragFloat( "Heightmap Multiplier", &mBuildData.heightmapMultiplier, 0.5f );
     }
     else
     {
@@ -174,7 +177,7 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
 
 float MeshNoisePreview::GetLoadRangeModifier()
 {
-    return std::min( 0.01f, (float)(1000 / std::pow( std::min( 1000.0f, mLoadRange ), 1.5 ) ) );
+    return std::min( 0.01f, (float)( 1000 / std::pow( std::min( 1000.0f, mLoadRange ), 1.5 ) ) );
 }
 
 void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
@@ -183,7 +186,7 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
 
     if( mTriCount > mTriLimit ) // Reduce load range if over tri limit
     {
-        mLoadRange = std::max( mLoadRange * (1 - GetLoadRangeModifier()), Chunk::SIZE * 1.5f );
+        mLoadRange = std::max( mLoadRange * ( 1 - GetLoadRangeModifier() ), Chunk::SIZE * 1.5f );
     }
 
     StartTimer();
@@ -191,7 +194,7 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
 
     size_t newChunks = 0;
     if( queueCount )
-    {        
+    {
         Chunk::MeshData meshData;
 
         while( GetTimerDurationMs() < 14 && mCompleteQueue.Pop( meshData ) )
@@ -199,18 +202,17 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
             mMinMax << meshData.minMax;
             mMinAirY = std::min( mMinAirY, meshData.minAirY );
             mMaxSolidY = std::max( mMaxSolidY, meshData.maxSolidY );
-            
+
             mChunks.emplace_back( meshData );
             newChunks++;
         }
-        mAvgNewChunks += (newChunks - mAvgNewChunks) * 0.01f;
+        mAvgNewChunks += ( newChunks - mAvgNewChunks ) * 0.01f;
     }
 
-    std::sort( mChunks.begin(), mChunks.end(), 
-        [chunkPos]( const Chunk& a, const Chunk& b )
-        {
-            return (chunkPos - a.GetPos()).dot() < (chunkPos - b.GetPos()).dot();
-        } );
+    std::sort( mChunks.begin(), mChunks.end(),
+               [chunkPos]( const Chunk& a, const Chunk& b ) {
+                   return ( chunkPos - a.GetPos() ).dot() < ( chunkPos - b.GetPos() ).dot();
+               } );
 
     // Unload further chunk if out of load range
     size_t deletedChunks = 0;
@@ -218,7 +220,7 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
     {
         Vector3i backChunkPos = mChunks.back().GetPos();
         float unloadRange = mLoadRange * 1.1f;
-        if( GetTimerDurationMs() < 15 && (chunkPos - backChunkPos).dot() > unloadRange * unloadRange )
+        if( GetTimerDurationMs() < 15 && ( chunkPos - backChunkPos ).dot() > unloadRange * unloadRange )
         {
             mRegisteredChunkPositions.erase( backChunkPos );
             mChunks.pop_back();
@@ -230,38 +232,37 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
         }
     }
 
-    //ImGui::Text( " Queued Chunks: %zu", queueCount );
-    //ImGui::Text( "    New Chunks: %zu (%0.1f)", newChunks, mAvgNewChunks );
-    //ImGui::Text( "Deleted Chunks: %zu", deletedChunks );
+    // ImGui::Text( " Queued Chunks: %zu", queueCount );
+    // ImGui::Text( "    New Chunks: %zu (%0.1f)", newChunks, mAvgNewChunks );
+    // ImGui::Text( "Deleted Chunks: %zu", deletedChunks );
 
     // Increase load range if queue is not full
-    if( (double)mTriCount < mTriLimit * 0.85 && (mRegisteredChunkPositions.size() - mChunks.size()) < mThreads.size() * mAvgNewChunks )
+    if( (double)mTriCount < mTriLimit * 0.85 && ( mRegisteredChunkPositions.size() - mChunks.size() ) < mThreads.size() * mAvgNewChunks )
     {
-        mLoadRange = std::min( mLoadRange * (1 + GetLoadRangeModifier()), 3000.0f );
+        mLoadRange = std::min( mLoadRange * ( 1 + GetLoadRangeModifier() ), 3000.0f );
     }
-
 }
 
 void MeshNoisePreview::UpdateChunksForPosition( Vector3 position )
 {
-    //StartTimer();
+    // StartTimer();
     int chunkRange = (int)ceilf( mLoadRange / Chunk::SIZE );
 
     position -= Vector3( Chunk::SIZE * 0.5f );
     Vector3i positionI = Vector3i( position );
 
-    Vector3i chunkCenter = (positionI / Chunk::SIZE) * Chunk::SIZE;
+    Vector3i chunkCenter = ( positionI / Chunk::SIZE ) * Chunk::SIZE;
 
     std::vector<Vector3i> chunkPositions;
     Vector3i chunkPos;
-    int loadRangeSq = (int)(mLoadRange * mLoadRange);
+    int loadRangeSq = (int)( mLoadRange * mLoadRange );
 
-    int staggerShift = std::min( 5, (int)((loadRangeSq * (int64_t)mLoadRange) / 1000000000) );
-    int staggerCount = (1 << staggerShift) - 1;
+    int staggerShift = std::min( 5, (int)( ( loadRangeSq * (int64_t)mLoadRange ) / 1000000000 ) );
+    int staggerCount = ( 1 << staggerShift ) - 1;
 
     for( int x = -chunkRange; x <= chunkRange; x++ )
     {
-        if( (x & staggerCount) != (mStaggerCheck & staggerCount) )
+        if( ( x & staggerCount ) != ( mStaggerCheck & staggerCount ) )
         {
             continue;
         }
@@ -299,10 +300,10 @@ void MeshNoisePreview::UpdateChunksForPosition( Vector3 position )
 
     std::sort( chunkPositions.begin(), chunkPositions.end(), [positionI]( const Vector3i& a, const Vector3i& b )
     {
-        return (positionI - a).dot() < (positionI - b).dot();
+        return ( positionI - a ).dot() < ( positionI - b ).dot();
     } );
 
-    for( const Vector3i& pos : chunkPositions )
+    for( const Vector3i& pos: chunkPositions )
     {
         mBuildData.pos = pos;
         mRegisteredChunkPositions.insert( pos );
@@ -313,7 +314,7 @@ void MeshNoisePreview::UpdateChunksForPosition( Vector3 position )
         }
     }
 
-    //ImGui::Text( "UpdateChunksForPosition(%d) Ms: %.2f", staggerShift, GetTimerDurationMs() );
+    // ImGui::Text( "UpdateChunksForPosition(%d) Ms: %.2f", staggerShift, GetTimerDurationMs() );
 }
 
 void MeshNoisePreview::GenerateLoopThread( GenerateQueue<Chunk::BuildData>& generateQueue, CompleteQueue<Chunk::MeshData>& completeQueue )
@@ -341,10 +342,10 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildMeshData( const
     thread_local static std::vector<float> densityValues( SIZE_GEN * SIZE_GEN * SIZE_GEN );
     thread_local static std::vector<VertexData> vertexData;
     thread_local static std::vector<uint32_t> indicies;
-    
+
     vertexData.clear();
     indicies.clear();
-    
+
     switch( buildData.meshType )
     {
     case MeshType_Voxel3D:
@@ -355,16 +356,16 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildMeshData( const
 
     case MeshType_Count:
         break;
-    }           
+    }
 
     return MeshData( buildData.pos, {}, vertexData, indicies );
 }
 
 MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildVoxel3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
 {
-    FastNoise::OutputMinMax minMax = buildData.generator->GenUniformGrid3D( densityValues,
-                                                                            buildData.pos.x() - 1, buildData.pos.y() - 1, buildData.pos.z() - 1,
-                                                                            SIZE_GEN, SIZE_GEN, SIZE_GEN, buildData.frequency, buildData.seed );
+    FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid3D( densityValues,
+                                                                                  buildData.pos.x() - 1, buildData.pos.y() - 1, buildData.pos.z() - 1,
+                                                                                  SIZE_GEN, SIZE_GEN, SIZE_GEN, buildData.seed );
     float minAir = INFINITY;
     float maxSolid = -INFINITY;
 
@@ -470,30 +471,30 @@ void MeshNoisePreview::Chunk::AddQuadAO( std::vector<VertexData>& verts, std::ve
     uint8_t sideA1 = density[facingIdx + offsetA] <= isoSurface;
     uint8_t sideB0 = density[facingIdx - offsetB] <= isoSurface;
     uint8_t sideB1 = density[facingIdx + offsetB] <= isoSurface;
-    
-    uint8_t corner00 = (sideA0 & sideB0) || density[facingIdx - offsetA - offsetB] <= isoSurface;
-    uint8_t corner01 = (sideA0 & sideB1) || density[facingIdx - offsetA + offsetB] <= isoSurface;
-    uint8_t corner10 = (sideA1 & sideB0) || density[facingIdx + offsetA - offsetB] <= isoSurface;
-    uint8_t corner11 = (sideA1 & sideB1) || density[facingIdx + offsetA + offsetB] <= isoSurface;
 
-    constexpr float aoAdjust = AO_STRENGTH / 3.0f; 
+    uint8_t corner00 = ( sideA0 & sideB0 ) || density[facingIdx - offsetA - offsetB] <= isoSurface;
+    uint8_t corner01 = ( sideA0 & sideB1 ) || density[facingIdx - offsetA + offsetB] <= isoSurface;
+    uint8_t corner10 = ( sideA1 & sideB0 ) || density[facingIdx + offsetA - offsetB] <= isoSurface;
+    uint8_t corner11 = ( sideA1 & sideB1 ) || density[facingIdx + offsetA + offsetB] <= isoSurface;
 
-    float ao00 = (float)(sideA0 + sideB0 + corner00) * aoAdjust;
-    float ao01 = (float)(sideA1 + sideB0 + corner10) * aoAdjust;
-    float ao10 = (float)(sideA0 + sideB1 + corner01) * aoAdjust;
-    float ao11 = (float)(sideA1 + sideB1 + corner11) * aoAdjust;
+    constexpr float aoAdjust = AO_STRENGTH / 3.0f;
 
-    float densityLightShift = 1 - (isoSurface - density[idx]) * 2;
+    float ao00 = (float)( sideA0 + sideB0 + corner00 ) * aoAdjust;
+    float ao01 = (float)( sideA1 + sideB0 + corner10 ) * aoAdjust;
+    float ao10 = (float)( sideA0 + sideB1 + corner01 ) * aoAdjust;
+    float ao11 = (float)( sideA1 + sideB1 + corner11 ) * aoAdjust;
+
+    float densityLightShift = 1 - ( isoSurface - density[idx] ) * 2;
     light *= densityLightShift * densityLightShift;
 
     uint32_t vertIdx = (uint32_t)verts.size();
-    verts.emplace_back( pos00, (1.0f - ao00) * light );
-    verts.emplace_back( pos01, (1.0f - ao01) * light );
-    verts.emplace_back( pos10, (1.0f - ao10) * light );
-    verts.emplace_back( pos11, (1.0f - ao11) * light );
+    verts.emplace_back( pos00, ( 1.0f - ao00 ) * light );
+    verts.emplace_back( pos01, ( 1.0f - ao01 ) * light );
+    verts.emplace_back( pos10, ( 1.0f - ao10 ) * light );
+    verts.emplace_back( pos11, ( 1.0f - ao11 ) * light );
 
     // Rotate tris to give best visuals for AO lighting
-    uint32_t triRotation = ( ao00 + ao11 > ao01 + ao10 ) * 2;    
+    uint32_t triRotation = ( ao00 + ao11 > ao01 + ao10 ) * 2;
     indicies.push_back( vertIdx );
     indicies.push_back( vertIdx + 3 - triRotation );
     indicies.push_back( vertIdx + 2 );
@@ -506,9 +507,9 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh(
 {
     constexpr uint32_t SIZE_GEN_HEIGHTMAP = SIZE + 1;
 
-    FastNoise::OutputMinMax minMax = buildData.generator->GenUniformGrid2D( densityValues,
-                                                                            buildData.pos.x(), buildData.pos.z(),
-                                                                            SIZE_GEN_HEIGHTMAP, SIZE_GEN_HEIGHTMAP, buildData.frequency, buildData.seed );
+    FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid2D( densityValues,
+                                                                                  buildData.pos.x(), buildData.pos.z(),
+                                                                                  SIZE_GEN_HEIGHTMAP, SIZE_GEN_HEIGHTMAP, buildData.seed );
     constexpr int32_t STEP_X = 1;
     constexpr int32_t STEP_Y = SIZE_GEN_HEIGHTMAP;
 
@@ -527,12 +528,10 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh(
             Vector3 v00( xf, densityValues[noiseIdx] * buildData.heightmapMultiplier, yf );
             Vector3 v01( xf, densityValues[noiseIdx + STEP_Y] * buildData.heightmapMultiplier, yf + 1 );
             Vector3 v10( xf + 1, densityValues[noiseIdx + STEP_X] * buildData.heightmapMultiplier, yf );
-            Vector3 v11( xf + 1, densityValues[noiseIdx + STEP_X + STEP_Y] * buildData.heightmapMultiplier, yf + 1 );            
+            Vector3 v11( xf + 1, densityValues[noiseIdx + STEP_X + STEP_Y] * buildData.heightmapMultiplier, yf + 1 );
 
             // Normal for quad
-            float light = ( sunLight * (
-                Math::cross( v10 - v11, v00 - v11 ).normalized() +
-                Math::cross( v01 - v00, v11 - v00 ).normalized() ).normalized() ).dot();
+            float light = ( sunLight * ( Math::cross( v10 - v11, v00 - v11 ).normalized() + Math::cross( v01 - v00, v11 - v00 ).normalized() ).normalized() ).dot();
 
             uint32_t vertIdx = (uint32_t)vertexData.size();
             vertexData.emplace_back( v00, light );
@@ -541,7 +540,7 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh(
             vertexData.emplace_back( v11, light );
 
             // Slice quad along longest split
-            uint32_t triRotation = 2 * ( (v00 + v11).dot() < (v01 + v10).dot() );            
+            uint32_t triRotation = 2 * ( ( v00 + v11 ).dot() < ( v01 + v10 ).dot() );
             indicies.push_back( vertIdx );
             indicies.push_back( vertIdx + 3 - triRotation );
             indicies.push_back( vertIdx + 2 );
@@ -564,11 +563,11 @@ MeshNoisePreview::Chunk::Chunk( MeshData& meshData )
 
     if( !meshData.vertexData.isEmpty() )
     {
-        //https://doc.magnum.graphics/magnum/classMagnum_1_1GL_1_1Mesh.html
+        // https://doc.magnum.graphics/magnum/classMagnum_1_1GL_1_1Mesh.html
 
         mMesh = std::make_unique<GL::Mesh>( GL::MeshPrimitive::Triangles );
 
-        mMesh->addVertexBuffer( GL::Buffer( GL::Buffer::TargetHint::Array, meshData.vertexData ), 0, VertexLightShader::PositionLight{} );
+        mMesh->addVertexBuffer( GL::Buffer( GL::Buffer::TargetHint::Array, meshData.vertexData ), 0, VertexLightShader::PositionLight {} );
 
         if( meshData.indicies.isEmpty() )
         {
@@ -593,20 +592,20 @@ MeshNoisePreview::VertexLightShader::VertexLightShader()
 #else
     const GL::Version version = GL::Context::current().supportedVersion( { GL::Version::GLES300, GL::Version::GLES200 } );
 #endif
-    
+
     GL::Shader vert = CreateShader( version, GL::Shader::Type::Vertex );
     GL::Shader frag = CreateShader( version, GL::Shader::Type::Fragment );
-    
+
     CORRADE_INTERNAL_ASSERT_OUTPUT(
         vert.addSource( NodeEditorResources.getString( "VertexLight.vert" ) ).compile() );
-    CORRADE_INTERNAL_ASSERT_OUTPUT( 
+    CORRADE_INTERNAL_ASSERT_OUTPUT(
         frag.addSource( NodeEditorResources.getString( "VertexLight.frag" ) ).compile() );
 
     attachShader( vert );
     attachShader( frag );
 
     /* ES3 has this done in the shader directly */
-#if !defined(MAGNUM_TARGET_GLES) || defined(MAGNUM_TARGET_GLES2)
+#if !defined( MAGNUM_TARGET_GLES ) || defined( MAGNUM_TARGET_GLES2 )
 #ifndef MAGNUM_TARGET_GLES
     if( !GL::Context::current().isExtensionSupported<GL::Extensions::ARB::explicit_attrib_location>( version ) )
 #endif
@@ -679,7 +678,7 @@ void MeshNoisePreview::StartTimer()
 
 float MeshNoisePreview::GetTimerDurationMs()
 {
-    return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - mTimerStart).count() / 1e3f;
+    return std::chrono::duration_cast<std::chrono::microseconds>( std::chrono::high_resolution_clock::now() - mTimerStart ).count() / 1e3f;
 }
 
 void MeshNoisePreview::SetupSettingsHandlers()
@@ -693,7 +692,7 @@ void MeshNoisePreview::SetupSettingsHandlers()
         outBuf->appendf( "\n[%s][Settings]\n", handler->TypeName );
 
         outBuf->appendf( "tri_limit=%d\n", (int)meshNoisePreview->mTriLimit );
-        outBuf->appendf( "frequency=%f\n", meshNoisePreview->mBuildData.frequency );
+        outBuf->appendf( "scale=%f\n", meshNoisePreview->mBuildData.scale );
         outBuf->appendf( "iso_surface=%f\n", meshNoisePreview->mBuildData.isoSurface );
         outBuf->appendf( "heightmap_multiplier=%f\n", meshNoisePreview->mBuildData.heightmapMultiplier );
         outBuf->appendf( "seed=%d\n", meshNoisePreview->mBuildData.seed );
@@ -713,7 +712,7 @@ void MeshNoisePreview::SetupSettingsHandlers()
         auto* meshNoisePreview = (MeshNoisePreview*)handler->UserData;
 
         sscanf( line, "tri_limit=%d", &meshNoisePreview->mTriLimit );
-        sscanf( line, "frequency=%f", &meshNoisePreview->mBuildData.frequency );
+        sscanf( line, "scale=%f", &meshNoisePreview->mBuildData.scale );
         sscanf( line, "iso_surface=%f", &meshNoisePreview->mBuildData.isoSurface );
         sscanf( line, "heightmap_multiplier=%f", &meshNoisePreview->mBuildData.heightmapMultiplier );
         sscanf( line, "seed=%d", &meshNoisePreview->mBuildData.seed );
diff --git a/tools/MeshNoisePreview.h b/tools/MeshNoisePreview.h
index 4cb6fab4..0b8989b3 100644
--- a/tools/MeshNoisePreview.h
+++ b/tools/MeshNoisePreview.h
@@ -121,9 +121,10 @@ namespace Magnum
             struct BuildData
             {
                 FastNoise::SmartNode<const FastNoise::Generator> generator;
+                FastNoise::SmartNode<FastNoise::DomainScale> generatorScaled;
                 Vector3i pos;
                 Color3 color;
-                float frequency, isoSurface, heightmapMultiplier;
+                float scale, isoSurface, heightmapMultiplier;
                 int32_t seed;
                 MeshType meshType;
                 uint32_t genVersion;
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index 2c760995..47883e5b 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -23,7 +23,7 @@ using namespace Magnum;
 NoiseTexture::NoiseTexture()
 {
     mBuildData.iteration = 0;
-    mBuildData.frequency = 0.02f;
+    mBuildData.scale = 1.f;
     mBuildData.seed = 1337;
     mBuildData.size = { -1, -1 };
     mBuildData.offset = {};
@@ -73,6 +73,9 @@ void NoiseTexture::Draw()
     {
         //ImGui::Text( "Min: %0.6f Max: %0.6f", mMinMax.min, mMinMax.max );
 
+        ImGui::TextUnformatted( "Preview Settings: " );
+        ImGui::SameLine();
+
         ImGui::PushItemWidth( 82.0f );
         bool edited = false;
 
@@ -100,7 +103,7 @@ void NoiseTexture::Draw()
         edited |= ImGui::DragInt( "Seed", &mBuildData.seed );
         ImGui::SameLine();
 
-        edited |= ImGui::DragFloat( "Frequency", &mBuildData.frequency, 0.001f );
+        edited |= ImGui::DragFloat( "Scale", &mBuildData.scale, 0.05f );
         ImGui::SameLine();
 
         if( mBuildData.generator && ImGui::Button( "Export BMP" ) )
@@ -187,7 +190,7 @@ void NoiseTexture::DoExport()
 
             float relativeScale = (float)mExportBuildData.size.sum() / mBuildData.size.sum();
             
-            mExportBuildData.frequency /= relativeScale;
+            mExportBuildData.scale *= relativeScale;
             mExportBuildData.offset *= relativeScale;
 
             if( mExportThread.joinable() )
@@ -272,10 +275,16 @@ void NoiseTexture::DoExport()
                     }
 
                     file.close();
+
+                    Debug{} << "BMP Export Complete: " << filename.c_str();
                 }
             } );
         }
 
+        if( ImGui::Button( "Cancel" ) )
+        {
+            ImGui::CloseCurrentPopup();
+        }
         ImGui::PopItemWidth();
         ImGui::EndPopup();
     }
@@ -321,7 +330,10 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     noiseData.resize( (size_t)buildData.size.x() * buildData.size.y() );
 
     auto gen = FastNoise::New<FastNoise::ConvertRGBA8>( buildData.generator->GetActiveFeatureSet() );
-    gen->SetSource( buildData.generator );
+    auto scale = FastNoise::New<FastNoise::DomainScale>( buildData.generator->GetActiveFeatureSet() );
+    gen->SetSource( scale );
+    scale->SetSource( buildData.generator );
+    scale->SetScaling( 1 / buildData.scale );
 
     FastNoise::OutputMinMax minMax;
 
@@ -330,28 +342,24 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     case GenType_2D:
         minMax = gen->GenUniformGrid2D( noiseData.data(), 
             (int)buildData.offset.x(), (int)buildData.offset.y(),
-            buildData.size.x(), buildData.size.y(),
-            buildData.frequency, buildData.seed );
+            buildData.size.x(), buildData.size.y(), buildData.seed );
         break;
 
     case GenType_2DTiled:
         minMax = gen->GenTileable2D( noiseData.data(),
-            buildData.size.x(), buildData.size.y(),
-            buildData.frequency, buildData.seed );
+            buildData.size.x(), buildData.size.y(), buildData.seed );
         break;
 
     case GenType_3D:
         minMax = gen->GenUniformGrid3D( noiseData.data(),
             (int)buildData.offset.x(), (int)buildData.offset.y(), (int)buildData.offset.z(),
-            buildData.size.x(), buildData.size.y(), 1,
-            buildData.frequency, buildData.seed );
+            buildData.size.x(), buildData.size.y(), 1, buildData.seed );
         break;
 
     case GenType_4D:
         minMax = gen->GenUniformGrid4D( noiseData.data(),
             (int)buildData.offset.x(), (int)buildData.offset.y(), (int)buildData.offset.z(), (int)buildData.offset.w(),
-            buildData.size.x(), buildData.size.y(), 1, 1,
-            buildData.frequency, buildData.seed );
+            buildData.size.x(), buildData.size.y(), 1, 1, buildData.seed );
         break;
     case GenType_Count:
         break;
@@ -390,7 +398,7 @@ void NoiseTexture::SetupSettingsHandlers()
         auto* noiseTexture = (NoiseTexture*)handler->UserData;
         outBuf->appendf( "\n[%s][Settings]\n", handler->TypeName );        
 
-        outBuf->appendf( "frequency=%f\n", noiseTexture->mBuildData.frequency );
+        outBuf->appendf( "scale=%f\n", noiseTexture->mBuildData.scale );
         outBuf->appendf( "seed=%d\n", noiseTexture->mBuildData.seed );
         outBuf->appendf( "gen_type=%d\n", (int)noiseTexture->mBuildData.generationType );
         outBuf->appendf( "export_size=%d:%d\n", noiseTexture->mExportBuildData.size.x(), noiseTexture->mExportBuildData.size.y() );
@@ -406,7 +414,7 @@ void NoiseTexture::SetupSettingsHandlers()
     editorSettings.ReadLineFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, void* entry, const char* line ) {
         auto* noiseTexture = (NoiseTexture*)handler->UserData;
         
-        sscanf( line, "frequency=%f", &noiseTexture->mBuildData.frequency );
+        sscanf( line, "scale=%f", &noiseTexture->mBuildData.scale );
         sscanf( line, "seed=%d", &noiseTexture->mBuildData.seed );
         sscanf( line, "gen_type=%d", (int*)&noiseTexture->mBuildData.generationType );
         sscanf( line, "export_size=%d:%d", &noiseTexture->mExportBuildData.size.x() , &noiseTexture->mExportBuildData.size.y() );
diff --git a/tools/NoiseTexture.h b/tools/NoiseTexture.h
index 2d173c4a..8f5af873 100644
--- a/tools/NoiseTexture.h
+++ b/tools/NoiseTexture.h
@@ -45,7 +45,7 @@ namespace Magnum
             FastNoise::SmartNode<const FastNoise::Generator> generator;
             Vector2i size;
             Vector4 offset;
-            float frequency;
+            float scale;
             int32_t seed;
             uint64_t iteration;
             GenType generationType;          

From 6ca6d14ca6922a24620661a7870c8125cee6b1ab Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 28 Oct 2023 23:58:49 +0100
Subject: [PATCH 047/139] Update Corrade/Magnum

---
 tools/CMakeLists.txt       | 4 ++--
 tools/MeshNoisePreview.cpp | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 0b5a423c..3725079e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,7 +1,7 @@
 CPMAddPackage(
     NAME corrade
     GITHUB_REPOSITORY mosra/corrade
-    GIT_TAG 83dba87f3c04031235da6362524e0ec475bb21e8
+    GIT_TAG dc51b04b86294b00db45156e5e6cfdb1a6462df9
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -28,7 +28,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME magnum
     GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG c1239b66199926c610cd863e18a4d27a4cef67f1
+    GIT_TAG 68f6d75ee3f2aa4bf20ac3df706303e586cff323
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 9c90148b..977913aa 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -7,11 +7,13 @@
 #include <Magnum/Math/Frustum.h>
 #include <Magnum/Math/Intersection.h>
 #include <Magnum/Math/Matrix4.h>
-#include <Magnum/Shaders/Implementation/CreateCompatibilityShader.h>
+#include <Magnum/GL/Context.h>
+#include <Magnum/GL/Extensions.h>
 
 #include "ImGuiExtra.h"
 #include "MeshNoisePreview.h"
 
+
 using namespace Magnum;
 
 MeshNoisePreview::MeshNoisePreview()

From e7d934b798909e2c8a9ed086c5957658489e7fc1 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 8 Nov 2023 00:11:01 +0000
Subject: [PATCH 048/139] Don't invert scale in node editor settings

---
 tools/FastNoiseNodeEditor.cpp | 2 +-
 tools/FastNoiseNodeEditor.h   | 2 +-
 tools/MeshNoisePreview.cpp    | 2 +-
 tools/NoiseTexture.cpp        | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 21b66d25..2c122be6 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -147,7 +147,7 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
         auto scale = FastNoise::New<FastNoise::DomainScale>( editor.mMaxFeatureSet );
         genRGB->SetSource( scale );
         scale->SetSource( generator );
-        scale->SetScaling( 1 / editor.mNodeScale );
+        scale->SetScaling( editor.mNodeScale );
 
         FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
         
diff --git a/tools/FastNoiseNodeEditor.h b/tools/FastNoiseNodeEditor.h
index 6213b690..4e47f07b 100644
--- a/tools/FastNoiseNodeEditor.h
+++ b/tools/FastNoiseNodeEditor.h
@@ -132,7 +132,7 @@ namespace Magnum
         int32_t mNodeBenchmarkIndex = 0;
         int32_t mNodeBenchmarkMax = 128;
 
-        float mNodeScale = 0.4f;
+        float mNodeScale = 2.5f;
         int mNodeSeed = 1337;
         NoiseTexture::GenType mNodeGenType = NoiseTexture::GenType_2D;
 
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 977913aa..a1190190 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -52,7 +52,7 @@ void MeshNoisePreview::ReGenerate( FastNoise::SmartNodeArg<> generator )
     mLoadRange = 200.0f;
     mBuildData.generator = generator;
     mBuildData.generatorScaled = FastNoise::New<FastNoise::DomainScale>( generator->GetActiveFeatureSet() );
-    mBuildData.generatorScaled->SetScaling( 1 / mBuildData.scale );
+    mBuildData.generatorScaled->SetScaling( mBuildData.scale );
     mBuildData.generatorScaled->SetSource( generator );
     mBuildData.pos = Vector3i( 0 );
 
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index 47883e5b..f97d2b00 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -190,7 +190,7 @@ void NoiseTexture::DoExport()
 
             float relativeScale = (float)mExportBuildData.size.sum() / mBuildData.size.sum();
             
-            mExportBuildData.scale *= relativeScale;
+            mExportBuildData.scale /= relativeScale;
             mExportBuildData.offset *= relativeScale;
 
             if( mExportThread.joinable() )
@@ -333,7 +333,7 @@ NoiseTexture::TextureData NoiseTexture::BuildTexture( const BuildData& buildData
     auto scale = FastNoise::New<FastNoise::DomainScale>( buildData.generator->GetActiveFeatureSet() );
     gen->SetSource( scale );
     scale->SetSource( buildData.generator );
-    scale->SetScaling( 1 / buildData.scale );
+    scale->SetScaling( buildData.scale );
 
     FastNoise::OutputMinMax minMax;
 

From 975ac144fb01eef5822d512f79fe31e3430d8f1b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 8 Nov 2023 00:21:04 +0000
Subject: [PATCH 049/139] Don't delete node when pressing delete in text entry

---
 tools/FastNoiseNodeEditor.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 2c122be6..0479cece 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -762,9 +762,9 @@ void FastNoiseNodeEditor::UpdateSelected()
     std::vector<int> linksToDelete;
     int selectedLinkCount = ImNodes::NumSelectedLinks();
 
-    bool delKeyPressed =
+    bool delKeyPressed = !ImGui::GetIO().WantTextInput && (
         ImGui::IsKeyPressed( ImGui::GetKeyIndex( ImGuiKey_Delete ), false ) ||
-        ImGui::IsKeyPressed( ImGui::GetKeyIndex( ImGuiKey_Backspace ), false );
+        ImGui::IsKeyPressed( ImGui::GetKeyIndex( ImGuiKey_Backspace ), false ) );
 
     if( selectedLinkCount && delKeyPressed )
     {

From 15912735d3038b611fc4d210198f221c6e6f2709 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 8 Nov 2023 00:42:41 +0000
Subject: [PATCH 050/139] Add Abs node, convert remap node to hybrid inputs

---
 include/FastNoise/FastNoise_BuildList.inl  |  1 +
 include/FastNoise/Generators/Modifiers.h   | 75 +++++++++++++---------
 include/FastNoise/Generators/Modifiers.inl | 21 +++++-
 tools/DemoNodeTrees.inl                    |  2 +-
 4 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/include/FastNoise/FastNoise_BuildList.inl b/include/FastNoise/FastNoise_BuildList.inl
index 040ddd42..63c38b46 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/include/FastNoise/FastNoise_BuildList.inl
@@ -127,3 +127,4 @@ FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
 FASTNOISE_REGISTER_NODE( GeneratorCache );
 FASTNOISE_REGISTER_NODE( SquareRoot );
+FASTNOISE_REGISTER_NODE( Abs );
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index 9018e61c..4cd773e2 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -164,14 +164,25 @@ namespace FastNoise
         const Metadata& GetMetadata() const override;
 
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetRemap( float fromMin, float fromMax, float toMin, float toMax ) { mFromMin = fromMin; mFromMax = fromMax; mToMin = toMin; mToMax = toMax; }
+        
+        void SetFromMin( float value ) { mFromMin = value; }
+        void SetFromMin( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFromMin, gen ); }
+        
+        void SetFromMax( float value ) { mFromMax = value; }
+        void SetFromMax( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFromMax, gen ); }
+        
+        void SetToMin( float value ) { mToMin = value; }
+        void SetToMin( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mToMin, gen ); }
+        
+        void SetToMax( float value ) { mToMax = value; }
+        void SetToMax( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mToMax, gen ); }
 
     protected:
         GeneratorSource mSource;
-        float mFromMin = -1.0f;
-        float mFromMax = 1.0f;
-        float mToMin = 0.0f;
-        float mToMax = 1.0f;
+        HybridSource mFromMin = -1.0f;
+        HybridSource mFromMax = 1.0f;
+        HybridSource mToMin = 0.0f;
+        HybridSource mToMax = 1.0f;
 
         template<typename T>
         friend struct MetadataT;
@@ -187,30 +198,11 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &Remap::SetSource );
-
-            this->AddVariable( "From Min", -1.0f,
-                []( Remap* p, float f )
-                {
-                    p->mFromMin = f;
-                } );
-
-            this->AddVariable( "From Max", 1.0f,
-                []( Remap* p, float f )
-                {
-                    p->mFromMax = f;
-                } );
-
-            this->AddVariable( "To Min", 0.0f,
-                []( Remap* p, float f )
-                {
-                    p->mToMin = f;
-                } );
-
-            this->AddVariable( "To Max", 1.0f,
-                []( Remap* p, float f )
-                {
-                    p->mToMax = f;
-                } );
+            
+            this->AddHybridSource( "From Min", -1.0f, &Remap::SetFromMin, &Remap::SetFromMin );
+            this->AddHybridSource( "From Max", 1.0f, &Remap::SetFromMax, &Remap::SetFromMax );
+            this->AddHybridSource( "To Min", 0.0f, &Remap::SetToMin, &Remap::SetToMin );
+            this->AddHybridSource( "To Max", 1.0f, &Remap::SetToMax, &Remap::SetToMax );            
         }
     };
 #endif
@@ -430,5 +422,30 @@ namespace FastNoise
         }
     };
 #endif
+
+    class Abs : public virtual Generator
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+
+    protected:
+        GeneratorSource mSource;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Abs> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Modifiers" );
+            this->AddGeneratorSource( "Source", &Abs::SetSource );
+        }
+    };
+#endif
     
 }
diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index e8ca0257..cb43c786 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -81,8 +81,13 @@ class FastSIMD::DispatchClass<FastNoise::Remap, SIMD> final : public virtual Fas
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
         float32v source = this->GetSourceValue( mSource, seed, pos... );
+
+        float32v fromMin = this->GetSourceValue( mFromMin, seed, pos... );
+        float32v fromMax = this->GetSourceValue( mFromMax, seed, pos... );
+        float32v toMin = this->GetSourceValue( mToMin, seed, pos... );
+        float32v toMax = this->GetSourceValue( mToMax, seed, pos... );
             
-        return float32v( mToMin ) + (( source - float32v( mFromMin ) ) / float32v( mFromMax - mFromMin ) * float32v( mToMax - mToMin ));
+        return toMin + ( ( source - fromMin ) / ( fromMax - fromMin ) * ( toMax - toMin ) );
     }
 };
 
@@ -278,3 +283,17 @@ class FastSIMD::DispatchClass<FastNoise::SquareRoot, SIMD> final : public virtua
         return FS::InvSqrt( FS::Max( FS::Abs( value ), float32v( FLT_MIN ) ) ) * value;
     }
 };
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Abs, SIMD> final : public virtual FastNoise::Abs, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v value = this->GetSourceValue( mSource, seed, pos... );
+        
+        return FS::Abs( value );
+    }
+};
diff --git a/tools/DemoNodeTrees.inl b/tools/DemoNodeTrees.inl
index 1cbf2a33..31656cfa 100644
--- a/tools/DemoNodeTrees.inl
+++ b/tools/DemoNodeTrees.inl
@@ -3,5 +3,5 @@
 inline const char* gDemoNodeTrees[][2] =
 {
     { "Simple Terrain", "EgAC@EgQBE@ADIQhoADgAE@EgQAk@ACWQwBmZiY/@CD8BB@Ej8J1P@hCBCADMzMz8@E==" },
-    { "Cellular Caves", "EwAC@DDgQBE@BgQhsAASAAFwAB@BD@BCVEAw@BI@BD@BB@H/ARUA//8@DKVD@BpUM@AClQw@ERg@ACAvwAAgD89ChdAUrgeQAY@BCQwDhehQ/@BIEEAmpmZPg@D" },
+    { "Cellular Caves", "EwAC@DDgQBE@BgQhsAASAAFwAB@BD@BCVEAw@BI@BD@BB@H/ARUA//8@DKVD@BpUM@AClQw@ERgABg@BJD@BgL8@ACAPwA9ChdAAFK4HkAA4XoUPw@ACBBAJqZmT4@E==" },
 };

From 182201df2c9e5becbdaeb4397df7be653d7d5ce7 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 8 Nov 2023 23:22:40 +0000
Subject: [PATCH 051/139] Metadata node value UI drag speed setting

---
 include/FastNoise/Generators/BasicGenerators.h |  2 +-
 include/FastNoise/Generators/DomainWarp.h      |  2 +-
 include/FastNoise/Generators/Generator.h       | 15 ++++++++++-----
 include/FastNoise/Metadata.h                   |  6 +++++-
 tools/FastNoiseNodeEditor.cpp                  |  6 +++---
 tools/NoiseTexture.cpp                         |  5 +----
 6 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index f900d379..136a4ce2 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -23,7 +23,7 @@ namespace FastNoise
     {
         MetadataT()
         {
-            this->AddVariable( "Feature Scale", 100.0f, &ScalableGenerator::SetScale );
+            this->AddVariable( "Feature Scale", 100.0f, &ScalableGenerator::SetScale, 0.f, 0.f, 0.25f );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/DomainWarp.h b/include/FastNoise/Generators/DomainWarp.h
index f86f76a3..49080ec4 100644
--- a/include/FastNoise/Generators/DomainWarp.h
+++ b/include/FastNoise/Generators/DomainWarp.h
@@ -23,7 +23,7 @@ namespace FastNoise
         {
             groups.push_back( "Domain Warp" );
             this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
-            this->AddHybridSource( "Warp Amplitude", 50.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
+            this->AddHybridSource( "Warp Amplitude", 50.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude, 0.1f );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 2acf196d..6cca2f04 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -197,12 +197,13 @@ namespace FastNoise
     {
     protected:
         template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
-        void AddVariable( NameDesc nameDesc, T defaultV, U&& func, T minV = 0, T maxV = 0 )
+        void AddVariable( NameDesc nameDesc, T defaultV, U&& func, T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
         {
             MemberVariable member;
             member.name = nameDesc.name;
             member.description = nameDesc.desc;
             member.valueDefault = defaultV;
+            member.valueUiDragSpeed = uiDragSpeed;
             member.valueMin = minV;
             member.valueMax = maxV;
 
@@ -222,12 +223,13 @@ namespace FastNoise
         }
 
         template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
-        void AddVariable( NameDesc nameDesc, T defaultV, void(U::* func)(T), T minV = 0, T maxV = 0 )
+        void AddVariable( NameDesc nameDesc, T defaultV, void ( U::*func )( T ), T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
         {
             MemberVariable member;
             member.name = nameDesc.name;
             member.description = nameDesc.desc;
             member.valueDefault = defaultV;
+            member.valueUiDragSpeed = uiDragSpeed;
             member.valueMin = minV;
             member.valueMax = maxV;
 
@@ -293,7 +295,7 @@ namespace FastNoise
         }
 
         template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
-        void AddPerDimensionVariable( NameDesc nameDesc, T defaultV, U&& func, T minV = 0, T maxV = 0 )
+        void AddPerDimensionVariable( NameDesc nameDesc, T defaultV, U&& func, T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
         {
             for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<T>::varArray ) / sizeof( *PerDimensionVariable<T>::varArray ); idx++ )
             {
@@ -301,6 +303,7 @@ namespace FastNoise
                 member.name = nameDesc.name;
                 member.description = nameDesc.desc;
                 member.valueDefault = defaultV;
+                member.valueUiDragSpeed = uiDragSpeed;
                 member.valueMin = minV;
                 member.valueMax = maxV;
 
@@ -378,12 +381,13 @@ namespace FastNoise
 
 
         template<typename T, typename U>
-        void AddHybridSource( NameDesc nameDesc, float defaultValue, void(U::* funcNode)(SmartNodeArg<T>), void(U::* funcValue)(float) )
+        void AddHybridSource( NameDesc nameDesc, float defaultValue, void ( U::*funcNode )( SmartNodeArg<T> ), void ( U::*funcValue )( float ), float uiDragSpeed = Metadata::kDefaultUiDragSpeedFloat )
         {
             MemberHybrid member;
             member.name = nameDesc.name;
             member.description = nameDesc.desc;
             member.valueDefault = defaultValue;
+            member.valueUiDragSpeed = uiDragSpeed;
 
             member.setNodeFunc = [funcNode]( Generator* g, SmartNodeArg<> s )
             {
@@ -413,7 +417,7 @@ namespace FastNoise
         }
 
         template<typename U>
-        void AddPerDimensionHybridSource( NameDesc nameDesc, float defaultV, U&& func )
+        void AddPerDimensionHybridSource( NameDesc nameDesc, float defaultV, U&& func, float uiDragSpeed = Metadata::kDefaultUiDragSpeedFloat )
         {
             using HybridSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
             using T = typename HybridSourceT::Type;
@@ -424,6 +428,7 @@ namespace FastNoise
                 member.name = nameDesc.name;
                 member.description = nameDesc.desc;
                 member.valueDefault = defaultV;
+                member.valueUiDragSpeed = uiDragSpeed;
                 member.dimensionIdx = idx;
 
                 member.setNodeFunc = [func, idx]( Generator* g, SmartNodeArg<> s )
diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index 3e5006ae..2b1944b7 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -28,6 +28,9 @@ namespace FastNoise
     // Node name, member name+types, functions to set members
     struct FASTNOISE_API Metadata
     {
+        static constexpr float kDefaultUiDragSpeedFloat = 0.02f;
+        static constexpr float kDefaultUiDragSpeedInt = 0.2f;
+
         virtual ~Metadata() = default;
 
         /// <returns>Array containing metadata for every FastNoise node type</returns>
@@ -153,6 +156,7 @@ namespace FastNoise
 
             eType type;
             ValueUnion valueDefault, valueMin, valueMax;
+            float valueUiDragSpeed = 0;
             std::vector<const char*> enumNames;
 
             // Function to set value for given generator
@@ -171,7 +175,7 @@ namespace FastNoise
         // Either a constant float or node lookup
         struct MemberHybrid : Member
         {
-            float valueDefault = 0.0f;
+            float valueDefault, valueUiDragSpeed;
 
             // Function to set value for given generator
             // Returns true if Generator is correct node class
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 0479cece..18b305d7 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -964,7 +964,7 @@ void FastNoiseNodeEditor::DoNodes()
 
             formatName = FastNoise::Metadata::FormatMetadataMemberName( nodeMetadata->memberHybrids[i] );
 
-            if( ImGui::DragFloat( formatName.c_str(), &nodeData->hybrids[i].second, 0.02f, 0, 0, floatFormat ) )
+            if( ImGui::DragFloat( formatName.c_str(), &nodeData->hybrids[i].second, nodeMetadata->memberHybrids[i].valueUiDragSpeed, 0, 0, floatFormat ) )
             {
                 node.second.GeneratePreview();
             }
@@ -989,7 +989,7 @@ void FastNoiseNodeEditor::DoNodes()
             {
             case FastNoise::Metadata::MemberVariable::EFloat:
             {
-                if( ImGui::DragFloat( formatName.c_str(), &nodeData->variables[i].f, 0.02f, nodeVar.valueMin.f, nodeVar.valueMax.f ) )
+                if( ImGui::DragFloat( formatName.c_str(), &nodeData->variables[i].f, nodeVar.valueUiDragSpeed, nodeVar.valueMin.f, nodeVar.valueMax.f ) )
                 {
                     node.second.GeneratePreview();
                 }
@@ -997,7 +997,7 @@ void FastNoiseNodeEditor::DoNodes()
             break;
             case FastNoise::Metadata::MemberVariable::EInt:
             {
-                if( ImGui::DragInt( formatName.c_str(), &nodeData->variables[i].i, 0.2f, nodeVar.valueMin.i, nodeVar.valueMax.i ) )
+                if( ImGui::DragInt( formatName.c_str(), &nodeData->variables[i].i, nodeVar.valueUiDragSpeed, nodeVar.valueMin.i, nodeVar.valueMax.i ) )
                 {
                     node.second.GeneratePreview();
                 }
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index f97d2b00..fe962823 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -72,10 +72,7 @@ void NoiseTexture::Draw()
     if( ImGui::Begin( "Texture Preview", nullptr, ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse ) )
     {
         //ImGui::Text( "Min: %0.6f Max: %0.6f", mMinMax.min, mMinMax.max );
-
-        ImGui::TextUnformatted( "Preview Settings: " );
-        ImGui::SameLine();
-
+        
         ImGui::PushItemWidth( 82.0f );
         bool edited = false;
 

From 28f70df8bc63131a32c852b1847088a1d18862f1 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 11 Nov 2023 00:28:36 +0000
Subject: [PATCH 052/139] PositionOutput rename Set to SetAxis

---
 include/FastNoise/Generators/BasicGenerators.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 136a4ce2..de5129f9 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -127,7 +127,7 @@ namespace FastNoise
         const Metadata& GetMetadata() const override;
 
         template<Dim D>
-        void Set( float multiplier, float offset = 0.0f ) { mMultiplier[(int)D] = multiplier; mOffset[(int)D] = offset; }
+        void SetAxis( float multiplier, float offset = 0.0f ) { mMultiplier[(int)D] = multiplier; mOffset[(int)D] = offset; }
 
     protected:
         PerDimensionVariable<float> mMultiplier = 0.0f;

From b5188a484ef935a0c8f82ec7b96a6525abe73808 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 11 Nov 2023 00:31:46 +0000
Subject: [PATCH 053/139] Fix GenPositionArray reading past the end of the
 input arrays

---
 include/FastNoise/Generators/Generator.inl | 69 +++++++++++++---------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 8366ff3f..5adce095 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -118,7 +118,7 @@ public:
 
         float32v gen = Gen( int32v( seed ), xPos, yPos );
 
-        return DoRemaining( noiseOut, totalValues, index, min, max, gen );
+        return StoreRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
     FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int seed ) const final
@@ -170,7 +170,7 @@ public:
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
 
-        return DoRemaining( noiseOut, totalValues, index, min, max, gen );
+        return StoreRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
     FastNoise::OutputMinMax GenUniformGrid4D( float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, int seed ) const final
@@ -229,7 +229,7 @@ public:
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
-        return DoRemaining( noiseOut, totalValues, index, min, max, gen );
+        return StoreRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
     FastNoise::OutputMinMax GenPositionArray2D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, float xOffset, float yOffset, int seed ) const final
@@ -253,12 +253,12 @@ public:
             index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
+        float32v xPos = float32v( xOffset ) + LoadRemaining( xPosArray, count, index );
+        float32v yPos = float32v( yOffset ) + LoadRemaining( yPosArray, count, index );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos );
 
-        return DoRemaining( noiseOut, count, index, min, max, gen );
+        return StoreRemaining<true>( noiseOut, count, index, min, max, gen );
     }
 
     FastNoise::OutputMinMax GenPositionArray3D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, const float* zPosArray, float xOffset, float yOffset, float zOffset, int seed ) const final
@@ -283,13 +283,13 @@ public:
             index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
-        float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
+        float32v xPos = float32v( xOffset ) + LoadRemaining( xPosArray, count, index );
+        float32v yPos = float32v( yOffset ) + LoadRemaining( yPosArray, count, index );
+        float32v zPos = float32v( zOffset ) + LoadRemaining( zPosArray, count, index );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
 
-        return DoRemaining( noiseOut, count, index, min, max, gen );
+        return StoreRemaining<true>( noiseOut, count, index, min, max, gen );
     }
 
     FastNoise::OutputMinMax GenPositionArray4D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, const float* zPosArray, const float* wPosArray, float xOffset, float yOffset, float zOffset, float wOffset, int seed ) const final
@@ -315,14 +315,14 @@ public:
             index += int32v::ElementCount;
         }
 
-        float32v xPos = float32v( xOffset ) + FS::Load<float32v>( &xPosArray[index] );
-        float32v yPos = float32v( yOffset ) + FS::Load<float32v>( &yPosArray[index] );
-        float32v zPos = float32v( zOffset ) + FS::Load<float32v>( &zPosArray[index] );
-        float32v wPos = float32v( wOffset ) + FS::Load<float32v>( &wPosArray[index] );
+        float32v xPos = float32v( xOffset ) + LoadRemaining( xPosArray, count, index );
+        float32v yPos = float32v( yOffset ) + LoadRemaining( yPosArray, count, index );
+        float32v zPos = float32v( zOffset ) + LoadRemaining( zPosArray, count, index );
+        float32v wPos = float32v( wOffset ) + LoadRemaining( wPosArray, count, index );
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
-        return DoRemaining( noiseOut, count, index, min, max, gen );
+        return StoreRemaining<true>( noiseOut, count, index, min, max, gen );
     }
 
     float GenSingle2D( float x, float y, int seed ) const final
@@ -401,7 +401,7 @@ public:
 
         float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
 
-        return DoRemaining( noiseOut, totalValues, index, min, max, gen );
+        return StoreRemaining( noiseOut, totalValues, index, min, max, gen );
     }
 
 private:
@@ -416,21 +416,27 @@ private:
         }
     }
 
-    static FS_FORCEINLINE FastNoise::OutputMinMax DoRemaining( float* noiseOut, intptr_t totalValues, intptr_t index, float32v min, float32v max, float32v finalGen )
+    static FS_FORCEINLINE float32v LoadRemaining( const float* loadPtr, intptr_t totalValues, intptr_t index )        
     {
-        FastNoise::OutputMinMax minMax;
-        intptr_t remaining = totalValues - index;
-
-        if( remaining == (intptr_t)int32v::ElementCount )
+        if( index == 0 )
         {
-            FS::Store( &noiseOut[index], finalGen );
+            intptr_t remaining = totalValues - index;
 
-#if FASTNOISE_CALC_MIN_MAX
-            min = FS::Min( min, finalGen );
-            max = FS::Max( max, finalGen );
-#endif
+            float32v load;
+            std::memcpy( &load, loadPtr, remaining * sizeof( float ) );
+            return load;
         }
-        else
+
+        return FS::Load<float32v>( &loadPtr[totalValues - float32v::ElementCount] );
+    }
+
+    template<bool LOADREMAINING = false>
+    static FS_FORCEINLINE FastNoise::OutputMinMax StoreRemaining( float* noiseOut, intptr_t totalValues, intptr_t index, float32v min, float32v max, float32v finalGen )
+    {
+        FastNoise::OutputMinMax minMax;
+        intptr_t remaining = totalValues - index;
+
+        if( LOADREMAINING ? index == 0 : remaining != (intptr_t)int32v::ElementCount )
         {
             std::memcpy( &noiseOut[index], &finalGen, remaining * sizeof( float ) );
 
@@ -440,6 +446,15 @@ private:
                 minMax << noiseOut[index];
             }
             while( ++index < totalValues );
+#endif
+        }
+        else
+        {
+            FS::Store( &noiseOut[totalValues - float32v::ElementCount], finalGen );
+
+#if FASTNOISE_CALC_MIN_MAX
+            min = FS::Min( min, finalGen );
+            max = FS::Max( max, finalGen );
 #endif
         }
 

From 37f2347314a86bafd653cc56beccc6fb75581fa7 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 25 Nov 2023 01:34:38 +0000
Subject: [PATCH 054/139] Small OpenSimplex2S optimisation

---
 include/FastNoise/Generators/Simplex.inl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index f4b1d60e..cab39e30 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -473,7 +473,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
             float32v a0 = FS::Max( FS::MaskedAdd( flip0, a, p0 ), float32v( 0 ) );
             a0 *= a0; a0 *= a0;
             int32v h0 = HashPrimes( seed, FS::MaskedAdd( flip0, xrbp, int32v( Primes::X ) ), FS::MaskedAdd( flip0, yrbp, int32v( Primes::Y )), FS::MaskedAdd( flip0, zrbp, int32v( Primes::Z )));
-            float32v v0 = GetGradientDot( h0, FS::MaskedSub( flip0, xri, float32v( 1.0f ) ), FS::MaskedSub( flip0, yri, float32v( 1.0f ) ), FS::MaskedSub( flip0, zri, float32v( 1.0f ) ));
+            float32v v0 = GetGradientDot( h0, FS::MaskedDecrement( flip0, xri ), FS::MaskedDecrement( flip0, yri ), FS::MaskedDecrement( flip0, zri ) );
             value = FS::FMulAdd( a0, v0, value );
             a -= float32v( 0.5f );
 
@@ -482,7 +482,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
             float32v a1 = FS::Max( FS::MaskedAdd( flip1, a + xri, p1 ), float32v( 0 ) );
             a1 *= a1; a1 *= a1;
             int32v h1 = HashPrimes( seed, FS::InvMaskedAdd( flip1, xrbp, int32v( Primes::X )), FS::MaskedAdd( flip1, yrbp, int32v( Primes::Y ) ), FS::MaskedAdd( flip1, zrbp, int32v( Primes::Z )));
-            float32v v1 = GetGradientDot( h1, FS::InvMaskedSub( flip1, xri, float32v( 1.0f )), FS::MaskedSub( flip1, yri, float32v( 1.0f ) ), FS::MaskedSub( flip1, zri, float32v( 1.0f ) ));
+            float32v v1 = GetGradientDot( h1, FS::InvMaskedSub( flip1, xri, float32v( 1.0f ) ), FS::MaskedDecrement( flip1, yri ), FS::MaskedDecrement( flip1, zri ) );
             value = FS::FMulAdd( a1, v1, value );
 
             float32v p2 = xri + float32v( -0.5f ) + ( zri - yri );
@@ -490,7 +490,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
             float32v a2 = FS::Max( FS::MaskedAdd( flip2, a + yri, p2 ), float32v( 0 ) );
             a2 *= a2; a2 *= a2;
             int32v h2 = HashPrimes( seed, FS::MaskedAdd( flip2, xrbp, int32v( Primes::X )), FS::InvMaskedAdd( flip2, yrbp, int32v( Primes::Y )), FS::MaskedAdd( flip2, zrbp, int32v( Primes::Z )));
-            float32v v2 = GetGradientDot( h2, FS::MaskedSub( flip2, xri, float32v( 1.0f )), FS::InvMaskedSub( flip2, yri, float32v( 1.0f )), FS::MaskedSub( flip2, zri, float32v( 1.0f )));
+            float32v v2 = GetGradientDot( h2, FS::MaskedDecrement( flip2, xri ), FS::InvMaskedSub( flip2, yri, float32v( 1.0f ) ), FS::MaskedDecrement( flip2, zri ) );
             value = FS::FMulAdd( a2, v2, value );
 
             float32v p3 = xri + float32v( -0.5f ) - ( zri - yri );
@@ -498,7 +498,7 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
             float32v a3 = FS::Max( FS::MaskedAdd( flip3, a + zri, p3 ), float32v( 0 ) );
             a3 *= a3; a3 *= a3;
             int32v h3 = HashPrimes( seed, FS::MaskedAdd( flip3, xrbp, int32v( Primes::X )), FS::MaskedAdd( flip3, yrbp, int32v( Primes::Y )), FS::InvMaskedAdd( flip3, zrbp, int32v( Primes::Z )));
-            float32v v3 = GetGradientDot( h3, FS::MaskedSub( flip3, xri, float32v( 1.0f )), FS::MaskedSub( flip3, yri, float32v( 1.0f )), FS::InvMaskedSub( flip3, zri, float32v( 1.0f )));
+            float32v v3 = GetGradientDot( h3, FS::MaskedDecrement( flip3, xri ), FS::MaskedDecrement( flip3, yri ), FS::InvMaskedSub( flip3, zri, float32v( 1.0f ) ) );
             value = FS::FMulAdd( a3, v3, value );
 
             if( i == 1 )

From 85f554381d632cf27b229fb71cd33e81627b2975 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 5 Dec 2023 23:36:23 +0000
Subject: [PATCH 055/139] File rejig

---
 include/FastNoise/FastNoise.h                 |   2 +-
 include/FastNoise/FastNoise_C.h               |   2 +-
 .../FastNoise/Generators/DomainWarpSimplex.h  |  19 ++
 .../Generators/DomainWarpSimplex.inl          | 177 ++++++++++++++++++
 include/FastNoise/Generators/Generator.h      |   4 +-
 include/FastNoise/Metadata.h                  |   2 +-
 .../{FastNoise_Config.h => Utility/Config.h}  |   2 +-
 .../{FastNoise_Export.h => Utility/Export.h}  |   0
 include/FastNoise/{ => Utility}/SmartNode.h   |   2 +-
 src/CMakeLists.txt                            |   4 +-
 .../FastNoise/FastSIMD_Build.inl              |  52 ++---
 src/FastNoise/Metadata.cpp                    |   2 +-
 src/FastNoise/SmartNode.cpp                   |   4 +-
 13 files changed, 238 insertions(+), 34 deletions(-)
 create mode 100644 include/FastNoise/Generators/DomainWarpSimplex.h
 create mode 100644 include/FastNoise/Generators/DomainWarpSimplex.inl
 rename include/FastNoise/{FastNoise_Config.h => Utility/Config.h} (96%)
 rename include/FastNoise/{FastNoise_Export.h => Utility/Export.h} (100%)
 rename include/FastNoise/{ => Utility}/SmartNode.h (99%)
 rename include/FastNoise/FastNoise_BuildList.inl => src/FastNoise/FastSIMD_Build.inl (70%)

diff --git a/include/FastNoise/FastNoise.h b/include/FastNoise/FastNoise.h
index 2163c02a..ef4c9b93 100644
--- a/include/FastNoise/FastNoise.h
+++ b/include/FastNoise/FastNoise.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "FastNoise_Config.h"
+#include "Utility/Config.h"
 
 // Node class definitions
 #include "Generators/BasicGenerators.h"
diff --git a/include/FastNoise/FastNoise_C.h b/include/FastNoise/FastNoise_C.h
index 13839a29..ce0f4a3c 100644
--- a/include/FastNoise/FastNoise_C.h
+++ b/include/FastNoise/FastNoise_C.h
@@ -1,7 +1,7 @@
 #ifndef FASTNOISE_C_H
 #define FASTNOISE_C_H
 
-#include "FastNoise_Export.h"
+#include "Utility/Export.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/FastNoise/Generators/DomainWarpSimplex.h b/include/FastNoise/Generators/DomainWarpSimplex.h
new file mode 100644
index 00000000..5c1f72f2
--- /dev/null
+++ b/include/FastNoise/Generators/DomainWarpSimplex.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "Generator.h"
+#include "DomainWarp.h"
+
+namespace FastNoise
+{
+    class DomainWarpOpenSimplex : public virtual DomainWarp
+    {
+    public:        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpOpenSimplex> : MetadataT<DomainWarp>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
new file mode 100644
index 00000000..8e6e3612
--- /dev/null
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -0,0 +1,177 @@
+#include "DomainWarpSimplex.h"
+#include "Utils.inl"
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpOpenSimplex, SIMD> final : public virtual FastNoise::DomainWarpOpenSimplex, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
+{
+public:
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+
+    #define GRADIENT_COORD( _x, _y )\
+        int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
+        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
+        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
+
+        GRADIENT_COORD( 0, 0 );
+        GRADIENT_COORD( 1, 0 );
+        GRADIENT_COORD( 0, 1 );
+        GRADIENT_COORD( 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
+
+        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, yWarp * yWarp );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+
+    #define GRADIENT_COORD( _x, _y, _z )\
+        int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
+        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
+        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
+        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
+
+        GRADIENT_COORD( 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0 );
+        GRADIENT_COORD( 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 1 );
+        GRADIENT_COORD( 0, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
+        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
+        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
+                   
+        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
+        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
+        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
+
+        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
+
+        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, zWarp * zWarp ) );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+            
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
+    {
+        float32v xs = FS::Floor( x );
+        float32v ys = FS::Floor( y );
+        float32v zs = FS::Floor( z );
+        float32v ws = FS::Floor( w );
+
+        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
+        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
+        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
+        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
+        int32v x1 = x0 + int32v( Primes::X );
+        int32v y1 = y0 + int32v( Primes::Y );
+        int32v z1 = z0 + int32v( Primes::Z );
+        int32v w1 = w0 + int32v( Primes::W );
+
+        xs = InterpHermite( x - xs );
+        ys = InterpHermite( y - ys );
+        zs = InterpHermite( z - zs );
+        ws = InterpHermite( w - ws );
+
+    #define GRADIENT_COORD( _x, _y, _z, _w )\
+        int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
+        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
+        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
+        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
+        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
+
+        GRADIENT_COORD( 0, 0, 0, 0 );
+        GRADIENT_COORD( 1, 0, 0, 0 );
+        GRADIENT_COORD( 0, 1, 0, 0 );
+        GRADIENT_COORD( 1, 1, 0, 0 );
+        GRADIENT_COORD( 0, 0, 1, 0 );
+        GRADIENT_COORD( 1, 0, 1, 0 );
+        GRADIENT_COORD( 0, 1, 1, 0 );
+        GRADIENT_COORD( 1, 1, 1, 0 );
+        GRADIENT_COORD( 0, 0, 0, 1 );
+        GRADIENT_COORD( 1, 0, 0, 1 );
+        GRADIENT_COORD( 0, 1, 0, 1 );
+        GRADIENT_COORD( 1, 1, 0, 1 );
+        GRADIENT_COORD( 0, 0, 1, 1 );
+        GRADIENT_COORD( 1, 0, 1, 1 );
+        GRADIENT_COORD( 0, 1, 1, 1 );
+        GRADIENT_COORD( 1, 1, 1, 1 );
+
+    #undef GRADIENT_COORD
+
+        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
+        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
+        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
+        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
+
+        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
+        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
+        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
+        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
+
+        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
+
+        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+
+        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
+        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
+        wOut = FS::FMulAdd( wWarp, warpAmp, wOut );
+
+        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, FS::FMulAdd( zWarp, zWarp, wWarp * wWarp ) ) );
+
+        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+    }
+};
+
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 6cca2f04..4cb9000d 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -8,10 +8,10 @@
 #include <tuple>
 #endif
 
-#include "FastNoise/FastNoise_Config.h"
+#include "FastNoise/Utility/Config.h"
 
 #if !defined( FASTNOISE_METADATA ) && defined( __INTELLISENSE__ )
-//#define FASTNOISE_METADATA
+#define FASTNOISE_METADATA
 #endif
 
 namespace FastNoise
diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index 2b1944b7..b1f38a66 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -5,7 +5,7 @@
 #include <cstdint>
 #include <memory>
 
-#include "FastNoise_Config.h"
+#include "Utility/Config.h"
 
 #pragma warning( push )
 #pragma warning( disable : 4251 )
diff --git a/include/FastNoise/FastNoise_Config.h b/include/FastNoise/Utility/Config.h
similarity index 96%
rename from include/FastNoise/FastNoise_Config.h
rename to include/FastNoise/Utility/Config.h
index 8639c06c..bffd34ea 100644
--- a/include/FastNoise/FastNoise_Config.h
+++ b/include/FastNoise/Utility/Config.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "FastNoise_Export.h"
+#include "Export.h"
 #include <FastSIMD/DispatchClass.h>
 
 #define FASTNOISE_CALC_MIN_MAX true
diff --git a/include/FastNoise/FastNoise_Export.h b/include/FastNoise/Utility/Export.h
similarity index 100%
rename from include/FastNoise/FastNoise_Export.h
rename to include/FastNoise/Utility/Export.h
diff --git a/include/FastNoise/SmartNode.h b/include/FastNoise/Utility/SmartNode.h
similarity index 99%
rename from include/FastNoise/SmartNode.h
rename to include/FastNoise/Utility/SmartNode.h
index a27b9395..43544880 100644
--- a/include/FastNoise/SmartNode.h
+++ b/include/FastNoise/Utility/SmartNode.h
@@ -6,7 +6,7 @@
 #include <type_traits>
 #include <functional>
 
-#include "FastNoise_Config.h"
+#include "Config.h"
 
 namespace FastNoise
 {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 030501e8..c247a4e8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -30,7 +30,7 @@ set(FastNoise_source
 
 source_group("FastNoise" FILES ${FastNoise_headers})
 source_group("FastNoise" FILES ${FastNoise_source})
-source_group("FastNoise\\Generators" FILES ${FastNoise_generators_headers})
+source_group("FastNoise/Generators" FILES ${FastNoise_generators_headers})
 
 add_library(FastNoise
     ${FastNoise_headers}
@@ -54,7 +54,7 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "../include/FastNoise/FastNoise_BuildList.inl")
+fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "FastNoise/FastSIMD_Build.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
diff --git a/include/FastNoise/FastNoise_BuildList.inl b/src/FastNoise/FastSIMD_Build.inl
similarity index 70%
rename from include/FastNoise/FastNoise_BuildList.inl
rename to src/FastNoise/FastSIMD_Build.inl
index 63c38b46..cab7aa85 100644
--- a/include/FastNoise/FastNoise_BuildList.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -7,69 +7,75 @@ static_assert( std::is_final_v<FastSIMD::DispatchClass<CLASS, FastSIMD::FeatureS
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Generator.h"
+#include <FastNoise/Generators/Generator.h>
 #else
-#include "Generators/Generator.inl"
+#include <FastNoise/Generators/Generator.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/BasicGenerators.h"
+#include <FastNoise/Generators/BasicGenerators.h>
 #else
-#include "Generators/BasicGenerators.inl"
+#include <FastNoise/Generators/BasicGenerators.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Value.h"
+#include <FastNoise/Generators/Value.h>
 #else
-#include "Generators/Value.inl"
+#include <FastNoise/Generators/Value.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Perlin.h"
+#include <FastNoise/Generators/Perlin.h>
 #else
-#include "Generators/Perlin.inl"
+#include <FastNoise/Generators/Perlin.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Simplex.h"
+#include <FastNoise/Generators/Simplex.h>
 #else
-#include "Generators/Simplex.inl"
+#include <FastNoise/Generators/Simplex.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Cellular.h"
+#include <FastNoise/Generators/Cellular.h>
 #else
-#include "Generators/Cellular.inl"
+#include <FastNoise/Generators/Cellular.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Fractal.h"
+#include <FastNoise/Generators/Fractal.h>
 #else
-#include "Generators/Fractal.inl"
+#include <FastNoise/Generators/Fractal.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/DomainWarp.h"
+#include <FastNoise/Generators/DomainWarp.h>
 #else
-#include "Generators/DomainWarp.inl"
+#include <FastNoise/Generators/DomainWarp.inl>
+
+#endif
+#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
+#include <FastNoise/Generators/DomainWarpSimplex.h>
+#else
+#include <FastNoise/Generators/DomainWarpSimplex.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/DomainWarpFractal.h"
+#include <FastNoise/Generators/DomainWarpFractal.h>
 #else
-#include "Generators/DomainWarpFractal.inl"
+#include <FastNoise/Generators/DomainWarpFractal.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Modifiers.h"
+#include <FastNoise/Generators/Modifiers.h>
 #else
-#include "Generators/Modifiers.inl"
+#include <FastNoise/Generators/Modifiers.inl>
 #endif
 
 #ifdef FASTSIMD_INCLUDE_HEADER_ONLY
-#include "Generators/Blends.h"
+#include <FastNoise/Generators/Blends.h>
 #else
-#include "Generators/Blends.inl"
+#include <FastNoise/Generators/Blends.inl>
 #endif
 
 // Nodes
@@ -128,3 +134,5 @@ FASTNOISE_REGISTER_NODE( RemoveDimension );
 FASTNOISE_REGISTER_NODE( GeneratorCache );
 FASTNOISE_REGISTER_NODE( SquareRoot );
 FASTNOISE_REGISTER_NODE( Abs );
+
+FASTNOISE_REGISTER_NODE( DomainWarpOpenSimplex );
\ No newline at end of file
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 888c8187..96dcc357 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -480,4 +480,4 @@ SmartNode<> FastNoise::MetadataT<CLASS>::CreateNode( FastSIMD::FeatureSet l ) co
 }
 
 #define FASTSIMD_INCLUDE_HEADER_ONLY
-#include "FastNoise/FastNoise_BuildList.inl"
\ No newline at end of file
+#include "FastSIMD_Build.inl"
\ No newline at end of file
diff --git a/src/FastNoise/SmartNode.cpp b/src/FastNoise/SmartNode.cpp
index aa9372a6..7fd89d2e 100644
--- a/src/FastNoise/SmartNode.cpp
+++ b/src/FastNoise/SmartNode.cpp
@@ -1,8 +1,8 @@
-#include <FastNoise/FastNoise_Config.h>
+#include <FastNoise/Utility/Config.h>
 
 #if !FASTNOISE_USE_SHARED_PTR
 
-#include <FastNoise/SmartNode.h>
+#include <FastNoise/Utility/SmartNode.h>
 
 #include <mutex>
 #include <atomic>

From 197b57846eb7dee288e56e428c0462dffe6876a9 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 22 Dec 2023 16:09:20 +0000
Subject: [PATCH 056/139] Fix InvSqrt causing nans

---
 include/FastNoise/Generators/Modifiers.inl | 4 +++-
 include/FastNoise/Generators/Utils.inl     | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/FastNoise/Generators/Modifiers.inl b/include/FastNoise/Generators/Modifiers.inl
index cb43c786..e95ccaee 100644
--- a/include/FastNoise/Generators/Modifiers.inl
+++ b/include/FastNoise/Generators/Modifiers.inl
@@ -280,7 +280,9 @@ class FastSIMD::DispatchClass<FastNoise::SquareRoot, SIMD> final : public virtua
     {
         float32v value = this->GetSourceValue( mSource, seed, pos... );
         
-        return FS::InvSqrt( FS::Max( FS::Abs( value ), float32v( FLT_MIN ) ) ) * value;
+        float32v invSqrt = FS::InvSqrt( FS::Abs( value ) );
+
+        return FS::Masked( invSqrt != float32v( INFINITY ), value * invSqrt );
     }
 };
 
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 0470b37d..345abd35 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -251,7 +251,9 @@ namespace FastNoise
                 float32v distSqr = dX * dX;
                 ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
 
-                return FS::InvSqrt( distSqr ) * distSqr;
+                float32v invSqrt = FS::InvSqrt( distSqr );
+
+                return FS::Masked( invSqrt != float32v( INFINITY ), distSqr * invSqrt );
             }
 
             case DistanceFunction::EuclideanSquared:

From cd48b8a37657f87828f5dfab1161a87bf22b64bb Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 9 Jan 2024 22:53:17 +0000
Subject: [PATCH 057/139] Use BitShiftRightZeroExtend in cellular hashes to
 avoid & op

---
 include/FastNoise/Generators/Cellular.inl | 24 +++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index daae9795..f3fc94f9 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -45,7 +45,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
             {
                 int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 16 ) ) - float32v( 0xffff / 2.0f );
 
                 float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
                 xd = FS::FMulAdd( xd, invMag, xcf );
@@ -118,8 +118,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                 {
                     int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( ( hash >> 11 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 22 ) ) - float32v( 0x3ff / 2.0f );
                 
                     float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
                     xd = FS::FMulAdd( xd, invMag, xcf );
@@ -205,7 +205,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 24 ) ) - float32v( 0xff / 2.0f );
 
                         float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
                         xd = FS::FMulAdd( xd, invMag, xcf );
@@ -281,7 +281,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
             {
                 int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 16 ) ) - float32v( 0xffff / 2.0f );
 
                 float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
                 xd = FS::FMulAdd( xd, invMag, xcf );
@@ -339,8 +339,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                 {
                     int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 11) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 22 ) ) - float32v( 0x3ff / 2.0f );
 
                     float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
                     xd = FS::FMulAdd( xd, invMag, xcf );
@@ -411,7 +411,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 24 ) ) - float32v( 0xff / 2.0f );
 
                         float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
                         xd = FS::FMulAdd( xd, invMag, xcf );
@@ -507,7 +507,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
             {
                 int32v hash = HashPrimesHB( seed, xc, yc );
                 float32v xd = FS::Convert<float>( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
-                float32v yd = FS::Convert<float>( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
+                float32v yd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 16 ) ) - float32v( 0xffff / 2.0f );
 
                 float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
                 xd = FS::FMulAdd( xd, invMag, xcf );
@@ -563,8 +563,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                 {
                     int32v hash = HashPrimesHB( seed, xc, yc, zc );
                     float32v xd = FS::Convert<float>( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v yd = FS::Convert<float>( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
-                    float32v zd = FS::Convert<float>( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v yd = FS::Convert<float>( (hash >> 11) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
+                    float32v zd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 22 ) ) - float32v( 0x3ff / 2.0f );
 
                     float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
                     xd = FS::FMulAdd( xd, invMag, xcf );
@@ -634,7 +634,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                         float32v xd = FS::Convert<float>( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v yd = FS::Convert<float>( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
                         float32v zd = FS::Convert<float>( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
-                        float32v wd = FS::Convert<float>( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
+                        float32v wd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 24 ) ) - float32v( 0xff / 2.0f );
 
                         float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
                         xd = FS::FMulAdd( xd, invMag, xcf );

From 55ef18793540f291504ec612e357fe6b59caddcd Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 4 Feb 2024 17:07:23 +0000
Subject: [PATCH 058/139] Add zooming support to the node graph editor

---
 tools/CMakeLists.txt          |  2 +-
 tools/FastNoiseNodeEditor.cpp | 30 +++++++++++++++++++++++-------
 tools/NodeEditorApp.cpp       |  1 +
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 3725079e..78fcc8e5 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -73,7 +73,7 @@ find_package(ImGui REQUIRED SourcesMiscCpp)
 CPMAddPackage(
     NAME imnodes
     GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 1aa48f4af2a4f9f1b9a6ed53fe858ed76646b233
+    GIT_TAG 9d89b3e98c91ba2b414c9ad1cdf7e9c48510c6f0
     GIT_SUBMODULES ".github"
     EXCLUDE_FROM_ALL YES
     OPTIONS
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 18b305d7..5f561a0e 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -661,7 +661,7 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
             }
 
             ImGuiExtra::MarkSettingsDirty();
-        }                
+        }
 
         ImNodes::BeginNodeEditor();
         
@@ -673,15 +673,27 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
 
         ImNodes::MiniMap( 0.2f, ImNodesMiniMapLocation_BottomLeft );
 
-#if 0
-        if( ImGui::IsWindowHovered() )
+        ImNodes::EndNodeEditor();
+
+        // Zoom
+        if( ImNodes::IsEditorHovered() && ImGui::GetIO().MouseWheel != 0 )
         {
-            auto zoom = ImNodes::EditorContextGetZoom() + ImGui::GetIO().MouseWheel * 0.1f;
+            float zoom = ImNodes::EditorContextGetZoom();
+            if( ImGui::GetIO().MouseWheel > 0 )
+            {
+                zoom *= 1.5f;
+                if( zoom > 0.9f )
+                {
+                    zoom = 1;
+                }
+            }
+            else
+            {
+                zoom /= 1.5f;
+                zoom = std::max( zoom, 0.2f );
+            }
             ImNodes::EditorContextSetZoom( zoom, ImGui::GetMousePos() );
         }
-#endif
-
-        ImNodes::EndNodeEditor();
 
         CheckLinks();
 
@@ -1115,6 +1127,10 @@ void FastNoiseNodeEditor::DoHelp()
         ImGui::SameLine( alignPx );
         ImGui::TextUnformatted( "Right mouse drag" );
 
+        ImGui::TextUnformatted( "Zoom graph" );
+        ImGui::SameLine( alignPx );
+        ImGui::TextUnformatted( "Mouse wheel" );
+
         ImGui::TextUnformatted( "Delete node/link" );
         ImGui::SameLine( alignPx );
         ImGui::TextUnformatted( "Backspace or Delete" );
diff --git a/tools/NodeEditorApp.cpp b/tools/NodeEditorApp.cpp
index 4a9fa558..b8d6884b 100644
--- a/tools/NodeEditorApp.cpp
+++ b/tools/NodeEditorApp.cpp
@@ -46,6 +46,7 @@ NodeEditorApp::NodeEditorApp( const Arguments& arguments ) :
     }
 
     ImGui::GetIO().IniFilename = "NodeEditor.ini";
+    ImGui::GetIO().ConfigDragClickToInputText = true;
     mImGuiIntegrationContext = ImGuiIntegration::Context( *mImGuiContext, size, windowSize(), framebufferSize() );
 
     GL::Renderer::enable( GL::Renderer::Feature::DepthTest );

From 93e2c04b23d4099f8e57ead468b7a03d7aa1944f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 5 Feb 2024 00:22:47 +0000
Subject: [PATCH 059/139] Update node editor dependencies

---
 tools/CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 78fcc8e5..e87a5f3a 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,7 +1,7 @@
 CPMAddPackage(
     NAME corrade
     GITHUB_REPOSITORY mosra/corrade
-    GIT_TAG dc51b04b86294b00db45156e5e6cfdb1a6462df9
+    GIT_TAG dfbeae5c4a2ee429ecad3a37121aba3e3d389036
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -15,7 +15,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME GLFW
     GITHUB_REPOSITORY glfw/glfw
-    GIT_TAG 3.3.8
+    GIT_TAG 3.3.9
     EXCLUDE_FROM_ALL YES
     OPTIONS
         "BUILD_SHARED_LIBS OFF"
@@ -28,7 +28,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME magnum
     GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG 68f6d75ee3f2aa4bf20ac3df706303e586cff323
+    GIT_TAG b1ba1f076d3e8b4295b1afac94e95ff8a846e619
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -45,7 +45,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME imgui
     GITHUB_REPOSITORY ocornut/imgui
-    GIT_TAG 0ea3b87bd63ecbf359585b7c235839146e84dedb
+    GIT_TAG v1.90.1-docking
     EXCLUDE_FROM_ALL YES
     DOWNLOAD_ONLY YES
 )
@@ -55,7 +55,7 @@ set(IMGUI_DIR ${imgui_SOURCE_DIR})
 CPMAddPackage(
     NAME magnum-integration
     GITHUB_REPOSITORY mosra/magnum-integration
-    GIT_TAG 1a66b05bd7db0a5484366054ddc678bebf79921e
+    GIT_TAG 05cbe5f85593b7d4252048df98f0bc3bb48b540d
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS

From 82f296dabad51a282a3a8ab603a5f34fa0291b55 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 5 Feb 2024 01:07:35 +0000
Subject: [PATCH 060/139] Update imnodes to fix gcc and msvc

---
 tools/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index e87a5f3a..33de909e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -73,7 +73,7 @@ find_package(ImGui REQUIRED SourcesMiscCpp)
 CPMAddPackage(
     NAME imnodes
     GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 9d89b3e98c91ba2b414c9ad1cdf7e9c48510c6f0
+    GIT_TAG 32e2136a0f79ab96537088b8a757618a90bf8785
     GIT_SUBMODULES ".github"
     EXCLUDE_FROM_ALL YES
     OPTIONS

From c4881c0da6bfc63ab9e56fa31eee15eba62d925f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 6 Feb 2024 00:19:03 +0000
Subject: [PATCH 061/139] Update ignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index a8cde1a5..79926da2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,5 @@
 /enc_temp_folder
 /cpm-cache
 /CMakeUserPresets.json
+
+external/

From 2bbf3826323d778e5dc9b526e3ffbeaa337fe786 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 13 Feb 2024 19:32:40 +0000
Subject: [PATCH 062/139] Editor IPC and detachable node graph window (#131)

* Remove the use of dynamic_cast in headers (#128) fixes #127

Allows linking with non-rtti projects

* Update ignore

* Update ignore

* Update node editor dependencies

* Ipv6 multicast IPC

* Better UX, correct multicast setup, linux/macos support hopefully

* Switch to IPv4 multicast on loopback interface

* Use shared memory for IPC instead of multicast

* Add IPC Unix support, cleanup code and move into it's own file for easier copying to other projects

* Add error debug log on node graph detach

* Update main.yml

* Better IPC memory safety

* Fix IPC and detached node graph on MacOS

* Cleanup detached argument detection
---
 .github/workflows/main.yml               |   5 +-
 .gitignore                               |   2 +
 include/FastNoise/Generators/Generator.h |   2 +-
 tools/CMakeLists.txt                     |   2 +-
 tools/FastNoiseNodeEditor.cpp            | 272 ++++++++++++++++++++---
 tools/FastNoiseNodeEditor.h              |  20 +-
 tools/NodeEditorApp.cpp                  | 148 ++++++------
 tools/NodeEditorApp.h                    |  19 ++
 tools/SharedMemoryIpc.inl                | 117 ++++++++++
 9 files changed, 479 insertions(+), 108 deletions(-)
 create mode 100644 tools/SharedMemoryIpc.inl

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 672c0134..b2b460a3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -3,10 +3,11 @@ name: CI
 # Controls when the action will run. Triggers the workflow on push or pull request
 # events but only for the master branch
 on:
+  workflow_dispatch:
   push:
-    branches: [master,NewFastSIMD]
-  pull_request:
     branches: [master]
+  pull_request:
+    branches: [master,NewFastSIMD]
   release:
     types: [published]
 
diff --git a/.gitignore b/.gitignore
index a8cde1a5..79926da2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,5 @@
 /enc_temp_folder
 /cpm-cache
 /CMakeUserPresets.json
+
+external/
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 4cb9000d..47b34cd9 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -149,7 +149,7 @@ namespace FastNoise
 
             assert( !gen.get() || GetActiveFeatureSet() == gen->GetActiveFeatureSet() ); // Ensure that all SIMD levels match
 
-            SetSourceSIMDPtr( dynamic_cast<const Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
+            SetSourceSIMDPtr( static_cast<const Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
             memberVariable.base = gen;
         }
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 33de909e..df3cc04d 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -73,7 +73,7 @@ find_package(ImGui REQUIRED SourcesMiscCpp)
 CPMAddPackage(
     NAME imnodes
     GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 32e2136a0f79ab96537088b8a757618a90bf8785
+    GIT_TAG 4ccaf656b09fd6b69bdac36f2532756760bd0aa3
     GIT_SUBMODULES ".github"
     EXCLUDE_FROM_ALL YES
     OPTIONS
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 5f561a0e..856364a1 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -1,6 +1,7 @@
 #include <sstream>
 #include <random>
 #include <cstdio>
+#include <atomic>
 
 #define IMGUI_DEFINE_MATH_OPERATORS
 #include <imgui.h>
@@ -15,9 +16,78 @@
 #include "ImGuiExtra.h"
 #include "FastNoiseNodeEditor.h"
 #include "DemoNodeTrees.inl"
+#include "NodeEditorApp.h"
 
 using namespace Magnum;
 
+#include "SharedMemoryIpc.inl"
+
+static constexpr const char* kNodeGraphSettingsFile = "NodeGraph.ini";
+
+void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
+{
+#ifdef WIN32
+    std::string startArgs = "\"";
+    startArgs += mNodeEditorApp.GetExecutablePath();
+    startArgs += "\" -detached";
+
+    STARTUPINFOA si;
+    PROCESS_INFORMATION pi;
+
+    ZeroMemory( &si, sizeof( si ) );
+    si.cb = sizeof( si );
+    ZeroMemory( &pi, sizeof( pi ) );
+
+    // Create a job object
+    HANDLE hJob = CreateJobObject( NULL, NULL );
+    JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
+
+    // Configure the job object to terminate processes when the handle is closed
+    jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
+    SetInformationJobObject( hJob, JobObjectExtendedLimitInformation, &jeli, sizeof( jeli ) );
+
+    // Start the child process.
+    if( CreateProcessA( NULL, // No module name (use command line)
+                         (LPSTR)startArgs.data(), // Command line
+                         NULL, // Process handle not inheritable
+                         NULL, // Thread handle not inheritable
+                         FALSE, // Set handle inheritance to FALSE
+                         0, // No creation flags
+                         NULL, // Use parent's environment block
+                         NULL, // Use parent's starting directory
+                         &si, // Pointer to STARTUPINFO structure
+                         &pi ) ) // Pointer to PROCESS_INFORMATION structure
+    {
+        // Assign the child process to the job object
+        AssignProcessToJobObject( hJob, pi.hProcess );
+
+        // Close handles to the child process and primary thread
+        CloseHandle( pi.hProcess );
+        CloseHandle( pi.hThread );
+    }
+    else
+#else
+    pid_t pid = fork(); // Duplicate current process
+
+    if( pid == 0 )
+    {
+        // Child process
+        const char* executable = mNodeEditorApp.GetExecutablePath().data(); // Path to the current executable
+        execl( executable, executable, "-detached", (char*)NULL );
+        // If execl returns, it means it has failed
+        exit( EXIT_FAILURE ); // Ensure the child process exits if execl fails
+    }
+    if( pid < 0 )
+#endif
+    {
+        Debug {} << "Failed to launch standalone node graph process"
+#ifdef WIN32
+            << GetLastError()
+#endif
+        ;
+    }
+}
+
 static bool MatchingGroup( const std::vector<const char*>& a, const std::vector<const char*>& b )
 {
     std::string aString;
@@ -196,7 +266,7 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
         }
 
         // Save nodes to ini
-        ImGuiExtra::MarkSettingsDirty();
+        editor.mSettingsDirty = true;
     }
 }
 
@@ -517,14 +587,18 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
     ImGuiExtra::AddOrReplaceSettingsHandler( nodeSettings );
 }
 
-FastNoiseNodeEditor::FastNoiseNodeEditor() :
+FastNoiseNodeEditor::FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp ) :
+    mNodeEditorApp( nodeEditorApp ),
     mOverheadNode( *this, new FastNoise::NodeData( &FastNoise::Metadata::Get<FastNoise::Constant>() ), false )
 {
+    if( !mNodeEditorApp.IsDetachedNodeGraph() )
+    {
 #ifdef IMGUI_HAS_DOCK
-    ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_DockingEnable;
-    ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_ViewportsEnable;
+        ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_DockingEnable;
+        ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_ViewportsEnable;
 #endif
-    ImGui::GetIO().ConfigWindowsResizeFromEdges = true;
+        ImGui::GetIO().ConfigWindowsResizeFromEdges = true;
+    }
     ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_NavEnableSetMousePos;
     ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
 
@@ -539,8 +613,6 @@ FastNoiseNodeEditor::FastNoiseNodeEditor() :
 #ifndef NDEBUG
     mNodeBenchmarkMax = 1;
 #endif
-    
-    SetupSettingsHandlers();
 
     // Create Metadata context menu tree
     std::unordered_map<std::string, MetadataMenuGroup*> groupMap;
@@ -578,6 +650,17 @@ FastNoiseNodeEditor::FastNoiseNodeEditor() :
     }    
 }
 
+FastNoiseNodeEditor::~FastNoiseNodeEditor()
+{
+    // Go into node graph context and trigger save
+    ImGuiContext* currentContext = ImGui::GetCurrentContext();
+    ImGui::SetCurrentContext( ImNodes::GetNodeEditorImGuiContext() );
+    ImGui::SaveIniSettingsToDisk( kNodeGraphSettingsFile );
+    ImGui::SetCurrentContext( currentContext );
+
+    ImNodes::DestroyContext();
+}
+
 void FastNoiseNodeEditor::DoNodeBenchmarks()
 {
     // Benchmark overhead every frame to keep it accurate
@@ -610,18 +693,49 @@ void FastNoiseNodeEditor::DoNodeBenchmarks()
 
 void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition )
 {
+#ifndef WIN32
+    static pid_t parentPid = getppid();
+
+    if( getppid() != parentPid ) 
+    {
+        mNodeEditorApp.exit();
+    }
+#endif
+
+    DoIpcPolling();
+
+    bool isDetachedNodeEditor = mNodeEditorApp.IsDetachedNodeGraph();
     const ImGuiViewport* viewport = ImGui::GetMainViewport();
-    ImGui::DockSpaceOverViewport( viewport, ImGuiDockNodeFlags_PassthruCentralNode ); 
 
-    std::string simdTxt = "Current Feature Set: ";
-    simdTxt += FastSIMD::GetFeatureSetString( mActualFeatureSet );
-    ImGui::TextUnformatted( simdTxt.c_str() );
+    ImGuiWindowFlags windowFlags = 0;
+    ImGuiWindow* nodeGraphWindow = ImGui::FindWindowByName( "Node Graph" );
+
+    if( isDetachedNodeEditor )
+    {
+        ImGui::SetNextWindowSize( viewport->WorkSize );
+        ImGui::SetNextWindowPos( ImVec2( 0, 0 ) );
+        windowFlags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings;
+    }
+    else if( nodeGraphWindow && nodeGraphWindow->Collapsed )
+    {
+        // Avoid saving over the window position when it is minimised from detach
+        windowFlags = ImGuiWindowFlags_NoSavedSettings;
+    }
+    else
+    {
+        ImGui::DockSpaceOverViewport( viewport, ImGuiDockNodeFlags_PassthruCentralNode );     
+        
+        std::string simdTxt = "Current Feature Set: ";
+        simdTxt += FastSIMD::GetFeatureSetString( mActualFeatureSet );
+        ImGui::TextUnformatted( simdTxt.c_str() );
+
+        ImGui::DragInt( "Node Benchmark Count", &mNodeBenchmarkMax, 8, 8, 64 * 1024 );
 
-    ImGui::DragInt( "Node Benchmark Count", &mNodeBenchmarkMax, 8, 8, 64 * 1024 );
+        ImGui::SetNextWindowSize( ImVec2( 963, 634 ), ImGuiCond_FirstUseEver );
+        ImGui::SetNextWindowPos( ImVec2( 8, 439 ), ImGuiCond_FirstUseEver );
+    }
 
-    ImGui::SetNextWindowSize( ImVec2( 963, 634 ), ImGuiCond_FirstUseEver );
-    ImGui::SetNextWindowPos( ImVec2( 8, 439 ), ImGuiCond_FirstUseEver );
-    if( ImGui::Begin( "Node Editor" ) )
+    if( ImGui::Begin( "Node Graph", nullptr, windowFlags ) )
     {
         UpdateSelected();
 
@@ -651,6 +765,19 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
             ImGui::EndTooltip();
         }
 
+        bool openStandalonenodeGraph = false;
+        if( !isDetachedNodeEditor )
+        {
+            ImGui::SameLine();
+            if( ImGui::Button( "Detach Node Graph" ) )
+            {
+                openStandalonenodeGraph = true;
+
+                ImGui::SetWindowCollapsed( true );
+                ImGui::GetCurrentWindow()->Pos = ImVec2( 0, 0 );
+            }
+        }
+
         ImGui::PopItemWidth();
         
         if( edited )
@@ -660,11 +787,34 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
                 node.second.GeneratePreview( false );
             }
 
-            ImGuiExtra::MarkSettingsDirty();
+            mSettingsDirty = true;
         }
 
         ImNodes::BeginNodeEditor();
-        
+
+        // Setup setting handles in zoom context
+        if( ImGui::GetFrameCount() == 1 )
+        {
+            SetupSettingsHandlers();
+            ImGui::LoadIniSettingsFromDisk( kNodeGraphSettingsFile );
+        }
+        if( mSettingsDirty )
+        {
+            ImGui::MarkIniSettingsDirty();
+            mSettingsDirty = false;
+        }
+        if( ImGui::GetIO().WantSaveIniSettings || openStandalonenodeGraph )
+        {
+            ImGui::SaveIniSettingsToDisk( kNodeGraphSettingsFile );
+            ImGui::GetIO().WantSaveIniSettings = false;
+        }
+
+        // Open this after saving settings
+        if( openStandalonenodeGraph )
+        {
+            OpenStandaloneNodeGraph();
+        }
+
         DoHelp();
 
         DoContextMenu();
@@ -702,9 +852,12 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
 
     DoNodeBenchmarks();
 
-    mNoiseTexture.Draw();
+    if( !isDetachedNodeEditor )
+    {
+        mNoiseTexture.Draw();
 
-    mMeshNoisePreview.Draw( transformation, projection, cameraPosition );
+        mMeshNoisePreview.Draw( transformation, projection, cameraPosition );
+    }
 }
 
 void FastNoiseNodeEditor::CheckLinks()
@@ -846,7 +999,7 @@ void FastNoiseNodeEditor::SetSIMDLevel( FastSIMD::FeatureSet lvl )
         node.second.GeneratePreview( false );
     }
 
-    ChangeSelectedNode( mSelectedNode );
+    SetPreviewGenerator( mCachedActiveEnt );
 }
 
 void FastNoiseNodeEditor::DoNodes()
@@ -1244,25 +1397,16 @@ void FastNoiseNodeEditor::DoContextMenu()
     ImGui::PopStyleVar();
 }
 
-FastNoise::SmartNode<> FastNoiseNodeEditor::GenerateSelectedPreview()
+std::string_view FastNoiseNodeEditor::GetSelectedEncodedNodeTree()
 {
     auto find = mNodes.find( mSelectedNode );
 
-    FastNoise::SmartNode<> generator;
-
     if( find != mNodes.end() )
     {
-        generator = FastNoise::NewFromEncodedNodeTree( find->second.serialised.c_str(), mMaxFeatureSet );
-
-        if( generator )
-        {
-            mActualFeatureSet = generator->GetActiveFeatureSet();
-        }
+        return find->second.serialised;
     }
 
-    mNoiseTexture.ReGenerate( generator );
-
-    return generator;
+    return { "" };
 }
 
 FastNoise::OutputMinMax FastNoiseNodeEditor::GenerateNodePreviewNoise( FastNoise::Generator* gen, float* noise )
@@ -1327,10 +1471,70 @@ void FastNoiseNodeEditor::ChangeSelectedNode( FastNoise::NodeData* newId )
 {
     mSelectedNode = newId;
 
-    FastNoise::SmartNode<> generator = GenerateSelectedPreview();
+    std::string_view encodedNodeTree = GetSelectedEncodedNodeTree();
+
+    if( !encodedNodeTree.empty() )
+    {
+        // Send updated node tree via IPC
+        unsigned char* sharedMemory = static_cast<unsigned char*>( mNodeEditorApp.GetIpcSharedMemory() );
+
+        if( encodedNodeTree.length() + 3 >= kSharedMemorySize )
+        {
+            Debug {} << "Encoded node tree too large to send via IPC " << encodedNodeTree.length();
+            sharedMemory = nullptr;
+        }
+
+        if( sharedMemory )
+        {
+            memcpy( sharedMemory + 2, encodedNodeTree.data(), encodedNodeTree.length() + 1 );
+            sharedMemory[1] = 0;
+
+            std::atomic_thread_fence( std::memory_order_acq_rel );
+            sharedMemory[0]++; // Increment counter to mark updated tree
+        }
+        else
+        {
+            SetPreviewGenerator( encodedNodeTree );
+        }
+    }
+}
+
+void FastNoiseNodeEditor::SetPreviewGenerator( std::string_view encodedNodeTree )
+{
+    auto SetActiveEnt = [this]( std::string_view encodedNodeTree )
+    {
+        if( GetSelectedEncodedNodeTree() != encodedNodeTree )
+        {
+            mSelectedNode = nullptr;
+        }
+
+        mCachedActiveEnt = encodedNodeTree;
+    };
+
+    FastNoise::SmartNode<> generator = FastNoise::NewFromEncodedNodeTree( encodedNodeTree.data(), mMaxFeatureSet );
 
     if( generator )
     {
-        mMeshNoisePreview.ReGenerate( generator );
+        mActualFeatureSet = generator->GetActiveFeatureSet();
+
+        if( !mNodeEditorApp.IsDetachedNodeGraph() )
+        {
+            mNoiseTexture.ReGenerate( generator );
+            mMeshNoisePreview.ReGenerate( generator );
+        }
+
+        if( !encodedNodeTree.empty() )
+        {
+            SetActiveEnt( encodedNodeTree );
+        }
+    }
+    else if( encodedNodeTree.empty() )
+    {
+        SetActiveEnt( encodedNodeTree );
+    }
+    else
+    {
+        Debug {} << "Invalid encoded node tree";
     }
 }
+
diff --git a/tools/FastNoiseNodeEditor.h b/tools/FastNoiseNodeEditor.h
index 4e47f07b..ed2db70e 100644
--- a/tools/FastNoiseNodeEditor.h
+++ b/tools/FastNoiseNodeEditor.h
@@ -18,12 +18,20 @@
 
 namespace Magnum
 {
+    class NodeEditorApp;
+
     class FastNoiseNodeEditor
     {
     public:
-        FastNoiseNodeEditor();
+        FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp );
+        ~FastNoiseNodeEditor();
+
         void Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition );
         void SetSIMDLevel( FastSIMD::FeatureSet lvl );
+        void DoIpcPolling();
+
+        static void* SetupSharedMemoryIpc();
+        static void ReleaseSharedMemoryIpc();
 
     private:
         struct Node
@@ -100,14 +108,16 @@ namespace Magnum
 
         Node& AddNode( ImVec2 startPos, const FastNoise::Metadata* metadata, bool generatePreview = true );
         bool AddNodeFromEncodedString( const char* string, ImVec2 nodePos );
-        FastNoise::SmartNode<> GenerateSelectedPreview();
+        std::string_view GetSelectedEncodedNodeTree();
         FastNoise::OutputMinMax GenerateNodePreviewNoise( FastNoise::Generator* gen, float* noise );
         Node* FindNodeFromId( int id );
         int GetFreeNodeId();
+        void SetPreviewGenerator( std::string_view encodedNodeTree );
         void ChangeSelectedNode( FastNoise::NodeData* newId );
         void DeleteNode( FastNoise::NodeData* nodeData );
         void DoNodeBenchmarks();
         void SetupSettingsHandlers();
+        void OpenStandaloneNodeGraph();
 
         void CheckLinks();
         void DoHelp();
@@ -115,6 +125,8 @@ namespace Magnum
         void DoNodes();
         void UpdateSelected();
 
+        NodeEditorApp& mNodeEditorApp;
+
         std::unordered_map<FastNoise::NodeData*, Node> mNodes;
         FastNoise::NodeData* mDroppedLinkNode = nullptr;
         bool mDroppedLink = false;
@@ -123,10 +135,12 @@ namespace Magnum
         std::vector<std::unique_ptr<MetadataMenu>> mContextMetadata;
         std::string mImportNodeString;
         bool mImportNodeModal = false;
+        bool mSettingsDirty = false;
 
         MeshNoisePreview mMeshNoisePreview;
         NoiseTexture mNoiseTexture;
 
+        std::string mCachedActiveEnt;
         FastNoise::NodeData* mSelectedNode = nullptr;
         Node mOverheadNode;
         int32_t mNodeBenchmarkIndex = 0;
@@ -139,4 +153,4 @@ namespace Magnum
         FastSIMD::FeatureSet mMaxFeatureSet    = FastSIMD::FeatureSet::Max;
         FastSIMD::FeatureSet mActualFeatureSet = FastSIMD::FeatureSet::Invalid;
     };
-}
\ No newline at end of file
+}
diff --git a/tools/NodeEditorApp.cpp b/tools/NodeEditorApp.cpp
index b8d6884b..3bb9d323 100644
--- a/tools/NodeEditorApp.cpp
+++ b/tools/NodeEditorApp.cpp
@@ -21,17 +21,26 @@ void InitResources()
 #endif
 }
 
+static bool IsDetached( const NodeEditorApp::Arguments& arguments )
+{
+    return arguments.argc > 1 && std::string_view { arguments.argv[1] } == "-detached";
+}
+
 NodeEditorApp::NodeEditorApp( const Arguments& arguments ) :
     Platform::Application{ arguments,
-    Configuration{}
-    .setTitle( "FastNoise2 Node Editor" )
-    .setSize( Vector2i( 1280, 720 ) )
-    .setWindowFlags( Configuration::WindowFlag::Resizable | Configuration::WindowFlag::Maximized ),
-    GLConfiguration{}
-    .setSampleCount( 4 )
+        Configuration{}
+        .setTitle( IsDetached( arguments ) ? "FastNoise2 Node Graph" : "FastNoise2 Node Editor" )
+        .setSize( Vector2i( 1280, 720 ) )
+        .setWindowFlags( Configuration::WindowFlag::Resizable | ( IsDetached( arguments ) ? (Configuration::WindowFlag)0 : Configuration::WindowFlag::Maximized ) ),
+        GLConfiguration{}
+        .setSampleCount( 4 )
     },
+    mIsDetachedNodeGraph( IsDetached( arguments ) ),
+    mExecutablePath( arguments.argv[0] ),
+    mIpcSharedMemory( FastNoiseNodeEditor::SetupSharedMemoryIpc() ),
     mImGuiIntegrationContext{ NoCreate },
-    mImGuiContext{ ImGui::CreateContext() }
+    mImGuiContext{ ImGui::CreateContext() },
+    mNodeEditor( *this )
 {
     InitResources();
 
@@ -82,6 +91,8 @@ NodeEditorApp::~NodeEditorApp()
     // Avoid trying to save settings after node editor is already destroyed
     ImGui::SaveIniSettingsToDisk( ImGui::GetIO().IniFilename );
     ImGui::GetIO().IniFilename = nullptr;
+
+    FastNoiseNodeEditor::ReleaseSharedMemoryIpc();
 }
 
 void NodeEditorApp::drawEvent()
@@ -96,74 +107,77 @@ void NodeEditorApp::drawEvent()
     else if( !ImGui::GetIO().WantTextInput && isTextInputActive() )
         stopTextInput();
 
+    if( !mIsDetachedNodeGraph )
     {
-        if( ImGui::Button( "Reset State" ) )
         {
-            ImGui::ClearIniSettings();
-            mNodeEditor.~FastNoiseNodeEditor();
-            new( &mNodeEditor ) FastNoiseNodeEditor();
-            ImGui::SaveIniSettingsToDisk( ImGui::GetIO().IniFilename );
+            if( ImGui::Button( "Reset State" ) )
+            {
+                ImGui::ClearIniSettings();
+                mNodeEditor.~FastNoiseNodeEditor();
+                new( &mNodeEditor ) FastNoiseNodeEditor( *this );
+                ImGui::SaveIniSettingsToDisk( ImGui::GetIO().IniFilename );
+            }
+
+            if( ImGui::ColorEdit3( "Clear Color", mClearColor.data() ) )
+                GL::Renderer::setClearColor( mClearColor );
+
+            ImGui::Checkbox( "Backface Culling", &mBackFaceCulling );
+
+            ImGui::Text( "Application average %.3f ms/frame (%.1f FPS)",
+                         1000.0 / Double( ImGui::GetIO().Framerate ), Double( ImGui::GetIO().Framerate ) );
+
+            if( ImGui::Combo( "Max Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
+                ImGuiExtra::ScrollCombo( &mMaxFeatureSet, (int)mFeatureSetSelection.size() ) )
+            {
+                FastSIMD::FeatureSet newLevel = mFeatureSetSelection[mMaxFeatureSet];
+                mNodeEditor.SetSIMDLevel( newLevel );
+            }
         }
 
-        if( ImGui::ColorEdit3( "Clear Color", mClearColor.data() ) )
-            GL::Renderer::setClearColor( mClearColor );
-
-        ImGui::Checkbox( "Backface Culling", &mBackFaceCulling );
-
-        ImGui::Text( "Application average %.3f ms/frame (%.1f FPS)",
-            1000.0 / Double( ImGui::GetIO().Framerate ), Double( ImGui::GetIO().Framerate ) );
-
-        if( ImGui::Combo( "Max Feature Set", &mMaxFeatureSet, mFeatureSetNames.data(), (int)mFeatureSetSelection.size() ) ||
-            ImGuiExtra::ScrollCombo( &mMaxFeatureSet, (int)mFeatureSetSelection.size() ) )
-        {   
-            FastSIMD::FeatureSet newLevel = mFeatureSetSelection[mMaxFeatureSet];
-            mNodeEditor.SetSIMDLevel( newLevel );
+        // Update camera pos
+        Vector3 cameraVelocity( 0 );
+        if( mKeyDown[Key_W] || mKeyDown[Key_Up] )
+        {
+            cameraVelocity.z() -= 1.0f;
+        }
+        if( mKeyDown[Key_S] || mKeyDown[Key_Down] )
+        {
+            cameraVelocity.z() += 1.0f;
+        }
+        if( mKeyDown[Key_A] || mKeyDown[Key_Left] )
+        {
+            cameraVelocity.x() -= 1.0f;
+        }
+        if( mKeyDown[Key_D] || mKeyDown[Key_Right] )
+        {
+            cameraVelocity.x() += 1.0f;
+        }
+        if( mKeyDown[Key_Q] || mKeyDown[Key_PgDn] )
+        {
+            cameraVelocity.y() -= 1.0f;
+        }
+        if( mKeyDown[Key_E] || mKeyDown[Key_PgUp] )
+        {
+            cameraVelocity.y() += 1.0f;
+        }
+        if( mKeyDown[Key_RShift] || mKeyDown[Key_LShift] )
+        {
+            cameraVelocity *= 4.0f;
         }
-    }
-
-    // Update camera pos
-    Vector3 cameraVelocity( 0 );
-    if( mKeyDown[Key_W] || mKeyDown[Key_Up] )
-    {
-        cameraVelocity.z() -= 1.0f;
-    }
-    if( mKeyDown[Key_S] || mKeyDown[Key_Down] )
-    {
-        cameraVelocity.z() += 1.0f;
-    }
-    if( mKeyDown[Key_A] || mKeyDown[Key_Left] )
-    {
-        cameraVelocity.x() -= 1.0f;
-    }
-    if( mKeyDown[Key_D] || mKeyDown[Key_Right] )
-    {
-        cameraVelocity.x() += 1.0f;
-    }
-    if( mKeyDown[Key_Q] || mKeyDown[Key_PgDn] )
-    {
-        cameraVelocity.y() -= 1.0f;
-    }
-    if( mKeyDown[Key_E] || mKeyDown[Key_PgUp] )
-    {
-        cameraVelocity.y() += 1.0f;
-    }
-    if( mKeyDown[Key_RShift] || mKeyDown[Key_LShift] )
-    {
-        cameraVelocity *= 4.0f;
-    }
 
-    cameraVelocity *= mFrameTime.previousFrameDuration() * 80.0f;
+        cameraVelocity *= mFrameTime.previousFrameDuration() * 80.0f;
 
-    if( !cameraVelocity.isZero() ) 
-    {
-        Matrix4 transform = mCameraObject.transformation();
-        transform.translation() += transform.rotation() * cameraVelocity;
-        mCameraObject.setTransformation( transform );
-    }
+        if( !cameraVelocity.isZero() )
+        {
+            Matrix4 transform = mCameraObject.transformation();
+            transform.translation() += transform.rotation() * cameraVelocity;
+            mCameraObject.setTransformation( transform );
+        }
 
-    if( mBackFaceCulling )
-    {
-        GL::Renderer::enable( GL::Renderer::Feature::FaceCulling );
+        if( mBackFaceCulling )
+        {
+            GL::Renderer::enable( GL::Renderer::Feature::FaceCulling );
+        }
     }
 
     mNodeEditor.Draw( mCamera.cameraMatrix(), mCamera.projectionMatrix(), mCameraObject.transformation().translation() );
diff --git a/tools/NodeEditorApp.h b/tools/NodeEditorApp.h
index 60ce3df0..be9839cf 100644
--- a/tools/NodeEditorApp.h
+++ b/tools/NodeEditorApp.h
@@ -18,6 +18,21 @@ namespace Magnum
         explicit NodeEditorApp( const Arguments& arguments );
         ~NodeEditorApp();
 
+        bool IsDetachedNodeGraph()
+        {
+            return mIsDetachedNodeGraph;
+        }
+
+        void* GetIpcSharedMemory()
+        {
+            return mIpcSharedMemory;
+        }
+
+        std::string_view GetExecutablePath()
+        {
+            return mExecutablePath;
+        }
+
     private:
         void drawEvent() override;
         void viewportEvent( ViewportEvent& event ) override;
@@ -33,6 +48,10 @@ namespace Magnum
         void UpdatePespectiveProjection();
         void HandleKeyEvent( KeyEvent::Key key, bool value );
 
+        bool mIsDetachedNodeGraph;
+        std::string mExecutablePath;
+        void* mIpcSharedMemory;
+
         SceneGraph::Object<SceneGraph::MatrixTransformation3D> mCameraObject;
         SceneGraph::Camera3D mCamera{ mCameraObject };
         Vector2 mLookAngle{ 0 };
diff --git a/tools/SharedMemoryIpc.inl b/tools/SharedMemoryIpc.inl
new file mode 100644
index 00000000..0f213f4f
--- /dev/null
+++ b/tools/SharedMemoryIpc.inl
@@ -0,0 +1,117 @@
+#ifdef _WIN32
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#else
+#include <fcntl.h> // For O_* constants
+#include <sys/mman.h> // For shared memory
+#include <sys/stat.h> // For mode constants
+#include <unistd.h>
+#endif
+
+static constexpr const char* kSharedMemoryName = "/FastNoise2NodeEditor";
+static constexpr unsigned int kSharedMemorySize = 64 * 1024;
+
+// Setup shared memory for IPC selected node ENT updates
+void* FastNoiseNodeEditor::SetupSharedMemoryIpc()
+{
+#ifdef WIN32
+    // Create a shared memory file mapping
+    HANDLE hMapFile = CreateFileMapping(
+        INVALID_HANDLE_VALUE, // Use paging file - shared memory
+        NULL, // Default security attributes
+        PAGE_READWRITE, // Read/write access
+        0, // Maximum object size (high-order DWORD)
+        kSharedMemorySize, // Maximum object size (low-order DWORD)
+        kSharedMemoryName ); // Name of mapping object
+
+    if( hMapFile == NULL )
+    {
+        Debug {} << "Failed to create IPC shared memory object" << GetLastError();
+        return nullptr;
+    }
+
+    // Map a view of the file mapping into the address space of the current process
+    void* ptr = MapViewOfFile( hMapFile, // Handle to map object
+        FILE_MAP_ALL_ACCESS, // Read/write permission
+        0,
+        0,
+        kSharedMemorySize );
+
+    if( !ptr )
+    {
+        Debug {} << "Failed to map IPC shared memory" << GetLastError();
+    }
+    return ptr;
+
+#else
+    // Create the shared memory object
+    int shmFd = shm_open( kSharedMemoryName, O_CREAT | O_RDWR, 0666 );
+    if( shmFd == -1 )
+    {
+        Debug {} << "Failed to create IPC shared memory object";
+        return nullptr;
+    }
+
+    // Configure the size of the shared memory object
+    if( ftruncate( shmFd, kSharedMemorySize ) == -1 )
+    {
+        if( errno != EINVAL ) // If the error is not just because it's already the right size
+        {
+            Debug {} << "Failed to config IPC shared memory object";
+            return nullptr;
+        }
+    }
+
+    // Memory map the shared memory object
+    void* ptr = mmap( 0, kSharedMemorySize, PROT_READ | PROT_WRITE, MAP_SHARED, shmFd, 0 );
+    if( ptr == MAP_FAILED )
+    {
+        Debug {} << "Failed to map IPC shared memory object";
+        return nullptr;
+    }
+    return ptr;
+#endif
+}
+
+void FastNoiseNodeEditor::ReleaseSharedMemoryIpc()
+{
+#ifndef WIN32
+    shm_unlink( kSharedMemoryName );
+#endif
+}
+
+// Poll for changes in the shared memory space
+void FastNoiseNodeEditor::DoIpcPolling()
+{
+    const void* sharedMemory = mNodeEditorApp.GetIpcSharedMemory();
+
+    if( sharedMemory )
+    {
+        const unsigned char sharedCounter = *static_cast<const unsigned char*>( sharedMemory );
+        const unsigned char dataType = *( static_cast<const unsigned char*>( sharedMemory ) + 1 );
+
+        // Invalidate the counter to read initial stale data only if it's type 0
+        static int counter = ( dataType == 0 ) ? 0xFFFFFF : sharedCounter;
+
+        if( sharedCounter != counter )
+        {
+            counter = sharedCounter;
+
+            // Check type
+            switch( dataType )
+            {
+            default:
+                Debug {} << "Unknown IPC data type" << dataType;
+                break;
+            case 0: // Selected node ENT
+            {
+                std::string newEncodedNodeTree = static_cast<const char*>( sharedMemory ) + 2;
+
+                SetPreviewGenerator( newEncodedNodeTree );
+            }
+            break;
+            }
+        }
+    }
+}
\ No newline at end of file

From 4d6375c503ec6b93ed0f5d169bbbd505ec0de175 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 13 Feb 2024 21:06:53 +0000
Subject: [PATCH 063/139] Keep selected node when detaching node graph

---
 tools/FastNoiseNodeEditor.cpp | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 856364a1..5c74e5a0 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -558,6 +558,13 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
         outBuf->appendf( "scale=%f\n", nodeEditor->mNodeScale );
         outBuf->appendf( "seed=%d\n", nodeEditor->mNodeSeed );
         outBuf->appendf( "gen_type=%d\n", (int)nodeEditor->mNodeGenType );
+        
+        auto find = nodeEditor->mNodes.find( nodeEditor->mSelectedNode );
+
+        if( find != nodeEditor->mNodes.end() )
+        {
+            outBuf->appendf( "selected_node=%d\n", find->second.nodeId );
+        }
     };
     editorSettings.ReadOpenFn = []( ImGuiContext* ctx, ImGuiSettingsHandler* handler, const char* name ) -> void*
     {
@@ -581,10 +588,22 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
         sscanf( line, "scale=%f", &nodeEditor->mNodeScale );
         sscanf( line, "seed=%d", &nodeEditor->mNodeSeed );
         sscanf( line, "gen_type=%d", (int*)&nodeEditor->mNodeGenType );
+
+        if( nodeEditor->mNodeEditorApp.IsDetachedNodeGraph() )
+        {
+            int i;
+            if( sscanf( line, "selected_node=%d", &i ) == 1 )
+            {
+                if( Node* selectedNode = nodeEditor->FindNodeFromId( i ) )
+                {
+                    nodeEditor->mSelectedNode = selectedNode->data.get();
+                }
+            }
+        }
     };
 
-    ImGuiExtra::AddOrReplaceSettingsHandler( editorSettings );
     ImGuiExtra::AddOrReplaceSettingsHandler( nodeSettings );
+    ImGuiExtra::AddOrReplaceSettingsHandler( editorSettings );
 }
 
 FastNoiseNodeEditor::FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp ) :

From 0049bd0398eded81b16b6dd5d2ff811f5a74633b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 7 Mar 2024 20:30:50 +0000
Subject: [PATCH 064/139] Shared memory code tidy

---
 src/FastNoise/Metadata.cpp    | 7 +++++++
 tools/FastNoiseNodeEditor.cpp | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 96dcc357..1073aa66 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -430,6 +430,13 @@ std::string Metadata::FormatMetadataNodeName( const Metadata* metadata, bool rem
             }
         }
     }
+
+    // Fallback since empty strings cause imgui errors
+    if( string.empty() )
+    {
+        return metadata->name;
+    }
+
     return string;
 }
 
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 5c74e5a0..7b128975 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -40,7 +40,8 @@ void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
 
     // Create a job object
     HANDLE hJob = CreateJobObject( NULL, NULL );
-    JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
+    JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+    ZeroMemory( &jeli, sizeof( jeli ) );
 
     // Configure the job object to terminate processes when the handle is closed
     jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;

From c31c80cf3ec27186b5844436d0fb26c1039eb38b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 10 Mar 2024 23:40:58 +0000
Subject: [PATCH 065/139] Fix MSVC warnings

---
 tools/CMakeLists.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index df3cc04d..d190f016 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,7 +1,7 @@
 CPMAddPackage(
     NAME corrade
     GITHUB_REPOSITORY mosra/corrade
-    GIT_TAG dfbeae5c4a2ee429ecad3a37121aba3e3d389036
+    GIT_TAG 295bbba1f49887da060465f88b8501965f6acd7d
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -28,7 +28,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME magnum
     GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG b1ba1f076d3e8b4295b1afac94e95ff8a846e619
+    GIT_TAG 7d0a8215d38284f7b7ae041cfbb19d410e5988a6
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -55,7 +55,7 @@ set(IMGUI_DIR ${imgui_SOURCE_DIR})
 CPMAddPackage(
     NAME magnum-integration
     GITHUB_REPOSITORY mosra/magnum-integration
-    GIT_TAG 05cbe5f85593b7d4252048df98f0bc3bb48b540d
+    GIT_TAG f01593fc94556bff23a848ac71187c56e034b6d9
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -142,6 +142,7 @@ endif()
 
 if (MSVC)
     target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
+    target_compile_options(NodeEditor PRIVATE /wd4244)
 endif()
 
 set(install_targets ${install_targets} NodeEditor PARENT_SCOPE)

From b6fd9556003c2a19d4b968f9e84da56b34b100f6 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 11 Mar 2024 19:06:35 +0000
Subject: [PATCH 066/139] Initial DMC meshing

---
 tools/CMakeLists.txt          |   2 +
 tools/DmcTable.inl            | 332 +++++++++++++++++++++++++++++
 tools/FastNoiseNodeEditor.cpp |  22 +-
 tools/MeshNoisePreview.cpp    | 390 ++++++++++++++++++++++++++++++++--
 tools/MeshNoisePreview.h      |  23 +-
 5 files changed, 732 insertions(+), 37 deletions(-)
 create mode 100644 tools/DmcTable.inl

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index df3cc04d..b6176d7b 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -123,6 +123,8 @@ target_link_libraries(NodeEditor PRIVATE
     robin_hood
 )
 
+target_compile_features(NodeEditor PRIVATE cxx_std_20)
+
 # Windows HiDPI support
 if(CORRADE_TARGET_WINDOWS)
     target_sources(NodeEditor PRIVATE WindowsHiDPI.manifest)
diff --git a/tools/DmcTable.inl b/tools/DmcTable.inl
new file mode 100644
index 00000000..c5c24a47
--- /dev/null
+++ b/tools/DmcTable.inl
@@ -0,0 +1,332 @@
+namespace DMC
+{
+    enum EdgeCode : uint16_t
+    {
+        EDGE0 = 1,
+        EDGE1 = 1 << 1,
+        EDGE2 = 1 << 2,
+        EDGE3 = 1 << 3,
+        EDGE4 = 1 << 4,
+        EDGE5 = 1 << 5,
+        EDGE6 = 1 << 6,
+        EDGE7 = 1 << 7,
+        EDGE8 = 1 << 8,
+        EDGE9 = 1 << 9,
+        EDGE10 = 1 << 10,
+        EDGE11 = 1 << 11,
+    };
+
+    //  Coordinate system
+    //
+    //       y
+    //       |
+    //       |
+    //       |
+    //       0-----x
+    //      /
+    //     /
+    //    z
+    //
+
+    // Cell Corners
+    // (Corners are voxels. Number correspond to Morton codes of corner coordinates)
+    //
+    //       2-------------------3
+    //      /|                  /|
+    //     / |                 / |
+    //    /  |                /  |
+    //   6-------------------7   |
+    //   |   |               |   |
+    //   |   |               |   |
+    //   |   |               |   |
+    //   |   |               |   |
+    //   |   0---------------|---1
+    //   |  /                |  /
+    //   | /                 | /
+    //   |/                  |/
+    //   4-------------------5
+    //
+
+
+    //         Cell Edges
+    //
+    //       o--------4----------o
+    //      /|                  /|
+    //     7 |                 5 |
+    //    /  |                /  |
+    //   o--------6----------o   |
+    //   |   8               |   9
+    //   |   |               |   |
+    //   |   |               |   |
+    //   11  |               10  |
+    //   |   o--------0------|---o
+    //   |  /                |  /
+    //   | 3                 | 1
+    //   |/                  |/
+    //   o--------2----------o
+    //
+
+    // Encodes the edge vertices for the 256 marching cubes cases.
+    // A marching cube case produces up to four faces and ,thus, up to four
+    // dual points.
+
+    const uint16_t kDualPointsList[256][4] = {
+        { 0, 0, 0, 0 }, // 0
+        { EDGE0 | EDGE3 | EDGE8, 0, 0, 0 }, // 1
+        { EDGE0 | EDGE1 | EDGE9, 0, 0, 0 }, // 2
+        { EDGE1 | EDGE3 | EDGE8 | EDGE9, 0, 0, 0 }, // 3
+        { EDGE4 | EDGE7 | EDGE8, 0, 0, 0 }, // 4
+        { EDGE0 | EDGE3 | EDGE4 | EDGE7, 0, 0, 0 }, // 5
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE7 | EDGE8, 0, 0 }, // 6
+        { EDGE1 | EDGE3 | EDGE4 | EDGE7 | EDGE9, 0, 0, 0 }, // 7
+        { EDGE4 | EDGE5 | EDGE9, 0, 0, 0 }, // 8
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE5 | EDGE9, 0, 0 }, // 9
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5, 0, 0, 0 }, // 10
+        { EDGE1 | EDGE3 | EDGE4 | EDGE5 | EDGE8, 0, 0, 0 }, // 11
+        { EDGE5 | EDGE7 | EDGE8 | EDGE9, 0, 0, 0 }, // 12
+        { EDGE0 | EDGE3 | EDGE5 | EDGE7 | EDGE9, 0, 0, 0 }, // 13
+        { EDGE0 | EDGE1 | EDGE5 | EDGE7 | EDGE8, 0, 0, 0 }, // 14
+        { EDGE1 | EDGE3 | EDGE5 | EDGE7, 0, 0, 0 }, // 15
+        { EDGE2 | EDGE3 | EDGE11, 0, 0, 0 }, // 16
+        { EDGE0 | EDGE2 | EDGE8 | EDGE11, 0, 0, 0 }, // 17
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 18
+        { EDGE1 | EDGE2 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 19
+        { EDGE4 | EDGE7 | EDGE8, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 20
+        { EDGE0 | EDGE2 | EDGE4 | EDGE7 | EDGE11, 0, 0, 0 }, // 21
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE7 | EDGE8, EDGE2 | EDGE3 | EDGE11, 0 }, // 22
+        { EDGE1 | EDGE2 | EDGE4 | EDGE7 | EDGE9 | EDGE11, 0, 0, 0 }, // 23
+        { EDGE4 | EDGE5 | EDGE9, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 24
+        { EDGE0 | EDGE2 | EDGE8 | EDGE11, EDGE4 | EDGE5 | EDGE9, 0, 0 }, // 25
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 26
+        { EDGE1 | EDGE2 | EDGE4 | EDGE5 | EDGE8 | EDGE11, 0, 0, 0 }, // 27
+        { EDGE5 | EDGE7 | EDGE8 | EDGE9, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 28
+        { EDGE0 | EDGE2 | EDGE5 | EDGE7 | EDGE9 | EDGE11, 0, 0, 0 }, // 29
+        { EDGE0 | EDGE1 | EDGE5 | EDGE7 | EDGE8, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 30
+        { EDGE1 | EDGE2 | EDGE5 | EDGE7 | EDGE11, 0, 0, 0 }, // 31
+        { EDGE1 | EDGE2 | EDGE10, 0, 0, 0 }, // 32
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 33
+        { EDGE0 | EDGE2 | EDGE9 | EDGE10, 0, 0, 0 }, // 34
+        { EDGE2 | EDGE3 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 35
+        { EDGE4 | EDGE7 | EDGE8, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 36
+        { EDGE0 | EDGE3 | EDGE4 | EDGE7, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 37
+        { EDGE0 | EDGE2 | EDGE9 | EDGE10, EDGE4 | EDGE7 | EDGE8, 0, 0 }, // 38
+        { EDGE2 | EDGE3 | EDGE4 | EDGE7 | EDGE9 | EDGE10, 0, 0, 0 }, // 39
+        { EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 40
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE2 | EDGE10, 0 }, // 41
+        { EDGE0 | EDGE2 | EDGE4 | EDGE5 | EDGE10, 0, 0, 0 }, // 42
+        { EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE8 | EDGE10, 0, 0, 0 }, // 43
+        { EDGE5 | EDGE7 | EDGE8 | EDGE9, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 44
+        { EDGE0 | EDGE3 | EDGE5 | EDGE7 | EDGE9, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 45
+        { EDGE0 | EDGE2 | EDGE5 | EDGE7 | EDGE8 | EDGE10, 0, 0, 0 }, // 46
+        { EDGE2 | EDGE3 | EDGE5 | EDGE7 | EDGE10, 0, 0, 0 }, // 47
+        { EDGE1 | EDGE3 | EDGE10 | EDGE11, 0, 0, 0 }, // 48
+        { EDGE0 | EDGE1 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 49
+        { EDGE0 | EDGE3 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 50
+        { EDGE8 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 51
+        { EDGE4 | EDGE7 | EDGE8, EDGE1 | EDGE3 | EDGE10 | EDGE11, 0, 0 }, // 52
+        { EDGE0 | EDGE1 | EDGE4 | EDGE7 | EDGE10 | EDGE11, 0, 0, 0 }, // 53
+        { EDGE0 | EDGE3 | EDGE9 | EDGE10 | EDGE11, EDGE4 | EDGE7 | EDGE8, 0, 0 }, // 54
+        { EDGE4 | EDGE7 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 55
+        { EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE3 | EDGE10 | EDGE11, 0, 0 }, // 56
+        { EDGE0 | EDGE1 | EDGE8 | EDGE10 | EDGE11, EDGE4 | EDGE5 | EDGE9, 0, 0 }, // 57
+        { EDGE0 | EDGE3 | EDGE4 | EDGE5 | EDGE10 | EDGE11, 0, 0, 0 }, // 58
+        { EDGE4 | EDGE5 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 59
+        { EDGE5 | EDGE7 | EDGE8 | EDGE9, EDGE1 | EDGE3 | EDGE10 | EDGE11, 0, 0 }, // 60
+        { EDGE0 | EDGE1 | EDGE5 | EDGE7 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 61
+        { EDGE0 | EDGE3 | EDGE5 | EDGE7 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 62
+        { EDGE5 | EDGE7 | EDGE10 | EDGE11, 0, 0, 0 }, // 63
+        { EDGE6 | EDGE7 | EDGE11, 0, 0, 0 }, // 64
+        { EDGE0 | EDGE3 | EDGE8, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 65
+        { EDGE0 | EDGE1 | EDGE9, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 66
+        { EDGE1 | EDGE3 | EDGE8 | EDGE9, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 67
+        { EDGE4 | EDGE6 | EDGE8 | EDGE11, 0, 0, 0 }, // 68
+        { EDGE0 | EDGE3 | EDGE4 | EDGE6 | EDGE11, 0, 0, 0 }, // 69
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE6 | EDGE8 | EDGE11, 0, 0 }, // 70
+        { EDGE1 | EDGE3 | EDGE4 | EDGE6 | EDGE9 | EDGE11, 0, 0, 0 }, // 71
+        { EDGE4 | EDGE5 | EDGE9, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 72
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE5 | EDGE9, EDGE6 | EDGE7 | EDGE11, 0 }, // 73
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 74
+        { EDGE1 | EDGE3 | EDGE4 | EDGE5 | EDGE8, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 75
+        { EDGE5 | EDGE6 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 76
+        { EDGE0 | EDGE3 | EDGE5 | EDGE6 | EDGE9 | EDGE11, 0, 0, 0 }, // 77
+        { EDGE0 | EDGE1 | EDGE5 | EDGE6 | EDGE8 | EDGE11, 0, 0, 0 }, // 78
+        { EDGE1 | EDGE3 | EDGE5 | EDGE6 | EDGE11, 0, 0, 0 }, // 79
+        { EDGE2 | EDGE3 | EDGE6 | EDGE7, 0, 0, 0 }, // 80
+        { EDGE0 | EDGE2 | EDGE6 | EDGE7 | EDGE8, 0, 0, 0 }, // 81
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE6 | EDGE7, 0, 0 }, // 82
+        { EDGE1 | EDGE2 | EDGE6 | EDGE7 | EDGE8 | EDGE9, 0, 0, 0 }, // 83
+        { EDGE2 | EDGE3 | EDGE4 | EDGE6 | EDGE8, 0, 0, 0 }, // 84
+        { EDGE0 | EDGE2 | EDGE4 | EDGE6, 0, 0, 0 }, // 85
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE4 | EDGE6 | EDGE8, 0, 0 }, // 86
+        { EDGE1 | EDGE2 | EDGE4 | EDGE6 | EDGE9, 0, 0, 0 }, // 87
+        { EDGE4 | EDGE5 | EDGE9, EDGE2 | EDGE3 | EDGE6 | EDGE7, 0, 0 }, // 88
+        { EDGE0 | EDGE2 | EDGE6 | EDGE7 | EDGE8, EDGE4 | EDGE5 | EDGE9, 0, 0 }, // 89
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5, EDGE2 | EDGE3 | EDGE6 | EDGE7, 0, 0 }, // 90
+        { EDGE1 | EDGE2 | EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE8, 0, 0, 0 }, // 91
+        { EDGE2 | EDGE3 | EDGE5 | EDGE6 | EDGE8 | EDGE9, 0, 0, 0 }, // 92
+        { EDGE0 | EDGE2 | EDGE5 | EDGE6 | EDGE9, 0, 0, 0 }, // 93
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE5 | EDGE6 | EDGE8, 0, 0, 0 }, // 94
+        { EDGE1 | EDGE2 | EDGE5 | EDGE6, 0, 0, 0 }, // 95
+        { EDGE1 | EDGE2 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 96
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0 }, // 97
+        { EDGE0 | EDGE2 | EDGE9 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 98
+        { EDGE2 | EDGE3 | EDGE8 | EDGE9 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 99
+        { EDGE4 | EDGE6 | EDGE8 | EDGE11, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 100
+        { EDGE0 | EDGE3 | EDGE4 | EDGE6 | EDGE11, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 101
+        { EDGE0 | EDGE2 | EDGE9 | EDGE10, EDGE4 | EDGE6 | EDGE8 | EDGE11, 0, 0 }, // 102
+        { EDGE2 | EDGE3 | EDGE4 | EDGE6 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 103
+        { EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE2 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0 }, // 104
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE2 | EDGE10, EDGE6 | EDGE7 | EDGE11 }, // 105
+        { EDGE0 | EDGE2 | EDGE4 | EDGE5 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 106
+        { EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE8 | EDGE10, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 107
+        { EDGE5 | EDGE6 | EDGE8 | EDGE9 | EDGE11, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 108
+        { EDGE0 | EDGE3 | EDGE5 | EDGE6 | EDGE9 | EDGE11, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 109
+        { EDGE0 | EDGE2 | EDGE5 | EDGE6 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 110
+        { EDGE2 | EDGE3 | EDGE5 | EDGE6 | EDGE10 | EDGE11, 0, 0, 0 }, // 111
+        { EDGE1 | EDGE3 | EDGE6 | EDGE7 | EDGE10, 0, 0, 0 }, // 112
+        { EDGE0 | EDGE1 | EDGE6 | EDGE7 | EDGE8 | EDGE10, 0, 0, 0 }, // 113
+        { EDGE0 | EDGE3 | EDGE6 | EDGE7 | EDGE9 | EDGE10, 0, 0, 0 }, // 114
+        { EDGE6 | EDGE7 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 115
+        { EDGE1 | EDGE3 | EDGE4 | EDGE6 | EDGE8 | EDGE10, 0, 0, 0 }, // 116
+        { EDGE0 | EDGE1 | EDGE4 | EDGE6 | EDGE10, 0, 0, 0 }, // 117
+        { EDGE0 | EDGE3 | EDGE4 | EDGE6 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 118
+        { EDGE4 | EDGE6 | EDGE9 | EDGE10, 0, 0, 0 }, // 119
+        { EDGE4 | EDGE5 | EDGE9, EDGE1 | EDGE3 | EDGE6 | EDGE7 | EDGE10, 0, 0 }, // 120
+        { EDGE0 | EDGE1 | EDGE6 | EDGE7 | EDGE8 | EDGE10, EDGE4 | EDGE5 | EDGE9, 0, 0 }, // 121
+        { EDGE0 | EDGE3 | EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE10, 0, 0, 0 }, // 122
+        { EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE8 | EDGE10, 0, 0, 0 }, // 123
+        { EDGE1 | EDGE3 | EDGE5 | EDGE6 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 124
+        { EDGE0 | EDGE1 | EDGE5 | EDGE6 | EDGE9 | EDGE10, 0, 0, 0 }, // 125
+        { EDGE0 | EDGE3 | EDGE8, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 126
+        { EDGE5 | EDGE6 | EDGE10, 0, 0, 0 }, // 127
+        { EDGE5 | EDGE6 | EDGE10, 0, 0, 0 }, // 128
+        { EDGE0 | EDGE3 | EDGE8, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 129
+        { EDGE0 | EDGE1 | EDGE9, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 130
+        { EDGE1 | EDGE3 | EDGE8 | EDGE9, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 131
+        { EDGE4 | EDGE7 | EDGE8, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 132
+        { EDGE0 | EDGE3 | EDGE4 | EDGE7, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 133
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE7 | EDGE8, EDGE5 | EDGE6 | EDGE10, 0 }, // 134
+        { EDGE1 | EDGE3 | EDGE4 | EDGE7 | EDGE9, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 135
+        { EDGE4 | EDGE6 | EDGE9 | EDGE10, 0, 0, 0 }, // 136
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE6 | EDGE9 | EDGE10, 0, 0 }, // 137
+        { EDGE0 | EDGE1 | EDGE4 | EDGE6 | EDGE10, 0, 0, 0 }, // 138
+        { EDGE1 | EDGE3 | EDGE4 | EDGE6 | EDGE8 | EDGE10, 0, 0, 0 }, // 139
+        { EDGE6 | EDGE7 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 140
+        { EDGE0 | EDGE3 | EDGE6 | EDGE7 | EDGE9 | EDGE10, 0, 0, 0 }, // 141
+        { EDGE0 | EDGE1 | EDGE6 | EDGE7 | EDGE8 | EDGE10, 0, 0, 0 }, // 142
+        { EDGE1 | EDGE3 | EDGE6 | EDGE7 | EDGE10, 0, 0, 0 }, // 143
+        { EDGE2 | EDGE3 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 144
+        { EDGE0 | EDGE2 | EDGE8 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 145
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0 }, // 146
+        { EDGE1 | EDGE2 | EDGE8 | EDGE9 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 147
+        { EDGE4 | EDGE7 | EDGE8, EDGE2 | EDGE3 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0 }, // 148
+        { EDGE0 | EDGE2 | EDGE4 | EDGE7 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 149
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE7 | EDGE8, EDGE2 | EDGE3 | EDGE11, EDGE5 | EDGE6 | EDGE10 }, // 150
+        { EDGE1 | EDGE2 | EDGE4 | EDGE7 | EDGE9 | EDGE11, EDGE5 | EDGE6 | EDGE10, 0, 0 }, // 151
+        { EDGE4 | EDGE6 | EDGE9 | EDGE10, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 152
+        { EDGE0 | EDGE2 | EDGE8 | EDGE11, EDGE4 | EDGE6 | EDGE9 | EDGE10, 0, 0 }, // 153
+        { EDGE0 | EDGE1 | EDGE4 | EDGE6 | EDGE10, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 154
+        { EDGE1 | EDGE2 | EDGE4 | EDGE6 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 155
+        { EDGE6 | EDGE7 | EDGE8 | EDGE9 | EDGE10, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 156
+        { EDGE0 | EDGE2 | EDGE6 | EDGE7 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 157
+        { EDGE0 | EDGE1 | EDGE6 | EDGE7 | EDGE8 | EDGE10, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 158
+        { EDGE1 | EDGE2 | EDGE6 | EDGE7 | EDGE10 | EDGE11, 0, 0, 0 }, // 159
+        { EDGE1 | EDGE2 | EDGE5 | EDGE6, 0, 0, 0 }, // 160
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE5 | EDGE6, 0, 0 }, // 161
+        { EDGE0 | EDGE2 | EDGE5 | EDGE6 | EDGE9, 0, 0, 0 }, // 162
+        { EDGE2 | EDGE3 | EDGE5 | EDGE6 | EDGE8 | EDGE9, 0, 0, 0 }, // 163
+        { EDGE4 | EDGE7 | EDGE8, EDGE1 | EDGE2 | EDGE5 | EDGE6, 0, 0 }, // 164
+        { EDGE0 | EDGE3 | EDGE4 | EDGE7, EDGE1 | EDGE2 | EDGE5 | EDGE6, 0, 0 }, // 165
+        { EDGE0 | EDGE2 | EDGE5 | EDGE6 | EDGE9, EDGE4 | EDGE7 | EDGE8, 0, 0 }, // 166
+        { EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE9, 0, 0, 0 }, // 167
+        { EDGE1 | EDGE2 | EDGE4 | EDGE6 | EDGE9, 0, 0, 0 }, // 168
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE4 | EDGE6 | EDGE9, 0, 0 }, // 169
+        { EDGE0 | EDGE2 | EDGE4 | EDGE6, 0, 0, 0 }, // 170
+        { EDGE2 | EDGE3 | EDGE4 | EDGE6 | EDGE8, 0, 0, 0 }, // 171
+        { EDGE1 | EDGE2 | EDGE6 | EDGE7 | EDGE8 | EDGE9, 0, 0, 0 }, // 172
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE6 | EDGE7 | EDGE9, 0, 0, 0 }, // 173
+        { EDGE0 | EDGE2 | EDGE6 | EDGE7 | EDGE8, 0, 0, 0 }, // 174
+        { EDGE2 | EDGE3 | EDGE6 | EDGE7, 0, 0, 0 }, // 175
+        { EDGE1 | EDGE3 | EDGE5 | EDGE6 | EDGE11, 0, 0, 0 }, // 176
+        { EDGE0 | EDGE1 | EDGE5 | EDGE6 | EDGE8 | EDGE11, 0, 0, 0 }, // 177
+        { EDGE0 | EDGE3 | EDGE5 | EDGE6 | EDGE9 | EDGE11, 0, 0, 0 }, // 178
+        { EDGE5 | EDGE6 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 179
+        { EDGE4 | EDGE7 | EDGE8, EDGE1 | EDGE3 | EDGE5 | EDGE6 | EDGE11, 0, 0 }, // 180
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE11, 0, 0, 0 }, // 181
+        { EDGE0 | EDGE3 | EDGE5 | EDGE6 | EDGE9 | EDGE11, EDGE4 | EDGE7 | EDGE8, 0, 0 }, // 182
+        { EDGE4 | EDGE5 | EDGE6 | EDGE7 | EDGE9 | EDGE11, 0, 0, 0 }, // 183
+        { EDGE1 | EDGE3 | EDGE4 | EDGE6 | EDGE9 | EDGE11, 0, 0, 0 }, // 184
+        { EDGE0 | EDGE1 | EDGE4 | EDGE6 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 185
+        { EDGE0 | EDGE3 | EDGE4 | EDGE6 | EDGE11, 0, 0, 0 }, // 186
+        { EDGE4 | EDGE6 | EDGE8 | EDGE11, 0, 0, 0 }, // 187
+        { EDGE1 | EDGE3 | EDGE6 | EDGE7 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 188
+        { EDGE0 | EDGE1 | EDGE9, EDGE6 | EDGE7 | EDGE11, 0, 0 }, // 189
+        { EDGE0 | EDGE3 | EDGE6 | EDGE7 | EDGE8 | EDGE11, 0, 0, 0 }, // 190
+        { EDGE6 | EDGE7 | EDGE11, 0, 0, 0 }, // 191
+        { EDGE5 | EDGE7 | EDGE10 | EDGE11, 0, 0, 0 }, // 192
+        { EDGE0 | EDGE3 | EDGE8, EDGE5 | EDGE7 | EDGE10 | EDGE11, 0, 0 }, // 193
+        { EDGE0 | EDGE1 | EDGE9, EDGE5 | EDGE7 | EDGE10 | EDGE11, 0, 0 }, // 194
+        { EDGE1 | EDGE3 | EDGE8 | EDGE9, EDGE5 | EDGE7 | EDGE10 | EDGE11, 0, 0 }, // 195
+        { EDGE4 | EDGE5 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 196
+        { EDGE0 | EDGE3 | EDGE4 | EDGE5 | EDGE10 | EDGE11, 0, 0, 0 }, // 197
+        { EDGE0 | EDGE1 | EDGE9, EDGE4 | EDGE5 | EDGE8 | EDGE10 | EDGE11, 0, 0 }, // 198
+        { EDGE1 | EDGE3 | EDGE4 | EDGE5 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 199
+        { EDGE4 | EDGE7 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 200
+        { EDGE0 | EDGE3 | EDGE8, EDGE4 | EDGE7 | EDGE9 | EDGE10 | EDGE11, 0, 0 }, // 201
+        { EDGE0 | EDGE1 | EDGE4 | EDGE7 | EDGE10 | EDGE11, 0, 0, 0 }, // 202
+        { EDGE1 | EDGE3 | EDGE4 | EDGE7 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 203
+        { EDGE8 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 204
+        { EDGE0 | EDGE3 | EDGE9 | EDGE10 | EDGE11, 0, 0, 0 }, // 205
+        { EDGE0 | EDGE1 | EDGE8 | EDGE10 | EDGE11, 0, 0, 0 }, // 206
+        { EDGE1 | EDGE3 | EDGE10 | EDGE11, 0, 0, 0 }, // 207
+        { EDGE2 | EDGE3 | EDGE5 | EDGE7 | EDGE10, 0, 0, 0 }, // 208
+        { EDGE0 | EDGE2 | EDGE5 | EDGE7 | EDGE8 | EDGE10, 0, 0, 0 }, // 209
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE5 | EDGE7 | EDGE10, 0, 0 }, // 210
+        { EDGE1 | EDGE2 | EDGE5 | EDGE7 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 211
+        { EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE8 | EDGE10, 0, 0, 0 }, // 212
+        { EDGE0 | EDGE2 | EDGE4 | EDGE5 | EDGE10, 0, 0, 0 }, // 213
+        { EDGE0 | EDGE1 | EDGE9, EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE8 | EDGE10, 0, 0 }, // 214
+        { EDGE1 | EDGE2 | EDGE4 | EDGE5 | EDGE9 | EDGE10, 0, 0, 0 }, // 215
+        { EDGE2 | EDGE3 | EDGE4 | EDGE7 | EDGE9 | EDGE10, 0, 0, 0 }, // 216
+        { EDGE0 | EDGE2 | EDGE4 | EDGE7 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 217
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE4 | EDGE7 | EDGE10, 0, 0, 0 }, // 218
+        { EDGE4 | EDGE7 | EDGE8, EDGE1 | EDGE2 | EDGE10, 0, 0 }, // 219
+        { EDGE2 | EDGE3 | EDGE8 | EDGE9 | EDGE10, 0, 0, 0 }, // 220
+        { EDGE0 | EDGE2 | EDGE9 | EDGE10, 0, 0, 0 }, // 221
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE8 | EDGE10, 0, 0, 0 }, // 222
+        { EDGE1 | EDGE2 | EDGE10, 0, 0, 0 }, // 223
+        { EDGE1 | EDGE2 | EDGE5 | EDGE7 | EDGE11, 0, 0, 0 }, // 224
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE5 | EDGE7 | EDGE11, 0, 0 }, // 225
+        { EDGE0 | EDGE2 | EDGE5 | EDGE7 | EDGE9 | EDGE11, 0, 0, 0 }, // 226
+        { EDGE2 | EDGE3 | EDGE5 | EDGE7 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 227
+        { EDGE1 | EDGE2 | EDGE4 | EDGE5 | EDGE8 | EDGE11, 0, 0, 0 }, // 228
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE4 | EDGE5 | EDGE11, 0, 0, 0 }, // 229
+        { EDGE0 | EDGE2 | EDGE4 | EDGE5 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 230
+        { EDGE4 | EDGE5 | EDGE9, EDGE2 | EDGE3 | EDGE11, 0, 0 }, // 231
+        { EDGE1 | EDGE2 | EDGE4 | EDGE7 | EDGE9 | EDGE11, 0, 0, 0 }, // 232
+        { EDGE0 | EDGE3 | EDGE8, EDGE1 | EDGE2 | EDGE4 | EDGE7 | EDGE9 | EDGE11, 0, 0 }, // 233
+        { EDGE0 | EDGE2 | EDGE4 | EDGE7 | EDGE11, 0, 0, 0 }, // 234
+        { EDGE2 | EDGE3 | EDGE4 | EDGE7 | EDGE8 | EDGE11, 0, 0, 0 }, // 235
+        { EDGE1 | EDGE2 | EDGE8 | EDGE9 | EDGE11, 0, 0, 0 }, // 236
+        { EDGE0 | EDGE1 | EDGE2 | EDGE3 | EDGE9 | EDGE11, 0, 0, 0 }, // 237
+        { EDGE0 | EDGE2 | EDGE8 | EDGE11, 0, 0, 0 }, // 238
+        { EDGE2 | EDGE3 | EDGE11, 0, 0, 0 }, // 239
+        { EDGE1 | EDGE3 | EDGE5 | EDGE7, 0, 0, 0 }, // 240
+        { EDGE0 | EDGE1 | EDGE5 | EDGE7 | EDGE8, 0, 0, 0 }, // 241
+        { EDGE0 | EDGE3 | EDGE5 | EDGE7 | EDGE9, 0, 0, 0 }, // 242
+        { EDGE5 | EDGE7 | EDGE8 | EDGE9, 0, 0, 0 }, // 243
+        { EDGE1 | EDGE3 | EDGE4 | EDGE5 | EDGE8, 0, 0, 0 }, // 244
+        { EDGE0 | EDGE1 | EDGE4 | EDGE5, 0, 0, 0 }, // 245
+        { EDGE0 | EDGE3 | EDGE4 | EDGE5 | EDGE8 | EDGE9, 0, 0, 0 }, // 246
+        { EDGE4 | EDGE5 | EDGE9, 0, 0, 0 }, // 247
+        { EDGE1 | EDGE3 | EDGE4 | EDGE7 | EDGE9, 0, 0, 0 }, // 248
+        { EDGE0 | EDGE1 | EDGE4 | EDGE7 | EDGE8 | EDGE9, 0, 0, 0 }, // 249
+        { EDGE0 | EDGE3 | EDGE4 | EDGE7, 0, 0, 0 }, // 250
+        { EDGE4 | EDGE7 | EDGE8, 0, 0, 0 }, // 251
+        { EDGE1 | EDGE3 | EDGE8 | EDGE9, 0, 0, 0 }, // 252
+        { EDGE0 | EDGE1 | EDGE9, 0, 0, 0 }, // 253
+        { EDGE0 | EDGE3 | EDGE8, 0, 0, 0 }, // 254
+        { 0, 0, 0, 0 } // 255
+    };
+
+} // namespace dualmc
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 7b128975..72276cfa 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -91,21 +91,23 @@ void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
 
 static bool MatchingGroup( const std::vector<const char*>& a, const std::vector<const char*>& b )
 {
-    std::string aString;
-    for( const char* c : a )
+    // Check if the sizes of the vectors are the same
+    if( a.size() != b.size() )
     {
-        aString.append( c );
-        aString.push_back( '\t' );
+        return false;
     }
 
-    std::string bString;
-    for( const char* c : b )
+    // Directly compare each corresponding pair of strings
+    for( size_t i = 0; i < a.size(); ++i )
     {
-        bString.append( c );
-        bString.push_back( '\t' );
+        if( std::string_view( a[i] ) != std::string_view( b[i] ) )
+        {
+            return false;
+        }
     }
 
-    return aString == bString;
+    // All pairs matched
+    return true;
 }
 
 template<typename T>
@@ -118,7 +120,7 @@ static bool MatchingMembers( const std::vector<T>& a, const std::vector<T>& b )
 
     for( size_t i = 0; i < a.size(); i++ )
     {
-        if( strcmp( a[i].name, b[i].name ) != 0 )
+        if( std::string_view( a[i].name ) != std::string_view( b[i].name ) )
         {
             return false;
         }
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index a1190190..3440a877 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -1,6 +1,7 @@
 #include <algorithm>
 #include <cmath>
 #include <thread>
+#include <bit>
 
 #include <Corrade/Utility/Resource.h>
 #include <Magnum/Math/Color.h>
@@ -12,6 +13,7 @@
 
 #include "ImGuiExtra.h"
 #include "MeshNoisePreview.h"
+#include "DmcTable.inl"
 
 
 using namespace Magnum;
@@ -23,7 +25,7 @@ MeshNoisePreview::MeshNoisePreview()
     mBuildData.isoSurface = 0.0f;
     mBuildData.heightmapMultiplier = 100.0f;
     mBuildData.color = Color3( 1.0f );
-    mBuildData.meshType = MeshType_Voxel3D;
+    mBuildData.meshType = MeshType_Bloxel3D;
 
     uint32_t threadCount = std::max( 2u, std::thread::hardware_concurrency() );
 
@@ -341,7 +343,7 @@ void MeshNoisePreview::GenerateLoopThread( GenerateQueue<Chunk::BuildData>& gene
 
 MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildMeshData( const BuildData& buildData )
 {
-    thread_local static std::vector<float> densityValues( SIZE_GEN * SIZE_GEN * SIZE_GEN );
+    thread_local static std::vector<float> densityValues( SIZE_DENSITY * SIZE_DENSITY * SIZE_DENSITY );
     thread_local static std::vector<VertexData> vertexData;
     thread_local static std::vector<uint32_t> indicies;
 
@@ -350,8 +352,11 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildMeshData( const
 
     switch( buildData.meshType )
     {
-    case MeshType_Voxel3D:
-        return BuildVoxel3DMesh( buildData, densityValues.data(), vertexData, indicies );
+    case MeshType_Bloxel3D:
+        return BuildBloxel3DMesh( buildData, densityValues.data(), vertexData, indicies );
+
+    case MeshType_DualMarchingCubes3D:
+        return BuildDmc3DMesh( buildData, densityValues.data(), vertexData, indicies );
 
     case MeshType_Heightmap2D:
         return BuildHeightMap2DMesh( buildData, densityValues.data(), vertexData, indicies );
@@ -363,8 +368,9 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildMeshData( const
     return MeshData( buildData.pos, {}, vertexData, indicies );
 }
 
-MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildVoxel3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
+MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildBloxel3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
 {
+    static constexpr uint32_t SIZE_GEN = SIZE + 2;
     FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid3D( densityValues,
                                                                                   buildData.pos.x() - 1, buildData.pos.y() - 1, buildData.pos.z() - 1,
                                                                                   SIZE_GEN, SIZE_GEN, SIZE_GEN, buildData.seed );
@@ -383,11 +389,11 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildVoxel3DMesh( con
     else
 #endif
     {
-        Vector3 light = LIGHT_DIR.normalized() * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
+        constexpr Vector3 SUN = LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT ) + Vector3( 0.577f ) * AMBIENT_LIGHT;
 
-        float xLight = std::abs( light.x() );
-        float yLight = std::abs( light.y() );
-        float zLight = std::abs( light.z() );
+        float xLight = std::abs( SUN.x() );
+        float yLight = std::abs( SUN.y() );
+        float zLight = std::abs( SUN.z() );
 
         constexpr int32_t STEP_X = 1;
         constexpr int32_t STEP_Y = SIZE_GEN;
@@ -413,37 +419,37 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildVoxel3DMesh( con
 
                         if( densityValues[noiseIdx + STEP_X] > buildData.isoSurface ) // Right
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_X, STEP_Y, STEP_Z, xLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_X, STEP_Y, STEP_Z, xLight,
                                        Vector3( xf + 1, yf, zf ), Vector3( xf + 1, yf + 1, zf ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf + 1, yf, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_X] > buildData.isoSurface ) // Left
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_X, -STEP_Y, STEP_Z, 1.0f - xLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_X, -STEP_Y, STEP_Z, 1.0f - xLight,
                                        Vector3( xf, yf + 1, zf ), Vector3( xf, yf, zf ), Vector3( xf, yf, zf + 1 ), Vector3( xf, yf + 1, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx + STEP_Y] > buildData.isoSurface ) // Up
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Y, STEP_Z, STEP_X, yLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Y, STEP_Z, STEP_X, yLight,
                                        Vector3( xf, yf + 1, zf ), Vector3( xf, yf + 1, zf + 1 ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf + 1, yf + 1, zf ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_Y] > buildData.isoSurface ) // Down
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Y, -STEP_Z, STEP_X, 1.0f - yLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Y, -STEP_Z, STEP_X, 1.0f - yLight,
                                        Vector3( xf, yf, zf + 1 ), Vector3( xf, yf, zf ), Vector3( xf + 1, yf, zf ), Vector3( xf + 1, yf, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx + STEP_Z] > buildData.isoSurface ) // Forward
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Z, STEP_X, STEP_Y, zLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Z, STEP_X, STEP_Y, zLight,
                                        Vector3( xf, yf, zf + 1 ), Vector3( xf + 1, yf, zf + 1 ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf, yf + 1, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_Z] > buildData.isoSurface ) // Back
                         {
-                            AddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Z, -STEP_X, STEP_Y, 1.0f - zLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Z, -STEP_X, STEP_Y, 1.0f - zLight,
                                        Vector3( xf + 1, yf, zf ), Vector3( xf, yf, zf ), Vector3( xf, yf + 1, zf ), Vector3( xf + 1, yf + 1, zf ) );
                         }
                     }
@@ -464,7 +470,7 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildVoxel3DMesh( con
     return MeshData( buildData.pos, minMax, vertexData, indicies, minAir, maxSolid );
 }
 
-void MeshNoisePreview::Chunk::AddQuadAO( std::vector<VertexData>& verts, std::vector<uint32_t>& indicies, const float* density, float isoSurface,
+void MeshNoisePreview::Chunk::BloxelAddQuadAO( std::vector<VertexData>& verts, std::vector<uint32_t>& indicies, const float* density, float isoSurface,
                                          int32_t idx, int32_t facingOffset, int32_t offsetA, int32_t offsetB, float light, Vector3 pos00, Vector3 pos01, Vector3 pos11, Vector3 pos10 )
 {
     int32_t facingIdx = idx + facingOffset;
@@ -505,15 +511,361 @@ void MeshNoisePreview::Chunk::AddQuadAO( std::vector<VertexData>& verts, std::ve
     indicies.push_back( vertIdx + 1 );
 }
 
+MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
+{
+    static constexpr uint32_t SIZE_GEN = SIZE + 4;
+
+    FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid3D( densityValues,
+                                                                                  buildData.pos.x() - 2, buildData.pos.y() - 2, buildData.pos.z() - 2,
+                                                                                  SIZE_GEN, SIZE_GEN, SIZE_GEN, buildData.seed );
+    float minAir = INFINITY;
+    float maxSolid = -INFINITY;
+
+#if FASTNOISE_CALC_MIN_MAX
+    if( minMax.min > buildData.isoSurface )
+    {
+        minAir = (float)buildData.pos.y();
+    }
+    else if( minMax.max < buildData.isoSurface )
+    {
+        maxSolid = (float)buildData.pos.y() - 1.0f + SIZE;
+    }
+    else
+#endif
+    {
+        constexpr Vector3 VEC_X = Vector3( 1, 0, 0 );
+        constexpr Vector3 VEC_Y = Vector3( 0, 1, 0 );
+        constexpr Vector3 VEC_Z = Vector3( 0, 0, 1 );
+
+        constexpr uint32_t STEP_X = 1;
+        constexpr uint32_t STEP_Y = SIZE_GEN;
+        constexpr uint32_t STEP_Z = SIZE_GEN * SIZE_GEN;
+
+        robin_hood::unordered_flat_map<uint64_t, uint32_t> vertIndexMap;
+
+        Vector3 cellOffset( NoInit );
+        uint32_t cellIndex = (STEP_X + STEP_Y + STEP_Z) * 2;
+
+        for( uint32_t z = 0; z < SIZE; z++ )
+        {
+            cellOffset.z() = (float)( buildData.pos.z() + (int32_t)z );
+
+            for( uint32_t y = 0; y < SIZE; y++ )
+            {
+                cellOffset.y() = (float)( buildData.pos.y() + (int32_t)y );
+
+                for( uint32_t x = 0; x < SIZE; x++ )
+                {
+                    cellOffset.x() = (float)( buildData.pos.x() + (int32_t)x );
+
+                    const float density = densityValues[cellIndex];
+
+                    // construct quad for x edge
+                    {
+                        const float densityX = densityValues[cellIndex + STEP_X];
+
+                        if( density <= buildData.isoSurface )
+                        {
+                            maxSolid = std::max( cellOffset.y(), maxSolid );
+                        }
+                        else
+                        {
+                            minAir = std::min( cellOffset.y(), minAir );
+                        }
+
+                        // is edge intersected?
+                        if( ( density <= buildData.isoSurface ) ^ ( densityX <= buildData.isoSurface ) )
+                        {
+                            // generate quad
+                            const uint32_t quadVertIndicies[] = {
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex, DMC::EDGE0, cellOffset, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Z, DMC::EDGE2, cellOffset - VEC_Z, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Y, DMC::EDGE4, cellOffset - VEC_Y, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Y - STEP_Z, DMC::EDGE6, cellOffset - (VEC_Y + VEC_Z), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                            };
+
+                            // Slice quad along shortest diagonal
+                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
+                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+
+                            // Flip tris if backfacing
+                            uint8_t triFlip = 2 * ( density < densityX );
+
+                            indicies.emplace_back( quadVertIndicies[triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triRotation] );
+                            indicies.emplace_back( quadVertIndicies[2 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[triRotation] );
+                            indicies.emplace_back( quadVertIndicies[1 + triFlip] );
+                        }
+                    }
+
+                    // construct quad for y edge
+                    {
+                        const float densityY = densityValues[cellIndex + STEP_Y];
+
+                        // is edge intersected?
+                        if( ( density <= buildData.isoSurface ) ^ ( densityY <= buildData.isoSurface ) )
+                        {
+                            // generate quad
+                            const uint32_t quadVertIndicies[] = {
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex, DMC::EDGE8, cellOffset, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X, DMC::EDGE9, cellOffset - VEC_X, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Z, DMC::EDGE11, cellOffset - VEC_Z, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X - STEP_Z, DMC::EDGE10, cellOffset - (VEC_X + VEC_Z), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                            };
+
+                            // Slice quad along shortest diagonal
+                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
+                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+
+                            // Flip tris if backfacing
+                            uint8_t triFlip = 2 * (density < densityY);
+
+                            indicies.emplace_back( quadVertIndicies[triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triRotation] );
+                            indicies.emplace_back( quadVertIndicies[2 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[triRotation] );
+                            indicies.emplace_back( quadVertIndicies[1 + triFlip] );
+                        }
+                    }
+
+                    // construct quad for z edge
+                    {
+                        const float densityZ = densityValues[cellIndex + STEP_Z];
+
+                        // is edge intersected?
+                        if( ( density <= buildData.isoSurface ) ^ ( densityZ <= buildData.isoSurface ) )
+                        {
+                            // generate quad
+                            const uint32_t quadVertIndicies[] = {
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex, DMC::EDGE3, cellOffset, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Y, DMC::EDGE7, cellOffset - VEC_Y, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X, DMC::EDGE1, cellOffset - VEC_X, densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                                DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X - STEP_Y, DMC::EDGE5, cellOffset - (VEC_X + VEC_Y), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
+                            };
+
+                            // Slice quad along shortest diagonal
+                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
+                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+
+                            // Flip tris if backfacing
+                            uint8_t triFlip = 2 * ( density < densityZ );
+
+                            indicies.emplace_back( quadVertIndicies[triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triRotation] );
+                            indicies.emplace_back( quadVertIndicies[2 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[3 - triFlip] );
+                            indicies.emplace_back( quadVertIndicies[triRotation] );
+                            indicies.emplace_back( quadVertIndicies[1 + triFlip] );
+                        }
+                    }
+
+                    cellIndex += STEP_X;
+                }
+                cellIndex += STEP_X * ( SIZE_GEN - SIZE );
+            }
+            cellIndex += STEP_Y * ( SIZE_GEN - SIZE );
+        }
+    }
+
+    return MeshData( buildData.pos, minMax, vertexData, indicies, minAir, maxSolid );
+}
+
+template<uint32_t STEP_X, uint32_t STEP_Y, uint32_t STEP_Z, uint32_t SIZE_GEN>
+uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t edge, Vector3 vertOffset, const float* densityArray, float isoSurface,
+                                                   std::vector<VertexData>& vertexData, robin_hood::unordered_flat_map<uint64_t, uint32_t>& vertIndexMap )
+{
+    uint32_t cellCode = 0;
+    if( densityArray[cellIndex] > isoSurface )
+        cellCode |= 1;
+    if( densityArray[cellIndex + STEP_X] > isoSurface )
+        cellCode |= 2;
+    if( densityArray[cellIndex + STEP_Y] > isoSurface )
+        cellCode |= 4;
+    if( densityArray[cellIndex + STEP_X + STEP_Y] > isoSurface )
+        cellCode |= 8;
+    if( densityArray[cellIndex + STEP_Z] > isoSurface )
+        cellCode |= 16;
+    if( densityArray[cellIndex + STEP_X + STEP_Z] > isoSurface )
+        cellCode |= 32;
+    if( densityArray[cellIndex + STEP_Y + STEP_Z] > isoSurface )
+        cellCode |= 64;
+    if( densityArray[cellIndex + STEP_X + STEP_Y + STEP_Z] > isoSurface )
+        cellCode |= 128;
+    
+    uint16_t pointCode = 0;
+    for( int i = 0; i < 4; ++i )
+    {
+        if( DMC::kDualPointsList[cellCode][i] & edge )
+        {
+            pointCode = DMC::kDualPointsList[cellCode][i];
+            break;
+        }
+    }
+
+    uint64_t lookup = (uint64_t)cellIndex << 12 | (uint64_t)pointCode;
+    uint32_t vertIndex = (uint32_t)vertexData.size();
+    auto find = vertIndexMap.try_emplace( lookup, vertIndex );
+
+    if( !find.second )
+    {
+        return find.first->second;
+    }
+
+    // compute the dual point as the mean of the face vertices belonging to the
+    // original marching cubes face
+    Vector3 vert( Math::ZeroInit );
+
+    // sum edge intersection vertices using the point code
+    if( pointCode & DMC::EDGE0 )
+    {
+        vert.x() += ( isoSurface - densityArray[cellIndex] ) / ( densityArray[cellIndex + STEP_X] - densityArray[cellIndex] );
+    }
+
+    if( pointCode & DMC::EDGE1 )
+    {
+        vert.x() += 1.0f;
+        vert.z() += ( isoSurface - densityArray[cellIndex + STEP_X] ) / ( densityArray[cellIndex + STEP_X + STEP_Z] - densityArray[cellIndex + STEP_X] );
+    }
+
+    if( pointCode & DMC::EDGE2 )
+    {
+        vert.x() += ( isoSurface - densityArray[cellIndex + STEP_Z] ) / ( densityArray[cellIndex + STEP_X + STEP_Z] - densityArray[cellIndex + STEP_Z] );
+        vert.z() += 1.0f;
+    }
+
+    if( pointCode & DMC::EDGE3 )
+    {
+        vert.z() += ( isoSurface - densityArray[cellIndex] ) / ( densityArray[cellIndex + STEP_Z] - densityArray[cellIndex] );
+    }
+
+    if( pointCode & DMC::EDGE4 )
+    {
+        vert.x() += ( isoSurface - densityArray[cellIndex + STEP_Y] ) / ( densityArray[cellIndex + STEP_X + STEP_Y] - densityArray[cellIndex + STEP_Y] );
+        vert.y() += 1.0f;
+    }
+
+    if( pointCode & DMC::EDGE5 )
+    {
+        vert.x() += 1.0f;
+        vert.y() += 1.0f;
+        vert.z() += ( isoSurface - densityArray[cellIndex + STEP_X + STEP_Y] ) / ( densityArray[cellIndex + STEP_X + STEP_Y + STEP_Z] - densityArray[cellIndex + STEP_X + STEP_Y] );
+    }
+
+    if( pointCode & DMC::EDGE6 )
+    {
+        vert.x() += ( isoSurface - densityArray[cellIndex + STEP_Y + STEP_Z] ) / ( densityArray[cellIndex + STEP_X + STEP_Y + STEP_Z] - densityArray[cellIndex + STEP_Y + STEP_Z] );
+        vert.y() += 1.0f;
+        vert.z() += 1.0f;
+    }
+
+    if( pointCode & DMC::EDGE7 )
+    {
+        vert.y() += 1.0f;
+        vert.z() += ( isoSurface - densityArray[cellIndex + STEP_Y] ) / ( densityArray[cellIndex + STEP_Y + STEP_Z] - densityArray[cellIndex + STEP_Y] );
+    }
+
+    if( pointCode & DMC::EDGE8 )
+    {
+        vert.y() += ( isoSurface - densityArray[cellIndex] ) / ( densityArray[cellIndex + STEP_Y] - densityArray[cellIndex] );
+    }
+
+    if( pointCode & DMC::EDGE9 )
+    {
+        vert.x() += 1.0f;
+        vert.y() += ( isoSurface - densityArray[cellIndex + STEP_X] ) / ( densityArray[cellIndex + STEP_X + STEP_Y] - densityArray[cellIndex + STEP_X] );
+    }
+
+    if( pointCode & DMC::EDGE10 )
+    {
+        vert.x() += 1.0f;
+        vert.y() += ( isoSurface - densityArray[cellIndex + STEP_X + STEP_Z] ) / ( densityArray[cellIndex + STEP_X + STEP_Y + STEP_Z] - densityArray[cellIndex + STEP_X + STEP_Z] );
+        vert.z() += 1.0f;
+    }
+
+    if( pointCode & DMC::EDGE11 )
+    {
+        vert.y() += ( isoSurface - densityArray[cellIndex + STEP_Z] ) / ( densityArray[cellIndex + STEP_Y + STEP_Z] - densityArray[cellIndex + STEP_Z] );
+        vert.z() += 1.0f;
+    }
+
+    vert /= (float)std::popcount( pointCode );
+
+    Vector3i derivStep( -Math::round( vert ) );
+
+    uint32_t derivIndex = cellIndex + ( STEP_X & derivStep.x() ) + ( STEP_Y & derivStep.y() ) + ( STEP_Z & derivStep.z() );
+
+    Vector3 derivDelta = vert + Vector3( 0.5f );
+    derivDelta -= Math::floor( derivDelta );
+        
+    Vector3 derivative;
+
+    for( int32_t z = -1; z < 1; z++ )
+    {
+        float contribZ = std::abs( z + vert.z() );
+
+        for( int32_t y = -1; y < 1; y++ )
+        {
+            float contribY = std::abs( y + vert.y() );
+
+            for( int32_t x = -1; x < 1; x++ )
+            {
+                float contribX = std::abs( x + vert.x() );
+
+                if( x )
+                {
+                    derivative.x() += contribY * contribZ *
+                        ImLerp( densityArray[derivIndex - STEP_X] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_X], derivDelta.x() );
+                }
+                if( y )
+                {
+                    derivative.y() += contribX * contribZ *
+                        ImLerp( densityArray[derivIndex - STEP_Y] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Y], derivDelta.y() );
+                }
+                if( z )
+                {
+                    derivative.x() += contribX * contribY *
+                        ImLerp( densityArray[derivIndex - STEP_Z] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Z], derivDelta.z() );
+                }
+
+                derivIndex += STEP_X;
+            }
+
+            derivIndex += STEP_Y - STEP_X * 2;
+        }
+
+        derivIndex += STEP_Z - STEP_Y * 2;
+    }
+        
+    /*derivIndex = cellIndex + ( STEP_X & derivStep.x() ) + ( STEP_Y & derivStep.y() ) + ( STEP_Z & derivStep.z() );
+    derivative = Vector3 {
+        ImLerp( densityArray[derivIndex - STEP_X] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_X], derivDelta.x() ),
+        ImLerp( densityArray[derivIndex - STEP_Y] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Y], derivDelta.y() ),
+        ImLerp( densityArray[derivIndex - STEP_Z] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Z], derivDelta.z() ),
+    };*/
+
+    constexpr Vector3 SUN = -Vector3( 0.45f, 1.f, 0.6f );//    LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
+
+    float light = ( SUN.normalized() * derivative.normalized() ).sum() * 0.5 + 0.5f;
+
+    assert( light <= 1 );
+
+    vertexData.emplace_back( vert + vertOffset, light );//( uint32_t ) robin_hood::hash_bytes( &vert, sizeof( float ) * 3 ) / (float)UINT32_MAX );
+
+    return vertIndex;
+}
+
 MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
 {
-    constexpr uint32_t SIZE_GEN_HEIGHTMAP = SIZE + 1;
+    static constexpr uint32_t SIZE_GEN = SIZE + 2;
 
     FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid2D( densityValues,
                                                                                   buildData.pos.x(), buildData.pos.z(),
-                                                                                  SIZE_GEN_HEIGHTMAP, SIZE_GEN_HEIGHTMAP, buildData.seed );
+                                                                                  SIZE_GEN, SIZE_GEN, buildData.seed );
     constexpr int32_t STEP_X = 1;
-    constexpr int32_t STEP_Y = SIZE_GEN_HEIGHTMAP;
+    constexpr int32_t STEP_Y = SIZE_GEN;
 
     Vector3 sunLight = LIGHT_DIR.normalized() * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
 
diff --git a/tools/MeshNoisePreview.h b/tools/MeshNoisePreview.h
index 0b8989b3..f493961c 100644
--- a/tools/MeshNoisePreview.h
+++ b/tools/MeshNoisePreview.h
@@ -33,13 +33,15 @@ namespace Magnum
     private:
         enum MeshType
         {
-            MeshType_Voxel3D,
+            MeshType_Bloxel3D,
+            MeshType_DualMarchingCubes3D,
             MeshType_Heightmap2D,
             MeshType_Count
         };
 
         inline static const char* MeshTypeStrings =
-            "Voxel 3D\0"
+            "Bloxel 3D\0"
+            "Dual Marching Cubes 3D\0"
             "Heightmap 2D\0";
 
         class VertexLightShader : public GL::AbstractShaderProgram
@@ -131,8 +133,9 @@ namespace Magnum
             };
 
             static MeshData BuildMeshData( const BuildData& buildData );
-            static MeshNoisePreview::Chunk::MeshData BuildVoxel3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies );
-            static MeshNoisePreview::Chunk::MeshData BuildHeightMap2DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies );
+            static MeshData BuildBloxel3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies );
+            static MeshData BuildDmc3DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies );
+            static MeshData BuildHeightMap2DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies );
 
             Chunk( MeshData& meshData );
 
@@ -140,15 +143,19 @@ namespace Magnum
             Vector3i GetPos() const { return mPos; }
 
             static constexpr uint32_t SIZE          = 128;
-            static constexpr Vector3  LIGHT_DIR     = { 3, 4, 2 };
+            static constexpr Vector3  LIGHT_DIR     = { 0.557f, 0.743f, 0.371f }; // normalised
             static constexpr float    AMBIENT_LIGHT = 0.3f;
             static constexpr float    AO_STRENGTH   = 0.6f;
 
         private:
-            static void AddQuadAO( std::vector<VertexData>& verts, std::vector<uint32_t>& indicies, const float* density, float isoSurface,
-                                   int32_t idx, int32_t facingIdx, int32_t offsetA, int32_t offsetB, float light, Vector3 pos00, Vector3 pos01, Vector3 pos11, Vector3 pos10 );
+            static void BloxelAddQuadAO( std::vector<VertexData>& verts, std::vector<uint32_t>& indicies, const float* density, float isoSurface,
+                                         int32_t idx, int32_t facingIdx, int32_t offsetA, int32_t offsetB, float light, Vector3 pos00, Vector3 pos01, Vector3 pos11, Vector3 pos10 );
 
-            static constexpr uint32_t SIZE_GEN = SIZE + 2;
+            template<uint32_t STEP_X, uint32_t STEP_Y, uint32_t STEP_Z, uint32_t SIZE_GEN>
+            static uint32_t DmcGetVertIndex( uint32_t cellIndex, uint16_t edge, Vector3 vertOffset, const float* densityArray, float isoSurface,
+                std::vector<VertexData>& vertexData, robin_hood::unordered_flat_map<uint64_t, uint32_t>& vertIndexMap );
+
+            static constexpr uint32_t SIZE_DENSITY = SIZE + 4;
 
             Vector3i mPos;
             std::unique_ptr<GL::Mesh> mMesh;

From 6e99c0a38e7d88e6d42b7b6e00ffd59fa6341b4c Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 12 Mar 2024 21:18:56 +0000
Subject: [PATCH 067/139] Fix derivative calculations for dmc mesh normals

---
 tools/MeshNoisePreview.cpp | 55 +++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 3440a877..f5673d08 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -584,9 +584,9 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const
                                 DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_Y - STEP_Z, DMC::EDGE6, cellOffset - (VEC_Y + VEC_Z), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
                             };
 
-                            // Slice quad along shortest diagonal
-                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
-                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+                            // Slice quad for best vertex lighting
+                            uint8_t triRotation = 2 * ( std::abs( vertexData[quadVertIndicies[0]].posLight.w() - vertexData[quadVertIndicies[3]].posLight.w() ) >
+                                std::abs( vertexData[quadVertIndicies[1]].posLight.w() - vertexData[quadVertIndicies[2]].posLight.w() ) );
 
                             // Flip tris if backfacing
                             uint8_t triFlip = 2 * ( density < densityX );
@@ -615,9 +615,9 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const
                                 DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X - STEP_Z, DMC::EDGE10, cellOffset - (VEC_X + VEC_Z), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
                             };
 
-                            // Slice quad along shortest diagonal
-                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
-                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+                            // Slice quad for best vertex lighting
+                            uint8_t triRotation = 2 * ( std::abs( vertexData[quadVertIndicies[0]].posLight.w() - vertexData[quadVertIndicies[3]].posLight.w() ) >
+                                std::abs( vertexData[quadVertIndicies[1]].posLight.w() - vertexData[quadVertIndicies[2]].posLight.w() ) );
 
                             // Flip tris if backfacing
                             uint8_t triFlip = 2 * (density < densityY);
@@ -646,9 +646,9 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const
                                 DmcGetVertIndex<STEP_X, STEP_Y, STEP_Z, SIZE_GEN>( cellIndex - STEP_X - STEP_Y, DMC::EDGE5, cellOffset - (VEC_X + VEC_Y), densityValues, buildData.isoSurface, vertexData, vertIndexMap ),
                             };
 
-                            // Slice quad along shortest diagonal
-                            uint8_t triRotation = 2 * ( ( vertexData[quadVertIndicies[0]].posLight.rgb() - vertexData[quadVertIndicies[3]].posLight.rgb() ).dot() > 
-                                ( vertexData[quadVertIndicies[1]].posLight.rgb() - vertexData[quadVertIndicies[2]].posLight.rgb() ).dot() );
+                            // Slice quad for best vertex lighting
+                            uint8_t triRotation = 2 * ( std::abs( vertexData[quadVertIndicies[0]].posLight.w() - vertexData[quadVertIndicies[3]].posLight.w() ) >
+                                std::abs( vertexData[quadVertIndicies[1]].posLight.w() - vertexData[quadVertIndicies[2]].posLight.w() ) );
 
                             // Flip tris if backfacing
                             uint8_t triFlip = 2 * ( density < densityZ );
@@ -792,10 +792,10 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
     }
 
     vert /= (float)std::popcount( pointCode );
-
-    Vector3i derivStep( -Math::round( vert ) );
-
-    uint32_t derivIndex = cellIndex + ( STEP_X & derivStep.x() ) + ( STEP_Y & derivStep.y() ) + ( STEP_Z & derivStep.z() );
+    
+    uint32_t derivOffsetX = STEP_X & (int)std::lroundf( -vert.x() );
+    uint32_t derivOffsetY = STEP_Y & (int)std::lroundf( -vert.y() );
+    uint32_t derivOffsetZ = STEP_Z & (int)std::lroundf( -vert.z() );
 
     Vector3 derivDelta = vert + Vector3( 0.5f );
     derivDelta -= Math::floor( derivDelta );
@@ -816,39 +816,38 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
 
                 if( x )
                 {
+                    uint32_t derivIndex = cellIndex + derivOffsetX;
                     derivative.x() += contribY * contribZ *
-                        ImLerp( densityArray[derivIndex - STEP_X] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_X], derivDelta.x() );
+                        ImLerp( densityArray[derivIndex - STEP_X] - densityArray[derivIndex],
+                                densityArray[derivIndex] - densityArray[derivIndex + STEP_X], derivDelta.x() );
                 }
                 if( y )
                 {
+                    uint32_t derivIndex = cellIndex + derivOffsetY;
                     derivative.y() += contribX * contribZ *
-                        ImLerp( densityArray[derivIndex - STEP_Y] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Y], derivDelta.y() );
+                        ImLerp( densityArray[derivIndex - STEP_Y] - densityArray[derivIndex],
+                                densityArray[derivIndex] - densityArray[derivIndex + STEP_Y], derivDelta.y() );
                 }
                 if( z )
                 {
-                    derivative.x() += contribX * contribY *
-                        ImLerp( densityArray[derivIndex - STEP_Z] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Z], derivDelta.z() );
+                    uint32_t derivIndex = cellIndex + derivOffsetZ;
+                    derivative.z() += contribX * contribY *
+                        ImLerp( densityArray[derivIndex - STEP_Z] - densityArray[derivIndex],
+                                densityArray[derivIndex] - densityArray[derivIndex + STEP_Z], derivDelta.z() );
                 }
 
-                derivIndex += STEP_X;
+                cellIndex += STEP_X;
             }
 
-            derivIndex += STEP_Y - STEP_X * 2;
+            cellIndex += STEP_Y - STEP_X * 2;
         }
 
-        derivIndex += STEP_Z - STEP_Y * 2;
+        cellIndex += STEP_Z - STEP_Y * 2;
     }
-        
-    /*derivIndex = cellIndex + ( STEP_X & derivStep.x() ) + ( STEP_Y & derivStep.y() ) + ( STEP_Z & derivStep.z() );
-    derivative = Vector3 {
-        ImLerp( densityArray[derivIndex - STEP_X] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_X], derivDelta.x() ),
-        ImLerp( densityArray[derivIndex - STEP_Y] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Y], derivDelta.y() ),
-        ImLerp( densityArray[derivIndex - STEP_Z] - densityArray[derivIndex], densityArray[derivIndex] - densityArray[derivIndex + STEP_Z], derivDelta.z() ),
-    };*/
 
     constexpr Vector3 SUN = -Vector3( 0.45f, 1.f, 0.6f );//    LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
 
-    float light = ( SUN.normalized() * derivative.normalized() ).sum() * 0.5 + 0.5f;
+    float light = ( SUN.normalized() * derivative.normalized() ).sum() * 0.5f + 0.5f;
 
     assert( light <= 1 );
 

From 306f9bc2049b3febdc6d052ee2cc05aff091a21e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 12 Mar 2024 21:32:20 +0000
Subject: [PATCH 068/139] Fix last few MSVC warnings

---
 tools/CMakeLists.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index d190f016..211a4ef4 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -28,7 +28,7 @@ CPMAddPackage(
 CPMAddPackage(
     NAME magnum
     GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG 7d0a8215d38284f7b7ae041cfbb19d410e5988a6
+    GIT_TAG b1419017650c83538d8fe4681de6f0bca524cf49
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -73,7 +73,7 @@ find_package(ImGui REQUIRED SourcesMiscCpp)
 CPMAddPackage(
     NAME imnodes
     GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 4ccaf656b09fd6b69bdac36f2532756760bd0aa3
+    GIT_TAG 26b70c528d48beeb839035f3da71550f8b0adfa7
     GIT_SUBMODULES ".github"
     EXCLUDE_FROM_ALL YES
     OPTIONS
@@ -142,7 +142,6 @@ endif()
 
 if (MSVC)
     target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
-    target_compile_options(NodeEditor PRIVATE /wd4244)
 endif()
 
 set(install_targets ${install_targets} NodeEditor PARENT_SCOPE)

From e8a2675a1ed4cb4b5ab6232bb4e9be6bb35b5d4c Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 12 Mar 2024 23:55:38 +0000
Subject: [PATCH 069/139] Updated mesh lighting

---
 tools/MeshNoisePreview.cpp | 50 ++++++++++++++++++++++----------------
 tools/MeshNoisePreview.h   |  8 +++---
 2 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index f5673d08..0fdbc515 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -18,6 +18,17 @@
 
 using namespace Magnum;
 
+static constexpr float SqrtNewtonRaphson( float x, float curr, float prev )
+{
+    return curr == prev ? curr : SqrtNewtonRaphson( x, 0.5f * ( curr + x / curr ), curr );
+}
+
+static constexpr Vector3 NormaliseConstExpr( const Vector3& vec )
+{
+    float lenSqr = vec.x() * vec.x() + vec.y() * vec.y() + vec.z() * vec.z();
+    return vec / SqrtNewtonRaphson( lenSqr, lenSqr, 0 );
+}
+
 MeshNoisePreview::MeshNoisePreview()
 {
     mBuildData.scale = 1.f;
@@ -389,11 +400,7 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildBloxel3DMesh( co
     else
 #endif
     {
-        constexpr Vector3 SUN = LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT ) + Vector3( 0.577f ) * AMBIENT_LIGHT;
-
-        float xLight = std::abs( SUN.x() );
-        float yLight = std::abs( SUN.y() );
-        float zLight = std::abs( SUN.z() );
+        constexpr Vector3 SUN = LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT );
 
         constexpr int32_t STEP_X = 1;
         constexpr int32_t STEP_Y = SIZE_GEN;
@@ -419,37 +426,37 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildBloxel3DMesh( co
 
                         if( densityValues[noiseIdx + STEP_X] > buildData.isoSurface ) // Right
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_X, STEP_Y, STEP_Z, xLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_X, STEP_Y, STEP_Z, SUN.x() + AMBIENT_LIGHT,
                                        Vector3( xf + 1, yf, zf ), Vector3( xf + 1, yf + 1, zf ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf + 1, yf, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_X] > buildData.isoSurface ) // Left
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_X, -STEP_Y, STEP_Z, 1.0f - xLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_X, -STEP_Y, STEP_Z, 1.0f - SUN.x(),
                                        Vector3( xf, yf + 1, zf ), Vector3( xf, yf, zf ), Vector3( xf, yf, zf + 1 ), Vector3( xf, yf + 1, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx + STEP_Y] > buildData.isoSurface ) // Up
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Y, STEP_Z, STEP_X, yLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Y, STEP_Z, STEP_X, SUN.y() + AMBIENT_LIGHT,
                                        Vector3( xf, yf + 1, zf ), Vector3( xf, yf + 1, zf + 1 ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf + 1, yf + 1, zf ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_Y] > buildData.isoSurface ) // Down
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Y, -STEP_Z, STEP_X, 1.0f - yLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Y, -STEP_Z, STEP_X, 1.0f - SUN.y(),
                                        Vector3( xf, yf, zf + 1 ), Vector3( xf, yf, zf ), Vector3( xf + 1, yf, zf ), Vector3( xf + 1, yf, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx + STEP_Z] > buildData.isoSurface ) // Forward
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Z, STEP_X, STEP_Y, zLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, STEP_Z, STEP_X, STEP_Y, SUN.z() + AMBIENT_LIGHT,
                                        Vector3( xf, yf, zf + 1 ), Vector3( xf + 1, yf, zf + 1 ), Vector3( xf + 1, yf + 1, zf + 1 ), Vector3( xf, yf + 1, zf + 1 ) );
                         }
 
                         if( densityValues[noiseIdx - STEP_Z] > buildData.isoSurface ) // Back
                         {
-                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Z, -STEP_X, STEP_Y, 1.0f - zLight,
+                            BloxelAddQuadAO( vertexData, indicies, densityValues, buildData.isoSurface, noiseIdx, -STEP_Z, -STEP_X, STEP_Y, 1.0f - SUN.z(),
                                        Vector3( xf + 1, yf, zf ), Vector3( xf, yf, zf ), Vector3( xf, yf + 1, zf ), Vector3( xf + 1, yf + 1, zf ) );
                         }
                     }
@@ -492,8 +499,10 @@ void MeshNoisePreview::Chunk::BloxelAddQuadAO( std::vector<VertexData>& verts, s
     float ao10 = (float)( sideA0 + sideB1 + corner01 ) * aoAdjust;
     float ao11 = (float)( sideA1 + sideB1 + corner11 ) * aoAdjust;
 
-    float densityLightShift = 1 - ( isoSurface - density[idx] ) * 2;
-    light *= densityLightShift * densityLightShift;
+    float densityLightShift = ( isoSurface - density[idx] ) * 4;
+    light -= densityLightShift;
+    light *= std::abs( light );
+    light = std::max( AMBIENT_LIGHT, light );
 
     uint32_t vertIdx = (uint32_t)verts.size();
     verts.emplace_back( pos00, ( 1.0f - ao00 ) * light );
@@ -792,7 +801,9 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
     }
 
     vert /= (float)std::popcount( pointCode );
-    
+
+    // Calculate analytical derivative 
+
     uint32_t derivOffsetX = STEP_X & (int)std::lroundf( -vert.x() );
     uint32_t derivOffsetY = STEP_Y & (int)std::lroundf( -vert.y() );
     uint32_t derivOffsetZ = STEP_Z & (int)std::lroundf( -vert.z() );
@@ -845,9 +856,8 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
         cellIndex += STEP_Z - STEP_Y * 2;
     }
 
-    constexpr Vector3 SUN = -Vector3( 0.45f, 1.f, 0.6f );//    LIGHT_DIR * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
-
-    float light = ( SUN.normalized() * derivative.normalized() ).sum() * 0.5f + 0.5f;
+    float light = ( NormaliseConstExpr( -LIGHT_DIR ) * derivative.normalized() ).sum() * ( 0.5f - AMBIENT_LIGHT * 0.5f ) + ( 0.5f + AMBIENT_LIGHT * 0.5f );
+    light *= light;
 
     assert( light <= 1 );
 
@@ -858,7 +868,7 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
 
 MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh( const BuildData& buildData, float* densityValues, std::vector<VertexData>& vertexData, std::vector<uint32_t>& indicies )
 {
-    static constexpr uint32_t SIZE_GEN = SIZE + 2;
+    static constexpr uint32_t SIZE_GEN = SIZE + 1;
 
     FastNoise::OutputMinMax minMax = buildData.generatorScaled->GenUniformGrid2D( densityValues,
                                                                                   buildData.pos.x(), buildData.pos.z(),
@@ -866,8 +876,6 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh(
     constexpr int32_t STEP_X = 1;
     constexpr int32_t STEP_Y = SIZE_GEN;
 
-    Vector3 sunLight = LIGHT_DIR.normalized() * ( 1.0f - AMBIENT_LIGHT ) + Vector3( AMBIENT_LIGHT );
-
     int32_t noiseIdx = 0;
 
     for( uint32_t y = 0; y < SIZE; y++ )
@@ -884,7 +892,7 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildHeightMap2DMesh(
             Vector3 v11( xf + 1, densityValues[noiseIdx + STEP_X + STEP_Y] * buildData.heightmapMultiplier, yf + 1 );
 
             // Normal for quad
-            float light = ( sunLight * ( Math::cross( v10 - v11, v00 - v11 ).normalized() + Math::cross( v01 - v00, v11 - v00 ).normalized() ).normalized() ).dot();
+            float light = ( LIGHT_DIR * ( Math::cross( v10 - v11, v00 - v11 ).normalized() + Math::cross( v01 - v00, v11 - v00 ).normalized() ).normalized() ).dot();
 
             uint32_t vertIdx = (uint32_t)vertexData.size();
             vertexData.emplace_back( v00, light );
diff --git a/tools/MeshNoisePreview.h b/tools/MeshNoisePreview.h
index f493961c..a9bc125f 100644
--- a/tools/MeshNoisePreview.h
+++ b/tools/MeshNoisePreview.h
@@ -142,10 +142,10 @@ namespace Magnum
             GL::Mesh* GetMesh() { return mMesh.get(); }
             Vector3i GetPos() const { return mPos; }
 
-            static constexpr uint32_t SIZE          = 128;
-            static constexpr Vector3  LIGHT_DIR     = { 0.557f, 0.743f, 0.371f }; // normalised
-            static constexpr float    AMBIENT_LIGHT = 0.3f;
-            static constexpr float    AO_STRENGTH   = 0.6f;
+            static constexpr uint32_t SIZE           = 128;
+            static constexpr Vector3  LIGHT_DIR      = { 0.6f, 1.f, 0.4f };
+            static constexpr float    AMBIENT_LIGHT  = 0.3f;
+            static constexpr float    AO_STRENGTH    = 0.9f;
 
         private:
             static void BloxelAddQuadAO( std::vector<VertexData>& verts, std::vector<uint32_t>& indicies, const float* density, float isoSurface,

From 3de2b2940c98080e8e2c504041f196a47ca0f3d1 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 14 Mar 2024 19:55:16 +0000
Subject: [PATCH 070/139] Default to DMC meshing

---
 tools/MeshNoisePreview.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 0fdbc515..e838dee8 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -36,7 +36,7 @@ MeshNoisePreview::MeshNoisePreview()
     mBuildData.isoSurface = 0.0f;
     mBuildData.heightmapMultiplier = 100.0f;
     mBuildData.color = Color3( 1.0f );
-    mBuildData.meshType = MeshType_Bloxel3D;
+    mBuildData.meshType = MeshType_DualMarchingCubes3D;
 
     uint32_t threadCount = std::max( 2u, std::thread::hardware_concurrency() );
 

From 222a1702bff0ab9a6365279b0b7ecc3da5ea3eb4 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 14 Mar 2024 21:25:53 +0000
Subject: [PATCH 071/139] More accurate min/max Y with DMC meshing

---
 tools/MeshNoisePreview.cpp | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index e838dee8..7f8dd710 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -573,15 +573,6 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const
                     {
                         const float densityX = densityValues[cellIndex + STEP_X];
 
-                        if( density <= buildData.isoSurface )
-                        {
-                            maxSolid = std::max( cellOffset.y(), maxSolid );
-                        }
-                        else
-                        {
-                            minAir = std::min( cellOffset.y(), minAir );
-                        }
-
                         // is edge intersected?
                         if( ( density <= buildData.isoSurface ) ^ ( densityX <= buildData.isoSurface ) )
                         {
@@ -637,6 +628,25 @@ MeshNoisePreview::Chunk::MeshData MeshNoisePreview::Chunk::BuildDmc3DMesh( const
                             indicies.emplace_back( quadVertIndicies[3 - triFlip] );
                             indicies.emplace_back( quadVertIndicies[triRotation] );
                             indicies.emplace_back( quadVertIndicies[1 + triFlip] );
+                            
+                            if( density <= buildData.isoSurface )
+                            {
+                                maxSolid = std::max( { maxSolid, 
+                                    vertexData[quadVertIndicies[0]].posLight.y(),
+                                    vertexData[quadVertIndicies[1]].posLight.y(),
+                                    vertexData[quadVertIndicies[2]].posLight.y(),
+                                    vertexData[quadVertIndicies[3]].posLight.y()
+                                } );
+                            }
+                            else
+                            {
+                                minAir = std::min( { minAir,
+                                    vertexData[quadVertIndicies[0]].posLight.y(),
+                                    vertexData[quadVertIndicies[1]].posLight.y(),
+                                    vertexData[quadVertIndicies[2]].posLight.y(),
+                                    vertexData[quadVertIndicies[3]].posLight.y()
+                                } );                                
+                            }
                         }
                     }
 

From 32a1013cdb82c66ead9fe9109ec00ce4e3b69a6b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 28 Mar 2024 07:31:30 +0000
Subject: [PATCH 072/139] Basic generators metadata descriptions

---
 .../FastNoise/Generators/BasicGenerators.h    | 32 +++++++++++++++----
 include/FastNoise/Generators/Generator.h      |  1 +
 tools/FastNoiseNodeEditor.cpp                 |  2 +-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index de5129f9..3cb08202 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -23,7 +23,7 @@ namespace FastNoise
     {
         MetadataT()
         {
-            this->AddVariable( "Feature Scale", 100.0f, &ScalableGenerator::SetScale, 0.f, 0.f, 0.25f );
+            this->AddVariable( { "Feature Scale", "Effectively `1.0 / frequency`" }, 100.0f, &ScalableGenerator::SetScale, 0.f, 0.f, 0.25f );
         }
     };
 #endif
@@ -48,7 +48,7 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddVariable( "Value", 1.0f, &Constant::SetValue );
+            this->AddVariable( { "Value", "Constant output" }, 1.0f, &Constant::SetValue );
         }
     };
 #endif
@@ -68,6 +68,10 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
+            
+            description = 
+                "White noise generator\n"
+                "Outputs between -1.0 and 1.0";
         }
     };
 #endif
@@ -96,8 +100,11 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddHybridSource( "High", 1.0f, &Checkerboard::SetHigh, &Checkerboard::SetHigh );
-            this->AddHybridSource( "Low", -1.0f, &Checkerboard::SetLow, &Checkerboard::SetLow );
+            this->AddHybridSource( { "High", "Output for \"White\"" }, 1.0f, &Checkerboard::SetHigh, &Checkerboard::SetHigh );
+            this->AddHybridSource( { "Low", "Output for \"Black\"" }, -1.0f, &Checkerboard::SetLow, &Checkerboard::SetLow );
+
+            description =
+                "Outputs checkerboard pattern";
         }
     };
 #endif
@@ -117,6 +124,9 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
+
+            description =
+                "Outputs between -1.0 and 1.0";
         }
     };
 #endif
@@ -146,8 +156,13 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddPerDimensionVariable( "Multiplier", 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); } );
-            this->AddPerDimensionVariable( "Offset", 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); } );
+            this->AddPerDimensionVariable( { "Multiplier", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); } );
+            this->AddPerDimensionVariable( { "Offset", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); } );
+
+            description =
+                "Takes the input position and does the following per dimension\n"
+                "(input + offset) * multiplier\n";
+                "The output is the sum of all results";
         }
     };
 #endif
@@ -182,7 +197,10 @@ namespace FastNoise
         {
             groups.push_back( "Basic Generators" );
             this->AddVariableEnum( "Distance Function", DistanceFunction::Euclidean, &DistanceToPoint::SetDistanceFunction, kDistanceFunction_Strings );
-            this->AddPerDimensionVariable( "Point", 0.0f, []( DistanceToPoint* p ) { return std::ref( p->mPoint ); } );
+            this->AddPerDimensionVariable( { "Point", "Point in current domain space" }, 0.0f, []( DistanceToPoint* p ) { return std::ref( p->mPoint ); } );
+
+            description =
+                "Outputs calculated distance between point and input position";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 47b34cd9..c59fb861 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -16,6 +16,7 @@
 
 namespace FastNoise
 {
+    // Dimension
     enum class Dim
     {
         X, Y, Z, W,
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index 72276cfa..e9bad670 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -1508,7 +1508,7 @@ void FastNoiseNodeEditor::ChangeSelectedNode( FastNoise::NodeData* newId )
 
         if( sharedMemory )
         {
-            memcpy( sharedMemory + 2, encodedNodeTree.data(), encodedNodeTree.length() + 1 );
+            std::memcpy( sharedMemory + 2, encodedNodeTree.data(), encodedNodeTree.length() + 1 );
             sharedMemory[1] = 0;
 
             std::atomic_thread_fence( std::memory_order_acq_rel );

From c96c63616035f385285d0305da15564f2559d7ed Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 5 Apr 2024 00:13:40 +0100
Subject: [PATCH 073/139] More customisable Fade node, blend node descriptions

---
 include/FastNoise/Generators/Blends.h   | 52 ++++++++++++++++++++++---
 include/FastNoise/Generators/Blends.inl | 27 ++++++++++++-
 src/FastNoise/Metadata.cpp              |  3 ++
 3 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/include/FastNoise/Generators/Blends.h b/include/FastNoise/Generators/Blends.h
index c22c2ad0..a5c1d2de 100644
--- a/include/FastNoise/Generators/Blends.h
+++ b/include/FastNoise/Generators/Blends.h
@@ -159,6 +159,8 @@ namespace FastNoise
             groups.push_back( "Blends" );
             this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
             this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
+
+            description = "Equivalent to std::powf( value, pow )";
         }
     };
 #endif
@@ -186,6 +188,8 @@ namespace FastNoise
             groups.push_back( "Blends" );
             this->AddGeneratorSource( "Value", &PowInt::SetValue );
             this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
+
+            description = "Faster than PowFloat node but only for int powers";
         }
     };
 #endif
@@ -210,6 +214,12 @@ namespace FastNoise
         MetadataT()
         {
             this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
+
+            description = 
+                "Quadratic Smooth Minimum\n"
+                "Smoothes the transition between the 2 inputs\n"
+                "For explanation see:\n"
+                "https://iquilezles.org/articles/smin/";
         }
     };
 #endif
@@ -234,23 +244,47 @@ namespace FastNoise
         MetadataT()
         {
             this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
+
+            description =
+                "Quadratic Smooth Maximum\n"
+                "Smoothes the transition between the 2 inputs\n"
+                "For explanation see:\n"
+                "https://iquilezles.org/articles/smin/";
         }
     };
 #endif
 
     class Fade : public virtual Generator
     {
-    public:        const Metadata& GetMetadata() const override;
+    public:
+        enum class Interpolation
+        {
+            Linear,
+            Hermite,
+            Quintic,
+        };
+        const Metadata& GetMetadata() const override;
         void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
         void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
 
         void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
         void SetFade( float value ) { mFade = value; }
 
+        void SetFadeMin( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMin, gen ); }
+        void SetFadeMin( float value ) { mFadeMin = value; }
+
+        void SetFadeMax( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMax, gen ); }
+        void SetFadeMax( float value ) { mFadeMax = value; }
+
+        void SetInterpolation( Interpolation interpolation ) { mInterpolation = interpolation; }
+
     protected:
         GeneratorSource mA;
         GeneratorSource mB;
-        HybridSource mFade = 0.5f;
+        HybridSource mFade = 0;
+        HybridSource mFadeMin = -1.f;
+        HybridSource mFadeMax = 1.f;
+        Interpolation mInterpolation = Interpolation::Linear;
     };
 
 #ifdef FASTNOISE_METADATA
@@ -262,9 +296,17 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Blends" );
-            this->AddGeneratorSource( "A", &Fade::SetA );
-            this->AddGeneratorSource( "B", &Fade::SetB );
-            this->AddHybridSource( "Fade", 0.5f, &Fade::SetFade, &Fade::SetFade );
+            this->AddGeneratorSource( { "A", "From" }, &Fade::SetA );
+            this->AddGeneratorSource( { "B", "To" }, &Fade::SetB );
+            this->AddHybridSource( "Fade", 0, &Fade::SetFade, &Fade::SetFade );
+            this->AddHybridSource( "Fade Min", -1.f, &Fade::SetFadeMin, &Fade::SetFadeMin );
+            this->AddHybridSource( "Fade Max", 1.f, &Fade::SetFadeMax, &Fade::SetFadeMax );
+            this->AddVariableEnum( { "Interpolation", "Easing function" }, Fade::Interpolation::Linear, &Fade::SetInterpolation, "Linear", "Hermite", "Quintic" );            
+
+            description =
+                "Output fades between inputs A and B\n"
+                "Fade Min = 100% A\n"
+                "Fade Max = 100% B";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 6efae578..7d4c0af9 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -154,9 +154,32 @@ class FastSIMD::DispatchClass<FastNoise::Fade, SIMD> final : public virtual Fast
     template<typename... P> 
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v fade = FS::Abs( this->GetSourceValue( mFade, seed, pos... ) );
+        float32v fade = this->GetSourceValue( mFade, seed, pos... );
+        float32v fadeMin = this->GetSourceValue( mFadeMin, seed, pos... );
+        float32v fadeMax = this->GetSourceValue( mFadeMax, seed, pos... );
 
-        return FS::FMulAdd( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
+        float32v fadeRange = fadeMax - fadeMin;
+
+        fade = ( fade - fadeMin ) / fadeRange;
+
+        fade = FS::Max( float32v( 0 ), FS::Min( float32v( 1 ), fade ) );
+
+        switch( mInterpolation )
+        {
+        case Interpolation::Linear:
+            break;
+        case Interpolation::Hermite:
+            fade = InterpHermite( fade );
+            break;
+        case Interpolation::Quintic:
+            fade = InterpQuintic( fade );
+            break;
+        }
+
+        // Protect against nan from 0 range div
+        fade = FS::Select( fadeRange == float32v( 0 ), float32v( 0.5f ), fade );
+        
+        return Lerp( this->GetSourceValue( mA, seed, pos... ), this->GetSourceValue( mB, seed, pos... ), fade );
     }
 };
 
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 1073aa66..5c385c0b 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -462,6 +462,9 @@ std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* classNam
 {
     auto* newMetadata = new MetadataT<T>;
     newMetadata->name = className;
+
+    // Node must be in a group or it is not selectable in the UI
+    assert( !newMetadata->groups.empty() ); 
     return std::unique_ptr<const MetadataT<T>>( newMetadata );
 }
 

From f345b60f81a5e3912c6411db5cb92c9ef10567d0 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 6 Apr 2024 13:54:06 +0100
Subject: [PATCH 074/139] x86 zeroupper on public function exit

---
 .../FastNoise/Generators/BasicGenerators.h    |  2 +-
 include/FastNoise/Generators/Generator.inl    | 21 +++++++++++++++++++
 src/CMakeLists.txt                            |  8 +++----
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 3cb08202..9799a44b 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -161,7 +161,7 @@ namespace FastNoise
 
             description =
                 "Takes the input position and does the following per dimension\n"
-                "(input + offset) * multiplier\n";
+                "(input + offset) * multiplier\n"
                 "The output is the sum of all results";
         }
     };
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 5adce095..b57539b1 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -78,6 +78,7 @@ public:
 
     FastNoise::OutputMinMax GenUniformGrid2D( float* noiseOut, int xStart, int yStart, int xSize, int ySize, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -123,6 +124,7 @@ public:
 
     FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -175,6 +177,7 @@ public:
 
     FastNoise::OutputMinMax GenUniformGrid4D( float* noiseOut, int xStart, int yStart, int zStart, int wStart, int xSize, int ySize, int zSize, int wSize, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -234,6 +237,7 @@ public:
 
     FastNoise::OutputMinMax GenPositionArray2D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, float xOffset, float yOffset, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -263,6 +267,7 @@ public:
 
     FastNoise::OutputMinMax GenPositionArray3D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, const float* zPosArray, float xOffset, float yOffset, float zOffset, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -294,6 +299,7 @@ public:
 
     FastNoise::OutputMinMax GenPositionArray4D( float* noiseOut, int count, const float* xPosArray, const float* yPosArray, const float* zPosArray, const float* wPosArray, float xOffset, float yOffset, float zOffset, float wOffset, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -327,21 +333,25 @@ public:
 
     float GenSingle2D( float x, float y, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ) ) );
     }
 
     float GenSingle3D( float x, float y, float z, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ) ) );
     }
 
     float GenSingle4D( float x, float y, float z, float w, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         return FS::Extract0( Gen( int32v( seed ), float32v( x ), float32v( y ), float32v( z ), float32v( w ) ) );
     }
 
     FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int xSize, int ySize, int seed ) const final
     {
+        ScopeExitx86ZeroUpper zeroUpper;
         float32v min( INFINITY );
         float32v max( -INFINITY );
 
@@ -405,6 +415,17 @@ public:
     }
 
 private:
+    struct ScopeExitx86ZeroUpper
+    {
+        ~ScopeExitx86ZeroUpper()
+        {
+            if constexpr( SIMD & FeatureFlag::AVX )
+            {
+                _mm256_zeroupper();
+            }
+        }
+    };
+
     template<bool INITIAL>
     static FS_FORCEINLINE void AxisReset( int32v& aIdx, int32v& bIdx, int32v aMax, int32v aSize, size_t aStep )
     {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c247a4e8..eca6014e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -69,15 +69,15 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
     
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
-        target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast -mllvm -x86-use-vzeroupper=0)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /GS- /fp:fast -mllvm -x86-use-vzeroupper=0)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector -mllvm -x86-use-vzeroupper=0)        
     endif()
 
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-        target_compile_options(FastSIMD_FastNoise PRIVATE -mno-vzeroupper)
+        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector -mno-vzeroupper)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -mllvm -x86-use-vzeroupper=0)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector -mllvm -x86-use-vzeroupper=0)        
     endif()
 
 endif()

From 86f9cc8a54e87c9e0472deaf3c5d79e1aa392f4e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 6 Apr 2024 14:57:32 +0100
Subject: [PATCH 075/139] Correct no zero upper args

---
 include/FastNoise/Generators/Generator.inl | 4 ++--
 src/CMakeLists.txt                         | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index b57539b1..7e8edc8d 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -417,11 +417,11 @@ public:
 private:
     struct ScopeExitx86ZeroUpper
     {
-        ~ScopeExitx86ZeroUpper()
+        FS_FORCEINLINE ~ScopeExitx86ZeroUpper()
         {
             if constexpr( SIMD & FeatureFlag::AVX )
             {
-                _mm256_zeroupper();
+                FS_BIND_INTRINSIC( _mm256_zeroupper )();
             }
         }
     };
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index eca6014e..1112aaa6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -69,15 +69,15 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
     
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
-        target_compile_options(FastSIMD_FastNoise PRIVATE /GS- /fp:fast -mllvm -x86-use-vzeroupper=0)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /GS- /fp:fast)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector -mllvm -x86-use-vzeroupper=0)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector)        
     endif()
 
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector -mno-vzeroupper)
+        target_compile_options(FastSIMD_FastNoise PRIVATE -mno-vzeroupper)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector -mllvm -x86-use-vzeroupper=0)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -mllvm -x86-use-vzeroupper=0)        
     endif()
 
 endif()

From 11e368c3f98362dd96c965d78b783e8e42ada69f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 6 Apr 2024 15:37:25 +0100
Subject: [PATCH 076/139] Better bounding for coord value lookup

---
 include/FastNoise/Generators/Utils.inl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 345abd35..20cdf40c 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -219,9 +219,17 @@ namespace FastNoise
     {
         int32v hash = seed;
         hash ^= (primedPos ^ ...);
-        
+
+        int32v zeroCase = hash >> 8;
         hash *= hash * int32v( 0x27d4eb2d );
-        return FS::Convert<float>( hash ) * float32v( 1.0f / (float)-INT_MIN );
+
+        int32v floatBits = hash & int32v( 0x7FFFFF ); //fp32 fractional bits
+        floatBits |= int32v( 0x3F800000 ); // fp32 1.0
+        float32v f32 = FS::Cast<float>( floatBits ) - float32v( 0.9999999f ); // bring range to 0.0000001 - 1.0
+
+        float32v sign = FS::Cast<float>( hash & int32v( -2147483648 ) );
+
+        return FS::InvMasked( zeroCase == int32v( 0x7FFFFF ), f32 | sign );
     }
      
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )

From a52ec75252217ee11226f4b8df3f10ab97860174 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 6 Apr 2024 22:12:48 +0100
Subject: [PATCH 077/139] Revert to faster coord value function

---
 include/FastNoise/Generators/Utils.inl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 20cdf40c..ef86929c 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -220,6 +220,11 @@ namespace FastNoise
         int32v hash = seed;
         hash ^= (primedPos ^ ...);
 
+#if 1
+        hash *= hash * int32v( 0x27d4eb2d );
+        return FS::Convert<float>( hash ) * float32v( -1.0f / (float)INT_MIN );
+
+#else // More accurate bounding but slower
         int32v zeroCase = hash >> 8;
         hash *= hash * int32v( 0x27d4eb2d );
 
@@ -230,6 +235,7 @@ namespace FastNoise
         float32v sign = FS::Cast<float>( hash & int32v( -2147483648 ) );
 
         return FS::InvMasked( zeroCase == int32v( 0x7FFFFF ), f32 | sign );
+#endif
     }
      
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )

From 28f27cf369cc354c6aecb07aeb363269b1fed5bc Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 13 Apr 2024 21:46:00 +0100
Subject: [PATCH 078/139] Smart node const correctness, remove shared_ptr
 config

---
 include/FastNoise/FastNoise.h         |  4 ----
 include/FastNoise/Utility/Config.h    | 14 +-------------
 include/FastNoise/Utility/SmartNode.h | 27 +++++++++++++++++++++++----
 src/FastNoise/Metadata.cpp            |  8 +-------
 src/FastNoise/SmartNode.cpp           |  5 -----
 tests/FastNoiseCpp11Include.cpp       |  2 --
 6 files changed, 25 insertions(+), 35 deletions(-)

diff --git a/include/FastNoise/FastNoise.h b/include/FastNoise/FastNoise.h
index ef4c9b93..2cd60451 100644
--- a/include/FastNoise/FastNoise.h
+++ b/include/FastNoise/FastNoise.h
@@ -30,11 +30,7 @@ namespace FastNoise
         static_assert( std::is_base_of<Generator, T>::value, "This function should only be used for FastNoise node classes, for example FastNoise::Simplex" );
         static_assert( std::is_member_function_pointer<decltype(&T::GetMetadata)>::value, "Cannot create abstract node class, use a derived class, for example: Fractal -> FractalFBm" );
 
-#if FASTNOISE_USE_SHARED_PTR
-        return SmartNode<T>( FastSIMD::NewDispatchClass<T>( maxSimdLevel ) );
-#else
         return SmartNode<T>( FastSIMD::NewDispatchClass<T>( maxFeatureSet, &SmartNodeManager::Allocate ) );
-#endif
     }
 
     /// <summary>
diff --git a/include/FastNoise/Utility/Config.h b/include/FastNoise/Utility/Config.h
index bffd34ea..c77f9c01 100644
--- a/include/FastNoise/Utility/Config.h
+++ b/include/FastNoise/Utility/Config.h
@@ -3,11 +3,6 @@
 #include <FastSIMD/DispatchClass.h>
 
 #define FASTNOISE_CALC_MIN_MAX true
-#define FASTNOISE_USE_SHARED_PTR false
-
-#if FASTNOISE_USE_SHARED_PTR
-#include <memory>
-#endif
 
 namespace FastNoise
 {    
@@ -17,13 +12,8 @@ namespace FastNoise
     template<typename T>
     struct MetadataT;
 
-#if FASTNOISE_USE_SHARED_PTR
-    template<typename T = Generator>
-    using SmartNode = std::shared_ptr<T>;
-#else
     template<typename T = Generator>
     class SmartNode;
-#endif
 
     template<typename T = Generator>
     using SmartNodeArg = const SmartNode<const T>&;
@@ -32,6 +22,4 @@ namespace FastNoise
     SmartNode<T> New( FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max );
 } // namespace FastNoise
 
-#if !FASTNOISE_USE_SHARED_PTR
-#include "SmartNode.h"
-#endif
\ No newline at end of file
+#include "SmartNode.h"
\ No newline at end of file
diff --git a/include/FastNoise/Utility/SmartNode.h b/include/FastNoise/Utility/SmartNode.h
index 43544880..58f785a8 100644
--- a/include/FastNoise/Utility/SmartNode.h
+++ b/include/FastNoise/Utility/SmartNode.h
@@ -43,7 +43,7 @@ namespace FastNoise
         template<typename U>
         static SmartNode DynamicCast( const SmartNode<U>& node )
         {
-            if( T* dynamicCast = dynamic_cast<T*>( node.get() ) )
+            if( T* dynamicCast = dynamic_cast<T*>( node.mPtr ) )
             {
                 return FastNoise::SmartNode<T>( dynamicCast );
             }
@@ -150,13 +150,25 @@ namespace FastNoise
             return lhs.get() != rhs.get();
         }
 
-        T& operator*() const noexcept
+        const T& operator*() const noexcept
         {
             assert( mPtr->ReferencesFetchAdd() );
             return *mPtr;
         }
 
-        T* operator->() const noexcept
+        T& operator*() noexcept
+        {
+            assert( mPtr->ReferencesFetchAdd() );
+            return *mPtr;
+        }
+
+        const T* operator->() const noexcept
+        {
+            assert( mPtr->ReferencesFetchAdd() );
+            return mPtr;
+        }
+
+        T* operator->() noexcept
         {
             assert( mPtr->ReferencesFetchAdd() );
             return mPtr;
@@ -167,7 +179,12 @@ namespace FastNoise
             return mPtr != nullptr;
         }
 
-        T* get() const noexcept
+        const T* get() const noexcept
+        {
+            return mPtr;
+        }
+
+        T* get() noexcept
         {
             return mPtr;
         }
@@ -207,6 +224,8 @@ namespace FastNoise
         template<typename U>
         friend class SmartNode;
 
+        friend T;
+
         explicit SmartNode( T* ptr ) :
             mPtr( ptr )
         {
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 5c385c0b..58f17074 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -468,12 +468,6 @@ std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* classNam
     return std::unique_ptr<const MetadataT<T>>( newMetadata );
 }
 
-#if FASTNOISE_USE_SHARED_PTR
-#define FASTNOISE_GET_MEMORY_ALLOCATOR()
-#else
-#define FASTNOISE_GET_MEMORY_ALLOCATOR() , &SmartNodeManager::Allocate
-#endif
-
 #define FASTNOISE_REGISTER_NODE( CLASS ) \
 static const std::unique_ptr<const FastNoise::MetadataT<CLASS>> g ## CLASS ## Metadata = CreateMetadataInstance<CLASS>( #CLASS );\
 template<> FASTNOISE_API const FastNoise::Metadata& FastNoise::Impl::GetMetadata<CLASS>()\
@@ -486,7 +480,7 @@ const FastNoise::Metadata& CLASS::GetMetadata() const\
 }\
 SmartNode<> FastNoise::MetadataT<CLASS>::CreateNode( FastSIMD::FeatureSet l ) const\
 {\
-    return SmartNode<>( FastSIMD::NewDispatchClass<CLASS>( l FASTNOISE_GET_MEMORY_ALLOCATOR() ) );\
+    return SmartNode<>( FastSIMD::NewDispatchClass<CLASS>( l, &SmartNodeManager::Allocate ) );\
 }
 
 #define FASTSIMD_INCLUDE_HEADER_ONLY
diff --git a/src/FastNoise/SmartNode.cpp b/src/FastNoise/SmartNode.cpp
index 7fd89d2e..5d4cc01d 100644
--- a/src/FastNoise/SmartNode.cpp
+++ b/src/FastNoise/SmartNode.cpp
@@ -1,7 +1,4 @@
 #include <FastNoise/Utility/Config.h>
-
-#if !FASTNOISE_USE_SHARED_PTR
-
 #include <FastNoise/Utility/SmartNode.h>
 
 #include <mutex>
@@ -250,5 +247,3 @@ namespace FastNoise
         gMemoryAllocator.Free( ptr );        
     }
 } // namespace FastNoise
-
-#endif
\ No newline at end of file
diff --git a/tests/FastNoiseCpp11Include.cpp b/tests/FastNoiseCpp11Include.cpp
index 71192e20..556d1473 100644
--- a/tests/FastNoiseCpp11Include.cpp
+++ b/tests/FastNoiseCpp11Include.cpp
@@ -26,7 +26,6 @@ int main()
     std::cout << std::endl;
 
     // SmartNode down cast example
-#if !FASTNOISE_USE_SHARED_PTR
     {
         // New Checkerboard node stored in base SmartNode type
         FastNoise::SmartNode<> base = FastNoise::New<FastNoise::Checkerboard>();
@@ -45,5 +44,4 @@ int main()
 
         std::cout << ( simplex ? "valid" : "nullptr" ) << std::endl;
     }
-#endif
 }
\ No newline at end of file

From d2aecea2b30f992af2f1963f998d33e4e45f559b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 21 Apr 2024 20:14:21 +0100
Subject: [PATCH 079/139] Add native output bounding for various generator
 noise types

---
 .../FastNoise/Generators/BasicGenerators.h    | 61 +++++++++-----
 .../FastNoise/Generators/BasicGenerators.inl  | 22 +++--
 include/FastNoise/Generators/Blends.h         |  2 +-
 include/FastNoise/Generators/Cellular.h       | 28 +++----
 include/FastNoise/Generators/Cellular.inl     | 83 +++++++++----------
 include/FastNoise/Generators/Generator.h      |  4 +-
 include/FastNoise/Generators/Perlin.h         |  4 +-
 include/FastNoise/Generators/Perlin.inl       | 23 +++--
 include/FastNoise/Generators/Simplex.h        | 12 +--
 include/FastNoise/Generators/Simplex.inl      | 38 ++++++---
 include/FastNoise/Generators/Utils.inl        | 41 ++++-----
 include/FastNoise/Generators/Value.h          |  4 +-
 include/FastNoise/Generators/Value.inl        | 17 ++--
 13 files changed, 192 insertions(+), 147 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 9799a44b..40d30dce 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -28,6 +28,38 @@ namespace FastNoise
     };
 #endif
 
+    template<typename PARENT>
+    class VariableRange : public virtual PARENT
+    {
+    public:
+        void SetOutputMin( float value )
+        {
+            mRangeScale += mRangeMin - value;
+            mRangeMin = value;
+        }
+
+        void SetOutputMax( float value )
+        {
+            mRangeScale = ( value - mRangeMin );
+        }
+
+    protected:
+        float mRangeMin = -1;
+        float mRangeScale = 2;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<typename PARENT>
+    struct MetadataT<VariableRange<PARENT>> : MetadataT<PARENT>
+    {
+        MetadataT()
+        {
+            this->AddVariable( { "Output Min", "Minimum bound of output range" }, -1.0f, &VariableRange<PARENT>::SetOutputMin );
+            this->AddVariable( { "Output Max", "Maximum bound of output range" }, 1.0f, &VariableRange<PARENT>::SetOutputMax );
+        }
+    };
+#endif
+
     class Constant : public virtual Generator
     {
     public:
@@ -53,7 +85,7 @@ namespace FastNoise
     };
 #endif
 
-    class White : public virtual Generator
+    class White : public virtual VariableRange<Generator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -61,7 +93,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<White> : MetadataT<Generator>
+    struct MetadataT<White> : MetadataT<VariableRange<Generator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -70,46 +102,33 @@ namespace FastNoise
             groups.push_back( "Basic Generators" );
             
             description = 
-                "White noise generator\n"
-                "Outputs between -1.0 and 1.0";
+                "White noise generator";
         }
     };
 #endif
 
-    class Checkerboard : public virtual ScalableGenerator
+    class Checkerboard : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
-        
-        void SetHigh( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mHigh, gen ); }
-        void SetHigh( float value ) { mHigh = value; }
-        void SetLow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLow, gen ); }
-        void SetLow( float value ) { mLow = value; }
-
-    protected:
-        HybridSource mHigh = 1.0f;
-        HybridSource mLow = -1.0f;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Checkerboard> : MetadataT<ScalableGenerator>
+    struct MetadataT<Checkerboard> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddHybridSource( { "High", "Output for \"White\"" }, 1.0f, &Checkerboard::SetHigh, &Checkerboard::SetHigh );
-            this->AddHybridSource( { "Low", "Output for \"Black\"" }, -1.0f, &Checkerboard::SetLow, &Checkerboard::SetLow );
-
             description =
                 "Outputs checkerboard pattern";
         }
     };
 #endif
 
-    class SineWave : public virtual ScalableGenerator
+    class SineWave : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -117,7 +136,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<SineWave> : MetadataT<ScalableGenerator>
+    struct MetadataT<SineWave> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -126,7 +145,7 @@ namespace FastNoise
             groups.push_back( "Basic Generators" );
 
             description =
-                "Outputs between -1.0 and 1.0";
+                "Outputs sine wave";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 4119f6cf..604d768b 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -13,6 +13,16 @@ protected:
     }
 };
 
+template<FastSIMD::FeatureSet SIMD, typename PARENT>
+class FastSIMD::DispatchClass<FastNoise::VariableRange<PARENT>, SIMD> : public virtual FastNoise::VariableRange<PARENT>, public FastSIMD::DispatchClass<PARENT, SIMD>
+{
+protected:
+    FS_FORCEINLINE float32v ScaleOutput( float32v value, float nativeMin, float nativeMax ) const
+    {
+        return FS::FMulAdd( float32v( 1.0f / ( nativeMax - nativeMin ) ) * float32v( this->mRangeScale ), value - float32v( nativeMin ), float32v( this->mRangeMin ) );
+    }
+};
+
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> final : public virtual FastNoise::Constant, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
@@ -26,7 +36,7 @@ class FastSIMD::DispatchClass<FastNoise::Constant, SIMD> final : public virtual
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::White, SIMD> final : public virtual FastNoise::White, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::White, SIMD> final : public virtual FastNoise::White, public FastSIMD::DispatchClass<FastNoise::VariableRange<Generator>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -36,12 +46,12 @@ class FastSIMD::DispatchClass<FastNoise::White, SIMD> final : public virtual Fas
         size_t idx = 0;
         ((pos = FS::Cast<float>( (FS::Cast<int32_t>( pos ) ^ (FS::Cast<int32_t>( pos ) >> 16)) * int32v( Primes::Lookup[idx++] ) )), ...);
 
-        return GetValueCoord( seed, FS::Cast<int32_t>( pos )... );
+        return this->ScaleOutput( GetValueCoord( seed, FS::Cast<int32_t>( pos )... ), -kValueBounds, kValueBounds );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virtual FastNoise::Checkerboard, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -52,12 +62,12 @@ class FastSIMD::DispatchClass<FastNoise::Checkerboard, SIMD> final : public virt
 
         int32v value = (FS::Convert<int32_t>( pos ) ^ ...);
 
-        return float32v( 1.0f ) ^ FS::Cast<float>( value << 31 );
+        return this->ScaleOutput( FS::Cast<float>( (value & int32v( 1 )) << 30 ), 0, 2 );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual FastNoise::SineWave, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     FASTNOISE_IMPL_GEN_T;
 
@@ -66,7 +76,7 @@ class FastSIMD::DispatchClass<FastNoise::SineWave, SIMD> final : public virtual
     {
         this->ScalePositions( pos... );
 
-        return (FS::Sin( pos ) * ...);
+        return this->ScaleOutput( (FS::Sin( pos ) * ...), -1, 1 );
     }
 };
 
diff --git a/include/FastNoise/Generators/Blends.h b/include/FastNoise/Generators/Blends.h
index a5c1d2de..189b6099 100644
--- a/include/FastNoise/Generators/Blends.h
+++ b/include/FastNoise/Generators/Blends.h
@@ -187,7 +187,7 @@ namespace FastNoise
         {
             groups.push_back( "Blends" );
             this->AddGeneratorSource( "Value", &PowInt::SetValue );
-            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
+            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2 );
 
             description = "Faster than PowFloat node but only for int powers";
         }
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 5fad191e..246e2c98 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -5,7 +5,8 @@
 
 namespace FastNoise
 {
-    class Cellular : public virtual ScalableGenerator
+    template<typename PARENT = VariableRange<ScalableGenerator>>
+    class Cellular : public virtual PARENT
     {
     public:
         void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
@@ -18,20 +19,20 @@ namespace FastNoise
     };
 
 #ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Cellular> : MetadataT<ScalableGenerator>
+    template<typename PARENT>
+    struct MetadataT<Cellular<PARENT>> : MetadataT<PARENT>
     {
         MetadataT()
         {
-            groups.push_back( "Coherent Noise" );
-            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
+            this->groups.push_back( "Coherent Noise" );
+            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
             this->AddVariableEnum( { "Distance Function", "How distance to closest cells is calculated\nHybrid is EuclideanSquared + Manhattan" },
-                DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, kDistanceFunction_Strings );
+                DistanceFunction::EuclideanSquared, &Cellular<PARENT>::SetDistanceFunction, kDistanceFunction_Strings );
         }
     };
 #endif
 
-    class CellularValue : public virtual Cellular
+    class CellularValue : public virtual Cellular<>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -46,7 +47,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<CellularValue> : MetadataT<Cellular>
+    struct MetadataT<CellularValue> : MetadataT<Cellular<>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -56,13 +57,12 @@ namespace FastNoise
 
             description = 
                 "Returns value of Nth closest cell\n"
-                "Value is generated using white noise\n"
-                "Output is bounded -1 : 1";
+                "Value is generated using white noise";
         }
     };
 #endif
 
-    class CellularDistance : public virtual Cellular
+    class CellularDistance : public virtual Cellular<>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -90,7 +90,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<CellularDistance> : MetadataT<Cellular>
+    struct MetadataT<CellularDistance> : MetadataT<Cellular<>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -108,7 +108,7 @@ namespace FastNoise
     };
 #endif
 
-    class CellularLookup : public virtual Cellular
+    class CellularLookup : public virtual Cellular<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -121,7 +121,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<CellularLookup> : MetadataT<Cellular>
+    struct MetadataT<CellularLookup> : MetadataT<Cellular<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index f3fc94f9..e7bc34bb 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -4,26 +4,25 @@
 #include "Cellular.h"
 #include "Utils.inl"
 
-template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Cellular, SIMD> : public virtual FastNoise::Cellular, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+template<FastSIMD::FeatureSet SIMD, typename PARENT>
+class FastSIMD::DispatchClass<FastNoise::Cellular<PARENT>, SIMD> : public virtual FastNoise::Cellular<PARENT>, public FastSIMD::DispatchClass<PARENT, SIMD>
 {
 protected:
-    const float kJitter2D = 0.437016f;
-    const float kJitter3D = 0.396144f;
-    const float kJitter4D = 0.366025f;
-    const float kJitterIdx23 = 0.190983f;
+    static constexpr float kJitter2D = 0.437016f;
+    static constexpr float kJitter3D = 0.396144f;
+    static constexpr float kJitter4D = 0.366025f;
+    static constexpr float kJitterIdx23 = 0.190983f;
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public virtual FastNoise::CellularValue, public FastSIMD::DispatchClass<FastNoise::Cellular<>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         float32v jitter = float32v( this->kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
-        std::array<float32v, kMaxDistanceCount> value;
+        std::array<int32v, kMaxDistanceCount> valueHash;
         std::array<float32v, kMaxDistanceCount> distance;
         
-        value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
 
         this->ScalePositions( x, y );
@@ -51,7 +50,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                int32v newCellValueHash = hash;
                 float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
 
                 for( int i = 0; ; i++ )
@@ -59,10 +58,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                     mask32v closer = newDistance < distance[i];
 
                     float32v localDistance = distance[i];
-                    float32v localCellValue = value[i];
+                    int32v localCellValueHash = valueHash[i];
 
                     distance[i] = FS::Select( closer, newDistance, distance[i] );
-                    value[i] = FS::Select( closer, newCellValue, value[i] );
+                    valueHash[i] = FS::Select( closer, newCellValueHash, valueHash[i] );
 
                     if( i > mValueIndex )
                     {
@@ -70,7 +69,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                     }
 
                     newDistance = FS::Select( closer, localDistance, newDistance );
-                    newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                    newCellValueHash = FS::Select( closer, localCellValueHash, newCellValueHash );
                 }
 
                 ycf += float32v( 1 );
@@ -80,16 +79,15 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
             xc += int32v( Primes::X );
         }
 
-        return value[mValueIndex];
+        return this->ScaleOutput( FS::Convert<float>( valueHash[mValueIndex] ), -kValueBounds, kValueBounds );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         float32v jitter = float32v( this->kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
-        std::array<float32v, kMaxDistanceCount> value;
+        std::array<int32v, kMaxDistanceCount> valueHash;
         std::array<float32v, kMaxDistanceCount> distance;
         
-        value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
 
         this->ScalePositions( x, y, z );
@@ -125,8 +123,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                     xd = FS::FMulAdd( xd, invMag, xcf );
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
-                
-                    float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+
+                    int32v newCellValueHash = hash;
                     float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
                 
                     for( int i = 0; ; i++ )
@@ -134,10 +132,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         mask32v closer = newDistance < distance[i];
 
                         float32v localDistance = distance[i];
-                        float32v localCellValue = value[i];
+                        int32v localCellValueHash = valueHash[i];
 
                         distance[i] = FS::Select( closer, newDistance, distance[i] );
-                        value[i] = FS::Select( closer, newCellValue, value[i] );
+                        valueHash[i] = FS::Select( closer, newCellValueHash, valueHash[i] );
 
                         if( i > mValueIndex )
                         {
@@ -145,7 +143,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         }
 
                         newDistance = FS::Select( closer, localDistance, newDistance );
-                        newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                        newCellValueHash = FS::Select( closer, localCellValueHash, newCellValueHash );
                     }
             
                     zcf += float32v( 1 );
@@ -157,17 +155,16 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
             xcf += float32v( 1 );
             xc += int32v( Primes::X );
         }
-    
-        return value[mValueIndex];
+
+        return this->ScaleOutput( FS::Convert<float>( valueHash[mValueIndex] ), -kValueBounds, kValueBounds );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const
     {
         float32v jitter = float32v( this->kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
-        std::array<float32v, kMaxDistanceCount> value;
+        std::array<int32v, kMaxDistanceCount> valueHash;
         std::array<float32v, kMaxDistanceCount> distance;
         
-        value.fill( float32v( INFINITY ) );
         distance.fill( float32v( INFINITY ) );
 
         this->ScalePositions( x, y, z, w );
@@ -213,7 +210,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS::Convert<float>( hash );
+                        int32v newCellValueHash = hash;
                         float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
 
                         for( int i = 0; ; i++ )
@@ -221,10 +218,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                             mask32v closer = newDistance < distance[i];
 
                             float32v localDistance = distance[i];
-                            float32v localCellValue = value[i];
+                            int32v localCellValueHash = valueHash[i];
 
                             distance[i] = FS::Select( closer, newDistance, distance[i] );
-                            value[i] = FS::Select( closer, newCellValue, value[i] );
+                            valueHash[i] = FS::Select( closer, newCellValueHash, valueHash[i] );
 
                             if( i > mValueIndex )
                             {
@@ -232,7 +229,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                             }
 
                             newDistance = FS::Select( closer, localDistance, newDistance );
-                            newCellValue = FS::Select( closer, localCellValue, newCellValue );
+                            newCellValueHash = FS::Select( closer, localCellValueHash, newCellValueHash );
                         }
 
                         wcf += float32v( 1 );
@@ -247,13 +244,13 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
             xcf += float32v( 1 );
             xc += int32v( Primes::X );
         }
-    
-        return value[mValueIndex];
+
+        return this->ScaleOutput( FS::Convert<float>( valueHash[mValueIndex] ), -kValueBounds, kValueBounds );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public virtual FastNoise::CellularDistance, public FastSIMD::DispatchClass<FastNoise::Cellular<>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
@@ -303,7 +300,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
             xc += int32v( Primes::X );
         }
 
-        return GetReturn( distance );
+        return GetReturn( distance, 1 + this->kJitter2D );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -366,7 +363,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
             xc += int32v( Primes::X );
         }
 
-        return GetReturn( distance );
+        return GetReturn( distance, 1 + this->kJitter3D );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
@@ -441,10 +438,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
             xc += int32v( Primes::X );
         }
 
-        return GetReturn( distance );
+        return GetReturn( distance, 1 + this->kJitter4D );
     }
 
-    FS_FORCEINLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
+    FS_FORCEINLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance, float maxDist ) const
     {
         if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
         {
@@ -452,35 +449,37 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
             distance[mDistanceIndex1] *= FS::InvSqrt( distance[mDistanceIndex1] );
         }
 
+        maxDist *= maxDist;
+
         switch( mReturnType )
         {
         default:
         case ReturnType::Index0:
         {
-            return distance[mDistanceIndex0];
+            return this->ScaleOutput( distance[mDistanceIndex0], 0, maxDist );
         }
         case ReturnType::Index0Add1:
         {
-            return distance[mDistanceIndex0] + distance[mDistanceIndex1];
+            return this->ScaleOutput( distance[mDistanceIndex0] + distance[mDistanceIndex1], 0, maxDist * 2 );
         }
         case ReturnType::Index0Sub1:
         {
-            return distance[mDistanceIndex0] - distance[mDistanceIndex1];
+            return this->ScaleOutput( FS::Abs( distance[mDistanceIndex0] - distance[mDistanceIndex1] ), 0, maxDist );
         }
         case ReturnType::Index0Mul1:
         {
-            return distance[mDistanceIndex0] * distance[mDistanceIndex1];
+            return this->ScaleOutput( distance[mDistanceIndex0] * distance[mDistanceIndex1], 0, maxDist * maxDist );
         }
         case ReturnType::Index0Div1:
         {
-            return distance[mDistanceIndex0] * FS::Reciprocal( distance[mDistanceIndex1] );
+            return this->ScaleOutput( distance[mDistanceIndex0] * FS::Reciprocal( distance[mDistanceIndex1] ), 0, maxDist );
         }
         }
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular, SIMD>
+class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public virtual FastNoise::CellularLookup, public FastSIMD::DispatchClass<FastNoise::Cellular<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index c59fb861..cafe297e 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -223,8 +223,8 @@ namespace FastNoise
             memberVariables.push_back( member );
         }
 
-        template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
-        void AddVariable( NameDesc nameDesc, T defaultV, void ( U::*func )( T ), T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
+        template<typename T, typename U, typename V, typename = std::enable_if_t<!std::is_enum_v<T>>>
+        void AddVariable( NameDesc nameDesc, T defaultV, V ( U::*func )( T ), T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
         {
             MemberVariable member;
             member.name = nameDesc.name;
diff --git a/include/FastNoise/Generators/Perlin.h b/include/FastNoise/Generators/Perlin.h
index 8096537a..cf4832e5 100644
--- a/include/FastNoise/Generators/Perlin.h
+++ b/include/FastNoise/Generators/Perlin.h
@@ -3,7 +3,7 @@
 
 namespace FastNoise
 {
-    class Perlin : public virtual ScalableGenerator
+    class Perlin : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -11,7 +11,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Perlin> : MetadataT<ScalableGenerator>
+    struct MetadataT<Perlin> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index b9a3190d..6f19b475 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual FastNoise::Perlin, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         this->ScalePositions( x, y );
@@ -23,9 +23,12 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         xs = InterpQuintic( xs );
         ys = InterpQuintic( ys );
 
-        return float32v( 0.579106986522674560546875f ) * Lerp(
+        constexpr float kBounding = 0.579106986522674560546875f;
+
+        return this->ScaleOutput( Lerp(
             Lerp( GetGradientDot( HashPrimes( seed, x0, y0 ), xf0, yf0 ), GetGradientDot( HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDot( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDot( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys ),
+            -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -54,12 +57,15 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         ys = InterpQuintic( ys );
         zs = InterpQuintic( zs );
 
-        return float32v( 0.964921414852142333984375f ) * Lerp( Lerp(
+        constexpr float kBounding = 0.964921414852142333984375f;
+
+        return this->ScaleOutput( Lerp( Lerp(
             Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
             Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
             Lerp( 
             Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs ),
+            -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
@@ -94,7 +100,9 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         zs = InterpQuintic( zs );
         ws = InterpQuintic( ws );
 
-        return float32v( 0.964921414852142333984375f ) * Lerp( Lerp( Lerp(
+        constexpr float kBounding = 0.964921414852142333984375f;
+
+        return this->ScaleOutput( Lerp( Lerp( Lerp(
             Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
             Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
             Lerp(                                                                                                                                                     
@@ -105,6 +113,7 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
             Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
             Lerp(                                                                                                                                                     
             Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
+            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws ),
+            -1 / kBounding, 1 / kBounding );
     }
 };
diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index 00fd17e0..cbed8109 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -3,7 +3,7 @@
 
 namespace FastNoise
 {
-    class Simplex : public virtual ScalableGenerator
+    class Simplex : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -11,7 +11,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Simplex> : MetadataT<ScalableGenerator>
+    struct MetadataT<Simplex> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -26,7 +26,7 @@ namespace FastNoise
     };
 #endif
 
-    class OpenSimplex2 : public virtual ScalableGenerator
+    class OpenSimplex2 : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -34,7 +34,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<OpenSimplex2> : MetadataT<ScalableGenerator>
+    struct MetadataT<OpenSimplex2> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
@@ -49,7 +49,7 @@ namespace FastNoise
     };
 #endif
 
-    class OpenSimplex2S : public virtual ScalableGenerator
+    class OpenSimplex2S : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -57,7 +57,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<OpenSimplex2S> : MetadataT<ScalableGenerator>
+    struct MetadataT<OpenSimplex2S> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index cab39e30..3209d60c 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
@@ -48,7 +48,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
         float32v n2 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
 
-        return float32v( 38.283687591552734375f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
+        constexpr float kBounding = 38.283687591552734375f;
+
+        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) ),
+            -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -120,8 +123,11 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::MaskedAdd( j1, j, int32v( Primes::Y ) ), FS::MaskedAdd( k1, k, int32v( Primes::Z ) ) ), x1, y1, z1 );
         float32v n2 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i2, i, int32v( Primes::X ) ), FS::MaskedAdd( j2, j, int32v( Primes::Y ) ), FS::InvMaskedAdd( k2, k, int32v( Primes::Z ) ) ), x2, y2, z2 );
         float32v n3 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ) ), x3, y3, z3 );
+                
+        constexpr float kBounding = 32.69428253173828125f;
 
-        return float32v( 32.69428253173828125f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) );
+        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) ),
+            -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
@@ -255,12 +261,15 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             FS::MaskedAdd( l3, l, int32v( Primes::W ) ) ), x3, y3, z3, w3 );
         float32v n4 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ), l + int32v( Primes::W ) ), x4, y4, z4, w4 );
 
-        return float32v( 27.f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) );
+        constexpr float kBounding = 27.f;
+
+        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) ),
+            -1 / kBounding, 1 / kBounding );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
@@ -305,7 +314,10 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virt
         float32v n1 = GetGradientDotFancy( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
         float32v n2 = GetGradientDotFancy( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
 
-        return float32v( 49.918426513671875f ) * FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) );
+        constexpr float kBounding = 49.918426513671875f;
+
+        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) ),
+            -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -371,12 +383,14 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virt
             seed = ~seed;
         }
 
-        return float32v( 32.69428253173828125f ) * val;
+        constexpr float kBounding = 32.69428253173828125f;
+
+        return this->ScaleOutput( val, -1 / kBounding, 1 / kBounding );
     }
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
@@ -440,8 +454,10 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
         float32v a3 = FS::Max( FS::FNMulAdd( xi3, xi3, FS::FNMulAdd( yi3, yi3, float32v( 2.0f / 3.0f ) ) ), float32v( 0 ) );
         a3 *= a3; a3 *= a3;
         value = FS::FMulAdd( a3, v3, value );
+                
+        constexpr float kBounding = 9.28993664146183f;
 
-        return float32v( 9.28993664146183f ) * value;
+        return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -520,8 +536,10 @@ class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public vir
 
             seed = ~seed;
         }
+                
+        constexpr float kBounding = 144.736422163332608f;
 
-        return float32v( 144.736422163332608f ) * value;
+        return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 };
 
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index ef86929c..5e647866 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -13,8 +13,9 @@ namespace FastNoise
         static constexpr int Lookup[] = { X,Y,Z,W };
     }
 
-    static constexpr float ROOT2 = 1.4142135623730950488f;
-    static constexpr float ROOT3 = 1.7320508075688772935f;
+    static constexpr float kValueBounds = 2147483648.f;
+    static constexpr float kRoot2 = 1.4142135623730950488f;
+    static constexpr float kRoot3 = 1.7320508075688772935f;
 
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
@@ -23,15 +24,15 @@ namespace FastNoise
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( kRoot3, kRoot3, 2, 2, 1, -1, 0, 0, -kRoot3, -kRoot3, -2, -2, -1, 1, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 0, 0, kRoot3, kRoot3, 2, 2, -1, 1, 0, 0, -kRoot3, -kRoot3, -2, -2 ) );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
         else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kRoot3, kRoot3, 2, 2, 1, -1, 0, 0 ), index );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, -1, 0, 0, kRoot3, kRoot3, 2, 2 ), index );
 
             // Bit-8 = Flip sign of a + b 
             return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( ( index >> 3 ) << 31 );
@@ -66,7 +67,7 @@ namespace FastNoise
             // Bit-2 = Mul a by 2 or Root3
             mask32v bit2 = ( index & int32v( 2 ) ) == int32v( 0 );
 
-            a *= FS::Select( bit2, float32v( 2 ), float32v( ROOT3 ) );
+            a *= FS::Select( bit2, float32v( 2 ), float32v( kRoot3 ) );
             // b zero value if a mul 2
             float32v c = FS::MaskedAdd( bit2, a, b );
 
@@ -83,15 +84,15 @@ namespace FastNoise
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1, 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2, 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2 ) );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
         else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1 ), hash );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2 ), hash );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
@@ -114,7 +115,7 @@ namespace FastNoise
             float32v a = FS::Select( bit4Mask, fY, fX );
             float32v b = FS::Select( bit4Mask, fX, fY );
 
-            return FS::FMulAdd( float32v( 1.0f + ROOT2 ), a, b );
+            return FS::FMulAdd( float32v( 1.0f + kRoot2 ), a, b );
         }
     }
     
@@ -220,22 +221,8 @@ namespace FastNoise
         int32v hash = seed;
         hash ^= (primedPos ^ ...);
 
-#if 1
         hash *= hash * int32v( 0x27d4eb2d );
-        return FS::Convert<float>( hash ) * float32v( -1.0f / (float)INT_MIN );
-
-#else // More accurate bounding but slower
-        int32v zeroCase = hash >> 8;
-        hash *= hash * int32v( 0x27d4eb2d );
-
-        int32v floatBits = hash & int32v( 0x7FFFFF ); //fp32 fractional bits
-        floatBits |= int32v( 0x3F800000 ); // fp32 1.0
-        float32v f32 = FS::Cast<float>( floatBits ) - float32v( 0.9999999f ); // bring range to 0.0000001 - 1.0
-
-        float32v sign = FS::Cast<float>( hash & int32v( -2147483648 ) );
-
-        return FS::InvMasked( zeroCase == int32v( 0x7FFFFF ), f32 | sign );
-#endif
+        return FS::Convert<float>( hash );
     }
      
     FS_FORCEINLINE static float32v Lerp( float32v a, float32v b, float32v t )
diff --git a/include/FastNoise/Generators/Value.h b/include/FastNoise/Generators/Value.h
index 87b55603..2392f2f3 100644
--- a/include/FastNoise/Generators/Value.h
+++ b/include/FastNoise/Generators/Value.h
@@ -3,14 +3,14 @@
 
 namespace FastNoise
 {
-    class Value : public virtual ScalableGenerator
+    class Value : public virtual VariableRange<ScalableGenerator>
     {
     public:        const Metadata& GetMetadata() const override;
     };
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<Value> : MetadataT<ScalableGenerator>
+    struct MetadataT<Value> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Value.inl b/include/FastNoise/Generators/Value.inl
index 0d162b54..3792186b 100644
--- a/include/FastNoise/Generators/Value.inl
+++ b/include/FastNoise/Generators/Value.inl
@@ -2,7 +2,7 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::ScalableGenerator, SIMD>
+class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual FastNoise::Value, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
@@ -19,9 +19,10 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
         xs = InterpHermite( x - xs );
         ys = InterpHermite( y - ys );
 
-        return Lerp(
+        return this->ScaleOutput( Lerp(
             Lerp( GetValueCoord( seed, x0, y0 ), GetValueCoord( seed, x1, y0 ), xs ),
-            Lerp( GetValueCoord( seed, x0, y1 ), GetValueCoord( seed, x1, y1 ), xs ), ys );
+            Lerp( GetValueCoord( seed, x0, y1 ), GetValueCoord( seed, x1, y1 ), xs ), ys ),
+            -kValueBounds, kValueBounds );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
@@ -43,12 +44,13 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
         ys = InterpHermite( y - ys );
         zs = InterpHermite( z - zs );
 
-        return Lerp( Lerp(
+        return this->ScaleOutput( Lerp( Lerp(
             Lerp( GetValueCoord( seed, x0, y0, z0 ), GetValueCoord( seed, x1, y0, z0 ), xs ),
             Lerp( GetValueCoord( seed, x0, y1, z0 ), GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
             Lerp(                                                                                
             Lerp( GetValueCoord( seed, x0, y0, z1 ), GetValueCoord( seed, x1, y0, z1 ), xs ),    
-            Lerp( GetValueCoord( seed, x0, y1, z1 ), GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
+            Lerp( GetValueCoord( seed, x0, y1, z1 ), GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs ),
+            -kValueBounds, kValueBounds );
     }
 
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
@@ -74,7 +76,7 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
         zs = InterpHermite( z - zs );
         ws = InterpHermite( w - ws );
 
-        return Lerp( Lerp( Lerp(
+        return this->ScaleOutput( Lerp( Lerp( Lerp(
             Lerp( GetValueCoord( seed, x0, y0, z0, w0 ), GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
             Lerp( GetValueCoord( seed, x0, y1, z0, w0 ), GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
             Lerp( 
@@ -85,6 +87,7 @@ class FastSIMD::DispatchClass<FastNoise::Value, SIMD> final : public virtual Fas
             Lerp( GetValueCoord( seed, x0, y1, z0, w1 ), GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
             Lerp( 
             Lerp( GetValueCoord( seed, x0, y0, z1, w1 ), GetValueCoord( seed, x1, y0, z1, w1 ), xs ),    
-            Lerp( GetValueCoord( seed, x0, y1, z1, w1 ), GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
+            Lerp( GetValueCoord( seed, x0, y1, z1, w1 ), GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws ),
+            -kValueBounds, kValueBounds );
     }
 };

From 641027491dbb7002de4d2d767e5ac260019b237a Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 21 Apr 2024 20:24:31 +0100
Subject: [PATCH 080/139] Fix warnings, and catch nan light

---
 tools/MeshNoisePreview.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index 7f8dd710..ab52e3c3 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -230,7 +230,7 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
                } );
 
     // Unload further chunk if out of load range
-    size_t deletedChunks = 0;
+    //size_t deletedChunks = 0;
     while( !mChunks.empty() )
     {
         Vector3i backChunkPos = mChunks.back().GetPos();
@@ -239,7 +239,7 @@ void MeshNoisePreview::UpdateChunkQueues( const Vector3& position )
         {
             mRegisteredChunkPositions.erase( backChunkPos );
             mChunks.pop_back();
-            deletedChunks++;
+            //deletedChunks++;
         }
         else
         {
@@ -869,9 +869,10 @@ uint32_t MeshNoisePreview::Chunk::DmcGetVertIndex( uint32_t cellIndex, uint16_t
     float light = ( NormaliseConstExpr( -LIGHT_DIR ) * derivative.normalized() ).sum() * ( 0.5f - AMBIENT_LIGHT * 0.5f ) + ( 0.5f + AMBIENT_LIGHT * 0.5f );
     light *= light;
 
-    assert( light <= 1 );
+    // Catch NaNs
+    light = std::min( 1.0f, light );
 
-    vertexData.emplace_back( vert + vertOffset, light );//( uint32_t ) robin_hood::hash_bytes( &vert, sizeof( float ) * 3 ) / (float)UINT32_MAX );
+    vertexData.emplace_back( vert + vertOffset, light );
 
     return vertIndex;
 }

From 48f240733d5410373026ed0dcce213c8037c00bf Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 21 Apr 2024 20:24:50 +0100
Subject: [PATCH 081/139] Reduce metadata vector sizes

---
 src/FastNoise/Metadata.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 58f17074..606905b7 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -461,7 +461,11 @@ template<typename T>
 std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* className )
 {
     auto* newMetadata = new MetadataT<T>;
-    newMetadata->name = className;
+    newMetadata->name = className; 
+    newMetadata->memberVariables.shrink_to_fit();
+    newMetadata->memberNodeLookups.shrink_to_fit();
+    newMetadata->memberHybrids.shrink_to_fit();
+    newMetadata->groups.shrink_to_fit();
 
     // Node must be in a group or it is not selectable in the UI
     assert( !newMetadata->groups.empty() ); 

From f4bdc32c6f701b839854329a86299362b80c64f4 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 22 Apr 2024 23:38:50 +0100
Subject: [PATCH 082/139] Update demo node trees for new nodes

---
 tools/DemoNodeTrees.inl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/DemoNodeTrees.inl b/tools/DemoNodeTrees.inl
index 31656cfa..154175f0 100644
--- a/tools/DemoNodeTrees.inl
+++ b/tools/DemoNodeTrees.inl
@@ -2,6 +2,6 @@
 
 inline const char* gDemoNodeTrees[][2] =
 {
-    { "Simple Terrain", "EgAC@EgQBE@ADIQhoADgAE@EgQAk@ACWQwBmZiY/@CD8BB@Ej8J1P@hCBCADMzMz8@E==" },
-    { "Cellular Caves", "EwAC@DDgQBE@BgQhsAASAAFwAB@BD@BCVEAw@BI@BD@BB@H/ARUA//8@DKVD@BpUM@AClQw@ERgABg@BJD@BgL8@ACAPwA9ChdAAFK4HkAA4XoUPw@ACBBAJqZmT4@E==" },
+    { "Simple Terrain", "EgAC@EgQBE@ADIQhoADgAE@EgQAg@ACWQwAAgL8AAIA/AGZmJj8@CPwEE@DCPwnU8@hIEIAMzMzPw@D" },
+    { "Cellular Caves", "EwAC@DDgQBE@BgQiAAFQAM@BJUTNzMw9@AgwAM@BC@Cw@BI@BBBg@AD5CH4XrPoXrUT8@AClQw@AKVD@BpUM@EH//wEAAOxROD4@BgQQCamZk+AM3MTD8=" },
 };

From deee87091d0e584ca53a1e6290f5ab3e63637d28 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 22 Apr 2024 23:44:00 +0100
Subject: [PATCH 083/139] Better node editor slider drag speed

---
 include/FastNoise/Generators/BasicGenerators.h | 4 ++--
 include/FastNoise/Generators/Modifiers.h       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 40d30dce..cd0c13c4 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -175,8 +175,8 @@ namespace FastNoise
         MetadataT()
         {
             groups.push_back( "Basic Generators" );
-            this->AddPerDimensionVariable( { "Multiplier", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); } );
-            this->AddPerDimensionVariable( { "Offset", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); } );
+            this->AddPerDimensionVariable( { "Multiplier", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); }, 0.f, 0.f, 0.001f );
+            this->AddPerDimensionVariable( { "Offset", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); }, 0.f, 0.f, 0.25f );
 
             description =
                 "Takes the input position and does the following per dimension\n"
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index 4cd773e2..f8f8179e 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -62,7 +62,7 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &DomainOffset::SetSource );
-            this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); } );
+            this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); }, 0.25f );
         }
     };
 #endif

From e58e7324e17c0cd9f67b13fa356fe507f1f212ed Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 28 Apr 2024 00:36:16 +0100
Subject: [PATCH 084/139] Update FastSIMD for WASM support and Relaxed setting.
 Support targeting Emscripten with the Node Editor

---
 .gitignore                                    |   2 +
 CMakeLists.txt                                |   2 +
 cmake/CPM.cmake                               | 430 ++++++++++++++----
 src/CMakeLists.txt                            |  22 +-
 tools/CMakeLists.txt                          |  57 ++-
 tools/FastNoiseNodeEditor.cpp                 |   8 +-
 tools/MeshNoisePreview.cpp                    |   4 +-
 tools/NodeEditorApp.cpp                       |  36 +-
 tools/NodeEditorApp.h                         |  12 +-
 tools/NoiseTexture.cpp                        |   6 +-
 tools/SharedMemoryIpc.inl                     |   8 +-
 .../emscripten_enable_shared_array_buffer.js  |  75 +++
 tools/emscripten_pre.js                       |  11 +
 tools/emscripten_shell.html                   |  76 ++++
 14 files changed, 630 insertions(+), 119 deletions(-)
 create mode 100644 tools/emscripten_enable_shared_array_buffer.js
 create mode 100644 tools/emscripten_pre.js
 create mode 100644 tools/emscripten_shell.html

diff --git a/.gitignore b/.gitignore
index 79926da2..dc29054b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,7 +31,9 @@
 *.out
 *.app
 /.vs*
+/.idea
 /out
+/cmake-build*
 /build
 /enc_temp_folder
 /cpm-cache
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2111ebb7..a3b68704 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,8 @@ endif()
 option(FASTNOISE2_TOOLS "Build \"Node Editor\" executable" ${FASTNOISE2_STANDALONE_PROJECT})
 option(FASTNOISE2_TESTS "Build tests" OFF)
 
+option(FASTNOISE2_STRICT_FP "Enable strict floating point calculations to ensure output from different SIMD feature sets match EXACTLY" OFF)
+
 if(MSVC)
     #setup pdb target location
     set(pdb_output_dir "${CMAKE_CURRENT_BINARY_DIR}/pdb-files")
diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake
index 9ae66399..1b4cfcca 100644
--- a/cmake/CPM.cmake
+++ b/cmake/CPM.cmake
@@ -5,7 +5,7 @@
 # MIT License
 # -----------
 #[[
-  Copyright (c) 2021 Lars Melchior and additional contributors
+  Copyright (c) 2019-2023 Lars Melchior and contributors
 
   Permission is hereby granted, free of charge, to any person obtaining a copy
   of this software and associated documentation files (the "Software"), to deal
@@ -28,10 +28,25 @@
 
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 
-set(CURRENT_CPM_VERSION 0.32.2)
+# Initialize logging prefix
+if(NOT CPM_INDENT)
+  set(CPM_INDENT
+      "CPM:"
+      CACHE INTERNAL ""
+  )
+endif()
+
+if(NOT COMMAND cpm_message)
+  function(cpm_message)
+    message(${ARGV})
+  endfunction()
+endif()
 
+set(CURRENT_CPM_VERSION 0.39.0)
+
+get_filename_component(CPM_CURRENT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
 if(CPM_DIRECTORY)
-  if(NOT CPM_DIRECTORY STREQUAL CMAKE_CURRENT_LIST_DIR)
+  if(NOT CPM_DIRECTORY STREQUAL CPM_CURRENT_DIRECTORY)
     if(CPM_VERSION VERSION_LESS CURRENT_CPM_VERSION)
       message(
         AUTHOR_WARNING
@@ -57,8 +72,42 @@ See https://github.com/cpm-cmake/CPM.cmake for more information."
   endif()
 endif()
 
+if(CURRENT_CPM_VERSION MATCHES "development-version")
+  message(
+    WARNING "${CPM_INDENT} Your project is using an unstable development version of CPM.cmake. \
+Please update to a recent release if possible. \
+See https://github.com/cpm-cmake/CPM.cmake for details."
+  )
+endif()
+
 set_property(GLOBAL PROPERTY CPM_INITIALIZED true)
 
+macro(cpm_set_policies)
+  # the policy allows us to change options without caching
+  cmake_policy(SET CMP0077 NEW)
+  set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
+
+  # the policy allows us to change set(CACHE) without caching
+  if(POLICY CMP0126)
+    cmake_policy(SET CMP0126 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0126 NEW)
+  endif()
+
+  # The policy uses the download time for timestamp, instead of the timestamp in the archive. This
+  # allows for proper rebuilds when a projects url changes
+  if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0135 NEW)
+  endif()
+
+  # treat relative git repository paths as being relative to the parent project's remote
+  if(POLICY CMP0150)
+    cmake_policy(SET CMP0150 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0150 NEW)
+  endif()
+endmacro()
+cpm_set_policies()
+
 option(CPM_USE_LOCAL_PACKAGES "Always try to use `find_package` to get dependencies"
        $ENV{CPM_USE_LOCAL_PACKAGES}
 )
@@ -76,13 +125,17 @@ option(CPM_INCLUDE_ALL_IN_PACKAGE_LOCK
        "Add all packages added through CPM.cmake to the package lock"
        $ENV{CPM_INCLUDE_ALL_IN_PACKAGE_LOCK}
 )
+option(CPM_USE_NAMED_CACHE_DIRECTORIES
+       "Use additional directory of package name in cache on the most nested level."
+       $ENV{CPM_USE_NAMED_CACHE_DIRECTORIES}
+)
 
 set(CPM_VERSION
     ${CURRENT_CPM_VERSION}
     CACHE INTERNAL ""
 )
 set(CPM_DIRECTORY
-    ${CMAKE_CURRENT_LIST_DIR}
+    ${CPM_CURRENT_DIRECTORY}
     CACHE INTERNAL ""
 )
 set(CPM_FILE
@@ -191,19 +244,14 @@ function(cpm_package_name_and_ver_from_url url outName outVer)
   endif()
 endfunction()
 
-# Initialize logging prefix
-if(NOT CPM_INDENT)
-  set(CPM_INDENT
-      "CPM:"
-      CACHE INTERNAL ""
-  )
-endif()
-
 function(cpm_find_package NAME VERSION)
   string(REPLACE " " ";" EXTRA_ARGS "${ARGN}")
   find_package(${NAME} ${VERSION} ${EXTRA_ARGS} QUIET)
   if(${CPM_ARGS_NAME}_FOUND)
-    message(STATUS "${CPM_INDENT} using local package ${CPM_ARGS_NAME}@${VERSION}")
+    if(DEFINED ${CPM_ARGS_NAME}_VERSION)
+      set(VERSION ${${CPM_ARGS_NAME}_VERSION})
+    endif()
+    cpm_message(STATUS "${CPM_INDENT} Using local package ${CPM_ARGS_NAME}@${VERSION}")
     CPMRegisterPackage(${CPM_ARGS_NAME} "${VERSION}")
     set(CPM_PACKAGE_FOUND
         YES
@@ -223,7 +271,7 @@ function(cpm_create_module_file Name)
   if(NOT CPM_DONT_UPDATE_MODULE_PATH)
     # erase any previous modules
     file(WRITE ${CPM_MODULE_PATH}/Find${Name}.cmake
-         "include(${CPM_FILE})\n${ARGN}\nset(${Name}_FOUND TRUE)"
+         "include(\"${CPM_FILE}\")\n${ARGN}\nset(${Name}_FOUND TRUE)"
     )
   endif()
 endfunction()
@@ -240,14 +288,14 @@ function(CPMFindPackage)
     endif()
   endif()
 
-  if(CPM_DOWNLOAD_ALL)
-    CPMAddPackage(${ARGN})
-    cpm_export_variables(${CPM_ARGS_NAME})
-    return()
+  set(downloadPackage ${CPM_DOWNLOAD_ALL})
+  if(DEFINED CPM_DOWNLOAD_${CPM_ARGS_NAME})
+    set(downloadPackage ${CPM_DOWNLOAD_${CPM_ARGS_NAME}})
+  elseif(DEFINED ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
+    set(downloadPackage $ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
   endif()
-
-  cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}")
-  if(CPM_PACKAGE_ALREADY_ADDED)
+  if(downloadPackage)
+    CPMAddPackage(${ARGN})
     cpm_export_variables(${CPM_ARGS_NAME})
     return()
   endif()
@@ -268,7 +316,7 @@ function(cpm_check_if_package_already_added CPM_ARGS_NAME CPM_ARGS_VERSION)
     if("${CPM_PACKAGE_VERSION}" VERSION_LESS "${CPM_ARGS_VERSION}")
       message(
         WARNING
-          "${CPM_INDENT} requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION})."
+          "${CPM_INDENT} Requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION})."
       )
     endif()
     cpm_get_fetch_properties(${CPM_ARGS_NAME})
@@ -325,11 +373,11 @@ function(cpm_parse_add_package_single_arg arg outArgs)
       set(packageType "git")
     else()
       # Give up
-      message(FATAL_ERROR "CPM: Can't determine package type of '${arg}'")
+      message(FATAL_ERROR "${CPM_INDENT} Can't determine package type of '${arg}'")
     endif()
   endif()
 
-  # For all packages we interpret @... as version. Only replace the last occurence. Thus URIs
+  # For all packages we interpret @... as version. Only replace the last occurrence. Thus URIs
   # containing '@' can be used
   string(REGEX REPLACE "@([^@]+)$" ";VERSION;\\1" out "${out}")
 
@@ -345,7 +393,7 @@ function(cpm_parse_add_package_single_arg arg outArgs)
   else()
     # We should never get here. This is an assertion and hitting it means there's a bug in the code
     # above. A packageType was set, but not handled by this if-else.
-    message(FATAL_ERROR "CPM: Unsupported package type '${packageType}' of '${arg}'")
+    message(FATAL_ERROR "${CPM_INDENT} Unsupported package type '${packageType}' of '${arg}'")
   endif()
 
   set(${outArgs}
@@ -354,14 +402,119 @@ function(cpm_parse_add_package_single_arg arg outArgs)
   )
 endfunction()
 
+# Check that the working directory for a git repo is clean
+function(cpm_check_git_working_dir_is_clean repoPath gitTag isClean)
+
+  find_package(Git REQUIRED)
+
+  if(NOT GIT_EXECUTABLE)
+    # No git executable, assume directory is clean
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  # check for uncommitted changes
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} status --porcelain
+    RESULT_VARIABLE resultGitStatus
+    OUTPUT_VARIABLE repoStatus
+    OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET
+    WORKING_DIRECTORY ${repoPath}
+  )
+  if(resultGitStatus)
+    # not supposed to happen, assume clean anyway
+    message(WARNING "${CPM_INDENT} Calling git status on folder ${repoPath} failed")
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  if(NOT "${repoStatus}" STREQUAL "")
+    set(${isClean}
+        FALSE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  # check for committed changes
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} diff -s --exit-code ${gitTag}
+    RESULT_VARIABLE resultGitDiff
+    OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_QUIET
+    WORKING_DIRECTORY ${repoPath}
+  )
+
+  if(${resultGitDiff} EQUAL 0)
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+  else()
+    set(${isClean}
+        FALSE
+        PARENT_SCOPE
+    )
+  endif()
+
+endfunction()
+
+# method to overwrite internal FetchContent properties, to allow using CPM.cmake to overload
+# FetchContent calls. As these are internal cmake properties, this method should be used carefully
+# and may need modification in future CMake versions. Source:
+# https://github.com/Kitware/CMake/blob/dc3d0b5a0a7d26d43d6cfeb511e224533b5d188f/Modules/FetchContent.cmake#L1152
+function(cpm_override_fetchcontent contentName)
+  cmake_parse_arguments(PARSE_ARGV 1 arg "" "SOURCE_DIR;BINARY_DIR" "")
+  if(NOT "${arg_UNPARSED_ARGUMENTS}" STREQUAL "")
+    message(FATAL_ERROR "${CPM_INDENT} Unsupported arguments: ${arg_UNPARSED_ARGUMENTS}")
+  endif()
+
+  string(TOLOWER ${contentName} contentNameLower)
+  set(prefix "_FetchContent_${contentNameLower}")
+
+  set(propertyName "${prefix}_sourceDir")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} "${arg_SOURCE_DIR}")
+
+  set(propertyName "${prefix}_binaryDir")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} "${arg_BINARY_DIR}")
+
+  set(propertyName "${prefix}_populated")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} TRUE)
+endfunction()
+
 # Download and add a package from source
 function(CPMAddPackage)
+  cpm_set_policies()
+
   list(LENGTH ARGN argnLength)
   if(argnLength EQUAL 1)
     cpm_parse_add_package_single_arg("${ARGN}" ARGN)
 
-    # The shorthand syntax implies EXCLUDE_FROM_ALL
-    set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES")
+    # The shorthand syntax implies EXCLUDE_FROM_ALL and SYSTEM
+    set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;")
   endif()
 
   set(oneValueArgs
@@ -375,15 +528,16 @@ function(CPMAddPackage)
       BITBUCKET_REPOSITORY
       GIT_REPOSITORY
       SOURCE_DIR
-      DOWNLOAD_COMMAND
       FIND_PACKAGE_ARGUMENTS
       NO_CACHE
+      SYSTEM
       GIT_SHALLOW
       EXCLUDE_FROM_ALL
       SOURCE_SUBDIR
+      CUSTOM_CACHE_KEY
   )
 
-  set(multiValueArgs URL OPTIONS)
+  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND)
 
   cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
 
@@ -454,7 +608,7 @@ function(CPMAddPackage)
   if(NOT DEFINED CPM_ARGS_NAME)
     message(
       FATAL_ERROR
-        "CPM: 'NAME' was not provided and couldn't be automatically inferred for package added with arguments: '${ARGN}'"
+        "${CPM_INDENT} 'NAME' was not provided and couldn't be automatically inferred for package added with arguments: '${ARGN}'"
     )
   endif()
 
@@ -473,8 +627,10 @@ function(CPMAddPackage)
       NAME "${CPM_ARGS_NAME}"
       SOURCE_DIR "${PACKAGE_SOURCE}"
       EXCLUDE_FROM_ALL "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+      SYSTEM "${CPM_ARGS_SYSTEM}"
       OPTIONS "${CPM_ARGS_OPTIONS}"
       SOURCE_SUBDIR "${CPM_ARGS_SOURCE_SUBDIR}"
+      DOWNLOAD_ONLY "${DOWNLOAD_ONLY}"
       FORCE True
     )
     cpm_export_variables(${CPM_ARGS_NAME})
@@ -492,19 +648,21 @@ function(CPMAddPackage)
     return()
   endif()
 
-  if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY)
-    cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
+  if(NOT CPM_ARGS_FORCE)
+    if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY)
+      cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
 
-    if(CPM_PACKAGE_FOUND)
-      cpm_export_variables(${CPM_ARGS_NAME})
-      return()
-    endif()
+      if(CPM_PACKAGE_FOUND)
+        cpm_export_variables(${CPM_ARGS_NAME})
+        return()
+      endif()
 
-    if(CPM_LOCAL_PACKAGES_ONLY)
-      message(
-        SEND_ERROR
-          "CPM: ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})"
-      )
+      if(CPM_LOCAL_PACKAGES_ONLY)
+        message(
+          SEND_ERROR
+            "${CPM_INDENT} ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})"
+        )
+      endif()
     endif()
   endif()
 
@@ -529,28 +687,82 @@ function(CPMAddPackage)
     list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND})
   elseif(DEFINED CPM_ARGS_SOURCE_DIR)
     list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${CPM_ARGS_SOURCE_DIR})
+    if(NOT IS_ABSOLUTE ${CPM_ARGS_SOURCE_DIR})
+      # Expand `CPM_ARGS_SOURCE_DIR` relative path. This is important because EXISTS doesn't work
+      # for relative paths.
+      get_filename_component(
+        source_directory ${CPM_ARGS_SOURCE_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}
+      )
+    else()
+      set(source_directory ${CPM_ARGS_SOURCE_DIR})
+    endif()
+    if(NOT EXISTS ${source_directory})
+      string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
+      # remove timestamps so CMake will re-download the dependency
+      file(REMOVE_RECURSE "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild")
+    endif()
   elseif(CPM_SOURCE_CACHE AND NOT CPM_ARGS_NO_CACHE)
     string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
     set(origin_parameters ${CPM_ARGS_UNPARSED_ARGUMENTS})
     list(SORT origin_parameters)
-    string(SHA1 origin_hash "${origin_parameters}")
-    set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
+    if(CPM_ARGS_CUSTOM_CACHE_KEY)
+      # Application set a custom unique directory name
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
+    elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
+      string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
+    else()
+      string(SHA1 origin_hash "${origin_parameters}")
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
+    endif()
     # Expand `download_directory` relative path. This is important because EXISTS doesn't work for
     # relative paths.
     get_filename_component(download_directory ${download_directory} ABSOLUTE)
     list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory})
+
+    if(CPM_SOURCE_CACHE)
+      file(LOCK ${download_directory}/../cmake.lock)
+    endif()
+
     if(EXISTS ${download_directory})
-      # avoid FetchContent modules to improve performance
-      set(${CPM_ARGS_NAME}_BINARY_DIR ${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build)
-      set(${CPM_ARGS_NAME}_ADDED YES)
-      set(${CPM_ARGS_NAME}_SOURCE_DIR ${download_directory})
+      if(CPM_SOURCE_CACHE)
+        file(LOCK ${download_directory}/../cmake.lock RELEASE)
+      endif()
+
+      cpm_store_fetch_properties(
+        ${CPM_ARGS_NAME} "${download_directory}"
+        "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
+      )
+      cpm_get_fetch_properties("${CPM_ARGS_NAME}")
+
+      if(DEFINED CPM_ARGS_GIT_TAG AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS))
+        # warn if cache has been changed since checkout
+        cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN)
+        if(NOT ${IS_CLEAN})
+          message(
+            WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty"
+          )
+        endif()
+      endif()
+
       cpm_add_subdirectory(
-        "${CPM_ARGS_NAME}" "${DOWNLOAD_ONLY}"
-        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}" "${${CPM_ARGS_NAME}_BINARY_DIR}"
-        "${CPM_ARGS_EXCLUDE_FROM_ALL}" "${CPM_ARGS_OPTIONS}"
+        "${CPM_ARGS_NAME}"
+        "${DOWNLOAD_ONLY}"
+        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        "${${CPM_ARGS_NAME}_BINARY_DIR}"
+        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+        "${CPM_ARGS_SYSTEM}"
+        "${CPM_ARGS_OPTIONS}"
       )
-      set(CPM_SKIP_FETCH TRUE)
       set(PACKAGE_INFO "${PACKAGE_INFO} at ${download_directory}")
+
+      # As the source dir is already cached/populated, we override the call to FetchContent.
+      set(CPM_SKIP_FETCH TRUE)
+      cpm_override_fetchcontent(
+        "${lower_case_name}" SOURCE_DIR "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        BINARY_DIR "${${CPM_ARGS_NAME}_BINARY_DIR}"
+      )
+
     else()
       # Enable shallow clone when GIT_TAG is not a commit hash. Our guess may not be accurate, but
       # it should guarantee no commit hash get mis-detected.
@@ -567,7 +779,7 @@ function(CPMAddPackage)
     endif()
   endif()
 
-  cpm_create_module_file(${CPM_ARGS_NAME} "CPMAddPackage(${ARGN})")
+  cpm_create_module_file(${CPM_ARGS_NAME} "CPMAddPackage(\"${ARGN}\")")
 
   if(CPM_PACKAGE_LOCK_ENABLED)
     if((CPM_ARGS_VERSION AND NOT CPM_ARGS_SOURCE_DIR) OR CPM_INCLUDE_ALL_IN_PACKAGE_LOCK)
@@ -579,20 +791,29 @@ function(CPMAddPackage)
     endif()
   endif()
 
-  message(
-    STATUS "${CPM_INDENT} adding package ${CPM_ARGS_NAME}@${CPM_ARGS_VERSION} (${PACKAGE_INFO})"
+  cpm_message(
+    STATUS "${CPM_INDENT} Adding package ${CPM_ARGS_NAME}@${CPM_ARGS_VERSION} (${PACKAGE_INFO})"
   )
 
   if(NOT CPM_SKIP_FETCH)
     cpm_declare_fetch(
       "${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}" "${PACKAGE_INFO}" "${CPM_ARGS_UNPARSED_ARGUMENTS}"
     )
-    cpm_fetch_package("${CPM_ARGS_NAME}")
-    cpm_add_subdirectory(
-      "${CPM_ARGS_NAME}" "${DOWNLOAD_ONLY}"
-      "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}" "${${CPM_ARGS_NAME}_BINARY_DIR}"
-      "${CPM_ARGS_EXCLUDE_FROM_ALL}" "${CPM_ARGS_OPTIONS}"
-    )
+    cpm_fetch_package("${CPM_ARGS_NAME}" populated)
+    if(CPM_SOURCE_CACHE AND download_directory)
+      file(LOCK ${download_directory}/../cmake.lock RELEASE)
+    endif()
+    if(${populated})
+      cpm_add_subdirectory(
+        "${CPM_ARGS_NAME}"
+        "${DOWNLOAD_ONLY}"
+        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        "${${CPM_ARGS_NAME}_BINARY_DIR}"
+        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+        "${CPM_ARGS_SYSTEM}"
+        "${CPM_ARGS_OPTIONS}"
+      )
+    endif()
     cpm_get_fetch_properties("${CPM_ARGS_NAME}")
   endif()
 
@@ -605,7 +826,7 @@ macro(CPMGetPackage Name)
   if(DEFINED "CPM_DECLARATION_${Name}")
     CPMAddPackage(NAME ${Name})
   else()
-    message(SEND_ERROR "Cannot retrieve package ${Name}: no declaration available")
+    message(SEND_ERROR "${CPM_INDENT} Cannot retrieve package ${Name}: no declaration available")
   endif()
 endmacro()
 
@@ -623,10 +844,14 @@ macro(cpm_export_variables name)
       "${${name}_ADDED}"
       PARENT_SCOPE
   )
+  set(CPM_LAST_PACKAGE_NAME
+      "${name}"
+      PARENT_SCOPE
+  )
 endmacro()
 
 # declares a package, so that any call to CPMAddPackage for the package name will use these
-# arguments instead. Previous declarations will not be overriden.
+# arguments instead. Previous declarations will not be overridden.
 macro(CPMDeclarePackage Name)
   if(NOT DEFINED "CPM_DECLARATION_${Name}")
     set("CPM_DECLARATION_${Name}" "${ARGN}")
@@ -649,7 +874,7 @@ function(cpm_add_comment_to_package_lock Name)
   endif()
 endfunction()
 
-# includes the package lock file if it exists and creates a target `cpm-write-package-lock` to
+# includes the package lock file if it exists and creates a target `cpm-update-package-lock` to
 # update it
 macro(CPMUsePackageLock file)
   if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
@@ -691,7 +916,7 @@ endfunction()
 # declares a package in FetchContent_Declare
 function(cpm_declare_fetch PACKAGE VERSION INFO)
   if(${CPM_DRY_RUN})
-    message(STATUS "${CPM_INDENT} package not declared (dry run)")
+    cpm_message(STATUS "${CPM_INDENT} Package not declared (dry run)")
     return()
   endif()
 
@@ -703,18 +928,32 @@ function(cpm_get_fetch_properties PACKAGE)
   if(${CPM_DRY_RUN})
     return()
   endif()
-  FetchContent_GetProperties(${PACKAGE})
-  string(TOLOWER ${PACKAGE} lpackage)
+
   set(${PACKAGE}_SOURCE_DIR
-      "${${lpackage}_SOURCE_DIR}"
+      "${CPM_PACKAGE_${PACKAGE}_SOURCE_DIR}"
       PARENT_SCOPE
   )
   set(${PACKAGE}_BINARY_DIR
-      "${${lpackage}_BINARY_DIR}"
+      "${CPM_PACKAGE_${PACKAGE}_BINARY_DIR}"
       PARENT_SCOPE
   )
 endfunction()
 
+function(cpm_store_fetch_properties PACKAGE source_dir binary_dir)
+  if(${CPM_DRY_RUN})
+    return()
+  endif()
+
+  set(CPM_PACKAGE_${PACKAGE}_SOURCE_DIR
+      "${source_dir}"
+      CACHE INTERNAL ""
+  )
+  set(CPM_PACKAGE_${PACKAGE}_BINARY_DIR
+      "${binary_dir}"
+      CACHE INTERNAL ""
+  )
+endfunction()
+
 # adds a package as a subdirectory if viable, according to provided options
 function(
   cpm_add_subdirectory
@@ -723,22 +962,23 @@ function(
   SOURCE_DIR
   BINARY_DIR
   EXCLUDE
+  SYSTEM
   OPTIONS
 )
+
   if(NOT DOWNLOAD_ONLY AND EXISTS ${SOURCE_DIR}/CMakeLists.txt)
+    set(addSubdirectoryExtraArgs "")
     if(EXCLUDE)
-      set(addSubdirectoryExtraArgs EXCLUDE_FROM_ALL)
-    else()
-      set(addSubdirectoryExtraArgs "")
+      list(APPEND addSubdirectoryExtraArgs EXCLUDE_FROM_ALL)
+    endif()
+    if("${SYSTEM}" AND "${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.25")
+      # https://cmake.org/cmake/help/latest/prop_dir/SYSTEM.html#prop_dir:SYSTEM
+      list(APPEND addSubdirectoryExtraArgs SYSTEM)
     endif()
     if(OPTIONS)
-      # the policy allows us to change options without caching
-      cmake_policy(SET CMP0077 NEW)
-      set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
-
       foreach(OPTION ${OPTIONS})
-        cpm_parse_option(${OPTION})
-        set(${OPTION_KEY} ${OPTION_VALUE})
+        cpm_parse_option("${OPTION}")
+        set(${OPTION_KEY} "${OPTION_VALUE}")
       endforeach()
     endif()
     set(CPM_OLD_INDENT "${CPM_INDENT}")
@@ -750,19 +990,32 @@ endfunction()
 
 # downloads a previously declared package via FetchContent and exports the variables
 # `${PACKAGE}_SOURCE_DIR` and `${PACKAGE}_BINARY_DIR` to the parent scope
-function(cpm_fetch_package PACKAGE)
+function(cpm_fetch_package PACKAGE populated)
+  set(${populated}
+      FALSE
+      PARENT_SCOPE
+  )
   if(${CPM_DRY_RUN})
-    message(STATUS "${CPM_INDENT} package ${PACKAGE} not fetched (dry run)")
+    cpm_message(STATUS "${CPM_INDENT} Package ${PACKAGE} not fetched (dry run)")
     return()
   endif()
 
   FetchContent_GetProperties(${PACKAGE})
 
+  string(TOLOWER "${PACKAGE}" lower_case_name)
+
   if(NOT ${lower_case_name}_POPULATED)
     FetchContent_Populate(${PACKAGE})
+    set(${populated}
+        TRUE
+        PARENT_SCOPE
+    )
   endif()
 
-  string(TOLOWER "${PACKAGE}" lower_case_name)
+  cpm_store_fetch_properties(
+    ${CPM_ARGS_NAME} ${${lower_case_name}_SOURCE_DIR} ${${lower_case_name}_BINARY_DIR}
+  )
+
   set(${PACKAGE}_SOURCE_DIR
       ${${lower_case_name}_SOURCE_DIR}
       PARENT_SCOPE
@@ -775,15 +1028,15 @@ endfunction()
 
 # splits a package option
 function(cpm_parse_option OPTION)
-  string(REGEX MATCH "^[^ ]+" OPTION_KEY ${OPTION})
-  string(LENGTH ${OPTION} OPTION_LENGTH)
-  string(LENGTH ${OPTION_KEY} OPTION_KEY_LENGTH)
+  string(REGEX MATCH "^[^ ]+" OPTION_KEY "${OPTION}")
+  string(LENGTH "${OPTION}" OPTION_LENGTH)
+  string(LENGTH "${OPTION_KEY}" OPTION_KEY_LENGTH)
   if(OPTION_KEY_LENGTH STREQUAL OPTION_LENGTH)
     # no value for key provided, assume user wants to set option to "ON"
     set(OPTION_VALUE "ON")
   else()
     math(EXPR OPTION_KEY_LENGTH "${OPTION_KEY_LENGTH}+1")
-    string(SUBSTRING ${OPTION} "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE)
+    string(SUBSTRING "${OPTION}" "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE)
   endif()
   set(OPTION_KEY
       "${OPTION_KEY}"
@@ -813,7 +1066,7 @@ function(cpm_get_version_from_git_tag GIT_TAG RESULT)
   endif()
 endfunction()
 
-# guesses if the git tag is a commit hash or an actual tag or a branch nane.
+# guesses if the git tag is a commit hash or an actual tag or a branch name.
 function(cpm_is_git_tag_commit_hash GIT_TAG RESULT)
   string(LENGTH "${GIT_TAG}" length)
   # full hash has 40 characters, and short hash has at least 7 characters.
@@ -846,14 +1099,17 @@ function(cpm_prettify_package_arguments OUT_VAR IS_IN_COMMENT)
       DOWNLOAD_ONLY
       GITHUB_REPOSITORY
       GITLAB_REPOSITORY
+      BITBUCKET_REPOSITORY
       GIT_REPOSITORY
       SOURCE_DIR
-      DOWNLOAD_COMMAND
       FIND_PACKAGE_ARGUMENTS
       NO_CACHE
+      SYSTEM
       GIT_SHALLOW
+      EXCLUDE_FROM_ALL
+      SOURCE_SUBDIR
   )
-  set(multiValueArgs OPTIONS)
+  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND)
   cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
   foreach(oneArgName ${oneValueArgs})
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1112aaa6..e13b7520 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,7 @@
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 5bcddffbefa144138f572fe32e4e1d50a1d32deb
+    GIT_TAG 504e54fe5ec580e933ede0fb70a11bbc25dff714
 )
 
 set(install_targets ${install_targets}
@@ -54,7 +54,11 @@ set_target_properties(FastNoise PROPERTIES
     DEBUG_POSTFIX D
     COMPILE_PDB_NAME_DEBUG FastNoiseD)
 
-fastsimd_create_dispatch_library(FastSIMD_FastNoise SOURCES "FastNoise/FastSIMD_Build.inl")
+if(NOT FASTNOISE2_STRICT_FP)
+    set(FASTSIMD_RELAXED RELAXED)
+endif()
+
+fastsimd_create_dispatch_library(FastSIMD_FastNoise ${FASTSIMD_RELAXED} SOURCES "FastNoise/FastSIMD_Build.inl")
 
 target_include_directories(FastSIMD_FastNoise PRIVATE "../include/")
 
@@ -65,13 +69,13 @@ endif()
 target_link_libraries(FastNoise PUBLIC FastSIMD FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /fp:fast /wd4251 /d2vzeroupper-)
+    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /wd4251 /d2vzeroupper-)
     
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
-        target_compile_options(FastSIMD_FastNoise PRIVATE /GS- /fp:fast)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /GS-)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math -fno-stack-protector)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector)        
     endif()
 
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
@@ -79,6 +83,12 @@ elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}"
     else()
         target_compile_options(FastSIMD_FastNoise PRIVATE -mllvm -x86-use-vzeroupper=0)        
     endif()
-
 endif()
 
+if(NOT FASTNOISE2_STRICT_FP)
+    if(MSVC)
+        target_compile_options(FastSIMD_FastNoise PRIVATE /fp:fast)
+    else()
+        target_compile_options(FastSIMD_FastNoise PRIVATE -ffast-math)   
+    endif()
+endif()
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 91c4ce3c..7dd7ad95 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -12,34 +12,43 @@ CPMAddPackage(
         "CORRADE_WITH_TESTSUITE OFF"
 )
 
-CPMAddPackage(
-    NAME GLFW
-    GITHUB_REPOSITORY glfw/glfw
-    GIT_TAG 3.3.9
-    EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "BUILD_SHARED_LIBS OFF"
-        "GLFW_INSTALL OFF"
-        "GLFW_BUILD_TESTS OFF"
-        "GLFW_BUILD_EXAMPLES OFF"
-        "GLFW_BUILD_DOCS OFF"
-)
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(NODE_EDITOR_APP_TYPE_CAPS "EMSCRIPTEN")   
+    set(NODE_EDITOR_APP_TYPE "Emscripten")   
+else()
+    set(NODE_EDITOR_APP_TYPE_CAPS "GLFW")   
+    set(NODE_EDITOR_APP_TYPE "Glfw")   
+
+    CPMAddPackage(
+        NAME GLFW
+        GITHUB_REPOSITORY glfw/glfw
+        GIT_TAG 3.3.9
+        EXCLUDE_FROM_ALL YES
+        OPTIONS
+            "BUILD_SHARED_LIBS OFF"
+            "GLFW_INSTALL OFF"
+            "GLFW_BUILD_TESTS OFF"
+            "GLFW_BUILD_EXAMPLES OFF"
+            "GLFW_BUILD_DOCS OFF"
+    )
+endif()
 
 CPMAddPackage(
     NAME magnum
     GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG b1419017650c83538d8fe4681de6f0bca524cf49
+    GIT_TAG c9a884938c606b7d4555da6d278d1f3e09588c3e
     GIT_SUBMODULES "src"
     EXCLUDE_FROM_ALL YES
     OPTIONS
         "MAGNUM_BUILD_STATIC ON"
         "MAGNUM_BUILD_PLUGINS_STATIC ON"
         "MAGNUM_BUILD_STATIC_UNIQUE_GLOBALS OFF"
-        "MAGNUM_WITH_GLFWAPPLICATION ON"
+        "MAGNUM_WITH_${NODE_EDITOR_APP_TYPE_CAPS}APPLICATION ON"
         "MAGNUM_WITH_MESHTOOLS OFF"
         "MAGNUM_WITH_TRADE OFF"
         "MAGNUM_WITH_TEXT OFF"
         "MAGNUM_WITH_TEXTURETOOLS OFF"
+        "MAGNUM_TARGET_GLES2 OFF"
 )
     
 CPMAddPackage(
@@ -66,7 +75,7 @@ CPMAddPackage(
 # Use modules from magnum-integration since it has everything we need
 set(CMAKE_MODULE_PATH "${magnum-integration_SOURCE_DIR}/modules" ${CMAKE_MODULE_PATH})
 
-find_package(Magnum REQUIRED GL GlfwApplication)
+find_package(Magnum REQUIRED GL ${NODE_EDITOR_APP_TYPE}Application)
 find_package(MagnumIntegration REQUIRED ImGui)
 find_package(ImGui REQUIRED SourcesMiscCpp)
   
@@ -130,7 +139,23 @@ if(CORRADE_TARGET_WINDOWS)
     target_sources(NodeEditor PRIVATE WindowsHiDPI.manifest)
 endif()
 
-if (UNIX)
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(CMAKE_EXECUTABLE_SUFFIX ".html")
+    target_compile_options(NodeEditor PRIVATE -pthread -msimd128)
+    target_link_options(NodeEditor PRIVATE 
+        "-sPTHREAD_POOL_SIZE=Math.max(2,navigator.hardwareConcurrency)+3-navigator.hardwareConcurrency/4"
+        -pthread -sALLOW_MEMORY_GROWTH=1 -lidbfs.js -s FORCE_FILESYSTEM
+        --shell-file "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_shell.html" 
+        --pre-js "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_pre.js"
+        -Wl,-u,_emscripten_run_callback_on_thread
+    )
+    add_custom_command(TARGET NodeEditor POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_enable_shared_array_buffer.js"
+        $<TARGET_FILE_DIR:NodeEditor>
+    )
+
+elseif (UNIX)
     target_link_options(NodeEditor PRIVATE -pthread)
 
     if(APPLE)
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index e9bad670..a2451bd4 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -22,7 +22,7 @@ using namespace Magnum;
 
 #include "SharedMemoryIpc.inl"
 
-static constexpr const char* kNodeGraphSettingsFile = "NodeGraph.ini";
+static constexpr const char* kNodeGraphSettingsFile = FILESYSTEM_ROOT "NodeGraph.ini";
 
 void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
 {
@@ -67,7 +67,7 @@ void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
         CloseHandle( pi.hThread );
     }
     else
-#else
+#elif !defined( __EMSCRIPTEN__ )
     pid_t pid = fork(); // Duplicate current process
 
     if( pid == 0 )
@@ -678,6 +678,7 @@ FastNoiseNodeEditor::~FastNoiseNodeEditor()
     ImGuiContext* currentContext = ImGui::GetCurrentContext();
     ImGui::SetCurrentContext( ImNodes::GetNodeEditorImGuiContext() );
     ImGui::SaveIniSettingsToDisk( kNodeGraphSettingsFile );
+    NodeEditorApp::SyncFileSystem();
     ImGui::SetCurrentContext( currentContext );
 
     ImNodes::DestroyContext();
@@ -715,7 +716,7 @@ void FastNoiseNodeEditor::DoNodeBenchmarks()
 
 void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& projection, const Vector3& cameraPosition )
 {
-#ifndef WIN32
+#if !defined( WIN32 ) && !defined( __EMSCRIPTEN__ )
     static pid_t parentPid = getppid();
 
     if( getppid() != parentPid ) 
@@ -829,6 +830,7 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
         {
             ImGui::SaveIniSettingsToDisk( kNodeGraphSettingsFile );
             ImGui::GetIO().WantSaveIniSettings = false;
+            NodeEditorApp::SyncFileSystem();
         }
 
         // Open this after saving settings
diff --git a/tools/MeshNoisePreview.cpp b/tools/MeshNoisePreview.cpp
index ab52e3c3..ce44f4c8 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/MeshNoisePreview.cpp
@@ -47,6 +47,8 @@ MeshNoisePreview::MeshNoisePreview()
         mThreads.emplace_back( GenerateLoopThread, std::ref( mGenerateQueue ), std::ref( mCompleteQueue ) );
     }
 
+    Debug{} << "Mesh generator thread count: " << mThreads.size();
+
     SetupSettingsHandlers();
 }
 
@@ -172,7 +174,7 @@ void MeshNoisePreview::Draw( const Matrix4& transformation, const Matrix4& proje
     }
 
     ImGui::Text( "Triangle Count: %0.1fM (%0.1fM)", mTriCount / 1000000.0f, drawnTriCount / 3000000.0f );
-    ImGui::Text( "Voxel Count: %0.1fM", ( mChunks.size() * Chunk::SIZE * Chunk::SIZE * Chunk::SIZE ) / 1000000.0 );
+    ImGui::Text( "Voxel Count: %0.1fM", mChunks.size() * ( Chunk::SIZE * Chunk::SIZE * Chunk::SIZE / 1000000.0f ) );
     ImGui::Text( "Loaded Chunks: %zu (%d)", mChunks.size(), mMeshesCount );
 
     size_t generateCount = mGenerateQueue.Count();
diff --git a/tools/NodeEditorApp.cpp b/tools/NodeEditorApp.cpp
index 3bb9d323..a78d346f 100644
--- a/tools/NodeEditorApp.cpp
+++ b/tools/NodeEditorApp.cpp
@@ -8,12 +8,18 @@
 #include <Magnum/GL/DefaultFramebuffer.h>
 #include <Magnum/GL/Renderer.h>
 
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
 #include "NodeEditorApp.h"
 #include "ImGuiExtra.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
 using namespace Magnum;
 
+static constexpr const char* kAppSettingsFile = FILESYSTEM_ROOT "NodeEditor.ini";
+
 void InitResources()
 {
 #ifdef MAGNUM_BUILD_STATIC
@@ -30,10 +36,14 @@ NodeEditorApp::NodeEditorApp( const Arguments& arguments ) :
     Platform::Application{ arguments,
         Configuration{}
         .setTitle( IsDetached( arguments ) ? "FastNoise2 Node Graph" : "FastNoise2 Node Editor" )
+#ifdef __EMSCRIPTEN__
+        .setWindowFlags( Configuration::WindowFlag::Resizable )
+#else
         .setSize( Vector2i( 1280, 720 ) )
         .setWindowFlags( Configuration::WindowFlag::Resizable | ( IsDetached( arguments ) ? (Configuration::WindowFlag)0 : Configuration::WindowFlag::Maximized ) ),
         GLConfiguration{}
         .setSampleCount( 4 )
+#endif
     },
     mIsDetachedNodeGraph( IsDetached( arguments ) ),
     mExecutablePath( arguments.argv[0] ),
@@ -54,13 +64,18 @@ NodeEditorApp::NodeEditorApp( const Arguments& arguments ) :
         ImGui::GetIO().Fonts->AddFontFromMemoryTTF( const_cast<char*>( font.data() ), (int)font.size(), 14.0f * framebufferSize().x() / size.x(), &fontConfig );
     }
 
-    ImGui::GetIO().IniFilename = "NodeEditor.ini";
+    // We manually save so we can sync the filesystem on emscripten
+    ImGui::GetIO().IniFilename = nullptr;
+    ImGui::LoadIniSettingsFromDisk( kAppSettingsFile );
+
     ImGui::GetIO().ConfigDragClickToInputText = true;
     mImGuiIntegrationContext = ImGuiIntegration::Context( *mImGuiContext, size, windowSize(), framebufferSize() );
 
     GL::Renderer::enable( GL::Renderer::Feature::DepthTest );
 
+#ifndef __EMSCRIPTEN__
     setSwapInterval( 1 );
+#endif
 
     mFrameTime.start();
 
@@ -95,12 +110,31 @@ NodeEditorApp::~NodeEditorApp()
     FastNoiseNodeEditor::ReleaseSharedMemoryIpc();
 }
 
+void NodeEditorApp::SyncFileSystem()
+{
+#ifdef __EMSCRIPTEN__
+    // Don't forget to sync to make sure you store it to IndexedDB
+    EM_ASM(
+        FS.syncfs( false, function( err ) {
+            if (err) {
+                console.warn("Error saving:", err);
+            } } ); );
+#endif
+}
+
 void NodeEditorApp::drawEvent()
 {
     GL::defaultFramebuffer.clear( GL::FramebufferClear::Color | GL::FramebufferClear::Depth );
 
     mImGuiIntegrationContext.newFrame();
 
+    if( ImGui::GetIO().WantSaveIniSettings )
+    {
+        ImGui::SaveIniSettingsToDisk( kAppSettingsFile );
+        ImGui::GetIO().WantSaveIniSettings = false;
+        SyncFileSystem();
+    }
+
     /* Enable text input, if needed */
     if( ImGui::GetIO().WantTextInput && !isTextInputActive() )
         startTextInput();
diff --git a/tools/NodeEditorApp.h b/tools/NodeEditorApp.h
index be9839cf..f5be0746 100644
--- a/tools/NodeEditorApp.h
+++ b/tools/NodeEditorApp.h
@@ -1,8 +1,16 @@
 #pragma once
 
 #include <array>
-#include <Magnum/Math/Color.h>
+
+#ifdef __EMSCRIPTEN__
+#define FILESYSTEM_ROOT "/fastnoise2/"
+#include <Magnum/Platform/EmscriptenApplication.h>
+#else
+#define FILESYSTEM_ROOT
 #include <Magnum/Platform/GlfwApplication.h>
+#endif
+
+#include <Magnum/Math/Color.h>
 #include <Magnum/ImGuiIntegration/Context.h>
 #include <Magnum/SceneGraph/Object.h>
 #include <Magnum/SceneGraph/Camera.h>
@@ -33,6 +41,8 @@ namespace Magnum
             return mExecutablePath;
         }
 
+        static void SyncFileSystem();
+
     private:
         void drawEvent() override;
         void viewportEvent( ViewportEvent& event ) override;
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index fe962823..dded0ad4 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -36,6 +36,8 @@ NoiseTexture::NoiseTexture()
         mThreads.emplace_back( GenerateLoopThread, std::ref( mGenerateQueue ), std::ref( mCompleteQueue ) );
     }
 
+    Debug{} << "Texture generator thread count: " << mThreads.size();
+
     SetupSettingsHandlers();
 }
 
@@ -61,7 +63,7 @@ void NoiseTexture::Draw()
         if( mCurrentIteration < texData.iteration )
         {
             mCurrentIteration = texData.iteration;
-            ImageView2D noiseImage( PixelFormat::RGBA8Srgb, texData.size, texData.textureData );
+            ImageView2D noiseImage( PixelFormat::RGBA8Unorm, texData.size, texData.textureData );
             SetPreviewTexture( noiseImage );
         }
         texData.Free();
@@ -131,7 +133,7 @@ void NoiseTexture::Draw()
         ImGui::PushStyleColor( ImGuiCol_Button, 0 );
         ImGui::PushStyleColor( ImGuiCol_ButtonActive, 0 );
         ImGui::PushStyleColor( ImGuiCol_ButtonHovered, 0 );
-        ImGuiIntegration::imageButton( mNoiseTexture, Vector2( mNoiseTexture.imageSize( 0 ) ), {{},Vector2{1}}, 0 );
+        ImGuiIntegration::imageButton( mNoiseTexture, Vector2( mBuildData.size ), { {}, Vector2 { 1 } }, 0 );
         ImGui::PopStyleColor( 3 );
 
         if( ImGui::IsItemHovered() )
diff --git a/tools/SharedMemoryIpc.inl b/tools/SharedMemoryIpc.inl
index 0f213f4f..380c0d5b 100644
--- a/tools/SharedMemoryIpc.inl
+++ b/tools/SharedMemoryIpc.inl
@@ -1,3 +1,4 @@
+#ifndef __EMSCRIPTEN__
 #ifdef _WIN32
 #define NOMINMAX
 #define WIN32_LEAN_AND_MEAN
@@ -8,6 +9,7 @@
 #include <sys/stat.h> // For mode constants
 #include <unistd.h>
 #endif
+#endif
 
 static constexpr const char* kSharedMemoryName = "/FastNoise2NodeEditor";
 static constexpr unsigned int kSharedMemorySize = 64 * 1024;
@@ -15,7 +17,9 @@ static constexpr unsigned int kSharedMemorySize = 64 * 1024;
 // Setup shared memory for IPC selected node ENT updates
 void* FastNoiseNodeEditor::SetupSharedMemoryIpc()
 {
-#ifdef WIN32
+#ifdef __EMSCRIPTEN__
+    return nullptr;
+#elif defined( WIN32 )
     // Create a shared memory file mapping
     HANDLE hMapFile = CreateFileMapping(
         INVALID_HANDLE_VALUE, // Use paging file - shared memory
@@ -76,7 +80,7 @@ void* FastNoiseNodeEditor::SetupSharedMemoryIpc()
 
 void FastNoiseNodeEditor::ReleaseSharedMemoryIpc()
 {
-#ifndef WIN32
+#if !defined( WIN32 ) && !defined( __EMSCRIPTEN__ )
     shm_unlink( kSharedMemoryName );
 #endif
 }
diff --git a/tools/emscripten_enable_shared_array_buffer.js b/tools/emscripten_enable_shared_array_buffer.js
new file mode 100644
index 00000000..83bf5d94
--- /dev/null
+++ b/tools/emscripten_enable_shared_array_buffer.js
@@ -0,0 +1,75 @@
+// NOTE: This file creates a service worker that cross-origin-isolates the page (read more here: https://web.dev/coop-coep/) which allows us to use wasm threads.
+// Normally you would set the COOP and COEP headers on the server to do this, but Github Pages doesn't allow this, so this is a hack to do that.
+
+/* Edited version of: coi-serviceworker v0.1.6 - Guido Zuidhof, licensed under MIT */
+// From here: https://github.com/gzuidhof/coi-serviceworker
+if(typeof window === 'undefined') {
+  self.addEventListener("install", () => self.skipWaiting());
+  self.addEventListener("activate", e => e.waitUntil(self.clients.claim()));
+
+  async function handleFetch(request) {
+    if(request.cache === "only-if-cached" && request.mode !== "same-origin") {
+      return;
+    }
+    
+    if(request.mode === "no-cors") { // We need to set `credentials` to "omit" for no-cors requests, per this comment: https://bugs.chromium.org/p/chromium/issues/detail?id=1309901#c7
+      request = new Request(request.url, {
+        cache: request.cache,
+        credentials: "omit",
+        headers: request.headers,
+        integrity: request.integrity,
+        destination: request.destination,
+        keepalive: request.keepalive,
+        method: request.method,
+        mode: request.mode,
+        redirect: request.redirect,
+        referrer: request.referrer,
+        referrerPolicy: request.referrerPolicy,
+        signal: request.signal,
+      });
+    }
+    
+    let r = await fetch(request).catch(e => console.error(e));
+    
+    if(r.status === 0) {
+      return r;
+    }
+
+    const headers = new Headers(r.headers);
+    headers.set("Cross-Origin-Embedder-Policy", "require-corp"); // or: credentialless
+    headers.set("Cross-Origin-Opener-Policy", "same-origin");
+    
+    return new Response(r.body, { status: r.status, statusText: r.statusText, headers });
+  }
+
+  self.addEventListener("fetch", function(e) {
+    e.respondWith(handleFetch(e.request)); // respondWith must be executed synchonously (but can be passed a Promise)
+  });
+  
+} else {
+  (async function() {
+    if(window.crossOriginIsolated !== false) return;
+
+    let registration = await navigator.serviceWorker.register(window.document.currentScript.src).catch(e => console.error("COOP/COEP Service Worker failed to register:", e));
+    if(registration) {
+      console.log("COOP/COEP Service Worker registered", registration.scope);
+
+      registration.addEventListener("updatefound", () => {
+        console.log("Reloading page to make use of updated COOP/COEP Service Worker.");
+        window.location.reload();
+      });
+
+      // If the registration is active, but it's not controlling the page
+      if(registration.active && !navigator.serviceWorker.controller) {
+        console.log("Reloading page to make use of COOP/COEP Service Worker.");
+        window.location.reload();
+      }
+    }
+  })();
+}
+
+// Code to deregister:
+// let registrations = await navigator.serviceWorker.getRegistrations();
+// for(let registration of registrations) {
+//   await registration.unregister();
+// }
diff --git a/tools/emscripten_pre.js b/tools/emscripten_pre.js
new file mode 100644
index 00000000..f01af27c
--- /dev/null
+++ b/tools/emscripten_pre.js
@@ -0,0 +1,11 @@
+(Module["preRun"] = Module["preRun"] || []).push(function () {
+    addRunDependency('syncfs')
+
+    FS.mkdir('/fastnoise2')
+    FS.mount(IDBFS, {}, '/fastnoise2')
+    FS.syncfs(true, function (err) {
+        if (err) throw err
+        removeRunDependency('syncfs')
+        console.log("FS Synced")
+    })
+});
\ No newline at end of file
diff --git a/tools/emscripten_shell.html b/tools/emscripten_shell.html
new file mode 100644
index 00000000..14282b11
--- /dev/null
+++ b/tools/emscripten_shell.html
@@ -0,0 +1,76 @@
+<!doctype html>
+<html lang="en-us">
+
+  <!-- NOTE: THIS FILE BASED ON: imgui/examples/libs/emscripten/shell_minimal.html -->
+
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, minimum-scale=1, user-scalable=no"/>
+
+    <title>FastNoise 2 Node Editor</title>
+
+    <style>
+        body { margin: 0; background-color: black }
+        /* FIXME: with our GLFW example this block seems to break resizing and io.DisplaySize gets stuck */
+        .emscripten {
+            position: absolute;
+            top:  0px;
+            left: 0px;
+            margin: 0px;
+            border: 0;
+            width:  100%;
+            height: 100%;
+            overflow: hidden;
+            display: block;
+            image-rendering: optimizeSpeed;
+            image-rendering: -moz-crisp-edges;
+            image-rendering: -o-crisp-edges;
+            image-rendering: -webkit-optimize-contrast;
+            image-rendering: optimize-contrast;
+            image-rendering: crisp-edges;
+            image-rendering: pixelated;
+            -ms-interpolation-mode: nearest-neighbor;
+        }
+    </style>
+  </head>
+
+  <body>
+    <canvas class="emscripten" id="canvas" oncontextmenu="event.preventDefault()"></canvas>
+
+    <script type='text/javascript'>
+      var Module = {
+        preRun: [],
+        postRun: [],
+        print: (function() {
+            return function(text) {
+                text = Array.prototype.slice.call(arguments).join(' ');
+                console.log(text);
+            };
+        })(),
+        printErr: function(text) {
+            text = Array.prototype.slice.call(arguments).join(' ');
+            console.error(text);
+        },
+        canvas: (function() {
+            var canvas = document.getElementById('canvas');
+            //canvas.addEventListener("webglcontextlost", function(e) { alert('FIXME: WebGL context lost, please reload the page'); e.preventDefault(); }, false);
+            return canvas;
+        })(),
+        setStatus: function(text) {
+            console.log("status: " + text);
+        },
+        monitorRunDependencies: function(left) {
+            // no run dependencies to log
+        }
+      };
+      window.onerror = function() {
+        console.log("onerror: " + event);
+      };
+    </script>
+    <script src="emscripten_enable_shared_array_buffer.js"></script>
+
+    {{{ SCRIPT }}}
+
+  </body>
+
+</html>

From e65569b12cfd12e837a95b248ed229fa7a26999a Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 28 Apr 2024 00:41:13 +0100
Subject: [PATCH 085/139] Split Mac ARM and x86 CI builds

---
 .github/workflows/main.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b2b460a3..8abca9f7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -35,9 +35,12 @@ jobs:
           - os: ubuntu-latest
             name: Linux64-Clang
             cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
-          - os: macos-latest
+          - os: macos-13
             name: MacOS64-Clang
             cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+          - os: macos-latest
+            name: MacOSARM64-Clang
+            cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
             
     steps:                
     - name: 'Install OpenGL & xorg'

From f9498a449f74e6ab5c22aa0d93ab729850e00495 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 28 Apr 2024 13:06:32 +0100
Subject: [PATCH 086/139] Fix BMP export not working on emscripten

---
 tools/NodeEditorApp.cpp |  4 +++-
 tools/NoiseTexture.cpp  | 42 ++++++++++++++++++++++++++++++++++++++---
 tools/NoiseTexture.h    |  1 +
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/tools/NodeEditorApp.cpp b/tools/NodeEditorApp.cpp
index a78d346f..380904f1 100644
--- a/tools/NodeEditorApp.cpp
+++ b/tools/NodeEditorApp.cpp
@@ -118,7 +118,9 @@ void NodeEditorApp::SyncFileSystem()
         FS.syncfs( false, function( err ) {
             if (err) {
                 console.warn("Error saving:", err);
-            } } ); );
+            }
+        } );
+    );
 #endif
 }
 
diff --git a/tools/NoiseTexture.cpp b/tools/NoiseTexture.cpp
index dded0ad4..4ec4e64a 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NoiseTexture.cpp
@@ -1,6 +1,12 @@
 #include <cstdio>
-#include <fstream>
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#include <sstream>
+#else
 #include <filesystem>
+#include <fstream>
+#endif
 
 #define IMGUI_DEFINE_MATH_OPERATORS
 #include <imgui.h>
@@ -105,13 +111,15 @@ void NoiseTexture::Draw()
         edited |= ImGui::DragFloat( "Scale", &mBuildData.scale, 0.05f );
         ImGui::SameLine();
 
-        if( mBuildData.generator && ImGui::Button( "Export BMP" ) )
+        ImGui::BeginDisabled( mIsExporting );
+        if( mBuildData.generator && ImGui::Button( mIsExporting ? "Exporting..." : "Export BMP" ) )
         {
             auto size = mExportBuildData.size;
             mExportBuildData = mBuildData;
             mExportBuildData.size = size;
             ImGui::OpenPopup( "Export BMP" );
         }
+        ImGui::EndDisabled();
 
         ImGui::PopItemWidth();
 
@@ -196,14 +204,20 @@ void NoiseTexture::DoExport()
             {
                 mExportThread.join();
             }
-            mExportThread = std::thread([buildData = mExportBuildData]()
+            mIsExporting.store( true, std::memory_order::relaxed );
+
+            mExportThread = std::thread([buildData = mExportBuildData, this]()
             {
+                Debug{} << "BMP Export Started";
                 auto data = BuildTexture( buildData );
 
                 const char* nodeName = buildData.generator->GetMetadata().name;
                 std::string filename = nodeName;
                 filename += ".bmp";
 
+#ifdef __EMSCRIPTEN__
+                std::stringstream file;
+#else
                 // Iterate through file names if filename exists
                 for( int i = 1; i < 1024; i++ )
                 {
@@ -218,6 +232,7 @@ void NoiseTexture::DoExport()
                 std::ofstream file( filename.c_str(), std::ofstream::binary | std::ofstream::out | std::ofstream::trunc );
 
                 if( file.is_open() )
+#endif
                 {
                     struct BmpHeader
                     {
@@ -273,9 +288,30 @@ void NoiseTexture::DoExport()
                         }
                     }
 
+#ifdef __EMSCRIPTEN__
+                    std::string_view fileString = file.view();
+
+                    MAIN_THREAD_EM_ASM( (
+                        // Create a temporary ArrayBuffer and copy the contents of the shared buffer
+                        // into it.
+                        const tempBuffer = new ArrayBuffer( $2 );
+                        const tempView = new Uint8Array( tempBuffer );
+
+                        let sharedView = new Uint8Array( Module["HEAPU8"].buffer, $1, $2 );
+                        tempView.set( sharedView );
+
+                        /// Offer a buffer in memory as a file to download, specifying download filename and mime type
+                        var a = document.createElement( 'a' );
+                        a.download = UTF8ToString( $0 );
+                        a.href = URL.createObjectURL( new Blob( [tempView], {type: 'image/bmp'} ) );
+                        a.click();
+                        ), filename.c_str(), fileString.data(), fileString.length() );
+#else
                     file.close();
+#endif
 
                     Debug{} << "BMP Export Complete: " << filename.c_str();
+                    mIsExporting = false;
                 }
             } );
         }
diff --git a/tools/NoiseTexture.h b/tools/NoiseTexture.h
index 8f5af873..704358da 100644
--- a/tools/NoiseTexture.h
+++ b/tools/NoiseTexture.h
@@ -96,6 +96,7 @@ namespace Magnum
         BuildData mExportBuildData;
         FastNoise::OutputMinMax mMinMax;
 
+        std::atomic_bool mIsExporting = false;
         std::thread mExportThread;
         std::vector<std::thread> mThreads;
         GenerateQueue<BuildData> mGenerateQueue;

From f8fd6f85f989688e2fc347dfaa07503575cb1fa6 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 28 Apr 2024 13:16:46 +0100
Subject: [PATCH 087/139] TEST hybrid range inputs for ScalableGenerator

---
 include/FastNoise/Generators/BasicGenerators.h   | 9 +++++----
 include/FastNoise/Generators/BasicGenerators.inl | 5 ++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index cd0c13c4..07137e9b 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -34,18 +34,19 @@ namespace FastNoise
     public:
         void SetOutputMin( float value )
         {
-            mRangeScale += mRangeMin - value;
+            //mRangeScale += mRangeMin - value;
             mRangeMin = value;
         }
 
         void SetOutputMax( float value )
         {
-            mRangeScale = ( value - mRangeMin );
+            mRangeScale = value;
+            //mRangeScale = ( value - mRangeMin );
         }
 
     protected:
-        float mRangeMin = -1;
-        float mRangeScale = 2;
+        HybridSource mRangeMin = -1;
+        HybridSource mRangeScale = 2;
     };
 
 #ifdef FASTNOISE_METADATA
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 604d768b..ff0b8664 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -19,7 +19,10 @@ class FastSIMD::DispatchClass<FastNoise::VariableRange<PARENT>, SIMD> : public v
 protected:
     FS_FORCEINLINE float32v ScaleOutput( float32v value, float nativeMin, float nativeMax ) const
     {
-        return FS::FMulAdd( float32v( 1.0f / ( nativeMax - nativeMin ) ) * float32v( this->mRangeScale ), value - float32v( nativeMin ), float32v( this->mRangeMin ) );
+        float32v rangeMin = this->GetSourceValue( this->mRangeMin, int32v(), float32v(), float32v() );
+        float32v rangeScale = this->GetSourceValue( this->mRangeScale, int32v(), float32v(), float32v() ) - rangeMin;
+
+        return FS::FMulAdd( float32v( 1.0f / ( nativeMax - nativeMin ) ) * rangeScale, value - float32v( nativeMin ), rangeMin );
     }
 };
 

From fe0fd4368ddce44c76e60b74e7ebde9c553a2d24 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 4 May 2024 20:29:54 +0100
Subject: [PATCH 088/139] Add Minkowski distance function

---
 .../FastNoise/Generators/BasicGenerators.h    | 25 +++++++++++---
 .../FastNoise/Generators/BasicGenerators.inl  |  4 +--
 include/FastNoise/Generators/Cellular.h       | 11 +++++--
 include/FastNoise/Generators/Cellular.inl     | 18 +++++-----
 include/FastNoise/Generators/Generator.h      |  8 +++--
 include/FastNoise/Generators/Generator.inl    |  6 ++--
 include/FastNoise/Generators/Utils.inl        | 33 +++++++++++--------
 7 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 07137e9b..f9ed01ee 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -195,13 +195,28 @@ namespace FastNoise
         void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
         void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
 
+        void SetMinkowskiP( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mMinkowskiP, gen ); }
+        void SetMinkowskiP( float value ) { mMinkowskiP = value; }
+
+        void SetPoint( float x, float y, float z = 0, float w = 0 )
+        {
+            mPoint[0] = x;
+            mPoint[1] = y;
+            mPoint[2] = z;
+            mPoint[3] = w;
+        }
+
         template<Dim D>
-        void SetScale( float value ) { mPoint[(int)D] = value; }
+        void SetPoint( float value ) { mPoint[(int)D] = value; }
+
+        template<Dim D>
+        void SetPoint( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPoint[(int)D], gen ); }
 
     protected:
         GeneratorSource mSource;
+        HybridSource mMinkowskiP = 0.5f;
         DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
-        PerDimensionVariable<float> mPoint = 0.0f;
+        PerDimensionVariable<HybridSource> mPoint = 0.0f;
 
         template<typename T>
         friend struct MetadataT;
@@ -217,10 +232,12 @@ namespace FastNoise
         {
             groups.push_back( "Basic Generators" );
             this->AddVariableEnum( "Distance Function", DistanceFunction::Euclidean, &DistanceToPoint::SetDistanceFunction, kDistanceFunction_Strings );
-            this->AddPerDimensionVariable( { "Point", "Point in current domain space" }, 0.0f, []( DistanceToPoint* p ) { return std::ref( p->mPoint ); } );
+            this->AddPerDimensionHybridSource( { "Point", "Point in current domain space" }, 0.0f, []( DistanceToPoint* p ) { return std::ref( p->mPoint ); } );
+
+            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 0.5f, &DistanceToPoint::SetMinkowskiP, &DistanceToPoint::SetMinkowskiP );
 
             description =
-                "Outputs calculated distance between point and input position";
+                "Outputs distance between point and input position";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index ff0b8664..8df29db8 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -109,7 +109,7 @@ class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> final : public v
     {
         size_t pointIdx = 0;
 
-        ((pos -= float32v( mPoint[pointIdx++] )), ...);
-        return CalcDistance( mDistanceFunction, pos... );
+        ((pos -= this->GetSourceValue( mPoint[pointIdx++], seed, pos... ) ), ...);
+        return CalcDistance( mDistanceFunction, mMinkowskiP, seed, pos... );
     }
 };
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 246e2c98..83b853a5 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -9,11 +9,16 @@ namespace FastNoise
     class Cellular : public virtual PARENT
     {
     public:
+        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
+
+        void SetMinkowskiP( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mMinkowskiP, gen ); }
+        void SetMinkowskiP( float value ) { mMinkowskiP = value; }
+
         void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
         void SetJitterModifier( float value ) { mJitterModifier = value; }
-        void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
 
     protected:
+        HybridSource mMinkowskiP = 0.5f;
         HybridSource mJitterModifier = 1.0f;
         DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
     };
@@ -25,9 +30,11 @@ namespace FastNoise
         MetadataT()
         {
             this->groups.push_back( "Coherent Noise" );
-            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
             this->AddVariableEnum( { "Distance Function", "How distance to closest cells is calculated\nHybrid is EuclideanSquared + Manhattan" },
                 DistanceFunction::EuclideanSquared, &Cellular<PARENT>::SetDistanceFunction, kDistanceFunction_Strings );
+            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 0.5f, &Cellular<PARENT>::SetMinkowskiP, &Cellular<PARENT>::SetMinkowskiP );
+
+            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index e7bc34bb..dc708ffb 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -51,7 +51,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
                 int32v newCellValueHash = hash;
-                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd );
 
                 for( int i = 0; ; i++ )
                 {
@@ -125,7 +125,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
                     int32v newCellValueHash = hash;
-                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd );
                 
                     for( int i = 0; ; i++ )
                     {
@@ -211,7 +211,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularValue, SIMD> final : public vir
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
                         int32v newCellValueHash = hash;
-                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd, wd );
 
                         for( int i = 0; ; i++ )
                         {
@@ -284,7 +284,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd );
 
                 for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                 {
@@ -344,7 +344,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd );
 
                     for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                     {
@@ -416,7 +416,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd, wd );
 
                         for( int i = kMaxDistanceCount - 1; i > 0; i-- )
                         {
@@ -512,7 +512,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                 xd = FS::FMulAdd( xd, invMag, xcf );
                 yd = FS::FMulAdd( yd, invMag, ycf );
 
-                float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd );
+                float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd );
 
                 mask32v closer = newDistance < distance;
                 distance = FS::Min( newDistance, distance );
@@ -570,7 +570,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                     yd = FS::FMulAdd( yd, invMag, ycf );
                     zd = FS::FMulAdd( zd, invMag, zcf );
 
-                    float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd );
+                    float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd );
 
                     mask32v closer = newDistance < distance;
                     distance = FS::Min( newDistance, distance );
@@ -641,7 +641,7 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                         zd = FS::FMulAdd( zd, invMag, zcf );
                         wd = FS::FMulAdd( wd, invMag, wcf );
 
-                        float32v newDistance = CalcDistance<false>( mDistanceFunction, xd, yd, zd, wd );
+                        float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd, wd );
 
                         mask32v closer = newDistance < distance;
                         distance = FS::Min( newDistance, distance );
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index cafe297e..8016934e 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -10,7 +10,7 @@
 
 #include "FastNoise/Utility/Config.h"
 
-#if !defined( FASTNOISE_METADATA ) && defined( __INTELLISENSE__ )
+#if !defined( FASTNOISE_METADATA ) && ( defined( __INTELLISENSE__ ) || defined( __CLION_IDE__ ) )
 #define FASTNOISE_METADATA
 #endif
 
@@ -35,6 +35,7 @@ namespace FastNoise
         Manhattan,
         Hybrid,
         MaxAxis,
+        Minkowski,
     };
 
     constexpr static const char* kDistanceFunction_Strings[] =
@@ -44,6 +45,7 @@ namespace FastNoise
         "Manhattan",
         "Hybrid",
         "Max Axis",
+        "Minkowski",
     };
 
     struct OutputMinMax
@@ -87,7 +89,7 @@ namespace FastNoise
     {
         float constant;
 
-        HybridSourceT( float f = 0.0f )
+        constexpr HybridSourceT( float f = 0.0f )
         {
             constant = f;
         }
@@ -173,7 +175,7 @@ namespace FastNoise
         T varArray[(int)Dim::Count];
 
         template<typename U = T>
-        PerDimensionVariable( U value = 0 )
+        constexpr PerDimensionVariable( U value = 0 )
         {
             for( T& element : varArray )
             {
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index 7e8edc8d..adc32bc4 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -46,7 +46,7 @@ public:
     }
 
     template<typename T, typename... POS>
-    FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos ) const
+    static FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos )
     {
         if( memberVariable.simdGeneratorPtr )
         {
@@ -58,7 +58,7 @@ public:
     }
 
     template<typename T, typename... POS>
-    FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos ) const
+    static FS_FORCEINLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos )
     {
         assert( memberVariable.simdGeneratorPtr );
         auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
@@ -67,7 +67,7 @@ public:
     }
 
     template<typename T>
-    FS_FORCEINLINE const DispatchClass<T, SIMD>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable ) const
+    static FS_FORCEINLINE const DispatchClass<T, SIMD>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable )
     {
         assert( memberVariable.simdGeneratorPtr );
         auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 5e647866..e706e757 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -2,7 +2,7 @@
 #include <climits>
 
 namespace FastNoise
-{    
+{
     namespace Primes
     {
         static constexpr int X = 501125321;
@@ -240,8 +240,8 @@ namespace FastNoise
         return t * t * t * FS::FMulAdd( t, FS::FMulAdd( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
     }
 
-    template<bool DO_SQRT = true, typename... P>
-    FS_FORCEINLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
+    template<bool DO_SQRT = true, FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault(), typename... P>
+    FS_FORCEINLINE static float32v CalcDistance( DistanceFunction distFunc, const HybridSource& minkowskiP, int32v seed, float32v pX, P... pos )
     {
         switch( distFunc )
         {
@@ -249,8 +249,8 @@ namespace FastNoise
             case DistanceFunction::Euclidean:
             if constexpr( DO_SQRT )
             {
-                float32v distSqr = dX * dX;
-                ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
+                float32v distSqr = pX * pX;
+                ((distSqr = FS::FMulAdd( pos, pos, distSqr )), ...);
 
                 float32v invSqrt = FS::InvSqrt( distSqr );
 
@@ -259,35 +259,42 @@ namespace FastNoise
 
             case DistanceFunction::EuclideanSquared:
             {
-                float32v distSqr = dX * dX;
-                ((distSqr = FS::FMulAdd( d, d, distSqr )), ...);
+                float32v distSqr = pX * pX;
+                ((distSqr = FS::FMulAdd( pos, pos, distSqr )), ...);
 
                 return distSqr;
             }
 
             case DistanceFunction::Manhattan:
             {
-                float32v dist = FS::Abs( dX );
-                dist += (FS::Abs( d ) + ...);
+                float32v dist = FS::Abs( pX );
+                dist += (FS::Abs( pos ) + ...);
 
                 return dist;
             }
 
             case DistanceFunction::Hybrid:
             {
-                float32v both = FS::FMulAdd( dX, dX, FS::Abs( dX ) );
-                ((both += FS::FMulAdd( d, d, FS::Abs( d ) )), ...);
+                float32v both = FS::FMulAdd( pX, pX, FS::Abs( pX ) );
+                ((both += FS::FMulAdd( pos, pos, FS::Abs( pos ) )), ...);
 
                 return both;
             }
 
             case DistanceFunction::MaxAxis:
             {
-                float32v max = FS::Abs( dX );
-                ((max = FS::Max( FS::Abs(d), max )), ...);
+                float32v max = FS::Abs( pX );
+                ((max = FS::Max( FS::Abs( pos ), max )), ...);
 
                 return max;
             }
+
+            case DistanceFunction::Minkowski:
+            {
+                float32v minkowski = FastSIMD::DispatchClass<Generator, SIMD>::GetSourceValue( minkowskiP, seed, pX, pos... );
+
+                return FS::Pow( FS::Pow( FS::Abs( pX ), minkowski) + (FS::Pow( FS::Abs( pos ), minkowski) + ...), FS::Reciprocal( minkowski ) );
+            }
         }
     }    
 }

From 9df09c8c6751287f4243a9f5e10ac3d3b5f6835f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 4 May 2024 20:48:31 +0100
Subject: [PATCH 089/139] MinkowskiP defaults to 1.5

---
 include/FastNoise/Generators/BasicGenerators.h | 4 ++--
 include/FastNoise/Generators/Cellular.h        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index f9ed01ee..f9846b38 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -214,7 +214,7 @@ namespace FastNoise
 
     protected:
         GeneratorSource mSource;
-        HybridSource mMinkowskiP = 0.5f;
+        HybridSource mMinkowskiP = 1.5f;
         DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
         PerDimensionVariable<HybridSource> mPoint = 0.0f;
 
@@ -234,7 +234,7 @@ namespace FastNoise
             this->AddVariableEnum( "Distance Function", DistanceFunction::Euclidean, &DistanceToPoint::SetDistanceFunction, kDistanceFunction_Strings );
             this->AddPerDimensionHybridSource( { "Point", "Point in current domain space" }, 0.0f, []( DistanceToPoint* p ) { return std::ref( p->mPoint ); } );
 
-            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 0.5f, &DistanceToPoint::SetMinkowskiP, &DistanceToPoint::SetMinkowskiP );
+            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 1.5f, &DistanceToPoint::SetMinkowskiP, &DistanceToPoint::SetMinkowskiP );
 
             description =
                 "Outputs distance between point and input position";
diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 83b853a5..153de1d7 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -18,7 +18,7 @@ namespace FastNoise
         void SetJitterModifier( float value ) { mJitterModifier = value; }
 
     protected:
-        HybridSource mMinkowskiP = 0.5f;
+        HybridSource mMinkowskiP = 1.5f;
         HybridSource mJitterModifier = 1.0f;
         DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
     };
@@ -32,7 +32,7 @@ namespace FastNoise
             this->groups.push_back( "Coherent Noise" );
             this->AddVariableEnum( { "Distance Function", "How distance to closest cells is calculated\nHybrid is EuclideanSquared + Manhattan" },
                 DistanceFunction::EuclideanSquared, &Cellular<PARENT>::SetDistanceFunction, kDistanceFunction_Strings );
-            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 0.5f, &Cellular<PARENT>::SetMinkowskiP, &Cellular<PARENT>::SetMinkowskiP );
+            this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 1.5f, &Cellular<PARENT>::SetMinkowskiP, &Cellular<PARENT>::SetMinkowskiP );
 
             this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
         }

From 8797057840e2ee1e158a069a769724e2f0bb202a Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 5 May 2024 08:48:49 +0100
Subject: [PATCH 090/139] Revert "TEST hybrid range inputs for
 ScalableGenerator"

This reverts commit f8fd6f85f989688e2fc347dfaa07503575cb1fa6.
---
 include/FastNoise/Generators/BasicGenerators.h   | 9 ++++-----
 include/FastNoise/Generators/BasicGenerators.inl | 5 +----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index f9846b38..994bb3c7 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -34,19 +34,18 @@ namespace FastNoise
     public:
         void SetOutputMin( float value )
         {
-            //mRangeScale += mRangeMin - value;
+            mRangeScale += mRangeMin - value;
             mRangeMin = value;
         }
 
         void SetOutputMax( float value )
         {
-            mRangeScale = value;
-            //mRangeScale = ( value - mRangeMin );
+            mRangeScale = ( value - mRangeMin );
         }
 
     protected:
-        HybridSource mRangeMin = -1;
-        HybridSource mRangeScale = 2;
+        float mRangeMin = -1;
+        float mRangeScale = 2;
     };
 
 #ifdef FASTNOISE_METADATA
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 8df29db8..98cede2d 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -19,10 +19,7 @@ class FastSIMD::DispatchClass<FastNoise::VariableRange<PARENT>, SIMD> : public v
 protected:
     FS_FORCEINLINE float32v ScaleOutput( float32v value, float nativeMin, float nativeMax ) const
     {
-        float32v rangeMin = this->GetSourceValue( this->mRangeMin, int32v(), float32v(), float32v() );
-        float32v rangeScale = this->GetSourceValue( this->mRangeScale, int32v(), float32v(), float32v() ) - rangeMin;
-
-        return FS::FMulAdd( float32v( 1.0f / ( nativeMax - nativeMin ) ) * rangeScale, value - float32v( nativeMin ), rangeMin );
+        return FS::FMulAdd( float32v( 1.0f / ( nativeMax - nativeMin ) ) * float32v( this->mRangeScale ), value - float32v( nativeMin ), float32v( this->mRangeMin ) );
     }
 };
 

From a1d5337d108347caaabe663978809cfd37ee210c Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 7 May 2024 21:32:50 +0100
Subject: [PATCH 091/139] Add ability for custom node name formatting

---
 include/FastNoise/Metadata.h |  1 +
 src/FastNoise/Metadata.cpp   | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index b1f38a66..f2d68a5f 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -190,6 +190,7 @@ namespace FastNoise
         uint16_t id;
         const char* name = "";
         const char* description = "";
+        const char* formattedName = nullptr;
         std::vector<const char*> groups;
 
         std::vector<MemberVariable>   memberVariables;
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 606905b7..d37d2323 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -410,12 +410,21 @@ NodeData* Metadata::DeserialiseNodeData( const char* serialisedBase64NodeData, s
 
 std::string Metadata::FormatMetadataNodeName( const Metadata* metadata, bool removeGroups )
 {
-    std::string string = metadata->name;
-    for( size_t i = 1; i < string.size(); i++ )
+    std::string string;
+
+    if( metadata->formattedName )
+    {
+        string = metadata->formattedName;
+    }
+    else
     {
-        if( ( isdigit( string[i] ) || isupper( string[i] ) ) && islower( string[i - 1] ) )
+        string = metadata->name;
+        for( size_t i = 1; i < string.size(); i++ )
         {
-            string.insert( i++, 1, ' ' );
+            if( ( isdigit( string[i] ) || isupper( string[i] ) ) && islower( string[i - 1] ) )
+            {
+                string.insert( i++, 1, ' ' );
+            }
         }
     }
 

From 4d557e89430605555b65e68ed00a93d72fd42a5d Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 9 May 2024 22:31:23 +0100
Subject: [PATCH 092/139] Use custom vector type in metadata to avoid dll
 linker warnings due to stl types in the API

---
 include/FastNoise/Generators/Generator.h |   7 +-
 include/FastNoise/Metadata.h             | 208 ++++++++++++++---------
 src/FastNoise/Metadata.cpp               |  92 +++++++---
 tools/FastNoiseNodeEditor.cpp            |  57 +++----
 4 files changed, 216 insertions(+), 148 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 8016934e..2958e60f 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -259,7 +259,7 @@ namespace FastNoise
             member.description = nameDesc.desc;
             member.type = MemberVariable::EEnum;
             member.valueDefault = (int)defaultV;
-            member.enumNames = { enumNames... };
+            ( member.enumNames.push_back( enumNames ), ... );
 
             member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v )
             {
@@ -282,7 +282,10 @@ namespace FastNoise
             member.description = nameDesc.desc;
             member.type = MemberVariable::EEnum;
             member.valueDefault = (int)defaultV;
-            member.enumNames = { enumNames, enumNames + ENUM_NAMES };
+            for( const char* enumName : enumNames )
+            {
+                member.enumNames.push_back( enumName );
+            }
 
             member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v )
             {
diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index f2d68a5f..bbb218bf 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -7,9 +7,6 @@
 
 #include "Utility/Config.h"
 
-#pragma warning( push )
-#pragma warning( disable : 4251 )
-
 namespace FastNoise
 {
     class Generator;
@@ -28,58 +25,30 @@ namespace FastNoise
     // Node name, member name+types, functions to set members
     struct FASTNOISE_API Metadata
     {
-        static constexpr float kDefaultUiDragSpeedFloat = 0.02f;
-        static constexpr float kDefaultUiDragSpeedInt = 0.2f;
-
-        virtual ~Metadata() = default;
-
-        /// <returns>Array containing metadata for every FastNoise node type</returns>
-        static const std::vector<const Metadata*>& GetAll()
-        {
-            return sAllMetadata;
-        }
-
-        /// <returns>Metadata for given Metadata::id</returns>
-        static const Metadata* GetFromId( uint16_t nodeId )
-        {
-            // Metadata not loaded yet
-            // Don't try to create nodes from metadata during static initialisation
-            // Metadata is loaded using static variable and static variable init is done in a random order
-            assert( sAllMetadata.size() );
-
-            if( nodeId < sAllMetadata.size() )
-            {
-                return sAllMetadata[nodeId];
-            }
-
-            return nullptr;
-        }
-
-        /// <returns>Metadata for given node class</returns>
         template<typename T>
-        static const Metadata& Get()
+        class FASTNOISE_API Vector
         {
-            static_assert( std::is_base_of<Generator, T>::value, "This function should only be used for FastNoise node classes, for example FastNoise::Simplex" );
-            static_assert( std::is_member_function_pointer<decltype(&T::GetMetadata)>::value, "Cannot get Metadata for abstract node class, use a derived class, for example: Fractal -> FractalFBm" );
-
-            return Impl::GetMetadata<T>();
-        }
-
-        /// <summary>
-        /// Serialise node data and any source node datas (recursive)
-        /// </summary>
-        /// <param name="nodeData">Root node data</param>
-        /// <param name="fixUp">Remove dependency loops and invalid node types</param>
-        /// <returns>Empty string on error</returns>
-        static std::string SerialiseNodeData( NodeData* nodeData, bool fixUp = false );
-
-        /// <summary>
-        /// Deserialise a string created from SerialiseNodeData to a node data tree
-        /// </summary>
-        /// <param name="serialisedBase64NodeData">Encoded string to deserialise</param>
-        /// <param name="nodeDataOut">Storage for new node data</param>
-        /// <returns>Root node</returns>
-        static NodeData* DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut );
+        public:
+            using const_iterator = const T*;
+            // template solves dll linking when not inlining
+            template<typename = T> const_iterator begin() const { return data() + mStart; }
+            template<typename = T> const_iterator end() const { return data() + mEnd; }
+            template<typename = T> size_t size() const { return mEnd - mStart; }
+            template<typename = T> const T& operator []( size_t i ) const { return begin()[i]; }
+
+        private:
+            template<typename>
+            friend struct MetadataT;
+            friend struct Metadata;
+            friend class Generator;
+            using index_type = uint8_t;
+
+            T* data() const;
+            void push_back( const T& value );
+
+            index_type mStart = (index_type)-1;
+            index_type mEnd = (index_type)-1;
+        };
 
         struct NameDesc
         {
@@ -94,25 +63,9 @@ namespace FastNoise
         {
             const char* name = "";
             const char* description = "";
-            int dimensionIdx = -1;            
+            int dimensionIdx = -1;
         };
 
-        /// <summary>
-        /// Add spaces to node names: DomainScale -> Domain Scale
-        /// </summary>
-        /// <param name="metadata">FastNoise node metadata</param>
-        /// <param name="removeGroups">Removes metadata groups from name: FractalFBm -> FBm</param>
-        /// <returns>string with formatted name</returns>
-        static std::string FormatMetadataNodeName( const Metadata* metadata, bool removeGroups = false );
-
-        /// <summary>
-        /// Adds dimension prefix to member varibles that per-dimension:
-        /// DomainAxisScale::Scale -> X Scale
-        /// </summary>
-        /// <param name="member">FastNoise node metadata member</param>
-        /// <returns>string with formatted name</returns>
-        static std::string FormatMetadataMemberName( const Member& member );
-
         // float, int or enum value
         struct MemberVariable : Member
         {
@@ -157,7 +110,7 @@ namespace FastNoise
             eType type;
             ValueUnion valueDefault, valueMin, valueMax;
             float valueUiDragSpeed = 0;
-            std::vector<const char*> enumNames;
+            Vector<const char*> enumNames;
 
             // Function to set value for given generator
             // Returns true if Generator is correct node class
@@ -187,15 +140,73 @@ namespace FastNoise
             std::function<bool( Generator*, SmartNodeArg<> )> setNodeFunc;
         };
 
-        uint16_t id;
-        const char* name = "";
-        const char* description = "";
-        const char* formattedName = nullptr;
-        std::vector<const char*> groups;
+        static std::pair<int32_t, const char*> DebugCheckVectorStorageSize( int i );
 
-        std::vector<MemberVariable>   memberVariables;
-        std::vector<MemberNodeLookup> memberNodeLookups;
-        std::vector<MemberHybrid>     memberHybrids;
+        virtual ~Metadata() = default;
+
+        /// <returns>Array containing metadata for every FastNoise node type</returns>
+        static const Vector<const Metadata*>& GetAll()
+        {
+            return sAllMetadata;
+        }
+
+        /// <returns>Metadata for given Metadata::id</returns>
+        static const Metadata* GetFromId( uint16_t nodeId )
+        {
+            // Metadata not loaded yet
+            // Don't try to create nodes from metadata during static initialisation
+            // Metadata is loaded using static variable and static variable init is done in a random order
+            assert( sAllMetadata.size() );
+
+            if( nodeId < sAllMetadata.size() )
+            {
+                return sAllMetadata[nodeId];
+            }
+
+            return nullptr;
+        }
+
+        /// <returns>Metadata for given node class</returns>
+        template<typename T>
+        static const Metadata& Get()
+        {
+            static_assert( std::is_base_of<Generator, T>::value, "This function should only be used for FastNoise node classes, for example FastNoise::Simplex" );
+            static_assert( std::is_member_function_pointer<decltype(&T::GetMetadata)>::value, "Cannot get Metadata for abstract node class, use a derived class, for example: Fractal -> FractalFBm" );
+
+            return Impl::GetMetadata<T>();
+        }
+
+        /// <summary>
+        /// Serialise node data and any source node datas (recursive)
+        /// </summary>
+        /// <param name="nodeData">Root node data</param>
+        /// <param name="fixUp">Remove dependency loops and invalid node types</param>
+        /// <returns>Empty string on error</returns>
+        static std::string SerialiseNodeData( NodeData* nodeData, bool fixUp = false );
+
+        /// <summary>
+        /// Deserialise a string created from SerialiseNodeData to a node data tree
+        /// </summary>
+        /// <param name="serialisedBase64NodeData">Encoded string to deserialise</param>
+        /// <param name="nodeDataOut">Storage for new node data</param>
+        /// <returns>Root node</returns>
+        static NodeData* DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut );
+
+        /// <summary>
+        /// Add spaces to node names: DomainScale -> Domain Scale
+        /// </summary>
+        /// <param name="metadata">FastNoise node metadata</param>
+        /// <param name="removeGroups">Removes metadata groups from name: FractalFBm -> FBm</param>
+        /// <returns>string with formatted name</returns>
+        static std::string FormatMetadataNodeName( const Metadata* metadata, bool removeGroups = false );
+
+        /// <summary>
+        /// Adds dimension prefix to member varibles that per-dimension:
+        /// DomainAxisScale::Scale -> X Scale
+        /// </summary>
+        /// <param name="member">FastNoise node metadata member</param>
+        /// <returns>string with formatted name</returns>
+        static std::string FormatMetadataMemberName( const Member& member );
 
         /// <summary>
         /// Create new instance of a FastNoise node from metadata
@@ -208,28 +219,57 @@ namespace FastNoise
         /// <returns>SmartNode<T> is guaranteed not nullptr</returns>
         virtual SmartNode<> CreateNode( FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max ) const = 0;
 
+        uint16_t id;
+        Vector<MemberVariable>   memberVariables;
+        Vector<MemberNodeLookup> memberNodeLookups;
+        Vector<MemberHybrid>     memberHybrids;
+        Vector<const char*>      groups;
+
+        const char* name = "";
+        const char* description = "";
+        const char* formattedName = nullptr;
+
     protected:
         Metadata()
         {
             id = AddMetadata( this );
         }
 
+        static constexpr float kDefaultUiDragSpeedFloat = 0.02f;
+        static constexpr float kDefaultUiDragSpeedInt = 0.2f;
+
     private:
         static uint16_t AddMetadata( const Metadata* newMetadata )
         {
-            sAllMetadata.emplace_back( newMetadata );
+            sAllMetadata.push_back( newMetadata );
 
             return (uint16_t)sAllMetadata.size() - 1;
         }
 
-        static std::vector<const Metadata*> sAllMetadata;
+        static Vector<const Metadata*> sAllMetadata;
     };
 
     // Stores data to create an instance of a FastNoise node
     // Node type, member values
-    struct FASTNOISE_API NodeData
+    struct NodeData
     {
-        NodeData( const Metadata* metadata );
+        NodeData( const Metadata* data )
+        {
+            if( ( metadata = data ) )
+            {
+                for( const Metadata::MemberVariable& value: metadata->memberVariables )
+                {
+                    variables.push_back( value.valueDefault );
+                }
+
+                nodeLookups.assign( metadata->memberNodeLookups.size(), nullptr );
+
+                for( const Metadata::MemberHybrid& value: metadata->memberHybrids )
+                {
+                    hybrids.emplace_back( nullptr, value.valueDefault );
+                }
+            }
+        }
 
         const Metadata* metadata;
         std::vector<Metadata::MemberVariable::ValueUnion> variables;
@@ -245,5 +285,3 @@ namespace FastNoise
         }
     };
 }
-
-#pragma warning( pop )
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index d37d2323..0d285edf 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -13,30 +13,70 @@
 
 using namespace FastNoise;
 
-std::vector<const Metadata*> Metadata::sAllMetadata;
+Metadata::Vector<const Metadata*> Metadata::sAllMetadata;
 
-NodeData::NodeData( const Metadata* data )
-{
-    metadata = data;
+template<typename T>
+constexpr static size_t gMetadataVectorSize = SIZE_MAX;
+
+// Setting these values avoids needless vector resizing and oversizing on startup
+// Sadly there is no way to automate this as they fill up as part of static init
+template<>
+constexpr size_t gMetadataVectorSize<const Metadata*> = 45;
+template<>
+constexpr size_t gMetadataVectorSize<const char*> = 83;
+template<>
+constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 75;
+template<>
+constexpr size_t gMetadataVectorSize<Metadata::MemberNodeLookup> = 30;
+template<>
+constexpr size_t gMetadataVectorSize<Metadata::MemberHybrid> = 50;
 
-    if( metadata )
+template<typename T>
+static std::vector<T>& GetVectorStorage()
+{
+    static std::vector<T> v = []()
     {
-        for( const auto& value : metadata->memberVariables )
-        {
-            variables.push_back( value.valueDefault );
-        }
+        std::vector<T> vec;
+        vec.reserve( gMetadataVectorSize<T> );
+        return vec;
+    }();
+    return v;
+}
 
-        for( const auto& value : metadata->memberNodeLookups )
-        {
-            (void)value;
-            nodeLookups.push_back( nullptr );
-        }
+template<typename T>
+static int32_t DebugCheckType()
+{
+    return ( GetVectorStorage<T>().size() == gMetadataVectorSize<T> ? -1 : 1 ) * (int32_t)GetVectorStorage<T>().size();
+}
 
-        for( const auto& value : metadata->memberHybrids )
-        {
-            hybrids.emplace_back( nullptr, value.valueDefault );
-        }
+std::pair<int32_t, const char*> Metadata::DebugCheckVectorStorageSize( int i )
+{
+    switch( i )
+    {
+    case 0: return { DebugCheckType<const Metadata*>(),  "const Metadata*" };
+    case 1: return { DebugCheckType<const char*>(),      "const char*" };
+    case 2: return { DebugCheckType<MemberVariable>(),   "MemberVariable" };
+    case 3: return { DebugCheckType<MemberNodeLookup>(), "MemberNodeLookup" };
+    case 4: return { DebugCheckType<MemberHybrid>(),     "MemberHybrid" };
     }
+    return { 0, nullptr };
+}
+
+template<typename T>
+T* Metadata::Vector<T>::data() const
+{
+    return GetVectorStorage<T>().data();
+}
+
+template<typename T>
+void Metadata::Vector<T>::push_back( const T& value )
+{
+    std::vector<T>& vec = GetVectorStorage<T>();
+    vec.push_back( value );
+    assert( vec.size() <= (index_type)-1 );
+
+    mEnd = (index_type)vec.size() - 1;
+    mStart = std::min( mStart, mEnd++ );
 }
 
 template<typename T>
@@ -55,9 +95,9 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
     const Metadata* metadata = nodeData->metadata;
 
     if( !metadata ||
-        nodeData->variables.size() != metadata->memberVariables.size()   ||
-        nodeData->nodeLookups.size()     != metadata->memberNodeLookups.size() ||
-        nodeData->hybrids.size()   != metadata->memberHybrids.size()     )
+        nodeData->variables.size() != metadata->memberVariables.size() ||
+        nodeData->nodeLookups.size() != metadata->memberNodeLookups.size() ||
+        nodeData->hybrids.size() != metadata->memberHybrids.size() )
     {
         assert( 0 ); // Member size mismatch with metadata
         return false;
@@ -168,7 +208,7 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
 
     referenceIds.emplace( nodeData, (uint16_t)referenceIds.size() );
 
-    return true; 
+    return true;
 }
 
 std::string Metadata::SerialiseNodeData( NodeData* nodeData, bool fixUp )
@@ -470,14 +510,10 @@ template<typename T>
 std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* className )
 {
     auto* newMetadata = new MetadataT<T>;
-    newMetadata->name = className; 
-    newMetadata->memberVariables.shrink_to_fit();
-    newMetadata->memberNodeLookups.shrink_to_fit();
-    newMetadata->memberHybrids.shrink_to_fit();
-    newMetadata->groups.shrink_to_fit();
+    newMetadata->name = className;
 
     // Node must be in a group or it is not selectable in the UI
-    assert( !newMetadata->groups.empty() ); 
+    assert( newMetadata->groups.size() );
     return std::unique_ptr<const MetadataT<T>>( newMetadata );
 }
 
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/FastNoiseNodeEditor.cpp
index a2451bd4..6a1a13ec 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/FastNoiseNodeEditor.cpp
@@ -89,43 +89,21 @@ void FastNoiseNodeEditor::OpenStandaloneNodeGraph()
     }
 }
 
-static bool MatchingGroup( const std::vector<const char*>& a, const std::vector<const char*>& b )
+static bool MatchingGroup( const FastNoise::Metadata::Vector<const char*>& a, const FastNoise::Metadata::Vector<const char*>& b )
 {
-    // Check if the sizes of the vectors are the same
-    if( a.size() != b.size() )
+    return std::ranges::equal( a, b, []( auto& x, auto& y )
     {
-        return false;
-    }
-
-    // Directly compare each corresponding pair of strings
-    for( size_t i = 0; i < a.size(); ++i )
-    {
-        if( std::string_view( a[i] ) != std::string_view( b[i] ) )
-        {
-            return false;
-        }
-    }
-
-    // All pairs matched
-    return true;
+        return std::strcmp( x, y ) == 0;
+    } );
 }
 
 template<typename T>
-static bool MatchingMembers( const std::vector<T>& a, const std::vector<T>& b )
+static bool MatchingMembers( const FastNoise::Metadata::Vector<T>& a, const FastNoise::Metadata::Vector<T>& b )
 {
-    if( a.size() != b.size() )
-    {
-        return false;
-    }
-
-    for( size_t i = 0; i < a.size(); i++ )
+    return std::ranges::equal( a, b, []( auto& x, auto& y )
     {
-        if( std::string_view( a[i].name ) != std::string_view( b[i].name ) )
-        {
-            return false;
-        }
-    }
-    return true;
+        return std::strcmp( x.name, y.name ) == 0;
+    } );
 }
 
 static std::string TimeWithUnits( int64_t time, int significantDigits = 3 )
@@ -669,7 +647,20 @@ FastNoiseNodeEditor::FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp ) :
 
         metaDataGroup->items.emplace_back( mContextMetadata.emplace_back( new MetadataMenuItem( metadata ) ).get() );
         std::sort( metaDataGroup->items.begin(), metaDataGroup->items.end(), menuSort );
-    }    
+    }
+
+    int debugMetadataVectorCheckIdx = 0;
+    std::pair<int32_t, const char*> state;
+    do
+    {
+        state = FastNoise::Metadata::DebugCheckVectorStorageSize( debugMetadataVectorCheckIdx++ );
+        if( state.first > 0 )
+        {
+            Error{} << "Non-optimal metadata vector, in FastNoise Metadata.cpp adjust gMetadataVectorSize " << state.second << " to: " << state.first;
+        }
+
+    } while( state.second );
+
 }
 
 FastNoiseNodeEditor::~FastNoiseNodeEditor()
@@ -1194,7 +1185,7 @@ void FastNoiseNodeEditor::DoNodes()
             break;
             case FastNoise::Metadata::MemberVariable::EEnum:
             {
-                if( ImGui::Combo( formatName.c_str(), &nodeData->variables[i].i, nodeVar.enumNames.data(), (int)nodeVar.enumNames.size() ) ||
+                if( ImGui::Combo( formatName.c_str(), &nodeData->variables[i].i, nodeVar.enumNames.begin(), (int)nodeVar.enumNames.size() ) ||
                     ImGuiExtra::ScrollCombo( &nodeData->variables[i].i, (int)nodeVar.enumNames.size() ) )
                 {
                     node.second.GeneratePreview();
@@ -1405,7 +1396,7 @@ void FastNoiseNodeEditor::DoContextMenu()
 
         auto newMetadata = mContextMetadata.front()->DrawUI( []( const FastNoise::Metadata* metadata )
         {
-            return !metadata->memberNodeLookups.empty() || !metadata->memberHybrids.empty();
+            return metadata->memberNodeLookups.size() || metadata->memberHybrids.size();
         } );
 
         if( newMetadata )

From 000bffa08ff79ecfd688d649044a5c093dddc379 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 9 May 2024 22:47:58 +0100
Subject: [PATCH 093/139] Fix benchmark build

---
 tests/FastNoiseBenchmark.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 73c2c0ab..6a96b54a 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -139,7 +139,7 @@ int main( int argc, char** argv )
         {
             const char* groupName = "Misc";
 
-            if( !metadata->groups.empty() )
+            if( metadata->groups.size() )
             {
                 groupName = metadata->groups[metadata->groups.size() - 1];
             }

From 53cfcd752a0bb3e2fb9ba66340e74e2d7182c6c6 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 9 May 2024 23:10:25 +0100
Subject: [PATCH 094/139] Explicitly instantiate the metadata vectors to avoid
 linker issues

---
 include/FastNoise/Metadata.h | 10 +++++-----
 src/FastNoise/Metadata.cpp   |  6 ++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index bbb218bf..55f35a2d 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -30,11 +30,11 @@ namespace FastNoise
         {
         public:
             using const_iterator = const T*;
-            // template solves dll linking when not inlining
-            template<typename = T> const_iterator begin() const { return data() + mStart; }
-            template<typename = T> const_iterator end() const { return data() + mEnd; }
-            template<typename = T> size_t size() const { return mEnd - mStart; }
-            template<typename = T> const T& operator []( size_t i ) const { return begin()[i]; }
+
+            const_iterator begin() const { return data() + mStart; }
+            const_iterator end() const { return data() + mEnd; }
+            size_t size() const { return mEnd - mStart; }
+            const T& operator []( size_t i ) const { return begin()[i]; }
 
         private:
             template<typename>
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 0d285edf..adb97fee 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -79,6 +79,12 @@ void Metadata::Vector<T>::push_back( const T& value )
     mStart = std::min( mStart, mEnd++ );
 }
 
+template class Metadata::Vector<const Metadata*>;
+template class Metadata::Vector<const char*>;
+template class Metadata::Vector<Metadata::MemberVariable>;
+template class Metadata::Vector<Metadata::MemberNodeLookup>;
+template class Metadata::Vector<Metadata::MemberHybrid>;
+
 template<typename T>
 void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
 {

From ccd1d1092ff81ba0451c1a5fe9d34e5ea145c6d0 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 10 May 2024 15:47:17 +0100
Subject: [PATCH 095/139] Convert nodeid to uint8

---
 include/FastNoise/Metadata.h  | 10 ++++++----
 src/FastNoise/FastNoise_C.cpp | 28 ++++++++++++++--------------
 src/FastNoise/Metadata.cpp    | 10 +++++-----
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/include/FastNoise/Metadata.h b/include/FastNoise/Metadata.h
index 55f35a2d..7b992251 100644
--- a/include/FastNoise/Metadata.h
+++ b/include/FastNoise/Metadata.h
@@ -140,6 +140,8 @@ namespace FastNoise
             std::function<bool( Generator*, SmartNodeArg<> )> setNodeFunc;
         };
 
+        using node_id = uint8_t;
+
         static std::pair<int32_t, const char*> DebugCheckVectorStorageSize( int i );
 
         virtual ~Metadata() = default;
@@ -151,7 +153,7 @@ namespace FastNoise
         }
 
         /// <returns>Metadata for given Metadata::id</returns>
-        static const Metadata* GetFromId( uint16_t nodeId )
+        static const Metadata* GetFromId( node_id nodeId )
         {
             // Metadata not loaded yet
             // Don't try to create nodes from metadata during static initialisation
@@ -219,7 +221,7 @@ namespace FastNoise
         /// <returns>SmartNode<T> is guaranteed not nullptr</returns>
         virtual SmartNode<> CreateNode( FastSIMD::FeatureSet maxFeatureSet = FastSIMD::FeatureSet::Max ) const = 0;
 
-        uint16_t id;
+        node_id id;
         Vector<MemberVariable>   memberVariables;
         Vector<MemberNodeLookup> memberNodeLookups;
         Vector<MemberHybrid>     memberHybrids;
@@ -239,11 +241,11 @@ namespace FastNoise
         static constexpr float kDefaultUiDragSpeedInt = 0.2f;
 
     private:
-        static uint16_t AddMetadata( const Metadata* newMetadata )
+        static node_id AddMetadata( const Metadata* newMetadata )
         {
             sAllMetadata.push_back( newMetadata );
 
-            return (uint16_t)sAllMetadata.size() - 1;
+            return (node_id)sAllMetadata.size() - 1;
         }
 
         static Vector<const Metadata*> sAllMetadata;
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index 954fbcd7..7c28f0ea 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -102,7 +102,7 @@ int fnGetMetadataCount()
 
 const char* fnGetMetadataName( int id )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         return metadata->name;
     }
@@ -111,7 +111,7 @@ const char* fnGetMetadataName( int id )
 
 void* fnNewFromMetadata( int id, unsigned simdLevel )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         return new FastNoise::SmartNode<>( metadata->CreateNode( (FastSIMD::FeatureSet)simdLevel ) );
     }
@@ -120,7 +120,7 @@ void* fnNewFromMetadata( int id, unsigned simdLevel )
 
 int fnGetMetadataVariableCount( int id )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         return (int)metadata->memberVariables.size();
     }
@@ -129,7 +129,7 @@ int fnGetMetadataVariableCount( int id )
 
 const char* fnGetMetadataVariableName( int id, int variableIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)variableIndex < metadata->memberVariables.size() )
         {
@@ -142,7 +142,7 @@ const char* fnGetMetadataVariableName( int id, int variableIndex )
 
 int fnGetMetadataVariableType( int id, int variableIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)variableIndex < metadata->memberVariables.size() )
         {
@@ -155,7 +155,7 @@ int fnGetMetadataVariableType( int id, int variableIndex )
 
 int fnGetMetadataVariableDimensionIdx( int id, int variableIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)variableIndex < metadata->memberVariables.size() )
         {
@@ -168,7 +168,7 @@ int fnGetMetadataVariableDimensionIdx( int id, int variableIndex )
 
 int fnGetMetadataEnumCount( int id, int variableIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)variableIndex < metadata->memberVariables.size() )
         {
@@ -181,7 +181,7 @@ int fnGetMetadataEnumCount( int id, int variableIndex )
 
 const char* fnGetMetadataEnumName( int id, int variableIndex, int enumIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)variableIndex < metadata->memberVariables.size() )
         {
@@ -218,7 +218,7 @@ bool fnSetVariableIntEnum( void* node, int variableIndex, int value )
 
 int fnGetMetadataNodeLookupCount( int id )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         return (int)metadata->memberNodeLookups.size();
     }
@@ -227,7 +227,7 @@ int fnGetMetadataNodeLookupCount( int id )
 
 const char* fnGetMetadataNodeLookupName( int id, int nodeLookupIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)nodeLookupIndex < metadata->memberNodeLookups.size() )
         {
@@ -240,7 +240,7 @@ const char* fnGetMetadataNodeLookupName( int id, int nodeLookupIndex )
 
 int fnGetMetadataNodeLookupDimensionIdx( int id, int nodeLookupIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)nodeLookupIndex < metadata->memberNodeLookups.size() )
         {
@@ -263,7 +263,7 @@ bool fnSetNodeLookup( void* node, int nodeLookupIndex, const void* nodeLookup )
 
 int fnGetMetadataHybridCount( int id )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         return (int)metadata->memberHybrids.size();
     }
@@ -272,7 +272,7 @@ int fnGetMetadataHybridCount( int id )
 
 const char* fnGetMetadataHybridName( int id, int hybridIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)hybridIndex < metadata->memberHybrids.size() )
         {
@@ -285,7 +285,7 @@ const char* fnGetMetadataHybridName( int id, int hybridIndex )
 
 int fnGetMetadataHybridDimensionIdx( int id, int hybridIndex )
 {
-    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (uint16_t)id ) )
+    if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
         if( (size_t)hybridIndex < metadata->memberHybrids.size() )
         {
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index adb97fee..c186f999 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -138,7 +138,7 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
     {
         // UINT16_MAX where node ID should be
         // Referenced by index in reference array, array ordering will match on decode
-        AddToDataStream( dataStream, std::numeric_limits<uint16_t>::max() );
+        AddToDataStream( dataStream, std::numeric_limits<Metadata::node_id>::max() );
         AddToDataStream( dataStream, reference->second );
         return true;
     }
@@ -245,14 +245,14 @@ bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T&
 
 SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::vector<SmartNode<>>& referenceNodes, FastSIMD::FeatureSet level = FastSIMD::FeatureSet::Max )
 {
-    uint16_t nodeId;
+    Metadata::node_id nodeId;
     if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
     {
         return nullptr;
     }
 
     // UINT16_MAX indicates a reference node
-    if( nodeId == std::numeric_limits<uint16_t>::max() )
+    if( nodeId == std::numeric_limits<Metadata::node_id>::max() )
     {
         uint16_t referenceId;
         if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )
@@ -359,14 +359,14 @@ SmartNode<> FastNoise::NewFromEncodedNodeTree( const char* serialisedBase64NodeD
 
 NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut, size_t& serialIdx )
 {
-    uint16_t nodeId;
+    Metadata::node_id nodeId;
     if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
     {
         return nullptr;
     }
 
     // UINT16_MAX indicates a reference node
-    if( nodeId == std::numeric_limits<uint16_t>::max() )
+    if( nodeId == std::numeric_limits<Metadata::node_id>::max() )
     {
         uint16_t referenceId;
         if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )

From 94a8aa3b33a90e899533bf14e755428c93259faf Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 18 May 2024 21:29:02 +0100
Subject: [PATCH 096/139] DomainWarp Gradient calculate contribution per point
 to avoid lerp

---
 include/FastNoise/Generators/DomainWarp.inl | 117 +++++++++++---------
 1 file changed, 63 insertions(+), 54 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index e086163a..e8a12f06 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -38,25 +38,32 @@ public:
         int32v x1 = x0 + int32v( Primes::X );
         int32v y1 = y0 + int32v( Primes::Y );
 
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
+        float32v xs1 = InterpHermite( x - xs );
+        float32v ys1 = InterpHermite( y - ys );
+        float32v xs0 = float32v( 1 ) - xs1;
+        float32v ys0 = float32v( 1 ) - ys1;
+
+        float32v normalise( 1.0f / (0xffff / 2.0f) );
 
     #define GRADIENT_COORD( _x, _y )\
         int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
-        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
-        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
+        float32v contrib##_x##_y = normalise * xs##_x * ys##_y;\
+        xWarp = FS::FMulAdd( contrib##_x##_y, FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) ), xWarp );\
+        yWarp = FS::FMulAdd( contrib##_x##_y, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y, 16) ), yWarp )
+
+        int32v hash00 = HashPrimesHB(seed, x0, y0 );
+        float32v contrib00 = normalise * xs0 * ys0;
+        float32v xWarp = contrib00 * FS::Convert<float>( hash00 & int32v( 0xffff ) );
+        float32v yWarp = contrib00 * FS::Convert<float>( FS::BitShiftRightZeroExtend( hash00, 16) );
 
-        GRADIENT_COORD( 0, 0 );
         GRADIENT_COORD( 1, 0 );
         GRADIENT_COORD( 0, 1 );
         GRADIENT_COORD( 1, 1 );
 
     #undef GRADIENT_COORD
 
-        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
-
-        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        xWarp -= float32v( 1 );
+        yWarp -= float32v( 1 );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -79,17 +86,28 @@ public:
         int32v y1 = y0 + int32v( Primes::Y );
         int32v z1 = z0 + int32v( Primes::Z );
 
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
+        float32v xs1 = InterpHermite( x - xs );
+        float32v ys1 = InterpHermite( y - ys );
+        float32v zs1 = InterpHermite( z - zs );
+        float32v xs0 = float32v( 1 ) - xs1;
+        float32v ys0 = float32v( 1 ) - ys1;
+        float32v zs0 = float32v( 1 ) - zs1;
+
+        float32v normalise( 1.0f / (0x3ff / 2.0f) );
 
     #define GRADIENT_COORD( _x, _y, _z )\
         int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
-        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
-        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
-        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
+        float32v contrib##_x##_y##_z = normalise * xs##_x * ys##_y * zs##_z;\
+        xWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) ), xWarp );\
+        yWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( (hash##_x##_y##_z >> 11) & int32v( 0x3ff ) ), yWarp );\
+        zWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y##_z, 22 ) ), zWarp )
+
+        int32v hash000 = HashPrimesHB( seed, x0, y0, z0 );
+        float32v contrib000 = normalise * xs0 * ys0 * zs0;
+        float32v xWarp = contrib000 * FS::Convert<float>( hash000 & int32v( 0x3ff ) );
+        float32v yWarp = contrib000 * FS::Convert<float>( (hash000 >> 11) & int32v( 0x3ff ) );
+        float32v zWarp = contrib000 * FS::Convert<float>( FS::BitShiftRightZeroExtend( hash000, 22 ) );
 
-        GRADIENT_COORD( 0, 0, 0 );
         GRADIENT_COORD( 1, 0, 0 );
         GRADIENT_COORD( 0, 1, 0 );
         GRADIENT_COORD( 1, 1, 0 );
@@ -100,19 +118,9 @@ public:
 
     #undef GRADIENT_COORD
 
-        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
-        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
-        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
-                   
-        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
-        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
-        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
-
-        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
+        xWarp -= float32v( 1 );
+        yWarp -= float32v( 1 );
+        zWarp -= float32v( 1 );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -139,19 +147,32 @@ public:
         int32v z1 = z0 + int32v( Primes::Z );
         int32v w1 = w0 + int32v( Primes::W );
 
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
-        ws = InterpHermite( w - ws );
+        float32v xs1 = InterpHermite( x - xs );
+        float32v ys1 = InterpHermite( y - ys );
+        float32v zs1 = InterpHermite( z - zs );
+        float32v ws1 = InterpHermite( w - ws );
+        float32v xs0 = float32v( 1 ) - xs1;
+        float32v ys0 = float32v( 1 ) - ys1;
+        float32v zs0 = float32v( 1 ) - zs1;
+        float32v ws0 = float32v( 1 ) - ws1;
+
+        float32v normalise( 1.0f / (0xff / 2.0f) );
 
     #define GRADIENT_COORD( _x, _y, _z, _w )\
         int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
-        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
-        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
-        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
-        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
+        float32v contrib##_x##_y##_z##_w = normalise * xs##_x * ys##_y * zs##_z * ws##_w;\
+        xWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) ), xWarp );\
+        yWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) ), yWarp );\
+        zWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) ), zWarp );\
+        wWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y##_z##_w, 24 ) ), wWarp )
+
+        int32v hash0000 = HashPrimesHB( seed, x0, y0, z0, w0 );
+        float32v contrib0000 = normalise * xs0 * ys0 * zs0 * ws0;
+        float32v xWarp = contrib0000 * FS::Convert<float>( hash0000 & int32v( 0xff ) );
+        float32v yWarp = contrib0000 * FS::Convert<float>( (hash0000 >> 8) & int32v( 0xff ) );
+        float32v zWarp = contrib0000 * FS::Convert<float>( (hash0000 >> 16) & int32v( 0xff ) );
+        float32v wWarp = contrib0000 * FS::Convert<float>( FS::BitShiftRightZeroExtend( hash0000, 24 ) );
 
-        GRADIENT_COORD( 0, 0, 0, 0 );
         GRADIENT_COORD( 1, 0, 0, 0 );
         GRADIENT_COORD( 0, 1, 0, 0 );
         GRADIENT_COORD( 1, 1, 0, 0 );
@@ -170,22 +191,10 @@ public:
 
     #undef GRADIENT_COORD
 
-        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
-        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
-        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
-        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
-
-        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
-        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
-        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
-        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
-
-        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
+        xWarp -= float32v( 1 );
+        yWarp -= float32v( 1 );
+        zWarp -= float32v( 1 );
+        wWarp -= float32v( 1 );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );

From 4e36b8d94efa1d427539c766f09a8d5e79e4af82 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 19 May 2024 19:32:56 +0100
Subject: [PATCH 097/139] Change serialisation to cope with variables being
 added/removed from nodes

---
 include/FastNoise/Generators/Generator.h |   6 +-
 src/FastNoise/FastSIMD_Build.inl         |  40 ++--
 src/FastNoise/Metadata.cpp               | 221 +++++++++++++++++------
 tools/DemoNodeTrees.inl                  |   4 +-
 4 files changed, 188 insertions(+), 83 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 2958e60f..19a93318 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -303,7 +303,7 @@ namespace FastNoise
         template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
         void AddPerDimensionVariable( NameDesc nameDesc, T defaultV, U&& func, T minV = 0, T maxV = 0, float uiDragSpeed = std::is_same_v<T, float> ? Metadata::kDefaultUiDragSpeedFloat : Metadata::kDefaultUiDragSpeedInt )
         {
-            for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<T>::varArray ) / sizeof( *PerDimensionVariable<T>::varArray ); idx++ )
+            for( int idx = 0; (size_t)idx < (size_t)Dim::Count; idx++ )
             {
                 MemberVariable member;
                 member.name = nameDesc.name;
@@ -360,7 +360,7 @@ namespace FastNoise
             using GeneratorSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
             using T = typename GeneratorSourceT::Type;
 
-            for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<GeneratorSourceT>::varArray ) / sizeof( *PerDimensionVariable<GeneratorSourceT>::varArray ); idx++ )
+            for( int idx = 0; (size_t)idx < (size_t)Dim::Count; idx++ )
             {
                 MemberNodeLookup member;
                 member.name = nameDesc.name;
@@ -428,7 +428,7 @@ namespace FastNoise
             using HybridSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
             using T = typename HybridSourceT::Type;
 
-            for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<HybridSourceT>::varArray ) / sizeof( *PerDimensionVariable<HybridSourceT>::varArray ); idx++ )
+            for( int idx = 0; (size_t)idx < (size_t)Dim::Count; idx++ )
             {
                 MemberHybrid member;
                 member.name = nameDesc.name;
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index cab7aa85..ec7a8a2d 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -90,11 +90,10 @@ FASTNOISE_REGISTER_NODE( SineWave );
 FASTNOISE_REGISTER_NODE( PositionOutput );
 FASTNOISE_REGISTER_NODE( DistanceToPoint );
 
-FASTNOISE_REGISTER_NODE( Value );
-FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Simplex );
 FASTNOISE_REGISTER_NODE( OpenSimplex2 );
-FASTNOISE_REGISTER_NODE( OpenSimplex2S );
+FASTNOISE_REGISTER_NODE( Perlin );
+FASTNOISE_REGISTER_NODE( Value );
                        
 FASTNOISE_REGISTER_NODE( CellularValue );
 FASTNOISE_REGISTER_NODE( CellularDistance );
@@ -103,36 +102,39 @@ FASTNOISE_REGISTER_NODE( CellularLookup );
 FASTNOISE_REGISTER_NODE( FractalFBm );
 FASTNOISE_REGISTER_NODE( FractalPingPong );
 FASTNOISE_REGISTER_NODE( FractalRidged );
-                       
+
+FASTNOISE_REGISTER_NODE( DomainWarpOpenSimplex );
+FASTNOISE_REGISTER_NODE( OpenSimplex2S );
 FASTNOISE_REGISTER_NODE( DomainWarpGradient );
+
 FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );
 FASTNOISE_REGISTER_NODE( DomainWarpFractalIndependant );
                        
-FASTNOISE_REGISTER_NODE( DomainScale );
-FASTNOISE_REGISTER_NODE( DomainOffset );
-FASTNOISE_REGISTER_NODE( DomainRotate );
-FASTNOISE_REGISTER_NODE( SeedOffset );
-FASTNOISE_REGISTER_NODE( Remap );
-FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
-                       
 FASTNOISE_REGISTER_NODE( Add );
 FASTNOISE_REGISTER_NODE( Subtract );
 FASTNOISE_REGISTER_NODE( Multiply );
 FASTNOISE_REGISTER_NODE( Divide );
+
+FASTNOISE_REGISTER_NODE( Abs );
 FASTNOISE_REGISTER_NODE( Min );
 FASTNOISE_REGISTER_NODE( Max );
 FASTNOISE_REGISTER_NODE( MinSmooth );
 FASTNOISE_REGISTER_NODE( MaxSmooth );
-FASTNOISE_REGISTER_NODE( Fade );
-                       
-FASTNOISE_REGISTER_NODE( Terrace );
+FASTNOISE_REGISTER_NODE( SquareRoot );
 FASTNOISE_REGISTER_NODE( PowFloat );
 FASTNOISE_REGISTER_NODE( PowInt );
+
+FASTNOISE_REGISTER_NODE( DomainScale );
+FASTNOISE_REGISTER_NODE( DomainOffset );
+FASTNOISE_REGISTER_NODE( DomainRotate );
 FASTNOISE_REGISTER_NODE( DomainAxisScale );
-FASTNOISE_REGISTER_NODE( AddDimension );
-FASTNOISE_REGISTER_NODE( RemoveDimension );
+
+FASTNOISE_REGISTER_NODE( SeedOffset );
+FASTNOISE_REGISTER_NODE( ConvertRGBA8 );
 FASTNOISE_REGISTER_NODE( GeneratorCache );
-FASTNOISE_REGISTER_NODE( SquareRoot );
-FASTNOISE_REGISTER_NODE( Abs );
 
-FASTNOISE_REGISTER_NODE( DomainWarpOpenSimplex );
\ No newline at end of file
+FASTNOISE_REGISTER_NODE( Fade );
+FASTNOISE_REGISTER_NODE( Remap );
+FASTNOISE_REGISTER_NODE( Terrace );
+FASTNOISE_REGISTER_NODE( AddDimension );
+FASTNOISE_REGISTER_NODE( RemoveDimension );
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index c186f999..4119d04e 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -16,7 +16,7 @@ using namespace FastNoise;
 Metadata::Vector<const Metadata*> Metadata::sAllMetadata;
 
 template<typename T>
-constexpr static size_t gMetadataVectorSize = SIZE_MAX;
+constexpr static std::nullptr_t gMetadataVectorSize = nullptr; // Invalid
 
 // Setting these values avoids needless vector resizing and oversizing on startup
 // Sadly there is no way to automate this as they fill up as part of static init
@@ -85,8 +85,19 @@ template class Metadata::Vector<Metadata::MemberVariable>;
 template class Metadata::Vector<Metadata::MemberNodeLookup>;
 template class Metadata::Vector<Metadata::MemberHybrid>;
 
+union MemberLookup
+{
+    struct
+    {
+        uint8_t type : 2;
+        uint8_t index : 6;
+    } member;
+
+    uint8_t data;
+};
+
 template<typename T>
-void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
+static void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
 {
     for( size_t i = 0; i < sizeof( T ); i++ )
     {
@@ -94,7 +105,16 @@ void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
     }
 }
 
-bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint8_t>& dataStream, std::unordered_map<const NodeData*, uint16_t>& referenceIds, std::unordered_set<const NodeData*> dependencies = {} )
+static void AddMemberLookupToDataStream( std::vector<uint8_t>& dataStream,  uint8_t type, uint8_t index )
+{
+    MemberLookup memberLookup;
+    memberLookup.member.type = type;
+    memberLookup.member.index = index;
+    AddToDataStream( dataStream, memberLookup.data );
+}
+
+
+static bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint8_t>& dataStream, std::unordered_map<const NodeData*, uint16_t>& referenceIds, std::unordered_set<const NodeData*> dependencies = {} )
 {
     // dependencies passed by value to avoid false positives from other branches in the node tree
 
@@ -136,7 +156,7 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
 
     if( reference != referenceIds.end() )
     {
-        // UINT16_MAX where node ID should be
+        // UINT8_MAX where node ID should be
         // Referenced by index in reference array, array ordering will match on decode
         AddToDataStream( dataStream, std::numeric_limits<Metadata::node_id>::max() );
         AddToDataStream( dataStream, reference->second );
@@ -149,7 +169,17 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
     // Member variables
     for( size_t i = 0; i < metadata->memberVariables.size(); i++ )
     {
-        AddToDataStream( dataStream, nodeData->variables[i].i );
+        if( nodeData->variables[i].i != metadata->memberVariables[i].valueDefault.i )
+        {
+            AddMemberLookupToDataStream( dataStream, 0, i );
+
+            AddToDataStream( dataStream, nodeData->variables[i].i );
+        }
+    }
+
+    if( metadata->memberNodeLookups.size() )
+    {
+        AddMemberLookupToDataStream( dataStream, 1, (uint8_t)metadata->memberNodeLookups.size() );
     }
 
     // Member nodes
@@ -177,17 +207,16 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
     // Member hybrids
     for( size_t i = 0; i < metadata->memberHybrids.size(); i++ )
     {
-        // 1 byte to indicate:
-        // 0 = constant float value
-        // 1 = node lookup
-
         if( !nodeData->hybrids[i].first )
         {
-            AddToDataStream( dataStream, (uint8_t)0 );
+            if( nodeData->hybrids[i].second != metadata->memberHybrids[i].valueDefault )
+            {
+                AddMemberLookupToDataStream( dataStream, 2, i );
 
-            Metadata::MemberVariable::ValueUnion v = nodeData->hybrids[i].second;
+                Metadata::MemberVariable::ValueUnion v = nodeData->hybrids[i].second;
 
-            AddToDataStream( dataStream, v.i );
+                AddToDataStream( dataStream, v.i );
+            }
         }
         else
         {
@@ -204,7 +233,8 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
                 }
             }
 
-            AddToDataStream( dataStream, (uint8_t)1 );
+            AddMemberLookupToDataStream( dataStream, 3, i );
+
             if( !SerialiseNodeDataInternal( nodeData->hybrids[i].first, fixUp, dataStream, referenceIds, dependencies ) )
             {
                 return false;
@@ -212,6 +242,9 @@ bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint
         }
     }
 
+    // Mark end of node
+    AddToDataStream( dataStream, (uint8_t)255 );
+
     referenceIds.emplace( nodeData, (uint16_t)referenceIds.size() );
 
     return true;
@@ -230,7 +263,7 @@ std::string Metadata::SerialiseNodeData( NodeData* nodeData, bool fixUp )
 }
 
 template<typename T>
-bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T& value )
+static bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T& value )
 {
     if( dataStream.size() < idx + sizeof( T ) )
     {
@@ -243,7 +276,7 @@ bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T&
     return true;
 }
 
-SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::vector<SmartNode<>>& referenceNodes, FastSIMD::FeatureSet level = FastSIMD::FeatureSet::Max )
+static SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::vector<SmartNode<>>& referenceNodes, FastSIMD::FeatureSet level = FastSIMD::FeatureSet::Max )
 {
     Metadata::node_id nodeId;
     if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
@@ -251,7 +284,7 @@ SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialised
         return nullptr;
     }
 
-    // UINT16_MAX indicates a reference node
+    // UINT8_MAX indicates a reference node
     if( nodeId == std::numeric_limits<Metadata::node_id>::max() )
     {
         uint16_t referenceId;
@@ -283,8 +316,14 @@ SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialised
         return nullptr;
     }
 
+    MemberLookup memberLookup;
+    if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
+    {
+        return nullptr;
+    }
+
     // Member variables
-    for( const auto& var : metadata->memberVariables )
+    while( memberLookup.member.type == 0 )
     {
         Metadata::MemberVariable::ValueUnion v;
 
@@ -293,40 +332,66 @@ SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialised
             return nullptr;
         }
 
-        var.setFunc( generator.get(), v );
-    }
-
-    // Member nodes
-    for( const auto& node : metadata->memberNodeLookups )
-    {
-        SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
+        if( memberLookup.member.index < metadata->memberVariables.size() )
+        {
+            metadata->memberVariables[memberLookup.member.index].setFunc( generator.get(), v );
+        }
 
-        if( !nodeGen || !node.setFunc( generator.get(), nodeGen ) )
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
         {
             return nullptr;
         }
     }
 
-    // Member variables
-    for( const auto& hybrid : metadata->memberHybrids )
+    // Member nodes
+    if( memberLookup.member.type == 1 )
     {
-        uint8_t isGenerator;
-        // 1 byte to indicate:
-        // 0 = constant float value
-        // 1 = node lookup
+        size_t i = 0;
+        for( ; i < std::min<size_t>( memberLookup.member.index, metadata->memberNodeLookups.size() ); i++ )
+        {
+            SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
 
-        if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
+            if( !nodeGen || !metadata->memberNodeLookups[i].setFunc( generator.get(), nodeGen ) )
+            {
+                return nullptr;
+            }
+        }
+        for( ; i < memberLookup.member.index; i++ )
+        {
+            // Still need to deserialise this even if there is no where to put it
+            if( !DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level ) )
+            {
+                return nullptr;
+            }
+        }
+        for( ; i < metadata->memberNodeLookups.size(); i++ )
+        {
+            // Attempt to use a dummy node to fill the new node lookup
+            if( !metadata->memberNodeLookups[i].setFunc( generator.get(), FastNoise::New<FastNoise::Constant>( level ) ) )
+            {
+                return nullptr;
+            }
+        }
+
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
         {
             return nullptr;
         }
+    }
 
-        if( isGenerator )
+    // Member hybrids
+    while( memberLookup.data != 255 )
+    {
+        if( memberLookup.member.type == 3 )
         {
             SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
 
-            if( !nodeGen || !hybrid.setNodeFunc( generator.get(), nodeGen ) )
+            if( memberLookup.member.index < metadata->memberHybrids.size() )
             {
-                return nullptr;
+                if( !nodeGen || !metadata->memberHybrids[memberLookup.member.index].setNodeFunc( generator.get(), nodeGen ) )
+                {
+                    return nullptr;
+                }
             }
         }
         else
@@ -338,7 +403,15 @@ SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialised
                 return nullptr;
             }
 
-            hybrid.setValueFunc( generator.get(), v );
+            if( memberLookup.member.index < metadata->memberHybrids.size() )
+            {
+                metadata->memberHybrids[memberLookup.member.index].setValueFunc( generator.get(), v );
+            }
+        }
+
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
+        {
+            return nullptr;
         }
     }
 
@@ -357,7 +430,7 @@ SmartNode<> FastNoise::NewFromEncodedNodeTree( const char* serialisedBase64NodeD
     return DeserialiseSmartNodeInternal( dataStream, startIdx, referenceNodes, level );
 }
 
-NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut, size_t& serialIdx )
+static NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut, size_t& serialIdx )
 {
     Metadata::node_id nodeId;
     if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
@@ -365,7 +438,7 @@ NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNod
         return nullptr;
     }
 
-    // UINT16_MAX indicates a reference node
+    // UINT8_MAX indicates a reference node
     if( nodeId == std::numeric_limits<Metadata::node_id>::max() )
     {
         uint16_t referenceId;
@@ -392,54 +465,84 @@ NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNod
 
     std::unique_ptr<NodeData> nodeData( new NodeData( metadata ) );
 
+
+    MemberLookup memberLookup;
+    if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
+    {
+        return nullptr;
+    }
+
     // Member variables
-    for( auto& var : nodeData->variables )
+    while( memberLookup.member.type == 0 )
     {
-        if( !GetFromDataStream( serialisedNodeData, serialIdx, var ) )
+        Metadata::MemberVariable::ValueUnion v;
+
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, v.i ) )
         {
             return nullptr;
         }
-    }
 
-    // Member nodes
-    for( auto& node : nodeData->nodeLookups )
-    {
-        node = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx );
+        if( memberLookup.member.index < metadata->memberVariables.size() )
+        {
+            nodeData->variables[memberLookup.member.index] = v;
+        }
 
-        if( !node )
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
         {
             return nullptr;
         }
     }
 
-    // Member hybrids
-    for( auto& hybrid : nodeData->hybrids )
+    // Member nodes
+    if( memberLookup.member.type == 1 )
     {
-        uint8_t isGenerator;
-        // 1 byte to indicate:
-        // 0 = constant float value
-        // 1 = node lookup
+        size_t i = 0;
+        for( ; i < std::min<size_t>( memberLookup.member.index, metadata->memberNodeLookups.size() ); i++ )
+        {
+            nodeData->nodeLookups[i] = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx );
+        }
+        for( ; i < memberLookup.member.index; i++ )
+        {
+            // Still need to deserialise this even if there is no where to put it
+            DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx );
+        }
 
-        if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
         {
             return nullptr;
         }
+    }
 
-        if( isGenerator )
+    // Member hybrids
+    while( memberLookup.data != 255 )
+    {
+        if( memberLookup.member.type == 3 )
         {
-            hybrid.first = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx );
+            NodeData* node = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx );
 
-            if( !hybrid.first )
+            if( memberLookup.member.index < metadata->memberHybrids.size() )
             {
-                return nullptr;
+                nodeData->hybrids[memberLookup.member.index].first = node;
             }
         }
         else
         {
-            if( !GetFromDataStream( serialisedNodeData, serialIdx, hybrid.second ) )
+            float v;
+
+            if( !GetFromDataStream( serialisedNodeData, serialIdx, v ) )
             {
                 return nullptr;
             }
+
+            if( memberLookup.member.index < metadata->memberHybrids.size() )
+            {
+                nodeData->hybrids[memberLookup.member.index].second = v;
+            }
+        }
+
+        if( !GetFromDataStream( serialisedNodeData, serialIdx, memberLookup ) )
+        {
+            return nullptr;
         }
     }
 
@@ -513,7 +616,7 @@ namespace FastNoise
 }
 
 template<typename T>
-std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* className )
+static std::unique_ptr<const MetadataT<T>> CreateMetadataInstance( const char* className )
 {
     auto* newMetadata = new MetadataT<T>;
     newMetadata->name = className;
diff --git a/tools/DemoNodeTrees.inl b/tools/DemoNodeTrees.inl
index 154175f0..52e98253 100644
--- a/tools/DemoNodeTrees.inl
+++ b/tools/DemoNodeTrees.inl
@@ -2,6 +2,6 @@
 
 inline const char* gDemoNodeTrees[][2] =
 {
-    { "Simple Terrain", "EgAC@EgQBE@ADIQhoADgAE@EgQAg@ACWQwAAgL8AAIA/AGZmJj8@CPwEE@DCPwnU8@hIEIAMzMzPw@D" },
-    { "Cellular Caves", "EwAC@DDgQBE@BgQiAAFQAM@BJUTNzMw9@AgwAM@BC@Cw@BI@BBBg@AD5CH4XrPoXrUT8@AClQw@AKVD@BpUM@EH//wEAAOxROD4@BgQQCamZk+AM3MTD8=" },
+    { "Simple Terrain", "EwAC@BB@AIEAFEgUVBQ0AB@CQAACBABQY@ACWQ/8CZmYmPwY@B//wMEBI/CdTz//wIAACBC/wIzMzM/BpqZmT7/" },
+    { "Cellular Caves", "FAAC@BB@A4EAFEg@ACBCBRwFIwUlBQs@BlRATNzMw9C@AIMAMAw@ABAC@BFAM@BYAg@BcJ@BPkIEH4XrPgiF61E/////AgAApUMGAAClQwoAAKVD/wP/AQAG7FE4Pv8C@AgQf8CmpmZPgbNzEw//w==" },
 };

From 9fa777ef646a87c76fb97d3589bdf629b95fd296 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 23 May 2024 21:19:04 +0100
Subject: [PATCH 098/139] Switch domain warp fractal grouping

---
 .../FastNoise/Generators/DomainWarpFractal.h  |  88 +++----
 include/FastNoise/Generators/Fractal.h        | 219 +++++++++---------
 2 files changed, 158 insertions(+), 149 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarpFractal.h b/include/FastNoise/Generators/DomainWarpFractal.h
index 34a9b0e7..daeba68d 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.h
+++ b/include/FastNoise/Generators/DomainWarpFractal.h
@@ -1,42 +1,46 @@
-#pragma once
-#include "Fractal.h"
-#include "DomainWarp.h"
-
-namespace FastNoise
-{
-    class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpFractalProgressive> : MetadataT<Fractal<DomainWarp>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
-        {
-            groups.push_back( "Domain Warp" );
-        }
-    };
-#endif
-
-    class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<Fractal<DomainWarp>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source"  )
-        {
-            groups.push_back( "Domain Warp" );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Fractal.h"
+#include "DomainWarp.h"
+
+namespace FastNoise
+{
+    class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpFractalProgressive> : MetadataT<Fractal<DomainWarp>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source", false )
+        {
+            groups.push_back( "Domain Warp" );
+            groups.push_back( "Fractal" );
+        }
+    };
+#endif
+
+    class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<Fractal<DomainWarp>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source", false )
+        {
+            groups.push_back( "Domain Warp" );
+            groups.push_back( "Fractal" );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Fractal.h b/include/FastNoise/Generators/Fractal.h
index 5ea2a5f1..b8e30d57 100644
--- a/include/FastNoise/Generators/Fractal.h
+++ b/include/FastNoise/Generators/Fractal.h
@@ -1,107 +1,112 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    template<typename T = Generator>
-    class Fractal : public virtual Generator
-    {
-    public:
-        void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetGain( float value ) { mGain = value; CalculateFractalBounding(); } 
-        void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
-        void SetWeightedStrength( float value ) { mWeightedStrength = value; } 
-        void SetWeightedStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWeightedStrength, gen ); }
-        void SetOctaveCount( int value ) { mOctaves = value; CalculateFractalBounding(); } 
-        void SetLacunarity( float value ) { mLacunarity = value; } 
-
-    protected:
-        GeneratorSourceT<T> mSource;
-        HybridSource mGain = 0.5f;
-        HybridSource mWeightedStrength = 0.0f;
-
-        int   mOctaves = 3;
-        float mLacunarity = 2.0f;
-        float mFractalBounding = 1.0f / 1.75f;
-
-        void CalculateFractalBounding()
-        {
-            float gain = std::abs( mGain.constant );
-            float amp = gain;
-            float ampFractal = 1.0f;
-            for( int i = 1; i < mOctaves; i++ )
-            {
-                ampFractal += amp;
-                amp *= gain;
-            }
-            mFractalBounding = 1.0f / ampFractal;
-        }     
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<typename T>
-    struct MetadataT<Fractal<T>> : MetadataT<Generator>
-    {
-        MetadataT( const char* sourceName = "Source" )
-        {
-            groups.push_back( "Fractal" );
-
-            this->AddGeneratorSource( sourceName, &Fractal<T>::SetSource );
-            this->AddHybridSource( "Gain", 0.5f, &Fractal<T>::SetGain, &Fractal<T>::SetGain );
-            this->AddHybridSource( "Weighted Strength", 0.0f, &Fractal<T>::SetWeightedStrength, &Fractal<T>::SetWeightedStrength );
-            this->AddVariable( "Octaves", 3, &Fractal<T>::SetOctaveCount, 2, 16 );
-            this->AddVariable( "Lacunarity", 2.0f, &Fractal<T>::SetLacunarity );
-        }
-    };
-#endif
-
-    class FractalFBm : public virtual Fractal<>
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalFBm> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class FractalRidged : public virtual Fractal<>
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalRidged> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class FractalPingPong : public virtual Fractal<>
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetPingPongStrength( float value ) { mPingPongStrength = value; }
-        void SetPingPongStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPingPongStrength, gen ); }
-
-    protected:
-        HybridSource mPingPongStrength = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<FractalPingPong> : MetadataT<Fractal<>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Ping Pong Strength", 2.0f, &FractalPingPong::SetPingPongStrength, &FractalPingPong::SetPingPongStrength );
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    template<typename T = Generator>
+    class Fractal : public virtual Generator
+    {
+    public:
+        void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+        void SetGain( float value ) { mGain = value; CalculateFractalBounding(); } 
+        void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
+        void SetWeightedStrength( float value ) { mWeightedStrength = value; } 
+        void SetWeightedStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWeightedStrength, gen ); }
+        void SetOctaveCount( int value ) { mOctaves = value; CalculateFractalBounding(); } 
+        void SetLacunarity( float value ) { mLacunarity = value; } 
+
+    protected:
+        GeneratorSourceT<T> mSource;
+        HybridSource mGain = 0.5f;
+        HybridSource mWeightedStrength = 0.0f;
+
+        int   mOctaves = 3;
+        float mLacunarity = 2.0f;
+        float mFractalBounding = 1.0f / 1.75f;
+
+        void CalculateFractalBounding()
+        {
+            float gain = std::abs( mGain.constant );
+            float amp = gain;
+            float ampFractal = 1.0f;
+            for( int i = 1; i < mOctaves; i++ )
+            {
+                ampFractal += amp;
+                amp *= gain;
+            }
+            mFractalBounding = 1.0f / ampFractal;
+        }     
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<typename T>
+    struct MetadataT<Fractal<T>> : MetadataT<Generator>
+    {
+        MetadataT( const char* sourceName = "Source", bool addGroup = true )
+        {
+            if( addGroup )
+            {
+                groups.push_back( "Fractal" );
+            }
+            this->AddGeneratorSource( sourceName, &Fractal<T>::SetSource );
+            this->AddHybridSource( "Gain", 0.5f, &Fractal<T>::SetGain, &Fractal<T>::SetGain );
+            this->AddHybridSource( "Weighted Strength", 0.0f, &Fractal<T>::SetWeightedStrength, &Fractal<T>::SetWeightedStrength );
+            this->AddVariable( "Octaves", 3, &Fractal<T>::SetOctaveCount, 2, 16 );
+            this->AddVariable( "Lacunarity", 2.0f, &Fractal<T>::SetLacunarity );
+        }
+    };
+#endif
+
+    class FractalFBm : public virtual Fractal<>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalFBm> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class FractalRidged : public virtual Fractal<>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalRidged> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class FractalPingPong : public virtual Fractal<>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetPingPongStrength( float value ) { mPingPongStrength = value; }
+        void SetPingPongStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPingPongStrength, gen ); }
+
+    protected:
+        HybridSource mPingPongStrength = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<FractalPingPong> : MetadataT<Fractal<>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Ping Pong Strength", 2.0f, &FractalPingPong::SetPingPongStrength, &FractalPingPong::SetPingPongStrength );
+        }
+    };
+#endif
+}

From 5ef9688e68698ed07813b52fc1ae34af5fb098dc Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 23 May 2024 22:40:53 +0100
Subject: [PATCH 099/139] Domain warp normalise in fmulsub

---
 include/FastNoise/Generators/DomainWarp.inl | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarp.inl b/include/FastNoise/Generators/DomainWarp.inl
index e8a12f06..2de616c8 100644
--- a/include/FastNoise/Generators/DomainWarp.inl
+++ b/include/FastNoise/Generators/DomainWarp.inl
@@ -47,12 +47,12 @@ public:
 
     #define GRADIENT_COORD( _x, _y )\
         int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
-        float32v contrib##_x##_y = normalise * xs##_x * ys##_y;\
+        float32v contrib##_x##_y = xs##_x * ys##_y;\
         xWarp = FS::FMulAdd( contrib##_x##_y, FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) ), xWarp );\
         yWarp = FS::FMulAdd( contrib##_x##_y, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y, 16) ), yWarp )
 
         int32v hash00 = HashPrimesHB(seed, x0, y0 );
-        float32v contrib00 = normalise * xs0 * ys0;
+        float32v contrib00 = xs0 * ys0;
         float32v xWarp = contrib00 * FS::Convert<float>( hash00 & int32v( 0xffff ) );
         float32v yWarp = contrib00 * FS::Convert<float>( FS::BitShiftRightZeroExtend( hash00, 16) );
 
@@ -62,8 +62,8 @@ public:
 
     #undef GRADIENT_COORD
 
-        xWarp -= float32v( 1 );
-        yWarp -= float32v( 1 );
+        xWarp = FS::FMulSub( xWarp, normalise, float32v( 1 ) );
+        yWarp = FS::FMulSub( yWarp, normalise, float32v( 1 ) );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -97,13 +97,13 @@ public:
 
     #define GRADIENT_COORD( _x, _y, _z )\
         int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
-        float32v contrib##_x##_y##_z = normalise * xs##_x * ys##_y * zs##_z;\
+        float32v contrib##_x##_y##_z = xs##_x * ys##_y * zs##_z;\
         xWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) ), xWarp );\
         yWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( (hash##_x##_y##_z >> 11) & int32v( 0x3ff ) ), yWarp );\
         zWarp = FS::FMulAdd( contrib##_x##_y##_z, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y##_z, 22 ) ), zWarp )
 
         int32v hash000 = HashPrimesHB( seed, x0, y0, z0 );
-        float32v contrib000 = normalise * xs0 * ys0 * zs0;
+        float32v contrib000 = xs0 * ys0 * zs0;
         float32v xWarp = contrib000 * FS::Convert<float>( hash000 & int32v( 0x3ff ) );
         float32v yWarp = contrib000 * FS::Convert<float>( (hash000 >> 11) & int32v( 0x3ff ) );
         float32v zWarp = contrib000 * FS::Convert<float>( FS::BitShiftRightZeroExtend( hash000, 22 ) );
@@ -118,9 +118,9 @@ public:
 
     #undef GRADIENT_COORD
 
-        xWarp -= float32v( 1 );
-        yWarp -= float32v( 1 );
-        zWarp -= float32v( 1 );
+        xWarp = FS::FMulSub( xWarp, normalise, float32v( 1 ) );
+        yWarp = FS::FMulSub( yWarp, normalise, float32v( 1 ) );
+        zWarp = FS::FMulSub( zWarp, normalise, float32v( 1 ) );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
@@ -160,14 +160,14 @@ public:
 
     #define GRADIENT_COORD( _x, _y, _z, _w )\
         int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
-        float32v contrib##_x##_y##_z##_w = normalise * xs##_x * ys##_y * zs##_z * ws##_w;\
+        float32v contrib##_x##_y##_z##_w = xs##_x * ys##_y * zs##_z * ws##_w;\
         xWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) ), xWarp );\
         yWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) ), yWarp );\
         zWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) ), zWarp );\
         wWarp = FS::FMulAdd( contrib##_x##_y##_z##_w, FS::Convert<float>( FS::BitShiftRightZeroExtend( hash##_x##_y##_z##_w, 24 ) ), wWarp )
 
         int32v hash0000 = HashPrimesHB( seed, x0, y0, z0, w0 );
-        float32v contrib0000 = normalise * xs0 * ys0 * zs0 * ws0;
+        float32v contrib0000 = xs0 * ys0 * zs0 * ws0;
         float32v xWarp = contrib0000 * FS::Convert<float>( hash0000 & int32v( 0xff ) );
         float32v yWarp = contrib0000 * FS::Convert<float>( (hash0000 >> 8) & int32v( 0xff ) );
         float32v zWarp = contrib0000 * FS::Convert<float>( (hash0000 >> 16) & int32v( 0xff ) );
@@ -191,10 +191,10 @@ public:
 
     #undef GRADIENT_COORD
 
-        xWarp -= float32v( 1 );
-        yWarp -= float32v( 1 );
-        zWarp -= float32v( 1 );
-        wWarp -= float32v( 1 );
+        xWarp = FS::FMulSub( xWarp, normalise, float32v( 1 ) );
+        yWarp = FS::FMulSub( yWarp, normalise, float32v( 1 ) );
+        zWarp = FS::FMulSub( zWarp, normalise, float32v( 1 ) );
+        wWarp = FS::FMulSub( wWarp, normalise, float32v( 1 ) );
 
         xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
         yOut = FS::FMulAdd( yWarp, warpAmp, yOut );

From 49aabb94d974e0810300cb5031139f22bfd9e78c Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 3 Jun 2024 23:27:33 +0100
Subject: [PATCH 100/139] Move node editor into its own tools folder

---
 tools/CMakeLists.txt                          | 178 +-----------------
 tools/NodeEditor/CMakeLists.txt               | 177 +++++++++++++++++
 .../{ => NodeEditor}/FastNoiseNodeEditor.cpp  |   6 +-
 tools/{ => NodeEditor}/FastNoiseNodeEditor.h  |   0
 tools/{ => NodeEditor}/MeshNoisePreview.cpp   |   4 +-
 tools/{ => NodeEditor}/MeshNoisePreview.h     |   0
 tools/{ => NodeEditor}/MultiThreadQueues.h    |   0
 tools/{ => NodeEditor}/NodeEditorApp.cpp      |   2 +-
 tools/{ => NodeEditor}/NodeEditorApp.h        |   0
 tools/{ => NodeEditor}/NoiseTexture.cpp       |   2 +-
 tools/{ => NodeEditor}/NoiseTexture.h         |   0
 .../resources}/VertexLight.frag               |   0
 .../resources}/VertexLight.vert               |   0
 .../resources}/WindowsHiDPI.manifest          |   0
 .../emscripten_enable_shared_array_buffer.js  |   0
 .../resources}/emscripten_pre.js              |   0
 .../resources}/emscripten_shell.html          |   0
 .../{ => NodeEditor/resources}/resources.conf |   0
 tools/{ => NodeEditor/util}/DemoNodeTrees.inl |   0
 tools/{ => NodeEditor/util}/DmcTable.inl      |   0
 tools/{ => NodeEditor/util}/ImGuiExtra.h      |   0
 .../{ => NodeEditor/util}/SharedMemoryIpc.inl |   0
 22 files changed, 185 insertions(+), 184 deletions(-)
 create mode 100644 tools/NodeEditor/CMakeLists.txt
 rename tools/{ => NodeEditor}/FastNoiseNodeEditor.cpp (99%)
 rename tools/{ => NodeEditor}/FastNoiseNodeEditor.h (100%)
 rename tools/{ => NodeEditor}/MeshNoisePreview.cpp (99%)
 rename tools/{ => NodeEditor}/MeshNoisePreview.h (100%)
 rename tools/{ => NodeEditor}/MultiThreadQueues.h (100%)
 rename tools/{ => NodeEditor}/NodeEditorApp.cpp (99%)
 rename tools/{ => NodeEditor}/NodeEditorApp.h (100%)
 rename tools/{ => NodeEditor}/NoiseTexture.cpp (99%)
 rename tools/{ => NodeEditor}/NoiseTexture.h (100%)
 rename tools/{ => NodeEditor/resources}/VertexLight.frag (100%)
 rename tools/{ => NodeEditor/resources}/VertexLight.vert (100%)
 rename tools/{ => NodeEditor/resources}/WindowsHiDPI.manifest (100%)
 rename tools/{ => NodeEditor/resources}/emscripten_enable_shared_array_buffer.js (100%)
 rename tools/{ => NodeEditor/resources}/emscripten_pre.js (100%)
 rename tools/{ => NodeEditor/resources}/emscripten_shell.html (100%)
 rename tools/{ => NodeEditor/resources}/resources.conf (100%)
 rename tools/{ => NodeEditor/util}/DemoNodeTrees.inl (100%)
 rename tools/{ => NodeEditor/util}/DmcTable.inl (100%)
 rename tools/{ => NodeEditor/util}/ImGuiExtra.h (100%)
 rename tools/{ => NodeEditor/util}/SharedMemoryIpc.inl (100%)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 7dd7ad95..ecbfe153 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,177 +1 @@
-CPMAddPackage(
-    NAME corrade
-    GITHUB_REPOSITORY mosra/corrade
-    GIT_TAG 295bbba1f49887da060465f88b8501965f6acd7d
-    GIT_SUBMODULES "src"
-    EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "CORRADE_BUILD_STATIC ON"
-        "CORRADE_BUILD_STATIC_UNIQUE_GLOBALS OFF"
-        "CORRADE_MSVC_COMPATIBILITY ON"
-        "CORRADE_WITH_INTERCONNECT OFF"
-        "CORRADE_WITH_TESTSUITE OFF"
-)
-
-if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-    set(NODE_EDITOR_APP_TYPE_CAPS "EMSCRIPTEN")   
-    set(NODE_EDITOR_APP_TYPE "Emscripten")   
-else()
-    set(NODE_EDITOR_APP_TYPE_CAPS "GLFW")   
-    set(NODE_EDITOR_APP_TYPE "Glfw")   
-
-    CPMAddPackage(
-        NAME GLFW
-        GITHUB_REPOSITORY glfw/glfw
-        GIT_TAG 3.3.9
-        EXCLUDE_FROM_ALL YES
-        OPTIONS
-            "BUILD_SHARED_LIBS OFF"
-            "GLFW_INSTALL OFF"
-            "GLFW_BUILD_TESTS OFF"
-            "GLFW_BUILD_EXAMPLES OFF"
-            "GLFW_BUILD_DOCS OFF"
-    )
-endif()
-
-CPMAddPackage(
-    NAME magnum
-    GITHUB_REPOSITORY mosra/magnum
-    GIT_TAG c9a884938c606b7d4555da6d278d1f3e09588c3e
-    GIT_SUBMODULES "src"
-    EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "MAGNUM_BUILD_STATIC ON"
-        "MAGNUM_BUILD_PLUGINS_STATIC ON"
-        "MAGNUM_BUILD_STATIC_UNIQUE_GLOBALS OFF"
-        "MAGNUM_WITH_${NODE_EDITOR_APP_TYPE_CAPS}APPLICATION ON"
-        "MAGNUM_WITH_MESHTOOLS OFF"
-        "MAGNUM_WITH_TRADE OFF"
-        "MAGNUM_WITH_TEXT OFF"
-        "MAGNUM_WITH_TEXTURETOOLS OFF"
-        "MAGNUM_TARGET_GLES2 OFF"
-)
-    
-CPMAddPackage(
-    NAME imgui
-    GITHUB_REPOSITORY ocornut/imgui
-    GIT_TAG v1.90.1-docking
-    EXCLUDE_FROM_ALL YES
-    DOWNLOAD_ONLY YES
-)
-# Set dir for find_package(ImGui)
-set(IMGUI_DIR ${imgui_SOURCE_DIR})
-
-CPMAddPackage(
-    NAME magnum-integration
-    GITHUB_REPOSITORY mosra/magnum-integration
-    GIT_TAG f01593fc94556bff23a848ac71187c56e034b6d9
-    GIT_SUBMODULES "src"
-    EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "BUILD_STATIC ON"
-        "MAGNUM_WITH_IMGUI ON"
-)
-
-# Use modules from magnum-integration since it has everything we need
-set(CMAKE_MODULE_PATH "${magnum-integration_SOURCE_DIR}/modules" ${CMAKE_MODULE_PATH})
-
-find_package(Magnum REQUIRED GL ${NODE_EDITOR_APP_TYPE}Application)
-find_package(MagnumIntegration REQUIRED ImGui)
-find_package(ImGui REQUIRED SourcesMiscCpp)
-  
-CPMAddPackage(
-    NAME imnodes
-    GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 26b70c528d48beeb839035f3da71550f8b0adfa7
-    GIT_SUBMODULES ".github"
-    EXCLUDE_FROM_ALL YES
-    OPTIONS
-        "BUILD_SHARED_LIBS OFF"
-        "IMNODES_IMGUI_TARGET_NAME MagnumIntegration::ImGui"
-)
-  
-CPMAddPackage(
-    NAME robinhoodhashing
-    GITHUB_REPOSITORY martinus/robin-hood-hashing
-    GIT_TAG 3.11.5    
-    EXCLUDE_FROM_ALL YES
-)
-
-# Ensure FastNoise.dll is built into the same dir as NodeEditor.exe
-set_target_properties(FastNoise
-    PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}
-    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
-)
-
-# Bundle a better font
-# Configure resource file for imgui source dir variable
-set(NodeEditor_RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-configure_file("resources.conf" "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
-corrade_add_resource(NodeEditor_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
-
-add_executable(NodeEditor
-    
-    "NodeEditorApp.cpp"
-    "FastNoiseNodeEditor.cpp"
-    "MeshNoisePreview.cpp"
-    "NoiseTexture.cpp"
-    ${NodeEditor_RESOURCES}
-) 
-
-target_link_libraries(NodeEditor PRIVATE
-    FastNoise
-    #FastSIMD_FastNoise
-    Magnum::Application
-    Magnum::Shaders
-    Magnum::SceneGraph
-    MagnumIntegration::ImGui
-    ImGui::SourcesMiscCpp
-    imnodes
-    robin_hood
-)
-
-target_compile_features(NodeEditor PRIVATE cxx_std_20)
-
-# Windows HiDPI support
-if(CORRADE_TARGET_WINDOWS)
-    target_sources(NodeEditor PRIVATE WindowsHiDPI.manifest)
-endif()
-
-if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-    set(CMAKE_EXECUTABLE_SUFFIX ".html")
-    target_compile_options(NodeEditor PRIVATE -pthread -msimd128)
-    target_link_options(NodeEditor PRIVATE 
-        "-sPTHREAD_POOL_SIZE=Math.max(2,navigator.hardwareConcurrency)+3-navigator.hardwareConcurrency/4"
-        -pthread -sALLOW_MEMORY_GROWTH=1 -lidbfs.js -s FORCE_FILESYSTEM
-        --shell-file "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_shell.html" 
-        --pre-js "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_pre.js"
-        -Wl,-u,_emscripten_run_callback_on_thread
-    )
-    add_custom_command(TARGET NodeEditor POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different
-        "${CMAKE_CURRENT_SOURCE_DIR}/emscripten_enable_shared_array_buffer.js"
-        $<TARGET_FILE_DIR:NodeEditor>
-    )
-
-elseif (UNIX)
-    target_link_options(NodeEditor PRIVATE -pthread)
-
-    if(APPLE)
-        set_property(TARGET NodeEditor PROPERTY
-            INSTALL_RPATH "@loader_path/../lib")
-    else()
-        set_property(TARGET NodeEditor PROPERTY
-            INSTALL_RPATH "\$ORIGIN/../lib")
-    endif()
-endif()
-
-if (MSVC)
-    target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
-endif()
-
-set(install_targets ${install_targets} NodeEditor PARENT_SCOPE)
-
-# Make the executable a default target to build & run in Visual Studio
-set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT NodeEditor)
+add_subdirectory(NodeEditor)
\ No newline at end of file
diff --git a/tools/NodeEditor/CMakeLists.txt b/tools/NodeEditor/CMakeLists.txt
new file mode 100644
index 00000000..ab0a861f
--- /dev/null
+++ b/tools/NodeEditor/CMakeLists.txt
@@ -0,0 +1,177 @@
+CPMAddPackage(
+    NAME corrade
+    GITHUB_REPOSITORY mosra/corrade
+    GIT_TAG 295bbba1f49887da060465f88b8501965f6acd7d
+    GIT_SUBMODULES "src"
+    EXCLUDE_FROM_ALL YES
+    OPTIONS
+        "CORRADE_BUILD_STATIC ON"
+        "CORRADE_BUILD_STATIC_UNIQUE_GLOBALS OFF"
+        "CORRADE_MSVC_COMPATIBILITY ON"
+        "CORRADE_WITH_INTERCONNECT OFF"
+        "CORRADE_WITH_TESTSUITE OFF"
+)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(NODE_EDITOR_APP_TYPE_CAPS "EMSCRIPTEN")   
+    set(NODE_EDITOR_APP_TYPE "Emscripten")   
+else()
+    set(NODE_EDITOR_APP_TYPE_CAPS "GLFW")   
+    set(NODE_EDITOR_APP_TYPE "Glfw")   
+
+    CPMAddPackage(
+        NAME GLFW
+        GITHUB_REPOSITORY glfw/glfw
+        GIT_TAG 3.3.9
+        EXCLUDE_FROM_ALL YES
+        OPTIONS
+            "BUILD_SHARED_LIBS OFF"
+            "GLFW_INSTALL OFF"
+            "GLFW_BUILD_TESTS OFF"
+            "GLFW_BUILD_EXAMPLES OFF"
+            "GLFW_BUILD_DOCS OFF"
+    )
+endif()
+
+CPMAddPackage(
+    NAME magnum
+    GITHUB_REPOSITORY mosra/magnum
+    GIT_TAG c9a884938c606b7d4555da6d278d1f3e09588c3e
+    GIT_SUBMODULES "src"
+    EXCLUDE_FROM_ALL YES
+    OPTIONS
+        "MAGNUM_BUILD_STATIC ON"
+        "MAGNUM_BUILD_PLUGINS_STATIC ON"
+        "MAGNUM_BUILD_STATIC_UNIQUE_GLOBALS OFF"
+        "MAGNUM_WITH_${NODE_EDITOR_APP_TYPE_CAPS}APPLICATION ON"
+        "MAGNUM_WITH_MESHTOOLS OFF"
+        "MAGNUM_WITH_TRADE OFF"
+        "MAGNUM_WITH_TEXT OFF"
+        "MAGNUM_WITH_TEXTURETOOLS OFF"
+        "MAGNUM_TARGET_GLES2 OFF"
+)
+    
+CPMAddPackage(
+    NAME imgui
+    GITHUB_REPOSITORY ocornut/imgui
+    GIT_TAG v1.90.1-docking
+    EXCLUDE_FROM_ALL YES
+    DOWNLOAD_ONLY YES
+)
+# Set dir for find_package(ImGui)
+set(IMGUI_DIR ${imgui_SOURCE_DIR})
+
+CPMAddPackage(
+    NAME magnum-integration
+    GITHUB_REPOSITORY mosra/magnum-integration
+    GIT_TAG f01593fc94556bff23a848ac71187c56e034b6d9
+    GIT_SUBMODULES "src"
+    EXCLUDE_FROM_ALL YES
+    OPTIONS
+        "BUILD_STATIC ON"
+        "MAGNUM_WITH_IMGUI ON"
+)
+
+# Use modules from magnum-integration since it has everything we need
+set(CMAKE_MODULE_PATH "${magnum-integration_SOURCE_DIR}/modules" ${CMAKE_MODULE_PATH})
+
+find_package(Magnum REQUIRED GL ${NODE_EDITOR_APP_TYPE}Application)
+find_package(MagnumIntegration REQUIRED ImGui)
+find_package(ImGui REQUIRED SourcesMiscCpp)
+  
+CPMAddPackage(
+    NAME imnodes
+    GITHUB_REPOSITORY Auburn/imnodes
+    GIT_TAG 26b70c528d48beeb839035f3da71550f8b0adfa7
+    GIT_SUBMODULES ".github"
+    EXCLUDE_FROM_ALL YES
+    OPTIONS
+        "BUILD_SHARED_LIBS OFF"
+        "IMNODES_IMGUI_TARGET_NAME MagnumIntegration::ImGui"
+)
+  
+CPMAddPackage(
+    NAME robinhoodhashing
+    GITHUB_REPOSITORY martinus/robin-hood-hashing
+    GIT_TAG 3.11.5    
+    EXCLUDE_FROM_ALL YES
+)
+
+# Ensure FastNoise.dll is built into the same dir as NodeEditor.exe
+set_target_properties(FastNoise
+    PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}
+    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
+)
+
+# Bundle a better font
+# Configure resource file for imgui source dir variable
+set(NodeEditor_RESOURCES_DIR "${CMAKE_CURRENT_LIST_DIR}/resources")
+configure_file("resources/resources.conf" "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
+corrade_add_resource(NodeEditor_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
+
+add_executable(NodeEditor
+    
+    "NodeEditorApp.cpp"
+    "FastNoiseNodeEditor.cpp"
+    "MeshNoisePreview.cpp"
+    "NoiseTexture.cpp"
+    ${NodeEditor_RESOURCES}
+) 
+
+target_link_libraries(NodeEditor PRIVATE
+    FastNoise
+    #FastSIMD_FastNoise
+    Magnum::Application
+    Magnum::Shaders
+    Magnum::SceneGraph
+    MagnumIntegration::ImGui
+    ImGui::SourcesMiscCpp
+    imnodes
+    robin_hood
+)
+
+target_compile_features(NodeEditor PRIVATE cxx_std_20)
+
+# Windows HiDPI support
+if(CORRADE_TARGET_WINDOWS)
+    target_sources(NodeEditor PRIVATE resources/WindowsHiDPI.manifest)
+endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(CMAKE_EXECUTABLE_SUFFIX ".html")
+    target_compile_options(NodeEditor PRIVATE -pthread -msimd128)
+    target_link_options(NodeEditor PRIVATE 
+        "-sPTHREAD_POOL_SIZE=Math.max(2,navigator.hardwareConcurrency)+3-navigator.hardwareConcurrency/4"
+        -pthread -sALLOW_MEMORY_GROWTH=1 -lidbfs.js -s FORCE_FILESYSTEM
+        --shell-file "resources/emscripten_shell.html"
+        --pre-js "resources/emscripten_pre.js"
+        -Wl,-u,_emscripten_run_callback_on_thread
+    )
+    add_custom_command(TARGET NodeEditor POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "resources/emscripten_enable_shared_array_buffer.js"
+        $<TARGET_FILE_DIR:NodeEditor>
+    )
+
+elseif (UNIX)
+    target_link_options(NodeEditor PRIVATE -pthread)
+
+    if(APPLE)
+        set_property(TARGET NodeEditor PROPERTY
+            INSTALL_RPATH "@loader_path/../lib")
+    else()
+        set_property(TARGET NodeEditor PROPERTY
+            INSTALL_RPATH "\$ORIGIN/../lib")
+    endif()
+endif()
+
+if (MSVC)
+    target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
+endif()
+
+set(install_targets ${install_targets} . PARENT_SCOPE)
+
+# Make the executable a default target to build & run in Visual Studio
+set_property(DIRECTORY ../.. PROPERTY VS_STARTUP_PROJECT .)
diff --git a/tools/FastNoiseNodeEditor.cpp b/tools/NodeEditor/FastNoiseNodeEditor.cpp
similarity index 99%
rename from tools/FastNoiseNodeEditor.cpp
rename to tools/NodeEditor/FastNoiseNodeEditor.cpp
index 6a1a13ec..a31db346 100644
--- a/tools/FastNoiseNodeEditor.cpp
+++ b/tools/NodeEditor/FastNoiseNodeEditor.cpp
@@ -13,14 +13,14 @@
 #include <Magnum/ImGuiIntegration/Widgets.h>
 #include <Corrade/Containers/ArrayViewStl.h>
 
-#include "ImGuiExtra.h"
+#include "util/ImGuiExtra.h"
+#include "util/DemoNodeTrees.inl"
 #include "FastNoiseNodeEditor.h"
-#include "DemoNodeTrees.inl"
 #include "NodeEditorApp.h"
 
 using namespace Magnum;
 
-#include "SharedMemoryIpc.inl"
+#include "util/SharedMemoryIpc.inl"
 
 static constexpr const char* kNodeGraphSettingsFile = FILESYSTEM_ROOT "NodeGraph.ini";
 
diff --git a/tools/FastNoiseNodeEditor.h b/tools/NodeEditor/FastNoiseNodeEditor.h
similarity index 100%
rename from tools/FastNoiseNodeEditor.h
rename to tools/NodeEditor/FastNoiseNodeEditor.h
diff --git a/tools/MeshNoisePreview.cpp b/tools/NodeEditor/MeshNoisePreview.cpp
similarity index 99%
rename from tools/MeshNoisePreview.cpp
rename to tools/NodeEditor/MeshNoisePreview.cpp
index ce44f4c8..246acb0d 100644
--- a/tools/MeshNoisePreview.cpp
+++ b/tools/NodeEditor/MeshNoisePreview.cpp
@@ -11,9 +11,9 @@
 #include <Magnum/GL/Context.h>
 #include <Magnum/GL/Extensions.h>
 
-#include "ImGuiExtra.h"
+#include "util/ImGuiExtra.h"
+#include "util/DmcTable.inl"
 #include "MeshNoisePreview.h"
-#include "DmcTable.inl"
 
 
 using namespace Magnum;
diff --git a/tools/MeshNoisePreview.h b/tools/NodeEditor/MeshNoisePreview.h
similarity index 100%
rename from tools/MeshNoisePreview.h
rename to tools/NodeEditor/MeshNoisePreview.h
diff --git a/tools/MultiThreadQueues.h b/tools/NodeEditor/MultiThreadQueues.h
similarity index 100%
rename from tools/MultiThreadQueues.h
rename to tools/NodeEditor/MultiThreadQueues.h
diff --git a/tools/NodeEditorApp.cpp b/tools/NodeEditor/NodeEditorApp.cpp
similarity index 99%
rename from tools/NodeEditorApp.cpp
rename to tools/NodeEditor/NodeEditorApp.cpp
index 380904f1..5f377336 100644
--- a/tools/NodeEditorApp.cpp
+++ b/tools/NodeEditor/NodeEditorApp.cpp
@@ -13,7 +13,7 @@
 #endif
 
 #include "NodeEditorApp.h"
-#include "ImGuiExtra.h"
+#include "util/ImGuiExtra.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
 using namespace Magnum;
diff --git a/tools/NodeEditorApp.h b/tools/NodeEditor/NodeEditorApp.h
similarity index 100%
rename from tools/NodeEditorApp.h
rename to tools/NodeEditor/NodeEditorApp.h
diff --git a/tools/NoiseTexture.cpp b/tools/NodeEditor/NoiseTexture.cpp
similarity index 99%
rename from tools/NoiseTexture.cpp
rename to tools/NodeEditor/NoiseTexture.cpp
index 4ec4e64a..9061407f 100644
--- a/tools/NoiseTexture.cpp
+++ b/tools/NodeEditor/NoiseTexture.cpp
@@ -20,8 +20,8 @@
 
 #include <FastNoise/Metadata.h>
 
+#include "util/ImGuiExtra.h"
 #include "NoiseTexture.h"
-#include "ImGuiExtra.h"
 
 
 using namespace Magnum;
diff --git a/tools/NoiseTexture.h b/tools/NodeEditor/NoiseTexture.h
similarity index 100%
rename from tools/NoiseTexture.h
rename to tools/NodeEditor/NoiseTexture.h
diff --git a/tools/VertexLight.frag b/tools/NodeEditor/resources/VertexLight.frag
similarity index 100%
rename from tools/VertexLight.frag
rename to tools/NodeEditor/resources/VertexLight.frag
diff --git a/tools/VertexLight.vert b/tools/NodeEditor/resources/VertexLight.vert
similarity index 100%
rename from tools/VertexLight.vert
rename to tools/NodeEditor/resources/VertexLight.vert
diff --git a/tools/WindowsHiDPI.manifest b/tools/NodeEditor/resources/WindowsHiDPI.manifest
similarity index 100%
rename from tools/WindowsHiDPI.manifest
rename to tools/NodeEditor/resources/WindowsHiDPI.manifest
diff --git a/tools/emscripten_enable_shared_array_buffer.js b/tools/NodeEditor/resources/emscripten_enable_shared_array_buffer.js
similarity index 100%
rename from tools/emscripten_enable_shared_array_buffer.js
rename to tools/NodeEditor/resources/emscripten_enable_shared_array_buffer.js
diff --git a/tools/emscripten_pre.js b/tools/NodeEditor/resources/emscripten_pre.js
similarity index 100%
rename from tools/emscripten_pre.js
rename to tools/NodeEditor/resources/emscripten_pre.js
diff --git a/tools/emscripten_shell.html b/tools/NodeEditor/resources/emscripten_shell.html
similarity index 100%
rename from tools/emscripten_shell.html
rename to tools/NodeEditor/resources/emscripten_shell.html
diff --git a/tools/resources.conf b/tools/NodeEditor/resources/resources.conf
similarity index 100%
rename from tools/resources.conf
rename to tools/NodeEditor/resources/resources.conf
diff --git a/tools/DemoNodeTrees.inl b/tools/NodeEditor/util/DemoNodeTrees.inl
similarity index 100%
rename from tools/DemoNodeTrees.inl
rename to tools/NodeEditor/util/DemoNodeTrees.inl
diff --git a/tools/DmcTable.inl b/tools/NodeEditor/util/DmcTable.inl
similarity index 100%
rename from tools/DmcTable.inl
rename to tools/NodeEditor/util/DmcTable.inl
diff --git a/tools/ImGuiExtra.h b/tools/NodeEditor/util/ImGuiExtra.h
similarity index 100%
rename from tools/ImGuiExtra.h
rename to tools/NodeEditor/util/ImGuiExtra.h
diff --git a/tools/SharedMemoryIpc.inl b/tools/NodeEditor/util/SharedMemoryIpc.inl
similarity index 100%
rename from tools/SharedMemoryIpc.inl
rename to tools/NodeEditor/util/SharedMemoryIpc.inl

From 72aaf75762b5b266040769daebfd8bd2548300d1 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 3 Jun 2024 23:33:02 +0100
Subject: [PATCH 101/139] Fix demo node include

---
 tests/FastNoiseBenchmark.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/FastNoiseBenchmark.cpp b/tests/FastNoiseBenchmark.cpp
index 6a96b54a..cefce3ea 100644
--- a/tests/FastNoiseBenchmark.cpp
+++ b/tests/FastNoiseBenchmark.cpp
@@ -6,7 +6,7 @@
 #include "FastNoise/Metadata.h"
 #include "FastSIMD/FastSIMD_FastNoise_config.h"
 
-#include "../tools/DemoNodeTrees.inl"
+#include "../tools/NodeEditor/util/DemoNodeTrees.inl"
 
 static const size_t gPositionCount = 8192;
 static float gPositionFloats[gPositionCount]; 

From 71c4c568dd81a53b62eb008ad86879a30400532f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 19 Jul 2024 22:28:15 +0100
Subject: [PATCH 102/139] Github wiki generator tool

---
 CMakeLists.txt                    |   5 +
 util/CMakeLists.txt               |   1 +
 util/WikiGenerator/CMakeLists.txt |   8 ++
 util/WikiGenerator/main.cpp       | 223 ++++++++++++++++++++++++++++++
 4 files changed, 237 insertions(+)
 create mode 100644 util/CMakeLists.txt
 create mode 100644 util/WikiGenerator/CMakeLists.txt
 create mode 100644 util/WikiGenerator/main.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3b68704..51bffeba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ endif()
 
 option(FASTNOISE2_TOOLS "Build \"Node Editor\" executable" ${FASTNOISE2_STANDALONE_PROJECT})
 option(FASTNOISE2_TESTS "Build tests" OFF)
+option(FASTNOISE2_UTILITY "Build utility tools" OFF)
 
 option(FASTNOISE2_STRICT_FP "Enable strict floating point calculations to ensure output from different SIMD feature sets match EXACTLY" OFF)
 
@@ -49,6 +50,10 @@ if(FASTNOISE2_TESTS)
     add_subdirectory(tests)
 endif()
 
+if(FASTNOISE2_UTILITY)
+    add_subdirectory(util)
+endif()
+
 
 #Install -----------------------------------------------------------
 
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
new file mode 100644
index 00000000..87ceb847
--- /dev/null
+++ b/util/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(WikiGenerator)
\ No newline at end of file
diff --git a/util/WikiGenerator/CMakeLists.txt b/util/WikiGenerator/CMakeLists.txt
new file mode 100644
index 00000000..242b4225
--- /dev/null
+++ b/util/WikiGenerator/CMakeLists.txt
@@ -0,0 +1,8 @@
+
+add_executable(WikiGenerator
+    main.cpp
+)
+
+target_link_libraries(WikiGenerator PRIVATE
+    FastNoise
+)
\ No newline at end of file
diff --git a/util/WikiGenerator/main.cpp b/util/WikiGenerator/main.cpp
new file mode 100644
index 00000000..16cada6f
--- /dev/null
+++ b/util/WikiGenerator/main.cpp
@@ -0,0 +1,223 @@
+#include <filesystem>
+#include <FastNoise/Metadata.h>
+#include <FastNoise/FastNoise.h>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <unordered_map>
+
+static constexpr int imageSizeX = 256;
+static constexpr int imageSizeY = 256;
+
+FastNoise::SmartNode<> BuildGenerator( const FastNoise::Metadata* metadata )
+{
+    FastNoise::SmartNode<> generator = metadata->CreateNode();
+
+    auto source = FastNoise::New<FastNoise::Constant>();
+    source->SetValue( 0.5f );
+
+    for( const auto& memberNode : metadata->memberNodeLookups )
+    {
+        if( !memberNode.setFunc( generator.get(), source ) )
+        {
+            // If constant source is not valid try all other node types in order
+            for( const FastNoise::Metadata* tryMetadata : FastNoise::Metadata::GetAll() )
+            {
+                // Other node types may also have sources
+                FastNoise::SmartNode<> trySource = BuildGenerator( tryMetadata );
+
+                if( trySource && memberNode.setFunc( generator.get(), trySource ) )
+                {
+                    for( const auto& tryMemberNode : tryMetadata->memberNodeLookups )
+                    {
+                        if( !tryMemberNode.setFunc( trySource.get(), source ) )
+                        {
+                            return {};
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+    }
+    return generator;
+}
+
+bool CreateImage( const FastNoise::Metadata* metadata, const std::string& outDir, const std::string& nodeName )
+{
+    auto node = FastNoise::New<FastNoise::DomainScale>();
+    node->SetSource( BuildGenerator( metadata ) );
+    node->SetScaling( 3.f );
+
+    std::vector<float> noiseData( imageSizeX * imageSizeY );
+    auto noiseMinMax = node->GenUniformGrid2D( noiseData.data(), imageSizeX / -2, imageSizeY / -2, imageSizeX, imageSizeY, 1337 );
+
+    if( noiseMinMax.min == noiseMinMax.max || !std::isfinite( noiseMinMax.min  ) || !std::isfinite( noiseMinMax.max ) )
+    {
+        return false;
+    }
+
+    std::filesystem::path tempFile = std::filesystem::temp_directory_path() / (nodeName + ".bmp");
+
+    std::ofstream file( tempFile, std::ofstream::binary | std::ofstream::out | std::ofstream::trunc );
+
+    if( file.is_open() )
+    {
+        float scale = 255 / (noiseMinMax.max - noiseMinMax.min);
+
+        struct BmpHeader
+        {
+            // File header (14)
+            // char b = 'B';
+            // char m = 'M';
+            uint32_t fileSize;
+            uint32_t reserved = 0;
+            uint32_t dataOffset = 14u + 12u + (256u * 3u);
+            // Bmp Info Header (12)
+            uint32_t headerSize = 12u;
+            uint16_t sizeX;
+            uint16_t sizeY;
+            uint16_t colorPlanes = 1u;
+            uint16_t bitDepth = 8u;
+        };
+
+        int paddedSizeX = imageSizeX;
+        int padding = paddedSizeX % 4;
+        if( padding )
+        {
+            padding = 4 - padding;
+            paddedSizeX += padding;
+        }
+
+        BmpHeader header;
+        header.fileSize = header.dataOffset + (uint32_t)(paddedSizeX * imageSizeY);
+        header.sizeX = (uint16_t)imageSizeX;
+        header.sizeY = (uint16_t)imageSizeY;
+
+        file << 'B' << 'M';
+        file.write( reinterpret_cast<char*>( &header ), sizeof( BmpHeader ) );
+
+        // Colour map
+        for (int i = 0; i < 256; i++)
+        {
+            char colourB = i;
+            file.write( &colourB, 1 );
+            file.write( &colourB, 1 );
+            file.write( &colourB, 1 );
+        }
+
+        int xIdx = padding ? imageSizeX : 0;
+
+        for( float noise : noiseData )
+        {
+            unsigned char pix = (unsigned char)std::clamp( (noise - noiseMinMax.min) * scale, 0.0f, 255.0f );
+
+            file.write( reinterpret_cast<char*>( &pix ), 1 );
+
+            if( --xIdx == 0 )
+            {
+                xIdx = imageSizeX;
+
+                int zero( 0 );
+                file.write( reinterpret_cast<char*>( &zero ), padding );
+            }
+        }
+
+        file.close();
+
+        std::string convertCmd = "magick convert \"";
+        convertCmd += tempFile.string();
+        convertCmd += "\" \"" + outDir + "/images/" + nodeName + ".png";
+
+        std::system( convertCmd.c_str() );
+        return true;
+    }
+    return false;
+}
+
+void DoNode( std::stringstream& output, const FastNoise::Metadata* metadata, const std::string& outDir )
+{
+    std::string nodeName = FastNoise::Metadata::FormatMetadataNodeName( metadata, false );
+
+    output << "## " << nodeName << '\n';
+    output << metadata->description << "\n\n";
+
+    if( CreateImage( metadata, outDir, nodeName ) )
+    {
+        output << "[[images/" << nodeName << ".png]]\n";
+    }
+
+    for( auto& node_lookup : metadata->memberNodeLookups )
+    {
+        output << "### " << node_lookup.name << " - Node Lookup\n" << node_lookup.description << '\n';
+    }
+
+    for( auto& hybrid_lookup : metadata->memberHybrids )
+    {
+        output << "### " << hybrid_lookup.name << " - Hybrid Lookup '= " << hybrid_lookup.valueDefault << "f`\n" << hybrid_lookup.description << '\n';
+    }
+
+    for( auto& variable : metadata->memberVariables )
+    {
+        switch( variable.type )
+        {
+        case FastNoise::Metadata::MemberVariable::EFloat:
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.f << "f`\n" << variable.description << '\n';
+            break;
+        case FastNoise::Metadata::MemberVariable::EInt:
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.i << "`\n" << variable.description << '\n';
+            break;
+        case FastNoise::Metadata::MemberVariable::EEnum:
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.enumNames[variable.valueDefault.i] << "`\n" << variable.description << '\n';
+            for( size_t i = 0; i < variable.enumNames.size(); i++ )
+            {
+                output << "* " << variable.enumNames[i] << (variable.valueDefault.i == i ? " (Default)\n" : "\n");
+            }
+            break;
+        }
+    }
+
+}
+
+int main( int argc, char* argv[] )
+{
+    std::string outputDir = ".";
+    if( argc > 1 )
+    {
+        outputDir = argv[1];
+        std::filesystem::create_directories( outputDir );
+    }
+
+    std::filesystem::create_directories( outputDir + "/images" );
+
+    std::unordered_map<std::string, std::stringstream> outputStreams;
+
+    for( const FastNoise::Metadata* metadata : FastNoise::Metadata::GetAll() )
+    {
+        const char* groupName = metadata->groups[0];
+
+        if( outputStreams.try_emplace( groupName ).second )
+        {
+            outputStreams[groupName] << "# " << groupName << '\n';
+            outputStreams[groupName].setf(std::ios::fixed);
+            outputStreams[groupName].precision(1);
+        }
+
+        DoNode( outputStreams[groupName], metadata, outputDir );
+
+    }
+
+    for( auto& stream : outputStreams )
+    {
+        std::string fileName = stream.first;
+        std::replace( fileName.begin(), fileName.end(), ' ', '-' );
+
+        std::ofstream outFile( outputDir + "/Nodes#-" + fileName + ".md" );
+
+        outFile << stream.second.str();
+        outFile.close();
+
+        std::cout << "Written " << fileName << ".md\n";
+    }
+}
\ No newline at end of file

From 4918ee09a6afd7a85c656052d88c74b51609619e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 27 Aug 2024 23:41:24 +0100
Subject: [PATCH 103/139] Update to latest FastSIMD

---
 src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e13b7520..5bce5301 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,7 @@
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 504e54fe5ec580e933ede0fb70a11bbc25dff714
+    GIT_TAG 868e77272ac0681a6b2936c1ef9087bb3ec8c153
 )
 
 set(install_targets ${install_targets}

From d24cd37790673294e717bb63df033fb676721c48 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 27 Aug 2024 23:48:44 +0100
Subject: [PATCH 104/139] Enable MacOS universal build

---
 .github/workflows/main.yml | 56 +-------------------------------------
 1 file changed, 1 insertion(+), 55 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 461d6462..28d02222 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -40,7 +40,7 @@ jobs:
             cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
           - os: macos-latest
             name: MacOSARM64-Clang
-            cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+            cmake_options: -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64"
 
     steps:
     - name: 'Install OpenGL & xorg'
@@ -89,57 +89,3 @@ jobs:
         file: ${{ github.workspace }}/${{ matrix.name }}.zip
         asset_name: FastNoise2-${{ github.event.release.tag_name }}-${{ matrix.name }}.zip
         tag: ${{ github.ref }}
-
-  macos-universal:
-    if: ${{ always() }}
-    needs: [ ci-matrix ]
-    name: macos Universal Build
-    runs-on: macos-latest
-    outputs:
-      matrix: ${{ steps.matrix.outputs.matrix }}
-    steps:
-    - name: 'Download artifact'
-      uses: actions/download-artifact@v3
-      with:
-        name: MacOSaarch64-Clang
-        path: MacOSaarch64-Clang
-    - name: 'Download artifact'
-      uses: actions/download-artifact@v3
-      with:
-        name: MacOSx86_64-Clang
-        path: MacOSx86_64-Clang
-    - name: 'Create Universal Binary'
-      run: |
-        mkdir -p universal/FastNoise2/lib universal/FastNoise2/bin
-        lipo -create \
-          -output universal/FastNoise2/lib/libFastNoise.dylib \
-            MacOSaarch64-Clang/FastNoise2/lib/libFastNoise.dylib \
-            MacOSx86_64-Clang/FastNoise2/lib/libFastNoise.dylib
-        lipo -create \
-          -output universal/FastNoise2/bin/NoiseTool \
-            MacOSaarch64-Clang/FastNoise2/bin/NoiseTool \
-            MacOSx86_64-Clang/FastNoise2/bin/NoiseTool
-        chmod +x universal/FastNoise2/bin/NoiseTool
-
-    - name: 'Upload artifact'
-      uses: actions/upload-artifact@v3
-      with:
-        name: MacOSUniversal-Clang
-        path: ${{ github.workspace }}/universal/
-
-    - name: 'Zip artifacts'
-      if: github.event_name == 'release'
-      uses: papeloto/action-zip@v1
-      with:
-        files: universal/
-        recursive: true
-        dest: MacOSUniversal-Clang.zip
-
-    - name: 'Upload release artifacts'
-      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@v2
-      with:
-        repo_token: ${{ secrets.GITHUB_TOKEN }}
-        file: ${{ github.workspace }}/MacOSUniversal-Clang.zip
-        asset_name: FastNoise2-${{ github.event.release.tag_name }}-MacOSUniversal-Clang.zip
-        tag: ${{ github.ref }}

From fbf6cae64fd1cb999d63b4198466e165b96dbec3 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 28 Aug 2024 00:20:47 +0100
Subject: [PATCH 105/139] Update CPM

---
 cmake/CPM.cmake | 130 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 120 insertions(+), 10 deletions(-)

diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake
index 1b4cfcca..8269a8bf 100644
--- a/cmake/CPM.cmake
+++ b/cmake/CPM.cmake
@@ -42,7 +42,7 @@ if(NOT COMMAND cpm_message)
   endfunction()
 endif()
 
-set(CURRENT_CPM_VERSION 0.39.0)
+set(CURRENT_CPM_VERSION 0.40.2)
 
 get_filename_component(CPM_CURRENT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
 if(CPM_DIRECTORY)
@@ -391,8 +391,8 @@ function(cpm_parse_add_package_single_arg arg outArgs)
     # We don't try to parse the version if it's not provided explicitly. cpm_get_version_from_url
     # should do this at a later point
   else()
-    # We should never get here. This is an assertion and hitting it means there's a bug in the code
-    # above. A packageType was set, but not handled by this if-else.
+    # We should never get here. This is an assertion and hitting it means there's a problem with the
+    # code above. A packageType was set, but not handled by this if-else.
     message(FATAL_ERROR "${CPM_INDENT} Unsupported package type '${packageType}' of '${arg}'")
   endif()
 
@@ -464,6 +464,72 @@ function(cpm_check_git_working_dir_is_clean repoPath gitTag isClean)
 
 endfunction()
 
+# Add PATCH_COMMAND to CPM_ARGS_UNPARSED_ARGUMENTS. This method consumes a list of files in ARGN
+# then generates a `PATCH_COMMAND` appropriate for `ExternalProject_Add()`. This command is appended
+# to the parent scope's `CPM_ARGS_UNPARSED_ARGUMENTS`.
+function(cpm_add_patches)
+  # Return if no patch files are supplied.
+  if(NOT ARGN)
+    return()
+  endif()
+
+  # Find the patch program.
+  find_program(PATCH_EXECUTABLE patch)
+  if(WIN32 AND NOT PATCH_EXECUTABLE)
+    # The Windows git executable is distributed with patch.exe. Find the path to the executable, if
+    # it exists, then search `../usr/bin` and `../../usr/bin` for patch.exe.
+    find_package(Git QUIET)
+    if(GIT_EXECUTABLE)
+      get_filename_component(extra_search_path ${GIT_EXECUTABLE} DIRECTORY)
+      get_filename_component(extra_search_path_1up ${extra_search_path} DIRECTORY)
+      get_filename_component(extra_search_path_2up ${extra_search_path_1up} DIRECTORY)
+      find_program(
+        PATCH_EXECUTABLE patch HINTS "${extra_search_path_1up}/usr/bin"
+                                     "${extra_search_path_2up}/usr/bin"
+      )
+    endif()
+  endif()
+  if(NOT PATCH_EXECUTABLE)
+    message(FATAL_ERROR "Couldn't find `patch` executable to use with PATCHES keyword.")
+  endif()
+
+  # Create a temporary
+  set(temp_list ${CPM_ARGS_UNPARSED_ARGUMENTS})
+
+  # Ensure each file exists (or error out) and add it to the list.
+  set(first_item True)
+  foreach(PATCH_FILE ${ARGN})
+    # Make sure the patch file exists, if we can't find it, try again in the current directory.
+    if(NOT EXISTS "${PATCH_FILE}")
+      if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
+        message(FATAL_ERROR "Couldn't find patch file: '${PATCH_FILE}'")
+      endif()
+      set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
+    endif()
+
+    # Convert to absolute path for use with patch file command.
+    get_filename_component(PATCH_FILE "${PATCH_FILE}" ABSOLUTE)
+
+    # The first patch entry must be preceded by "PATCH_COMMAND" while the following items are
+    # preceded by "&&".
+    if(first_item)
+      set(first_item False)
+      list(APPEND temp_list "PATCH_COMMAND")
+    else()
+      list(APPEND temp_list "&&")
+    endif()
+    # Add the patch command to the list
+    list(APPEND temp_list "${PATCH_EXECUTABLE}" "-p1" "<" "${PATCH_FILE}")
+  endforeach()
+
+  # Move temp out into parent scope.
+  set(CPM_ARGS_UNPARSED_ARGUMENTS
+      ${temp_list}
+      PARENT_SCOPE
+  )
+
+endfunction()
+
 # method to overwrite internal FetchContent properties, to allow using CPM.cmake to overload
 # FetchContent calls. As these are internal cmake properties, this method should be used carefully
 # and may need modification in future CMake versions. Source:
@@ -537,7 +603,7 @@ function(CPMAddPackage)
       CUSTOM_CACHE_KEY
   )
 
-  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND)
+  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES)
 
   cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
 
@@ -628,6 +694,7 @@ function(CPMAddPackage)
       SOURCE_DIR "${PACKAGE_SOURCE}"
       EXCLUDE_FROM_ALL "${CPM_ARGS_EXCLUDE_FROM_ALL}"
       SYSTEM "${CPM_ARGS_SYSTEM}"
+      PATCHES "${CPM_ARGS_PATCHES}"
       OPTIONS "${CPM_ARGS_OPTIONS}"
       SOURCE_SUBDIR "${CPM_ARGS_SOURCE_SUBDIR}"
       DOWNLOAD_ONLY "${DOWNLOAD_ONLY}"
@@ -683,6 +750,8 @@ function(CPMAddPackage)
     set(CPM_FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/_deps)
   endif()
 
+  cpm_add_patches(${CPM_ARGS_PATCHES})
+
   if(DEFINED CPM_ARGS_DOWNLOAD_COMMAND)
     list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND})
   elseif(DEFINED CPM_ARGS_SOURCE_DIR)
@@ -796,14 +865,38 @@ function(CPMAddPackage)
   )
 
   if(NOT CPM_SKIP_FETCH)
+    # CMake 3.28 added EXCLUDE, SYSTEM (3.25), and SOURCE_SUBDIR (3.18) to FetchContent_Declare.
+    # Calling FetchContent_MakeAvailable will then internally forward these options to
+    # add_subdirectory. Up until these changes, we had to call FetchContent_Populate and
+    # add_subdirectory separately, which is no longer necessary and has been deprecated as of 3.30.
+    set(fetchContentDeclareExtraArgs "")
+    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.28.0")
+      if(${CPM_ARGS_EXCLUDE_FROM_ALL})
+        list(APPEND fetchContentDeclareExtraArgs EXCLUDE_FROM_ALL)
+      endif()
+      if(${CPM_ARGS_SYSTEM})
+        list(APPEND fetchContentDeclareExtraArgs SYSTEM)
+      endif()
+      if(DEFINED CPM_ARGS_SOURCE_SUBDIR)
+        list(APPEND fetchContentDeclareExtraArgs SOURCE_SUBDIR ${CPM_ARGS_SOURCE_SUBDIR})
+      endif()
+      # For CMake version <3.28 OPTIONS are parsed in cpm_add_subdirectory
+      if(CPM_ARGS_OPTIONS AND NOT DOWNLOAD_ONLY)
+        foreach(OPTION ${CPM_ARGS_OPTIONS})
+          cpm_parse_option("${OPTION}")
+          set(${OPTION_KEY} "${OPTION_VALUE}")
+        endforeach()
+      endif()
+    endif()
     cpm_declare_fetch(
-      "${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}" "${PACKAGE_INFO}" "${CPM_ARGS_UNPARSED_ARGUMENTS}"
+      "${CPM_ARGS_NAME}" ${fetchContentDeclareExtraArgs} "${CPM_ARGS_UNPARSED_ARGUMENTS}"
     )
-    cpm_fetch_package("${CPM_ARGS_NAME}" populated)
+
+    cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS})
     if(CPM_SOURCE_CACHE AND download_directory)
       file(LOCK ${download_directory}/../cmake.lock RELEASE)
     endif()
-    if(${populated})
+    if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.28.0")
       cpm_add_subdirectory(
         "${CPM_ARGS_NAME}"
         "${DOWNLOAD_ONLY}"
@@ -914,7 +1007,7 @@ function(CPMGetPackageVersion PACKAGE OUTPUT)
 endfunction()
 
 # declares a package in FetchContent_Declare
-function(cpm_declare_fetch PACKAGE VERSION INFO)
+function(cpm_declare_fetch PACKAGE)
   if(${CPM_DRY_RUN})
     cpm_message(STATUS "${CPM_INDENT} Package not declared (dry run)")
     return()
@@ -990,7 +1083,7 @@ endfunction()
 
 # downloads a previously declared package via FetchContent and exports the variables
 # `${PACKAGE}_SOURCE_DIR` and `${PACKAGE}_BINARY_DIR` to the parent scope
-function(cpm_fetch_package PACKAGE populated)
+function(cpm_fetch_package PACKAGE DOWNLOAD_ONLY populated)
   set(${populated}
       FALSE
       PARENT_SCOPE
@@ -1005,7 +1098,24 @@ function(cpm_fetch_package PACKAGE populated)
   string(TOLOWER "${PACKAGE}" lower_case_name)
 
   if(NOT ${lower_case_name}_POPULATED)
-    FetchContent_Populate(${PACKAGE})
+    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.28.0")
+      if(DOWNLOAD_ONLY)
+        # MakeAvailable will call add_subdirectory internally which is not what we want when
+        # DOWNLOAD_ONLY is set. Populate will only download the dependency without adding it to the
+        # build
+        FetchContent_Populate(
+          ${PACKAGE}
+          SOURCE_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-src"
+          BINARY_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
+          SUBBUILD_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild"
+          ${ARGN}
+        )
+      else()
+        FetchContent_MakeAvailable(${PACKAGE})
+      endif()
+    else()
+      FetchContent_Populate(${PACKAGE})
+    endif()
     set(${populated}
         TRUE
         PARENT_SCOPE

From 8cb93ff1e7fa94f03da6cf6696b717051d7d09af Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 28 Aug 2024 00:21:04 +0100
Subject: [PATCH 106/139] Don't CI newfastsimd twice

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 28d02222..f4db4b15 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -5,7 +5,7 @@ name: CI
 on:
   workflow_dispatch:
   push:
-    branches: [master,NewFastSIMD]
+    branches: [master]
   pull_request:
     branches: [master,NewFastSIMD]
   release:

From 9096fa6248e33067b5eb9618be0c9a0a47122225 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 28 Aug 2024 00:31:59 +0100
Subject: [PATCH 107/139] Fix Cpp11 include

---
 include/FastNoise/Generators/Generator.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 19a93318..66daa4b2 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -175,7 +175,10 @@ namespace FastNoise
         T varArray[(int)Dim::Count];
 
         template<typename U = T>
-        constexpr PerDimensionVariable( U value = 0 )
+#if __cplusplus >= 201402L
+        constexpr
+#endif
+        PerDimensionVariable( U value = 0 )
         {
             for( T& element : varArray )
             {

From 0ba4ba6f617a9f38342a08a55811ab867a693dc0 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 28 Aug 2024 00:52:00 +0100
Subject: [PATCH 108/139] Fix unix compile

---
 include/FastNoise/Generators/Generator.h | 5 +----
 tools/NodeEditor/CMakeLists.txt          | 4 +++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 66daa4b2..248452ee 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -89,10 +89,7 @@ namespace FastNoise
     {
         float constant;
 
-        constexpr HybridSourceT( float f = 0.0f )
-        {
-            constant = f;
-        }
+        constexpr HybridSourceT( float f = 0.0f ) : constant( f ) { }
     };
 
     class FASTNOISE_API Generator
diff --git a/tools/NodeEditor/CMakeLists.txt b/tools/NodeEditor/CMakeLists.txt
index ab0a861f..7cf3f926 100644
--- a/tools/NodeEditor/CMakeLists.txt
+++ b/tools/NodeEditor/CMakeLists.txt
@@ -171,7 +171,9 @@ if (MSVC)
     target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
 endif()
 
-set(install_targets ${install_targets} . PARENT_SCOPE)
+set(install_targets ${install_targets}
+    NodeEditor
+    PARENT_SCOPE)
 
 # Make the executable a default target to build & run in Visual Studio
 set_property(DIRECTORY ../.. PROPERTY VS_STARTUP_PROJECT .)

From 35b1239050166f3acf8f7f5ee217c22598ffb6ba Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 28 Aug 2024 00:58:45 +0100
Subject: [PATCH 109/139] Fix unix compile

---
 tools/CMakeLists.txt            | 4 +++-
 tools/NodeEditor/CMakeLists.txt | 6 +-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index ecbfe153..9b9c6b6e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1 +1,3 @@
-add_subdirectory(NodeEditor)
\ No newline at end of file
+add_subdirectory(NodeEditor)
+
+set(install_targets ${install_targets} PARENT_SCOPE)
\ No newline at end of file
diff --git a/tools/NodeEditor/CMakeLists.txt b/tools/NodeEditor/CMakeLists.txt
index 7cf3f926..9f681d2a 100644
--- a/tools/NodeEditor/CMakeLists.txt
+++ b/tools/NodeEditor/CMakeLists.txt
@@ -112,7 +112,6 @@ configure_file("resources/resources.conf" "${CMAKE_CURRENT_BINARY_DIR}/resources
 corrade_add_resource(NodeEditor_RESOURCES "${CMAKE_CURRENT_BINARY_DIR}/resources.conf")
 
 add_executable(NodeEditor
-    
     "NodeEditorApp.cpp"
     "FastNoiseNodeEditor.cpp"
     "MeshNoisePreview.cpp"
@@ -122,7 +121,6 @@ add_executable(NodeEditor
 
 target_link_libraries(NodeEditor PRIVATE
     FastNoise
-    #FastSIMD_FastNoise
     Magnum::Application
     Magnum::Shaders
     Magnum::SceneGraph
@@ -171,9 +169,7 @@ if (MSVC)
     target_compile_definitions(NodeEditor PRIVATE _CRT_SECURE_NO_WARNINGS=1)
 endif()
 
-set(install_targets ${install_targets}
-    NodeEditor
-    PARENT_SCOPE)
+set(install_targets ${install_targets} NodeEditor PARENT_SCOPE)
 
 # Make the executable a default target to build & run in Visual Studio
 set_property(DIRECTORY ../.. PROPERTY VS_STARTUP_PROJECT .)

From 75eae50c898eafa9b12c3061fd406deaaee49c0b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 31 Aug 2024 23:25:16 +0100
Subject: [PATCH 110/139] Position output make offset hybrid, fix distance to
 point when using node inputs

---
 include/FastNoise/Generators/BasicGenerators.h   | 10 +++++++---
 include/FastNoise/Generators/BasicGenerators.inl | 13 +++++++++----
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 994bb3c7..7004a92d 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -156,11 +156,15 @@ namespace FastNoise
         const Metadata& GetMetadata() const override;
 
         template<Dim D>
-        void SetAxis( float multiplier, float offset = 0.0f ) { mMultiplier[(int)D] = multiplier; mOffset[(int)D] = offset; }
+        void SetMultiplier( float multiplier ) { mMultiplier[(int)D] = multiplier; }
+        template<Dim D>
+        void SetOffset( float offset ) { mOffset[(int)D] = offset; }
+        template<Dim D>
+        void SetOffset( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mOffset[(int)D], gen ); }
 
     protected:
         PerDimensionVariable<float> mMultiplier = 0.0f;
-        PerDimensionVariable<float> mOffset = 0.0f;
+        PerDimensionVariable<HybridSource> mOffset = 0.0f;
 
         template<typename T>
         friend struct MetadataT;
@@ -176,7 +180,7 @@ namespace FastNoise
         {
             groups.push_back( "Basic Generators" );
             this->AddPerDimensionVariable( { "Multiplier", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); }, 0.f, 0.f, 0.001f );
-            this->AddPerDimensionVariable( { "Offset", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); }, 0.f, 0.f, 0.25f );
+            this->AddPerDimensionHybridSource( { "Offset", "Read node description" }, 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); }, 0.25f );
 
             description =
                 "Takes the input position and does the following per dimension\n"
diff --git a/include/FastNoise/Generators/BasicGenerators.inl b/include/FastNoise/Generators/BasicGenerators.inl
index 98cede2d..bca77a63 100644
--- a/include/FastNoise/Generators/BasicGenerators.inl
+++ b/include/FastNoise/Generators/BasicGenerators.inl
@@ -90,9 +90,10 @@ class FastSIMD::DispatchClass<FastNoise::PositionOutput, SIMD> final : public vi
     {
         size_t offsetIdx = 0;
         size_t multiplierIdx = 0;
+        float32v r( 0 );
 
-        (((pos += float32v( mOffset[offsetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
-        return (pos + ...);
+        ((r = FS::FMulAdd( pos + this->GetSourceValue( mOffset[offsetIdx++], seed, pos... ), float32v( mMultiplier[multiplierIdx++]), r )), ...);
+        return r;
     }
 };
 
@@ -104,9 +105,13 @@ class FastSIMD::DispatchClass<FastNoise::DistanceToPoint, SIMD> final : public v
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        size_t pointIdx = 0;
+        [this, seed] ( P&... out, std::remove_reference_t<P>... pos )
+        {
+            size_t pointIdx = 0;
+            ((out -= this->GetSourceValue( mPoint[pointIdx++], seed, pos... )), ...);
+
+        }( pos..., pos... );
 
-        ((pos -= this->GetSourceValue( mPoint[pointIdx++], seed, pos... ) ), ...);
         return CalcDistance( mDistanceFunction, mMinkowskiP, seed, pos... );
     }
 };

From 3651da877a21f2d49a588ee8cbe57ea88b78e258 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 31 Aug 2024 23:26:16 +0100
Subject: [PATCH 111/139] C API doesn't new a smartnode instead it manually
 increments the node ref count

---
 include/FastNoise/Generators/Generator.h |  6 ++++++
 src/FastNoise/FastNoise_C.cpp            | 23 ++++++++++++++++++-----
 src/FastNoise/Metadata.cpp               |  6 +++---
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 248452ee..3139cad5 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -92,6 +92,11 @@ namespace FastNoise
         constexpr HybridSourceT( float f = 0.0f ) : constant( f ) { }
     };
 
+    namespace Internal
+    {
+        void BumpNodeRefences( const Generator*, bool );
+    }
+
     class FASTNOISE_API Generator
     {
     public:
@@ -159,6 +164,7 @@ namespace FastNoise
 
         template<typename>
         friend class SmartNode;
+        friend void Internal::BumpNodeRefences( const Generator*, bool );
     };
 
     using GeneratorSource = GeneratorSourceT<Generator>;
diff --git a/src/FastNoise/FastNoise_C.cpp b/src/FastNoise/FastNoise_C.cpp
index 7c28f0ea..527ea793 100644
--- a/src/FastNoise/FastNoise_C.cpp
+++ b/src/FastNoise/FastNoise_C.cpp
@@ -2,14 +2,22 @@
 #include <FastNoise/FastNoise.h>
 #include <FastNoise/Metadata.h>
 
+namespace FastNoise::Internal
+{
+    void BumpNodeRefences( const Generator* ptr, bool up )
+    {
+        ptr->ReferencesFetchAdd( up ? 1 : -1 );
+    }
+}
+
 FastNoise::Generator* ToGen( void* p )
 {
-    return static_cast<FastNoise::SmartNode<>*>( p )->get();
+    return static_cast<FastNoise::Generator*>( p );
 }
 
 const FastNoise::Generator* ToGen( const void* p )
 {
-    return static_cast<const FastNoise::SmartNode<>*>( p )->get();
+    return static_cast<const FastNoise::Generator*>( p );
 }
 
 void StoreMinMax( float* floatArray2, FastNoise::OutputMinMax minMax )
@@ -25,14 +33,16 @@ void* fnNewFromEncodedNodeTree( const char* encodedString, unsigned simdLevel )
 {
     if( FastNoise::SmartNode<> node = FastNoise::NewFromEncodedNodeTree( encodedString, (FastSIMD::FeatureSet)simdLevel ) )
     {
-        return new FastNoise::SmartNode<>( std::move( node ) );
+        FastNoise::Internal::BumpNodeRefences( node.get(), true );
+
+        return node.get();
     }
     return nullptr;
 }
 
 void fnDeleteNodeRef( void* node )
 {
-    delete static_cast<FastNoise::SmartNode<>*>( node );
+    FastNoise::Internal::BumpNodeRefences( ToGen( node ), false );
 }
 
 unsigned fnGetSIMDLevel( const void* node )
@@ -113,7 +123,10 @@ void* fnNewFromMetadata( int id, unsigned simdLevel )
 {
     if( const FastNoise::Metadata* metadata = FastNoise::Metadata::GetFromId( (FastNoise::Metadata::node_id)id ) )
     {
-        return new FastNoise::SmartNode<>( metadata->CreateNode( (FastSIMD::FeatureSet)simdLevel ) );
+        FastNoise::SmartNode<> node = metadata->CreateNode( (FastSIMD::FeatureSet)simdLevel );
+        FastNoise::Internal::BumpNodeRefences( node.get(), true );
+
+        return node.get();
     }
     return nullptr;
 }
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 4119d04e..8d52d946 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -25,11 +25,11 @@ constexpr size_t gMetadataVectorSize<const Metadata*> = 45;
 template<>
 constexpr size_t gMetadataVectorSize<const char*> = 83;
 template<>
-constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 75;
+constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 71;
 template<>
 constexpr size_t gMetadataVectorSize<Metadata::MemberNodeLookup> = 30;
 template<>
-constexpr size_t gMetadataVectorSize<Metadata::MemberHybrid> = 50;
+constexpr size_t gMetadataVectorSize<Metadata::MemberHybrid> = 54;
 
 template<typename T>
 static std::vector<T>& GetVectorStorage()
@@ -105,7 +105,7 @@ static void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
     }
 }
 
-static void AddMemberLookupToDataStream( std::vector<uint8_t>& dataStream,  uint8_t type, uint8_t index )
+static void AddMemberLookupToDataStream( std::vector<uint8_t>& dataStream, uint8_t type, uint8_t index )
 {
     MemberLookup memberLookup;
     memberLookup.member.type = type;

From 73d22bb2dfb6a054b0196ae2e7ab945fb3eebdd7 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 19 Sep 2024 22:00:06 +0100
Subject: [PATCH 112/139] More node metadata descriptions

---
 include/FastNoise/Generators/Cellular.h       |  2 +-
 include/FastNoise/Generators/DomainWarp.h     | 95 ++++++++++---------
 .../FastNoise/Generators/DomainWarpFractal.h  | 19 +++-
 include/FastNoise/Generators/Fractal.h        |  2 +-
 4 files changed, 68 insertions(+), 50 deletions(-)

diff --git a/include/FastNoise/Generators/Cellular.h b/include/FastNoise/Generators/Cellular.h
index 153de1d7..fa1ad6a3 100644
--- a/include/FastNoise/Generators/Cellular.h
+++ b/include/FastNoise/Generators/Cellular.h
@@ -34,7 +34,7 @@ namespace FastNoise
                 DistanceFunction::EuclideanSquared, &Cellular<PARENT>::SetDistanceFunction, kDistanceFunction_Strings );
             this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 1.5f, &Cellular<PARENT>::SetMinkowskiP, &Cellular<PARENT>::SetMinkowskiP );
 
-            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
+            this->AddHybridSource( { "Jitter Modifier", "Above 1.0 will cause grid artifacts\n0.0 will output a uniform grid" }, 1.0f, &Cellular<PARENT>::SetJitterModifier, &Cellular<PARENT>::SetJitterModifier );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/DomainWarp.h b/include/FastNoise/Generators/DomainWarp.h
index 49080ec4..ea22f28e 100644
--- a/include/FastNoise/Generators/DomainWarp.h
+++ b/include/FastNoise/Generators/DomainWarp.h
@@ -1,43 +1,52 @@
-#pragma once
-#include "Generator.h"
-
-namespace FastNoise
-{
-    class DomainWarp : public virtual ScalableGenerator
-    {
-    public:
-        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
-        void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
-        void SetWarpAmplitude( float value ) { mWarpAmplitude = value; } 
-
-    protected:
-        GeneratorSource mSource;
-        HybridSource mWarpAmplitude = 50.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarp> : MetadataT<ScalableGenerator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Domain Warp" );
-            this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
-            this->AddHybridSource( "Warp Amplitude", 50.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude, 0.1f );
-        }
-    };
-#endif
-
-    class DomainWarpGradient : public virtual DomainWarp
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpGradient> : MetadataT<DomainWarp>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+namespace FastNoise
+{
+    class DomainWarp : public virtual ScalableGenerator
+    {
+    public:
+        void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
+        void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
+        void SetWarpAmplitude( float value ) { mWarpAmplitude = value; } 
+
+    protected:
+        GeneratorSource mSource;
+        HybridSource mWarpAmplitude = 50.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarp> : MetadataT<ScalableGenerator>
+    {
+        MetadataT()
+        {
+            groups.push_back( "Domain Warp" );
+            this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
+            this->AddHybridSource( { "Warp Amplitude", "Maximum (euclidean) distance the position can be moved from it's original location" }, 50.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude, 0.1f );
+        }
+    };
+#endif
+
+    class DomainWarpGradient : public virtual DomainWarp
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpGradient> : MetadataT<DomainWarp>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT ()
+        {
+            description =
+                "Warps the input position using a simple uniform grid gradient, similar to perlin noise gradients.\n"
+                "The warped position is used when generating the attached source node\n"
+                "This node does not change the output value of the source node";
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/DomainWarpFractal.h b/include/FastNoise/Generators/DomainWarpFractal.h
index daeba68d..59c35521 100644
--- a/include/FastNoise/Generators/DomainWarpFractal.h
+++ b/include/FastNoise/Generators/DomainWarpFractal.h
@@ -16,10 +16,16 @@ namespace FastNoise
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source", false )
+        MetadataT() : MetadataT<Fractal<DomainWarp>>( { "Domain Warp Source", "Uses the algorithm from this domain warp node for each octave of the fractal" }, false )
         {
             groups.push_back( "Domain Warp" );
             groups.push_back( "Fractal" );
+
+            description =
+                "The original input position is passed into the first domain warp octave\n"
+                "The warped output position from the previous octave is passed into\n"
+                "the next octave's input position and so on for each octave\n"
+                "The final position is used to generate the source node on the attached domain warp node";
         }
     };
 #endif
@@ -32,14 +38,17 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<Fractal<DomainWarp>>
+    struct MetadataT<DomainWarpFractalIndependant> : MetadataT<DomainWarpFractalProgressive> // Inherits from DomainWarpFractalProgressive just to avoid duplicate code
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
-        MetadataT() : MetadataT<Fractal<DomainWarp>>( "Domain Warp Source", false )
+        MetadataT()
         {
-            groups.push_back( "Domain Warp" );
-            groups.push_back( "Fractal" );
+            description =
+                "The original input position is passed into all domain warp octaves\n"
+                "The warped offset from all octaves is accumulated\n"
+                "and added to the original input position\n"
+                "This position is the used to generate the source node on the attached domain warp node";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Fractal.h b/include/FastNoise/Generators/Fractal.h
index b8e30d57..6d767491 100644
--- a/include/FastNoise/Generators/Fractal.h
+++ b/include/FastNoise/Generators/Fractal.h
@@ -42,7 +42,7 @@ namespace FastNoise
     template<typename T>
     struct MetadataT<Fractal<T>> : MetadataT<Generator>
     {
-        MetadataT( const char* sourceName = "Source", bool addGroup = true )
+        MetadataT( NameDesc sourceName = "Source", bool addGroup = true )
         {
             if( addGroup )
             {

From 38e3e6aab6a8481a1bd4e5d75c690c390dd83f38 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 3 Dec 2024 21:27:57 +0000
Subject: [PATCH 113/139] Reorganised node groups, added modulus node

---
 include/FastNoise/Generators/Blends.h      | 661 +++++++++++----------
 include/FastNoise/Generators/Blends.inl    |  15 +
 include/FastNoise/Generators/Generator.inl |   2 +-
 include/FastNoise/Generators/Modifiers.h   |  12 +-
 src/CMakeLists.txt                         |   6 +-
 src/FastNoise/FastSIMD_Build.inl           |   2 +
 src/FastNoise/Metadata.cpp                 |  12 +-
 7 files changed, 381 insertions(+), 329 deletions(-)

diff --git a/include/FastNoise/Generators/Blends.h b/include/FastNoise/Generators/Blends.h
index 189b6099..a2a026c0 100644
--- a/include/FastNoise/Generators/Blends.h
+++ b/include/FastNoise/Generators/Blends.h
@@ -1,313 +1,348 @@
-#pragma once
-#include "Generator.h"
-
-#include <climits>
-
-namespace FastNoise
-{
-    class OperatorSourceLHS : public virtual Generator
-    {
-    public:
-        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
-        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
-        void SetRHS( float value ) { mRHS = value; }
-
-    protected:
-        GeneratorSource mLHS;
-        HybridSource mRHS = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OperatorSourceLHS> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
-            this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
-        }
-    };
-#endif
-
-    class OperatorHybridLHS : public virtual Generator
-    {
-    public:
-        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
-        void SetLHS( float value ) { mLHS = value; }
-        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
-        void SetRHS( float value ) { mRHS = value; }
-
-    protected:
-        HybridSource mLHS = 0.0f;
-        HybridSource mRHS = 0.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OperatorHybridLHS> : MetadataT<Generator>
-    {
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
-            this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
-        }
-    };
-#endif
-
-    class Add : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Add> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class Subtract : public virtual OperatorHybridLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Subtract> : MetadataT<OperatorHybridLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class Multiply : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Multiply> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class Divide : public virtual OperatorHybridLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Divide> : MetadataT<OperatorHybridLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class Min : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Min> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class Max : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Max> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-
-    class PowFloat : public virtual Generator
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
-        void SetValue( float value ) { mValue = value; }
-        void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
-        void SetPow( float value ) { mPow = value; }
-
-    protected:
-        HybridSource mValue = 2.0f;
-        HybridSource mPow = 2.0f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<PowFloat> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
-            this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
-
-            description = "Equivalent to std::powf( value, pow )";
-        }
-    };
-#endif
-
-    class PowInt : public virtual Generator
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
-        void SetPow( int value ) { mPow = value; }
-
-    protected:
-        GeneratorSource mValue;
-        int mPow = 2;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<PowInt> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( "Value", &PowInt::SetValue );
-            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2 );
-
-            description = "Faster than PowFloat node but only for int powers";
-        }
-    };
-#endif
-
-    class MinSmooth : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
-        void SetSmoothness( float value ) { mSmoothness = value; }
-
-    protected:
-        HybridSource mSmoothness = 0.1f;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<MinSmooth> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
-
-            description = 
-                "Quadratic Smooth Minimum\n"
-                "Smoothes the transition between the 2 inputs\n"
-                "For explanation see:\n"
-                "https://iquilezles.org/articles/smin/";
-        }
-    };
-#endif
-
-    class MaxSmooth : public virtual OperatorSourceLHS
-    {
-    public:        const Metadata& GetMetadata() const override;
-
-        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
-        void SetSmoothness( float value ) { mSmoothness = value; }
-
-    protected:
-        HybridSource mSmoothness = 0.1f;  
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<MaxSmooth> : MetadataT<OperatorSourceLHS>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
-
-            description =
-                "Quadratic Smooth Maximum\n"
-                "Smoothes the transition between the 2 inputs\n"
-                "For explanation see:\n"
-                "https://iquilezles.org/articles/smin/";
-        }
-    };
-#endif
-
-    class Fade : public virtual Generator
-    {
-    public:
-        enum class Interpolation
-        {
-            Linear,
-            Hermite,
-            Quintic,
-        };
-        const Metadata& GetMetadata() const override;
-        void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
-        void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
-
-        void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
-        void SetFade( float value ) { mFade = value; }
-
-        void SetFadeMin( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMin, gen ); }
-        void SetFadeMin( float value ) { mFadeMin = value; }
-
-        void SetFadeMax( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMax, gen ); }
-        void SetFadeMax( float value ) { mFadeMax = value; }
-
-        void SetInterpolation( Interpolation interpolation ) { mInterpolation = interpolation; }
-
-    protected:
-        GeneratorSource mA;
-        GeneratorSource mB;
-        HybridSource mFade = 0;
-        HybridSource mFadeMin = -1.f;
-        HybridSource mFadeMax = 1.f;
-        Interpolation mInterpolation = Interpolation::Linear;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<Fade> : MetadataT<Generator>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Blends" );
-            this->AddGeneratorSource( { "A", "From" }, &Fade::SetA );
-            this->AddGeneratorSource( { "B", "To" }, &Fade::SetB );
-            this->AddHybridSource( "Fade", 0, &Fade::SetFade, &Fade::SetFade );
-            this->AddHybridSource( "Fade Min", -1.f, &Fade::SetFadeMin, &Fade::SetFadeMin );
-            this->AddHybridSource( "Fade Max", 1.f, &Fade::SetFadeMax, &Fade::SetFadeMax );
-            this->AddVariableEnum( { "Interpolation", "Easing function" }, Fade::Interpolation::Linear, &Fade::SetInterpolation, "Linear", "Hermite", "Quintic" );            
-
-            description =
-                "Output fades between inputs A and B\n"
-                "Fade Min = 100% A\n"
-                "Fade Max = 100% B";
-        }
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+
+#include <climits>
+
+namespace FastNoise
+{
+    class OperatorSourceLHS : public virtual Generator
+    {
+    public:
+        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
+        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
+        void SetRHS( float value ) { mRHS = value; }
+
+    protected:
+        GeneratorSource mLHS;
+        HybridSource mRHS = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<OperatorSourceLHS> : MetadataT<Generator>
+    {
+        MetadataT( const char* group = "Blends" )
+        {
+            groups.push_back( group );
+            this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
+            this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
+        }
+    };
+#endif
+
+    class OperatorHybridLHS : public virtual Generator
+    {
+    public:
+        void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
+        void SetLHS( float value ) { mLHS = value; }
+        void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
+        void SetRHS( float value ) { mRHS = value; }
+
+    protected:
+        HybridSource mLHS = 0.0f;
+        HybridSource mRHS = 0.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<OperatorHybridLHS> : MetadataT<Generator>
+    {
+        MetadataT( const char* group = "Blends" )
+        {
+            groups.push_back( group );
+            this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
+            this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
+        }
+    };
+#endif
+
+    class Add : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Add> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<OperatorSourceLHS>( "Operators" ) {}
+    };
+#endif
+
+    class Subtract : public virtual OperatorHybridLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Subtract> : MetadataT<OperatorHybridLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<OperatorHybridLHS>( "Operators" ) {}
+    };
+#endif
+
+    class Multiply : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Multiply> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<OperatorSourceLHS>( "Operators" ) {}
+    };
+#endif
+
+    class Divide : public virtual OperatorHybridLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Divide> : MetadataT<OperatorHybridLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<OperatorHybridLHS>( "Operators" ) {}
+    };
+#endif
+
+    class Modulus : public virtual OperatorHybridLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Modulus> : MetadataT<OperatorHybridLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT() : MetadataT<OperatorHybridLHS>( "Operators" ) {}
+    };
+#endif
+
+    class Min : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Min> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class Max : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Max> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+    };
+#endif
+
+    class PowFloat : public virtual Generator
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
+        void SetValue( float value ) { mValue = value; }
+        void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
+        void SetPow( float value ) { mPow = value; }
+
+    protected:
+        HybridSource mValue = 2.0f;
+        HybridSource mPow = 2.0f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<PowFloat> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
+            this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
+
+            description = "Equivalent to std::powf( value, pow )";
+        }
+    };
+#endif
+
+    class PowInt : public virtual Generator
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
+        void SetPow( int value ) { mPow = value; }
+
+    protected:
+        GeneratorSource mValue;
+        int mPow = 2;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<PowInt> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddGeneratorSource( "Value", &PowInt::SetValue );
+            this->AddVariable( "Pow", 2, &PowInt::SetPow, 2 );
+
+            description = "Faster than PowFloat node but only for int powers";
+        }
+    };
+#endif
+
+    class MinSmooth : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
+        void SetSmoothness( float value ) { mSmoothness = value; }
+
+    protected:
+        HybridSource mSmoothness = 0.1f;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<MinSmooth> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
+
+            description = 
+                "Quadratic Smooth Minimum\n"
+                "Smoothes the transition between the 2 inputs\n"
+                "For explanation see:\n"
+                "https://iquilezles.org/articles/smin/";
+        }
+    };
+#endif
+
+    class MaxSmooth : public virtual OperatorSourceLHS
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
+        void SetSmoothness( float value ) { mSmoothness = value; }
+
+    protected:
+        HybridSource mSmoothness = 0.1f;  
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<MaxSmooth> : MetadataT<OperatorSourceLHS>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
+
+            description =
+                "Quadratic Smooth Maximum\n"
+                "Smoothes the transition between the 2 inputs\n"
+                "For explanation see:\n"
+                "https://iquilezles.org/articles/smin/";
+        }
+    };
+#endif
+
+    class Fade : public virtual Generator
+    {
+    public:
+        enum class Interpolation
+        {
+            Linear,
+            Hermite,
+            Quintic,
+        };
+
+        const Metadata& GetMetadata() const override;
+        void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
+        void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
+
+        void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
+        void SetFade( float value ) { mFade = value; }
+
+        void SetFadeMin( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMin, gen ); }
+        void SetFadeMin( float value ) { mFadeMin = value; }
+
+        void SetFadeMax( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFadeMax, gen ); }
+        void SetFadeMax( float value ) { mFadeMax = value; }
+
+        void SetInterpolation( Interpolation interpolation ) { mInterpolation = interpolation; }
+
+    protected:
+        GeneratorSource mA;
+        GeneratorSource mB;
+        HybridSource mFade = 0;
+        HybridSource mFadeMin = -1.f;
+        HybridSource mFadeMax = 1.f;
+        Interpolation mInterpolation = Interpolation::Linear;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<Fade> : MetadataT<Generator>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Blends" );
+            this->AddGeneratorSource( { "A", "From" }, &Fade::SetA );
+            this->AddGeneratorSource( { "B", "To" }, &Fade::SetB );
+            this->AddHybridSource( "Fade", 0, &Fade::SetFade, &Fade::SetFade );
+            this->AddHybridSource( "Fade Min", -1.f, &Fade::SetFadeMin, &Fade::SetFadeMin );
+            this->AddHybridSource( "Fade Max", 1.f, &Fade::SetFadeMax, &Fade::SetFadeMax );
+            this->AddVariableEnum( { "Interpolation", "Easing function" }, Fade::Interpolation::Linear, &Fade::SetInterpolation, "Linear", "Hermite", "Quintic" );            
+
+            description =
+                "Output fades between inputs A and B\n"
+                "Fade Min = 100% A\n"
+                "Fade Max = 100% B";
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/Blends.inl b/include/FastNoise/Generators/Blends.inl
index 7d4c0af9..821e13a8 100644
--- a/include/FastNoise/Generators/Blends.inl
+++ b/include/FastNoise/Generators/Blends.inl
@@ -48,6 +48,21 @@ class FastSIMD::DispatchClass<FastNoise::Divide, SIMD> final : public virtual Fa
     }
 };
 
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::Modulus, SIMD> final : public virtual FastNoise::Modulus, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
+{
+    FASTNOISE_IMPL_GEN_T;
+
+    template<typename... P>
+    FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
+    {
+        float32v a = this->GetSourceValue( mLHS, seed, pos... );
+        float32v b = this->GetSourceValue( mRHS, seed, pos... );
+
+        return FS::Modulus( a, b );
+    }
+};
+
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::PowFloat, SIMD> final : public virtual FastNoise::PowFloat, public FastSIMD::DispatchClass<FastNoise::Generator, SIMD>
 {
diff --git a/include/FastNoise/Generators/Generator.inl b/include/FastNoise/Generators/Generator.inl
index adc32bc4..86f28a9a 100644
--- a/include/FastNoise/Generators/Generator.inl
+++ b/include/FastNoise/Generators/Generator.inl
@@ -161,7 +161,7 @@ public:
 
             index += int32v::ElementCount;
             xIdx += int32v( int32v::ElementCount );
-            
+
             AxisReset<false>( xIdx, yIdx, xMax, xSizeV, xSize );
             AxisReset<false>( yIdx, zIdx, yMax, ySizeV, xSize * ySize );
         }
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index f8f8179e..9e3df51c 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -24,7 +24,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainScale::SetSource );
             this->AddVariable( "Scaling", 1.0f, &DomainScale::SetScaling );
         }
@@ -60,7 +60,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainOffset::SetSource );
             this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); }, 0.25f );
         }
@@ -121,7 +121,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainRotate::SetSource );
             this->AddVariable( "Yaw", 0.0f, &DomainRotate::SetYaw );
             this->AddVariable( "Pitch", 0.0f, &DomainRotate::SetPitch );
@@ -309,7 +309,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainAxisScale::SetSource );
             this->AddPerDimensionVariable( "Scaling", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
         }
@@ -338,7 +338,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &AddDimension::SetSource );
             this->AddHybridSource( "New Dimension Position", 0.0f, &AddDimension::SetNewDimensionPosition, &AddDimension::SetNewDimensionPosition );
         }
@@ -366,7 +366,7 @@ namespace FastNoise
 
         MetadataT()
         {
-            groups.push_back( "Modifiers" );
+            groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &RemoveDimension::SetSource );
             this->AddVariableEnum( "Remove Dimension", Dim::Y, &RemoveDimension::SetRemoveDimension, kDim_Strings );
         }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5bce5301..f4db5094 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,7 @@
 CPMAddPackage(
     NAME FastSIMD
     GITHUB_REPOSITORY Auburn/FastSIMD
-    GIT_TAG 868e77272ac0681a6b2936c1ef9087bb3ec8c153
+    GIT_TAG 2417e5b938d7e0aa4f4293d11682db0582a83ce8
 )
 
 set(install_targets ${install_targets}
@@ -69,13 +69,13 @@ endif()
 target_link_libraries(FastNoise PUBLIC FastSIMD FastSIMD_FastNoise)
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /wd4251 /d2vzeroupper-)
+    target_compile_options(FastSIMD_FastNoise PRIVATE /GL- /GS- /wd4251 /permissive- /d2vzeroupper-)
     
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     if(MSVC)
         target_compile_options(FastSIMD_FastNoise PRIVATE /GS-)
     else()
-        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector)        
+        target_compile_options(FastSIMD_FastNoise PRIVATE -fno-stack-protector -Wno-nan-infinity-disabled)
     endif()
 
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index ec7a8a2d..97330116 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -138,3 +138,5 @@ FASTNOISE_REGISTER_NODE( Remap );
 FASTNOISE_REGISTER_NODE( Terrace );
 FASTNOISE_REGISTER_NODE( AddDimension );
 FASTNOISE_REGISTER_NODE( RemoveDimension );
+
+FASTNOISE_REGISTER_NODE( Modulus );
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 8d52d946..87db80b8 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -21,15 +21,15 @@ constexpr static std::nullptr_t gMetadataVectorSize = nullptr; // Invalid
 // Setting these values avoids needless vector resizing and oversizing on startup
 // Sadly there is no way to automate this as they fill up as part of static init
 template<>
-constexpr size_t gMetadataVectorSize<const Metadata*> = 45;
+constexpr size_t gMetadataVectorSize<const Metadata*> = 46;
 template<>
-constexpr size_t gMetadataVectorSize<const char*> = 83;
+constexpr size_t gMetadataVectorSize<const char*> = 84;
 template<>
 constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 71;
 template<>
 constexpr size_t gMetadataVectorSize<Metadata::MemberNodeLookup> = 30;
 template<>
-constexpr size_t gMetadataVectorSize<Metadata::MemberHybrid> = 54;
+constexpr size_t gMetadataVectorSize<Metadata::MemberHybrid> = 56;
 
 template<typename T>
 static std::vector<T>& GetVectorStorage()
@@ -171,7 +171,7 @@ static bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vect
     {
         if( nodeData->variables[i].i != metadata->memberVariables[i].valueDefault.i )
         {
-            AddMemberLookupToDataStream( dataStream, 0, i );
+            AddMemberLookupToDataStream( dataStream, 0, (uint8_t)i );
 
             AddToDataStream( dataStream, nodeData->variables[i].i );
         }
@@ -211,7 +211,7 @@ static bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vect
         {
             if( nodeData->hybrids[i].second != metadata->memberHybrids[i].valueDefault )
             {
-                AddMemberLookupToDataStream( dataStream, 2, i );
+                AddMemberLookupToDataStream( dataStream, 2, (uint8_t)i );
 
                 Metadata::MemberVariable::ValueUnion v = nodeData->hybrids[i].second;
 
@@ -233,7 +233,7 @@ static bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vect
                 }
             }
 
-            AddMemberLookupToDataStream( dataStream, 3, i );
+            AddMemberLookupToDataStream( dataStream, 3, (uint8_t)i );
 
             if( !SerialiseNodeDataInternal( nodeData->hybrids[i].first, fixUp, dataStream, referenceIds, dependencies ) )
             {

From ca97e96fb5344b2e68fed743bdaf9af32547727f Mon Sep 17 00:00:00 2001
From: KdotJPG <KdotJPG@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:28:08 -0500
Subject: [PATCH 114/139] Simplex Rework + Domain Warp (#135)

---
 .../FastNoise/Generators/DomainWarpSimplex.h  |   60 +-
 .../Generators/DomainWarpSimplex.inl          | 1216 +++++++++++++--
 include/FastNoise/Generators/Generator.h      |   24 +
 include/FastNoise/Generators/Perlin.inl       |   28 +-
 include/FastNoise/Generators/Simplex.h        |   54 +-
 include/FastNoise/Generators/Simplex.inl      | 1363 +++++++++++------
 include/FastNoise/Generators/Utils.inl        |  936 +++++++++--
 src/FastNoise/FastSIMD_Build.inl              |    4 +-
 8 files changed, 2866 insertions(+), 819 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarpSimplex.h b/include/FastNoise/Generators/DomainWarpSimplex.h
index 5c1f72f2..6f1dfb8f 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.h
+++ b/include/FastNoise/Generators/DomainWarpSimplex.h
@@ -1,19 +1,41 @@
-#pragma once
-#include "Generator.h"
-#include "DomainWarp.h"
-
-namespace FastNoise
-{
-    class DomainWarpOpenSimplex : public virtual DomainWarp
-    {
-    public:        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<DomainWarpOpenSimplex> : MetadataT<DomainWarp>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-    };
-#endif
-}
+#pragma once
+#include "Generator.h"
+#include "DomainWarp.h"
+
+namespace FastNoise
+{
+    class DomainWarpSimplex : public virtual DomainWarp
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+
+        void SetType( SimplexType value ) { mType = value; }
+        void SetVectorizationScheme( VectorizationScheme value ) { mVectorizationScheme = value; }
+
+    protected:
+        SimplexType mType = SimplexType::Standard;
+        VectorizationScheme mVectorizationScheme = VectorizationScheme::OrthogonalGradientMatrix;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpSimplex> : MetadataT<DomainWarp>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            this->AddVariableEnum(
+                { "Type", "Noise character style" },
+                SimplexType::Standard, &DomainWarpSimplex::SetType,
+                kSimplexType_Strings
+            );
+            this->AddVariableEnum(
+                { "Vectorization Scheme", "Construction used by the noise to produce a vector output" },
+                VectorizationScheme::OrthogonalGradientMatrix, &DomainWarpSimplex::SetVectorizationScheme,
+                kVectorizationScheme_Strings
+            );
+        }
+    };
+#endif
+}
diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
index 8e6e3612..d40c2d08 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.inl
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -2,176 +2,1084 @@
 #include "Utils.inl"
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::DomainWarpOpenSimplex, SIMD> final : public virtual FastNoise::DomainWarpOpenSimplex, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpSimplex, SIMD> final : public virtual FastNoise::DomainWarpSimplex, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
 {
 public:
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
     {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
+        switch( mType ) {
+        case SimplexType::Standard:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
+            }
+        case SimplexType::Smooth:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
+            }
+        }
+    }
+
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
+    {
+        switch( mType ) {
+        case SimplexType::Standard:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+            }
+        case SimplexType::Smooth:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+            }
+        }
+    }
+
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
+    {
+        switch( mType ) {
+        case SimplexType::Standard:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+            }
+        case SimplexType::Smooth:
+            switch( mVectorizationScheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+            case VectorizationScheme::GradientOuterProduct:
+                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+            }
+        }
+    }
 
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
+protected:
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    {
+        constexpr double kRoot3 = 1.7320508075688772935274463415059;
+        constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
+        constexpr double kUnskew2 = -1.0 / ( kRoot3 + 3.0 );
+        constexpr double kFalloffRadiusSquared = 0.5;
+
+        float32v skewDelta = float32v( kSkew2 ) * ( x + y );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
 
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
+        mask32v xGreaterEqualY = dxSkewed >= dySkewed;
 
-    #define GRADIENT_COORD( _x, _y )\
-        int32v hash##_x##_y = HashPrimesHB(seed, x##_x, y##_y );\
-        float32v x##_x##_y = FS::Convert<float>( hash##_x##_y & int32v( 0xffff ) );\
-        float32v y##_x##_y = FS::Convert<float>( (hash##_x##_y >> 16) & int32v( 0xffff ) );
+        float32v unskewDelta = float32v( kUnskew2 ) * ( dxSkewed + dySkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
 
-        GRADIENT_COORD( 0, 0 );
-        GRADIENT_COORD( 1, 0 );
-        GRADIENT_COORD( 0, 1 );
-        GRADIENT_COORD( 1, 1 );
+        float32v dx1 = FS::MaskedIncrement( ~xGreaterEqualY, dx0 ) - float32v( kUnskew2 + 1 );
+        float32v dy1 = FS::MaskedIncrement( xGreaterEqualY, dy0 ) - float32v( kUnskew2 + 1 );
+        float32v dx2 = dx0 - float32v( kUnskew2 * 2 + 1 );
+        float32v dy2 = dy0 - float32v( kUnskew2 * 2 + 1 );
 
-    #undef GRADIENT_COORD
+        float32v falloff0 = FS::FNMulAdd( dx0, dx0, FS::FNMulAdd( dy0, dy0, float32v( kFalloffRadiusSquared ) ) );
+        float32v falloff1 = FS::FNMulAdd( dx1, dx1, FS::FNMulAdd( dy1, dy1, float32v( kFalloffRadiusSquared ) ) );
+        float32v falloff2 = falloff0 + FS::FMulAdd( unskewDelta,
+            float32v( -4.0 * ( kRoot3 + 2.0 ) / ( kRoot3 + 3.0 ) ),
+            float32v( -2.0 / 3.0 ) );
 
-        float32v normalise = float32v( 1.0f / (0xffff / 2.0f) );
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
 
-        float32v xWarp = (Lerp( Lerp( x00, x10, xs ), Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( Lerp( y00, y10, xs ), Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f )) * normalise;
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
 
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
+        float32v valueX( 0 );
+        float32v valueY( 0 );
 
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, yWarp * yWarp );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase ), dx0, dy0, falloff0, valueX, valueY );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1, falloff1, valueX, valueY );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2, falloff2, valueX, valueY );
 
-        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            49.918426513671875 / 2.0 :
+            70.1480577066486;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueY, valueY, valueX * valueX );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
     }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
+
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
     {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
-        float32v zs = FS::Floor( z );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
-        int32v z1 = z0 + int32v( Primes::Z );
-
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
-
-    #define GRADIENT_COORD( _x, _y, _z )\
-        int32v hash##_x##_y##_z = HashPrimesHB( seed, x##_x, y##_y, z##_z );\
-        float32v x##_x##_y##_z = FS::Convert<float>( hash##_x##_y##_z & int32v( 0x3ff ) );\
-        float32v y##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
-        float32v z##_x##_y##_z = FS::Convert<float>( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
-
-        GRADIENT_COORD( 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0 );
-        GRADIENT_COORD( 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 1 );
-        GRADIENT_COORD( 0, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0z = Lerp( Lerp( x000, x100, xs ), Lerp( x010, x110, xs ), ys );
-        float32v y0z = Lerp( Lerp( y000, y100, xs ), Lerp( y010, y110, xs ), ys );
-        float32v z0z = Lerp( Lerp( z000, z100, xs ), Lerp( z010, z110, xs ), ys );
-                   
-        float32v x1z = Lerp( Lerp( x001, x101, xs ), Lerp( x011, x111, xs ), ys );
-        float32v y1z = Lerp( Lerp( y001, y101, xs ), Lerp( y011, y111, xs ), ys );
-        float32v z1z = Lerp( Lerp( z001, z101, xs ), Lerp( z011, z111, xs ), ys );
-
-        float32v normalise = float32v( 1.0f / (0x3ff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f )) * normalise;
-
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
-        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
-
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, zWarp * zWarp ) );
-
-        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+        constexpr double kSkew3 = 1.0 / 3.0;
+        constexpr double kReflectUnskew3 = -1.0 / 2.0;
+        constexpr double kFalloffRadiusSquared = 0.6;
+
+        float32v skewDelta = float32v( kSkew3 ) * ( x + y + z );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+
+        mask32v xGreaterEqualY = dxSkewed >= dySkewed;
+        mask32v yGreaterEqualZ = dySkewed >= dzSkewed;
+        mask32v xGreaterEqualZ = dxSkewed >= dzSkewed;
+
+        float32v unskewDelta = float32v( kReflectUnskew3 ) * ( dxSkewed + dySkewed + dzSkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
+        float32v dz0 = dzSkewed + unskewDelta;
+
+        mask32v maskX1 = xGreaterEqualY & xGreaterEqualZ;
+        mask32v maskY1 = FS::BitwiseAndNot( yGreaterEqualZ, xGreaterEqualY );
+        mask32v maskZ1 = FS::BitwiseAndNot( ~xGreaterEqualZ, yGreaterEqualZ );
+
+        mask32v nMaskX2 = ~( xGreaterEqualY | xGreaterEqualZ );
+        mask32v nMaskY2 = xGreaterEqualY & ~yGreaterEqualZ;
+        mask32v nMaskZ2 = xGreaterEqualZ & yGreaterEqualZ;
+
+        float32v dx3 = dx0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dy3 = dy0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dz3 = dz0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dx1 = FS::MaskedSub( maskX1, dx3, float32v( 1 ) ); // kReflectUnskew3 * 3 + 1 = kReflectUnskew3, so dx0 - kReflectUnskew3 = dx3
+        float32v dy1 = FS::MaskedSub( maskY1, dy3, float32v( 1 ) );
+        float32v dz1 = FS::MaskedSub( maskZ1, dz3, float32v( 1 ) );
+        float32v dx2 = FS::MaskedIncrement( nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
+        float32v dy2 = FS::MaskedIncrement( nMaskY2, dy0 );
+        float32v dz2 = FS::MaskedIncrement( nMaskZ2, dz0 );
+
+        float32v falloff0 = FS::FNMulAdd( dz0, dz0, FS::FNMulAdd( dy0, dy0, FS::FNMulAdd( dx0, dx0, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff1 = FS::FNMulAdd( dz1, dz1, FS::FNMulAdd( dy1, dy1, FS::FNMulAdd( dx1, dx1, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff2 = FS::FNMulAdd( dz2, dz2, FS::FNMulAdd( dy2, dy2, FS::FNMulAdd( dx2, dx2, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff3 = falloff0 - ( unskewDelta + float32v( 3.0 / 4.0 ) );
+
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
+        falloff3 = FS::Max( falloff3, float32v( 0 ) );
+
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
+        falloff3 *= falloff3; falloff3 *= falloff3;
+
+        float32v valueX( 0 );
+        float32v valueY( 0 );
+        float32v valueZ( 0 );
+
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dx0, dy0, dz0, falloff0, valueX, valueY, valueZ );
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1, falloff1, valueX, valueY, valueZ );
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::InvMaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2, falloff2, valueX, valueY, valueZ );
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ) ), dx3, dy3, dz3, falloff3, valueX, valueY, valueZ );
+
+        if constexpr( Scheme != VectorizationScheme::OrthogonalGradientMatrix )
+        {
+            // Match gradient orientation.
+            constexpr double kReflect3D = -2.0 / 2.0;
+            float32v valueTransformDelta = float32v( kReflect3D ) * ( valueX + valueY + valueZ );
+            valueX += valueTransformDelta;
+            valueY += valueTransformDelta;
+            valueZ += valueTransformDelta;
+        }
+
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            32.69428253173828125 / 1.4142135623730951 :
+            16.281631889139874;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+        zOut = FS::FMulAdd( valueZ, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueZ, valueZ, FS::FMulAdd( valueY, valueY, valueX * valueX ) );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
     }
-            
-    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
+
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
     {
-        float32v xs = FS::Floor( x );
-        float32v ys = FS::Floor( y );
-        float32v zs = FS::Floor( z );
-        float32v ws = FS::Floor( w );
-
-        int32v x0 = FS::Convert<int32_t>( xs ) * int32v( Primes::X );
-        int32v y0 = FS::Convert<int32_t>( ys ) * int32v( Primes::Y );
-        int32v z0 = FS::Convert<int32_t>( zs ) * int32v( Primes::Z );
-        int32v w0 = FS::Convert<int32_t>( ws ) * int32v( Primes::W );
-        int32v x1 = x0 + int32v( Primes::X );
-        int32v y1 = y0 + int32v( Primes::Y );
-        int32v z1 = z0 + int32v( Primes::Z );
-        int32v w1 = w0 + int32v( Primes::W );
-
-        xs = InterpHermite( x - xs );
-        ys = InterpHermite( y - ys );
-        zs = InterpHermite( z - zs );
-        ws = InterpHermite( w - ws );
-
-    #define GRADIENT_COORD( _x, _y, _z, _w )\
-        int32v hash##_x##_y##_z##_w = HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
-        float32v x##_x##_y##_z##_w = FS::Convert<float>( hash##_x##_y##_z##_w & int32v( 0xff ) );\
-        float32v y##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
-        float32v z##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
-        float32v w##_x##_y##_z##_w = FS::Convert<float>( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
-
-        GRADIENT_COORD( 0, 0, 0, 0 );
-        GRADIENT_COORD( 1, 0, 0, 0 );
-        GRADIENT_COORD( 0, 1, 0, 0 );
-        GRADIENT_COORD( 1, 1, 0, 0 );
-        GRADIENT_COORD( 0, 0, 1, 0 );
-        GRADIENT_COORD( 1, 0, 1, 0 );
-        GRADIENT_COORD( 0, 1, 1, 0 );
-        GRADIENT_COORD( 1, 1, 1, 0 );
-        GRADIENT_COORD( 0, 0, 0, 1 );
-        GRADIENT_COORD( 1, 0, 0, 1 );
-        GRADIENT_COORD( 0, 1, 0, 1 );
-        GRADIENT_COORD( 1, 1, 0, 1 );
-        GRADIENT_COORD( 0, 0, 1, 1 );
-        GRADIENT_COORD( 1, 0, 1, 1 );
-        GRADIENT_COORD( 0, 1, 1, 1 );
-        GRADIENT_COORD( 1, 1, 1, 1 );
-
-    #undef GRADIENT_COORD
-
-        float32v x0w = Lerp( Lerp( Lerp( x0000, x1000, xs ), Lerp( x0100, x1100, xs ), ys ), Lerp( Lerp( x0010, x1010, xs ), Lerp( x0110, x1110, xs ), ys ), zs );
-        float32v y0w = Lerp( Lerp( Lerp( y0000, y1000, xs ), Lerp( y0100, y1100, xs ), ys ), Lerp( Lerp( y0010, y1010, xs ), Lerp( y0110, y1110, xs ), ys ), zs );
-        float32v z0w = Lerp( Lerp( Lerp( z0000, z1000, xs ), Lerp( z0100, z1100, xs ), ys ), Lerp( Lerp( z0010, z1010, xs ), Lerp( z0110, z1110, xs ), ys ), zs );
-        float32v w0w = Lerp( Lerp( Lerp( w0000, w1000, xs ), Lerp( w0100, w1100, xs ), ys ), Lerp( Lerp( w0010, w1010, xs ), Lerp( w0110, w1110, xs ), ys ), zs );
-
-        float32v x1w = Lerp( Lerp( Lerp( x0001, x1001, xs ), Lerp( x0101, x1101, xs ), ys ), Lerp( Lerp( x0011, x1011, xs ), Lerp( x0111, x1111, xs ), ys ), zs );
-        float32v y1w = Lerp( Lerp( Lerp( y0001, y1001, xs ), Lerp( y0101, y1101, xs ), ys ), Lerp( Lerp( y0011, y1011, xs ), Lerp( y0111, y1111, xs ), ys ), zs );
-        float32v z1w = Lerp( Lerp( Lerp( z0001, z1001, xs ), Lerp( z0101, z1101, xs ), ys ), Lerp( Lerp( z0011, z1011, xs ), Lerp( z0111, z1111, xs ), ys ), zs );
-        float32v w1w = Lerp( Lerp( Lerp( w0001, w1001, xs ), Lerp( w0101, w1101, xs ), ys ), Lerp( Lerp( w0011, w1011, xs ), Lerp( w0111, w1111, xs ), ys ), zs );                        
-
-        float32v normalise = float32v( 1.0f / (0xff / 2.0f) );
-
-        float32v xWarp = (Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v yWarp = (Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v zWarp = (Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-        float32v wWarp = (Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f )) * normalise;
-
-        xOut = FS::FMulAdd( xWarp, warpAmp, xOut );
-        yOut = FS::FMulAdd( yWarp, warpAmp, yOut );
-        zOut = FS::FMulAdd( zWarp, warpAmp, zOut );
-        wOut = FS::FMulAdd( wWarp, warpAmp, wOut );
-
-        float32v warpLengthSq = FS::FMulAdd( xWarp, xWarp, FS::FMulAdd( yWarp, yWarp, FS::FMulAdd( zWarp, zWarp, wWarp * wWarp ) ) );
-
-        return warpLengthSq * FS::InvSqrt( warpLengthSq );
+        constexpr double kRoot5 = 2.2360679774997896964091736687313;
+        constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
+        constexpr double kUnskew4 = -1.0 / ( kRoot5 + 5.0 );
+        constexpr double kFalloffRadiusSquared = 0.6;
+
+        float32v skewDelta = float32v( kSkew4 ) * ( x + y + z + w );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v wSkewed = w + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v wSkewedBase = FS::Floor( wSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+        float32v dwSkewed = wSkewed - wSkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+        int32v wPrimedBase = FS::Convert<int32_t>( wSkewedBase ) * int32v( Primes::W );
+
+        float32v unskewDelta = float32v( kUnskew4 ) * ( dxSkewed + dySkewed + dzSkewed + dwSkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
+        float32v dz0 = dzSkewed + unskewDelta;
+        float32v dw0 = dwSkewed + unskewDelta;
+
+        int32v rankX( 0 );
+        int32v rankY( 0 );
+        int32v rankZ( 0 );
+        int32v rankW( 0 );
+
+        mask32v xGreaterEqualY = dx0 >= dy0;
+        rankX = FS::MaskedIncrement( xGreaterEqualY, rankX );
+        rankY = FS::MaskedIncrement( ~xGreaterEqualY, rankY );
+
+        mask32v xGreaterEqualZ = dx0 >= dz0;
+        rankX = FS::MaskedIncrement( xGreaterEqualZ, rankX );
+        rankZ = FS::MaskedIncrement( ~xGreaterEqualZ, rankZ );
+
+        mask32v xGreaterEqualW = dx0 >= dw0;
+        rankX = FS::MaskedIncrement( xGreaterEqualW, rankX );
+        rankW = FS::MaskedIncrement( ~xGreaterEqualW, rankW );
+
+        mask32v yGreaterEqualZ = dy0 >= dz0;
+        rankY = FS::MaskedIncrement( yGreaterEqualZ, rankY );
+        rankZ = FS::MaskedIncrement( ~yGreaterEqualZ, rankZ );
+
+        mask32v yGreaterEqualW = dy0 >= dw0;
+        rankY = FS::MaskedIncrement( yGreaterEqualW, rankY );
+        rankW = FS::MaskedIncrement( ~yGreaterEqualW, rankW );
+
+        mask32v zGreaterEqualW = dz0 >= dw0;
+        rankZ = FS::MaskedIncrement( zGreaterEqualW, rankZ );
+        rankW = FS::MaskedIncrement( ~zGreaterEqualW, rankW );
+
+        mask32v maskX1 = rankX > int32v( 2 );
+        mask32v maskY1 = rankY > int32v( 2 );
+        mask32v maskZ1 = rankZ > int32v( 2 );
+        mask32v maskW1 = rankW > int32v( 2 );
+
+        mask32v maskX2 = rankX > int32v( 1 );
+        mask32v maskY2 = rankY > int32v( 1 );
+        mask32v maskZ2 = rankZ > int32v( 1 );
+        mask32v maskW2 = rankW > int32v( 1 );
+
+        mask32v maskX3 = rankX > int32v( 0 );
+        mask32v maskY3 = rankY > int32v( 0 );
+        mask32v maskZ3 = rankZ > int32v( 0 );
+        mask32v maskW3 = rankW > int32v( 0 );
+
+        float32v dx1 = FS::MaskedSub( maskX1, dx0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dy1 = FS::MaskedSub( maskY1, dy0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dz1 = FS::MaskedSub( maskZ1, dz0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dw1 = FS::MaskedSub( maskW1, dw0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dx2 = FS::MaskedSub( maskX2, dx0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dy2 = FS::MaskedSub( maskY2, dy0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dz2 = FS::MaskedSub( maskZ2, dz0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dw2 = FS::MaskedSub( maskW2, dw0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dx3 = FS::MaskedSub( maskX3, dx0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dy3 = FS::MaskedSub( maskY3, dy0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dz3 = FS::MaskedSub( maskZ3, dz0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dw3 = FS::MaskedSub( maskW3, dw0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dx4 = dx0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dy4 = dy0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dz4 = dz0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dw4 = dw0 - float32v( kUnskew4 * 4 + 1 );
+
+        float32v falloff0 = FS::FNMulAdd( dw0, dw0, FS::FNMulAdd( dz0, dz0, FS::FNMulAdd( dy0, dy0, FS::FNMulAdd( dx0, dx0, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff1 = FS::FNMulAdd( dw1, dw1, FS::FNMulAdd( dz1, dz1, FS::FNMulAdd( dy1, dy1, FS::FNMulAdd( dx1, dx1, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff2 = FS::FNMulAdd( dw2, dw2, FS::FNMulAdd( dz2, dz2, FS::FNMulAdd( dy2, dy2, FS::FNMulAdd( dx2, dx2, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff3 = FS::FNMulAdd( dw3, dw3, FS::FNMulAdd( dz3, dz3, FS::FNMulAdd( dy3, dy3, FS::FNMulAdd( dx3, dx3, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff4 = falloff0 + FS::FMulAdd( unskewDelta,
+            float32v( -4.0 * ( kRoot5 + 3.0 ) / ( kRoot5 + 5.0 ) ),
+            float32v( -4.0 / 5.0 ) );
+
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
+        falloff3 = FS::Max( falloff3, float32v( 0 ) );
+        falloff4 = FS::Max( falloff4, float32v( 0 ) );
+
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
+        falloff3 *= falloff3; falloff3 *= falloff3;
+        falloff4 *= falloff4; falloff4 *= falloff4;
+
+        float32v valueX( 0 );
+        float32v valueY( 0 );
+        float32v valueZ( 0 );
+        float32v valueW( 0 );
+
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dx0, dy0, dz0, dw0, falloff0, valueX, valueY, valueZ, valueW );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed,
+            FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW1, wPrimedBase, int32v( Primes::W ) ) ), dx1, dy1, dz1, dw1, falloff1, valueX, valueY, valueZ, valueW );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed,
+            FS::MaskedAdd( maskX2, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY2, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ2, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW2, wPrimedBase, int32v( Primes::W ) ) ), dx2, dy2, dz2, dw2, falloff2, valueX, valueY, valueZ, valueW );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed,
+            FS::MaskedAdd( maskX3, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY3, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ3, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW3, wPrimedBase, int32v( Primes::W ) ) ), dx3, dy3, dz3, dw3, falloff3, valueX, valueY, valueZ, valueW );
+        ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed,
+            xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ), wPrimedBase + int32v( Primes::W ) ),
+            dx4, dy4, dz4, dw4, falloff4, valueX, valueY, valueZ, valueW );
+
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            33.653125584827855 / 1.4142135623730951 :
+            30.88161777516092;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+        zOut = FS::FMulAdd( valueZ, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueW, valueW, FS::FMulAdd( valueZ, valueZ, FS::FMulAdd( valueY, valueY, valueX * valueX ) ) );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
     }
-};
 
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    {
+        constexpr double kRoot3 = 1.7320508075688772935274463415059;
+        constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
+        constexpr double kUnskew2 = -1.0 / ( kRoot3 + 3.0 );
+        constexpr double kFalloffRadiusSquared = 2.0 / 3.0;
+
+        float32v skewDelta = float32v( kSkew2 ) * ( x + y );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+
+        mask32v forwardXY = dxSkewed + dySkewed > float32v( 1.0f );
+        float32v boundaryXY = FS::Masked( forwardXY, float32v( -1.0f ) );
+        mask32v forwardX = FS::FMulAdd( dxSkewed, float32v( -2.0f ), dySkewed ) < boundaryXY;
+        mask32v forwardY = FS::FMulAdd( dySkewed, float32v( -2.0f ), dxSkewed ) < boundaryXY;
+
+        float32v unskewDelta = float32v( kUnskew2 ) * ( dxSkewed + dySkewed );
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
+
+        float32v falloffBase0;
+        float32v valueX( 0 );
+        float32v valueY( 0 );
+
+        // Vertex <0, 0>
+        {
+            int32v hash = HashPrimes( seed, xPrimedBase, yPrimedBase );
+            falloffBase0 = FS::FNMulAdd( dxBase, dxBase, FS::FNMulAdd( dyBase, dyBase, float32v( kFalloffRadiusSquared ) ) );
+            float32v falloff = falloffBase0; falloff *= falloff; falloff *= falloff;
+            ApplyVectorContributionSimplex<Scheme>( hash, dxBase, dyBase, falloff, valueX, valueY );
+        }
+
+        // Vertex <1, 1>
+        {
+            int32v hash = HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) );
+            float32v falloff = FS::FMulAdd( unskewDelta,
+                float32v( -4.0 * ( kRoot3 + 2.0 ) / ( kRoot3 + 3.0 ) ),
+                falloffBase0 - float32v( kFalloffRadiusSquared ) );
+            falloff *= falloff; falloff *= falloff;
+            ApplyVectorContributionSimplex<Scheme>( hash, dxBase - float32v( 2 * kUnskew2 + 1 ), dyBase - float32v( 2 * kUnskew2 + 1 ), falloff, valueX, valueY );
+        }
+
+        float32v xyDelta = FS::Select( forwardXY, float32v( kUnskew2 + 1 ), float32v( -kUnskew2 ) );
+        dxBase -= xyDelta;
+        dyBase -= xyDelta;
+
+        // Vertex <1, 0> or <-1, 0> or <1, 2>
+        {
+            int32v hash = HashPrimes( seed,
+                FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardX, xPrimedBase, int32v( Primes::X * 2 ) ), int32v( Primes::X ) ),
+                FS::MaskedAdd( forwardXY, yPrimedBase, int32v( Primes::Y ) ) );
+            float32v dx = dxBase - FS::Select( forwardX, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
+            float32v dy = FS::MaskedSub( forwardX, dyBase, float32v( 2 * kUnskew2 ) );
+            float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
+            falloff *= falloff; falloff *= falloff;
+            ApplyVectorContributionSimplex<Scheme>( hash, dx, dy, falloff, valueX, valueY );
+        }
+
+        // Vertex <0, 1> or <0, -1> or <2, 1>
+        {
+            int32v hash = HashPrimes( seed,
+                FS::MaskedAdd( forwardXY, xPrimedBase, int32v( Primes::X ) ),
+                FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardY, yPrimedBase, int32v( (int32_t)( Primes::Y * 2LL ) ) ), int32v( Primes::Y ) ) );
+            float32v dx = FS::MaskedSub( forwardY, dxBase, float32v( 2 * kUnskew2 ) );
+            float32v dy = dyBase - FS::Select( forwardY, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
+            float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
+            falloff *= falloff; falloff *= falloff;
+            ApplyVectorContributionSimplex<Scheme>( hash, dx, dy, falloff, valueX, valueY );
+        }
+
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            9.28993664146183 / 2.0 :
+            12.814453124999995;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueY, valueY, valueX * valueX );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
+    }
+
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
+    {
+        constexpr double kSkew3 = 1.0 / 3.0;
+        constexpr double kReflectUnskew3 = -1.0 / 2.0;
+        constexpr double kTwiceUnskew3 = -1.0 / 4.0;
+
+        constexpr double kDistanceSquaredA = 3.0 / 4.0;
+        constexpr double kDistanceSquaredB = 1.0;
+        constexpr double kFalloffRadiusSquared = kDistanceSquaredA;
+
+        float32v skewDelta = float32v( kSkew3 ) * ( x + y + z );
+
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+
+        // From unit cell base, find closest vertex
+        {
+            // Perform a double unskew to get the vector whose dot product with skewed vectors produces the unskewed result.
+            float32v twiceUnskewDelta = float32v( kTwiceUnskew3 ) * ( dxSkewed + dySkewed + dzSkewed );
+            float32v xNormal = dxSkewed + twiceUnskewDelta;
+            float32v yNormal = dySkewed + twiceUnskewDelta;
+            float32v zNormal = dzSkewed + twiceUnskewDelta;
+            float32v xyzNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal
+
+            // Using those, compare scores to determine which vertex is closest.
+            constexpr auto considerVertex = [] ( float32v& maxScore, int32v& moveMaskBits, float32v score, int32v bits ) constexpr
+                {
+                    moveMaskBits = FS::Select( score > maxScore, bits, moveMaskBits );
+                    maxScore = FS::Max( maxScore, score );
+                };
+            float32v maxScore = float32v( 0.375f );
+            int32v moveMaskBits = FS::Masked( xyzNormal > maxScore, int32v( -1 ) );
+            maxScore = FS::Max( maxScore, xyzNormal );
+            considerVertex( maxScore, moveMaskBits, xNormal, 0b001 );
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b100 );
+            maxScore += float32v( 0.125f ) - xyzNormal;
+            considerVertex( maxScore, moveMaskBits, -zNormal, 0b011 );
+            considerVertex( maxScore, moveMaskBits, -yNormal, 0b101 );
+            considerVertex( maxScore, moveMaskBits, -xNormal, 0b110 );
+
+            mask32v moveX = ( moveMaskBits & int32v( 0b001 ) ) != int32v( 0 );
+            mask32v moveY = ( moveMaskBits & int32v( 0b010 ) ) != int32v( 0 );
+            mask32v moveZ = ( moveMaskBits & int32v( 0b100 ) ) != int32v( 0 );
+
+            xSkewedBase = FS::MaskedIncrement( moveX, xSkewedBase );
+            ySkewedBase = FS::MaskedIncrement( moveY, ySkewedBase );
+            zSkewedBase = FS::MaskedIncrement( moveZ, zSkewedBase );
+
+            dxSkewed = FS::MaskedDecrement( moveX, dxSkewed );
+            dySkewed = FS::MaskedDecrement( moveY, dySkewed );
+            dzSkewed = FS::MaskedDecrement( moveZ, dzSkewed );
+        }
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+
+        float32v skewedCoordinateSum = dxSkewed + dySkewed + dzSkewed;
+        float32v twiceUnskewDelta = float32v( kTwiceUnskew3 ) * skewedCoordinateSum;
+        float32v xNormal = dxSkewed + twiceUnskewDelta;
+        float32v yNormal = dySkewed + twiceUnskewDelta;
+        float32v zNormal = dzSkewed + twiceUnskewDelta;
+        float32v xyzNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal
+
+        float32v unskewDelta = float32v( kReflectUnskew3 ) * skewedCoordinateSum;
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
+        float32v dzBase = dzSkewed + unskewDelta;
+
+        float32v coordinateSum = float32v( 1 + 3 * kReflectUnskew3 ) * skewedCoordinateSum; // dxBase + dyBase + dzBase
+
+        float32v valueX( 0 );
+        float32v valueY( 0 );
+        float32v valueZ( 0 );
+        float32v falloffBaseStemA, falloffBaseStemB;
+
+        // Vertex <0, 0, 0>
+        {
+            float32v falloffBase = FS::FNMulAdd( dzBase, dzBase, FS::FNMulAdd( dyBase, dyBase, FS::FNMulAdd( dxBase, dxBase, float32v( kFalloffRadiusSquared ) ) ) ) * float32v( 0.5f );
+            falloffBaseStemA = falloffBase - float32v( kDistanceSquaredA * 0.5 );
+            falloffBaseStemB = falloffBase - float32v( kDistanceSquaredB * 0.5 );
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dxBase, dyBase, dzBase,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <1, 1, 1> or <-1, -1, -1>
+        {
+            mask32v signMask = xyzNormal < float32v( 0 );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset = float32v( 3 * kReflectUnskew3 + 1 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset, coordinateSum, falloffBaseStemA ), float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimed ), dxBase - offset, dyBase - offset, dzBase - offset,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <1, 1, 0> or <-1, -1, 0>
+        {
+            mask32v signMask = xyzNormal < zNormal;
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
+
+            float32v falloffBase = FS::Min( ( sign ^ dzBase ) - falloffBaseStemB, float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase ), dxBase, dyBase, dzBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <1, 0, 1> or <-1, 0, -1>
+        {
+            mask32v signMask = xyzNormal < yNormal;
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
+
+            float32v falloffBase = FS::Min( ( sign ^ dyBase ) - falloffBaseStemB, float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed ), dxBase, dyBase - offset0, dzBase,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <0, 1, 1> or <0, -1, -1>
+        {
+            mask32v signMask = xyzNormal < xNormal;
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed ), dxBase - offset0, dyBase, dzBase );
+            float32v falloffBase = FS::Min( ( sign ^ dxBase ) - falloffBaseStemB, float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed ), dxBase - offset0, dyBase, dzBase,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <1, 0, 0> or <-1, 0, 0>
+        {
+            mask32v signMask = xNormal < float32v( 0 );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dxBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase ), dxBase + offset0, dyBase - offset0, dzBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <0, 1, 0> or <0, -1, 0>
+        {
+            mask32v signMask = yNormal < float32v( 0 );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dyBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase ), dxBase - offset0, dyBase + offset0, dzBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        // Vertex <0, 0, 1> or <0, 0, -1>
+        {
+            mask32v signMask = zNormal < float32v( 0 );
+
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dzBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed ), dxBase - offset0, dyBase - offset0, dzBase + offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ );
+        }
+
+        if constexpr( Scheme != VectorizationScheme::OrthogonalGradientMatrix )
+        {
+            // Match gradient orientation.
+            constexpr double kReflect3D = -2.0 / 3.0;
+            float32v valueTransformDelta = float32v( kReflect3D ) * ( valueX + valueY + valueZ );
+            valueX += valueTransformDelta;
+            valueY += valueTransformDelta;
+            valueZ += valueTransformDelta;
+        }
+
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            144.736422163332608 / 1.4142135623730951 :
+            37.63698669623629;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+        zOut = FS::FMulAdd( valueZ, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueZ, valueZ, FS::FMulAdd( valueY, valueY, valueX * valueX ) );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
+    }
+
+    template<VectorizationScheme Scheme>
+    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
+    {
+        constexpr double kRoot5 = 2.2360679774997896964091736687313;
+        constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
+        constexpr double kUnskew4 = -1.0 / ( kRoot5 + 5.0 );
+        constexpr double kTwiceUnskew4 = -1.0 / 5.0;
+
+        constexpr double kDistanceSquaredA = 4.0 / 5.0;
+        constexpr double kDistanceSquaredB = 6.0 / 5.0;
+        constexpr double kFalloffRadiusSquared = kDistanceSquaredA;
+
+        float32v skewDelta = float32v( kSkew4 ) * ( x + y + z + w );
+
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v wSkewed = w + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v wSkewedBase = FS::Floor( wSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+        float32v dwSkewed = wSkewed - wSkewedBase;
+
+        // From unit cell base, find closest vertex
+        {
+            // Perform a double unskew to get the vector whose dot product with skewed vectors produces the unskewed result.
+            float32v twiceUnskewDelta = float32v( kTwiceUnskew4 ) * ( dxSkewed + dySkewed + dzSkewed + dwSkewed );
+            float32v xNormal = dxSkewed + twiceUnskewDelta;
+            float32v yNormal = dySkewed + twiceUnskewDelta;
+            float32v zNormal = dzSkewed + twiceUnskewDelta;
+            float32v wNormal = dwSkewed + twiceUnskewDelta;
+            float32v xyzwNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal + wNormal
+
+            // Using those, compare scores to determine which vertex is closest.
+            constexpr auto considerVertex = [] ( float32v& maxScore, int32v& moveMaskBits, float32v score, int32v bits ) constexpr
+                {
+                    moveMaskBits = FS::Select( score > maxScore, bits, moveMaskBits );
+                    maxScore = FS::Max( maxScore, score );
+                };
+            float32v maxScore = float32v( 0.6f ) - xyzwNormal;
+            int32v moveMaskBits = FS::Masked( float32v( 0.2f ) > maxScore, int32v( -1 ) );
+            maxScore = FS::Max( maxScore, float32v( 0.2f ) );
+            considerVertex( maxScore, moveMaskBits, -wNormal, 0b0111 );
+            considerVertex( maxScore, moveMaskBits, -zNormal, 0b1011 );
+            considerVertex( maxScore, moveMaskBits, -yNormal, 0b1101 );
+            considerVertex( maxScore, moveMaskBits, -xNormal, 0b1110 );
+            maxScore += xyzwNormal - float32v( 0.2f );
+            considerVertex( maxScore, moveMaskBits, xNormal, 0b0001 );
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b0010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b0100 );
+            considerVertex( maxScore, moveMaskBits, wNormal, 0b1000 );
+            maxScore += float32v( 0.2f ) - xNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b0011 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b0101 );
+            considerVertex( maxScore, moveMaskBits, wNormal, 0b1001 );
+            maxScore += xNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal + zNormal, 0b0110 );
+            maxScore -= wNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b1010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b1100 );
+
+            mask32v moveX = ( moveMaskBits & int32v( 0b0001 ) ) != int32v( 0 );
+            mask32v moveY = ( moveMaskBits & int32v( 0b0010 ) ) != int32v( 0 );
+            mask32v moveZ = ( moveMaskBits & int32v( 0b0100 ) ) != int32v( 0 );
+            mask32v moveW = ( moveMaskBits & int32v( 0b1000 ) ) != int32v( 0 );
+
+            xSkewedBase = FS::MaskedIncrement( moveX, xSkewedBase );
+            ySkewedBase = FS::MaskedIncrement( moveY, ySkewedBase );
+            zSkewedBase = FS::MaskedIncrement( moveZ, zSkewedBase );
+            wSkewedBase = FS::MaskedIncrement( moveW, wSkewedBase );
+
+            dxSkewed = FS::MaskedDecrement( moveX, dxSkewed );
+            dySkewed = FS::MaskedDecrement( moveY, dySkewed );
+            dzSkewed = FS::MaskedDecrement( moveZ, dzSkewed );
+            dwSkewed = FS::MaskedDecrement( moveW, dwSkewed );
+        }
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+        int32v wPrimedBase = FS::Convert<int32_t>( wSkewedBase ) * int32v( Primes::W );
+
+        float32v skewedCoordinateSum = dxSkewed + dySkewed + dzSkewed + dwSkewed;
+        float32v twiceUnskewDelta = float32v( kTwiceUnskew4 ) * skewedCoordinateSum;
+        float32v xNormal = dxSkewed + twiceUnskewDelta;
+        float32v yNormal = dySkewed + twiceUnskewDelta;
+        float32v zNormal = dzSkewed + twiceUnskewDelta;
+        float32v wNormal = dwSkewed + twiceUnskewDelta;
+        float32v xyzwNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal + wNormal
+
+        float32v unskewDelta = float32v( kUnskew4 ) * skewedCoordinateSum;
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
+        float32v dzBase = dzSkewed + unskewDelta;
+        float32v dwBase = dwSkewed + unskewDelta;
+
+        float32v coordinateSum = float32v( 1 + 4 * kUnskew4 ) * skewedCoordinateSum; // dxBase + dyBase + dzBase + dwBase
+
+        float32v valueX( 0 );
+        float32v valueY( 0 );
+        float32v valueZ( 0 );
+        float32v valueW( 0 );
+        float32v falloffBaseStemA, falloffBaseStemB;
+
+        // Vertex <0, 0, 0, 0>
+        {
+            float32v falloffBase = FS::FNMulAdd( dwBase, dwBase, FS::FNMulAdd( dzBase, dzBase, FS::FNMulAdd( dyBase, dyBase, FS::FNMulAdd( dxBase, dxBase, float32v( kFalloffRadiusSquared ) ) ) ) ) * float32v( 0.5f );
+            falloffBaseStemA = falloffBase - float32v( kDistanceSquaredA * 0.5 );
+            falloffBaseStemB = falloffBase - float32v( kDistanceSquaredB * 0.5 );
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase, dyBase, dzBase, dwBase,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 1, 1, 1> or <-1, -1, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset = float32v( 4 * kUnskew4 + 1 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset, coordinateSum, falloffBaseStemA ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimed ), dxBase - offset, dyBase - offset, dzBase - offset, dwBase - offset,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 1, 1, 0> or <-1, -1, -1, 0>
+        {
+            mask32v signMask = xyzwNormal < wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dwBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset1, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 1, 0, 1> or <-1, -1, 0, -1>
+        {
+            mask32v signMask = xyzwNormal < zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dzBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 0, 1, 1> or <-1, 0, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < yNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dyBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 1, 1, 1> or <0, -1, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < xNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dxBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 0, 0, 0> or <-1, 0, 0, 0>
+        {
+            mask32v signMask = xNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dxBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 1, 0, 0> or <-1, -1, 0, 0>
+        {
+            mask32v signMask = xNormal < -yNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dyBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 0, 1, 0> or <-1, 0, -1, 0>
+        {
+            mask32v signMask = xNormal < -zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dzBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <1, 0, 0, 1> or <-1, 0, 0, -1>
+        {
+            mask32v signMask = xNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dwBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 1, 0, 0> or <0, -1, 0, 0>
+        {
+            mask32v signMask = yNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dyBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 1, 1, 0> or <0, -1, -1, 0>
+        {
+            mask32v signMask = yNormal < -zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dzBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 1, 0, 1> or <0, -1, 0, -1>
+        {
+            mask32v signMask = yNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dwBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 0, 1, 0> or <0, 0, -1, 0>
+        {
+            mask32v signMask = zNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dzBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset0,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 0, 1, 1> or <0, 0, -1, -1>
+        {
+            mask32v signMask = zNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dzBase + dwBase ) ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        // Vertex <0, 0, 0, 1> or <0, 0, 0, -1>
+        {
+            mask32v signMask = wNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dwBase ), float32v( 0.0f ) );
+
+            ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset0, dwBase - offset1,
+                ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
+        }
+
+        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+            115.21625311930542 / 1.4142135623730951 :
+            48.80058117543753;
+
+        warpAmp *= float32v( kBounding );
+        xOut = FS::FMulAdd( valueX, warpAmp, xOut );
+        yOut = FS::FMulAdd( valueY, warpAmp, yOut );
+        zOut = FS::FMulAdd( valueZ, warpAmp, zOut );
+
+        float32v warpLengthSq = FS::FMulAdd( valueW, valueW, FS::FMulAdd( valueZ, valueZ, FS::FMulAdd( valueY, valueY, valueX * valueX ) ) );
+        return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
+    }
+};
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index 3139cad5..fc651526 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -48,6 +48,30 @@ namespace FastNoise
         "Minkowski",
     };
 
+    enum class SimplexType
+    {
+        Standard,
+        Smooth
+    };
+
+    constexpr static const char* kSimplexType_Strings[] =
+    {
+        "Standard",
+        "Smooth",
+    };
+
+    enum class VectorizationScheme
+    {
+        OrthogonalGradientMatrix,
+        GradientOuterProduct
+    };
+
+    constexpr static const char* kVectorizationScheme_Strings[] =
+    {
+        "Orthogonal Gradient Matrix",
+        "Gradient Outer Product",
+    };
+
     struct OutputMinMax
     {
         float min =  INFINITY;
diff --git a/include/FastNoise/Generators/Perlin.inl b/include/FastNoise/Generators/Perlin.inl
index 6f19b475..edaa6f44 100644
--- a/include/FastNoise/Generators/Perlin.inl
+++ b/include/FastNoise/Generators/Perlin.inl
@@ -26,8 +26,8 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         constexpr float kBounding = 0.579106986522674560546875f;
 
         return this->ScaleOutput( Lerp(
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0 ), xf0, yf0 ), GetGradientDot( HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDot( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y0 ), xf0, yf0 ), GetGradientDotPerlin( HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y1 ), xf0, yf1 ), GetGradientDotPerlin( HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys ),
             -1 / kBounding, 1 / kBounding );
     }
 
@@ -60,11 +60,11 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         constexpr float kBounding = 0.964921414852142333984375f;
 
         return this->ScaleOutput( Lerp( Lerp(
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
+            Lerp( GetGradientDotCommon( HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), GetGradientDotCommon( HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
+            Lerp( GetGradientDotCommon( HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), GetGradientDotCommon( HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
             Lerp( 
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),    
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs ),
+            Lerp( GetGradientDotCommon( HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), GetGradientDotCommon( HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),
+            Lerp( GetGradientDotCommon( HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), GetGradientDotCommon( HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs ),
             -1 / kBounding, 1 / kBounding );
     }
 
@@ -103,17 +103,17 @@ class FastSIMD::DispatchClass<FastNoise::Perlin, SIMD> final : public virtual Fa
         constexpr float kBounding = 0.964921414852142333984375f;
 
         return this->ScaleOutput( Lerp( Lerp( Lerp(
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), GetGradientDotPerlin( HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), GetGradientDotPerlin( HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
             Lerp(                                                                                                                                                     
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), GetGradientDot( HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), GetGradientDotPerlin( HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),    
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), GetGradientDotPerlin( HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
             Lerp( Lerp(
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), GetGradientDotPerlin( HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), GetGradientDotPerlin( HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
             Lerp(                                                                                                                                                     
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
-            Lerp( GetGradientDot( HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), GetGradientDot( HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws ),
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), GetGradientDotPerlin( HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),    
+            Lerp( GetGradientDotPerlin( HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), GetGradientDotPerlin( HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws ),
             -1 / kBounding, 1 / kBounding );
     }
 };
diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index cbed8109..f56949bf 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -6,7 +6,11 @@ namespace FastNoise
     class Simplex : public virtual VariableRange<ScalableGenerator>
     {
     public:
+        void SetType( SimplexType value ) { mType = value; }
         const Metadata& GetMetadata() const override;
+
+    protected:
+        SimplexType mType = SimplexType::Standard;
     };
 
 #ifdef FASTNOISE_METADATA
@@ -22,52 +26,12 @@ namespace FastNoise
             description = 
                 "Smooth gradient noise from an N dimensional simplex grid\n"
                 "Developed by Ken Perlin in 2001";
-        }
-    };
-#endif
-
-    class OpenSimplex2 : public virtual VariableRange<ScalableGenerator>
-    {
-    public:
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OpenSimplex2> : MetadataT<VariableRange<ScalableGenerator>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
-
-            description = 
-                "Smooth gradient noise from an N dimensional simplex grid, alternate implementation\n"
-                "Developed by K.jpg in 2019";
-        }
-    };
-#endif
-
-    class OpenSimplex2S : public virtual VariableRange<ScalableGenerator>
-    {
-    public:
-        const Metadata& GetMetadata() const override;
-    };
-
-#ifdef FASTNOISE_METADATA
-    template<>
-    struct MetadataT<OpenSimplex2S> : MetadataT<VariableRange<ScalableGenerator>>
-    {
-        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
-
-        MetadataT()
-        {
-            groups.push_back( "Coherent Noise" );
 
-            description =
-                "Smoother gradient noise from an N dimensional simplex grid\n"
-                "Developed by K.jpg in 2017";
+            this->AddVariableEnum(
+                { "Type", "Noise character style" },
+                SimplexType::Standard, &Simplex::SetType,
+                kSimplexType_Strings
+            );
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 3209d60c..d35fc151 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -4,542 +4,951 @@
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
     {
-        this->ScalePositions( x, y );
-
-        const float SQRT3 = 1.7320508075688772935274463415059f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS::Floor( x + f );
-        float32v y0 = FS::Floor( y + f );
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
-
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
-
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //InvMasked funcs
-
-        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1.f ) ) + float32v( G2 );
-        float32v y1 = FS::InvMaskedSub( i1, y0, float32v( 1.f ) ) + float32v( G2 );
-
-        float32v x2 = x0 + float32v( G2 * 2 - 1 );
-        float32v y2 = y0 + float32v( G2 * 2 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, float32v( 0.5f ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, float32v( 0.5f ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, float32v( 0.5f ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
+        switch( mType ) {
+        case SimplexType::Standard:
+            return Gen_Standard( seed, x, y );
+        case SimplexType::Smooth:
+            return Gen_Smooth( seed, x, y );
+        }
+    }
 
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
+    {
+        switch( mType ) {
+        case SimplexType::Standard:
+            return Gen_Standard( seed, x, y, z );
+        case SimplexType::Smooth:
+            return Gen_Smooth( seed, x, y, z );
+        }
+    }
 
-        float32v n0 = GetGradientDot( HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
-        float32v n2 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
+    {
+        switch( mType ) {
+        case SimplexType::Standard:
+            return Gen_Standard( seed, x, y, z, w );
+        case SimplexType::Smooth:
+            return Gen_Smooth( seed, x, y, z, w );
+        }
+    }
 
-        constexpr float kBounding = 38.283687591552734375f;
+    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y ) const
+    {
+        this->ScalePositions( x, y );
 
-        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) ),
+        constexpr double kRoot3 = 1.7320508075688772935274463415059;
+        constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
+        constexpr double kUnskew2 = -1.0 / ( kRoot3 + 3.0 );
+        constexpr double kFalloffRadiusSquared = 0.5;
+
+        float32v skewDelta = float32v( kSkew2 ) * ( x + y );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+
+        mask32v xGreaterEqualY = dxSkewed >= dySkewed;
+        
+        float32v unskewDelta = float32v( kUnskew2 ) * ( dxSkewed + dySkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
+        
+        float32v dx1 = FS::MaskedIncrement( ~xGreaterEqualY, dx0 ) - float32v( kUnskew2 + 1 );
+        float32v dy1 = FS::MaskedIncrement( xGreaterEqualY, dy0 ) - float32v( kUnskew2 + 1 );
+        float32v dx2 = dx0 - float32v( kUnskew2 * 2 + 1 );
+        float32v dy2 = dy0 - float32v( kUnskew2 * 2 + 1 );
+
+        float32v falloff0 = FS::FNMulAdd( dx0, dx0, FS::FNMulAdd( dy0, dy0, float32v( kFalloffRadiusSquared ) ) );
+        float32v falloff1 = FS::FNMulAdd( dx1, dx1, FS::FNMulAdd( dy1, dy1, float32v( kFalloffRadiusSquared ) ) );
+        float32v falloff2 = falloff0 + FS::FMulAdd( unskewDelta,
+            float32v( -4.0 * ( kRoot3 + 2.0 ) / ( kRoot3 + 3.0 ) ),
+            float32v( -2.0 / 3.0 ) );
+
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
+
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
+
+        float32v gradientRampValue0 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase ), dx0, dy0 );
+        float32v gradientRampValue1 = GetGradientDotSimplex( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1 );
+        float32v gradientRampValue2 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2 );
+
+        constexpr double kBounding = 49.918426513671875;
+
+        return this->ScaleOutput( FS::FMulAdd( gradientRampValue0, falloff0, FS::FMulAdd( gradientRampValue1, falloff1, gradientRampValue2 * falloff2 ) ),
             -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
+    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y, float32v z ) const
     {
         this->ScalePositions( x, y, z );
 
-        const float F3 = 1.0f / 3.0f;
-        const float G3 = 1.0f / 2.0f;
-
-        float32v s = float32v( F3 ) * (x + y + z);
-        x += s;
-        y += s;
-        z += s;
-
-        float32v x0 = FS::Floor( x );
-        float32v y0 = FS::Floor( y );
-        float32v z0 = FS::Floor( z );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
-        int32v k = FS::Convert<int32_t>( z0 ) * int32v( Primes::Z );
-
-        mask32v x_ge_y = xi >= yi;
-        mask32v y_ge_z = yi >= zi;
-        mask32v x_ge_z = xi >= zi;
-
-        float32v g = float32v( G3 ) * (xi + yi + zi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-
-        mask32v i1 = x_ge_y & x_ge_z;
-        mask32v j1 = FS::BitwiseAndNot( y_ge_z, x_ge_y );
-        mask32v k1 = FS::BitwiseAndNot( ~x_ge_z, y_ge_z );
-
-        mask32v i2 = x_ge_y | x_ge_z;
-        mask32v j2 = ~x_ge_y | y_ge_z;
-        mask32v k2 = x_ge_z & y_ge_z; //InvMasked
-
-        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1 ) ) + float32v( G3 );
-        float32v y1 = FS::MaskedSub( j1, y0, float32v( 1 ) ) + float32v( G3 );
-        float32v z1 = FS::MaskedSub( k1, z0, float32v( 1 ) ) + float32v( G3 );
-        float32v x2 = FS::MaskedSub( i2, x0, float32v( 1 ) ) + float32v( G3 * 2 );
-        float32v y2 = FS::MaskedSub( j2, y0, float32v( 1 ) ) + float32v( G3 * 2 );
-        float32v z2 = FS::InvMaskedSub( k2, z0, float32v( 1 ) ) + float32v( G3 * 2 );
-        float32v x3 = x0 + float32v( G3 * 3 - 1 );
-        float32v y3 = y0 + float32v( G3 * 3 - 1 );
-        float32v z3 = z0 + float32v( G3 * 3 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, float32v( 0.6f ) ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, float32v( 0.6f ) ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, float32v( 0.6f ) ) ) );
-        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, float32v( 0.6f ) ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-        t3 = FS::Max( t3, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;             
-
-        float32v n0 = GetGradientDot( HashPrimes( seed, i, j, k ), x0, y0, z0 );
-        float32v n1 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::MaskedAdd( j1, j, int32v( Primes::Y ) ), FS::MaskedAdd( k1, k, int32v( Primes::Z ) ) ), x1, y1, z1 );
-        float32v n2 = GetGradientDot( HashPrimes( seed, FS::MaskedAdd( i2, i, int32v( Primes::X ) ), FS::MaskedAdd( j2, j, int32v( Primes::Y ) ), FS::InvMaskedAdd( k2, k, int32v( Primes::Z ) ) ), x2, y2, z2 );
-        float32v n3 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ) ), x3, y3, z3 );
-                
-        constexpr float kBounding = 32.69428253173828125f;
-
-        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, n3 * t3 ) ) ),
+        constexpr double kSkew3 = 1.0 / 3.0;
+        constexpr double kReflectUnskew3 = -1.0 / 2.0;
+        constexpr double kFalloffRadiusSquared = 0.6;
+
+        float32v skewDelta = float32v( kSkew3 ) * ( x + y + z );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+
+        mask32v xGreaterEqualY = dxSkewed >= dySkewed;
+        mask32v yGreaterEqualZ = dySkewed >= dzSkewed;
+        mask32v xGreaterEqualZ = dxSkewed >= dzSkewed;
+
+        float32v unskewDelta = float32v( kReflectUnskew3 ) * ( dxSkewed + dySkewed + dzSkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
+        float32v dz0 = dzSkewed + unskewDelta;
+
+        mask32v maskX1 = xGreaterEqualY & xGreaterEqualZ;
+        mask32v maskY1 = FS::BitwiseAndNot( yGreaterEqualZ, xGreaterEqualY );
+        mask32v maskZ1 = FS::BitwiseAndNot( ~xGreaterEqualZ, yGreaterEqualZ );
+
+        mask32v nMaskX2 = ~( xGreaterEqualY | xGreaterEqualZ );
+        mask32v nMaskY2 = xGreaterEqualY & ~yGreaterEqualZ;
+        mask32v nMaskZ2 = xGreaterEqualZ & yGreaterEqualZ;
+
+        float32v dx3 = dx0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dy3 = dy0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dz3 = dz0 - float32v( kReflectUnskew3 * 3 + 1 );
+        float32v dx1 = FS::MaskedSub( maskX1, dx3, float32v( 1 ) ); // kReflectUnskew3 * 3 + 1 = kReflectUnskew3, so dx0 - kReflectUnskew3 = dx3
+        float32v dy1 = FS::MaskedSub( maskY1, dy3, float32v( 1 ) );
+        float32v dz1 = FS::MaskedSub( maskZ1, dz3, float32v( 1 ) );
+        float32v dx2 = FS::MaskedIncrement( nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
+        float32v dy2 = FS::MaskedIncrement( nMaskY2, dy0 );
+        float32v dz2 = FS::MaskedIncrement( nMaskZ2, dz0 );
+
+        float32v falloff0 = FS::FNMulAdd( dz0, dz0, FS::FNMulAdd( dy0, dy0, FS::FNMulAdd( dx0, dx0, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff1 = FS::FNMulAdd( dz1, dz1, FS::FNMulAdd( dy1, dy1, FS::FNMulAdd( dx1, dx1, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff2 = FS::FNMulAdd( dz2, dz2, FS::FNMulAdd( dy2, dy2, FS::FNMulAdd( dx2, dx2, float32v( kFalloffRadiusSquared ) ) ) );
+        float32v falloff3 = falloff0 - ( unskewDelta + float32v( 3.0 / 4.0 ) );
+
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
+        falloff3 = FS::Max( falloff3, float32v( 0 ) );
+
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
+        falloff3 *= falloff3; falloff3 *= falloff3;
+
+        float32v gradientRampValue0 = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dx0, dy0, dz0 );
+        float32v gradientRampValue1 = GetGradientDotCommon( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1 );
+        float32v gradientRampValue2 = GetGradientDotCommon( HashPrimes( seed, FS::InvMaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2 );
+        float32v gradientRampValue3 = GetGradientDotCommon( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ) ), dx3, dy3, dz3 );
+
+        constexpr double kBounding = 32.69428253173828125;
+
+        return this->ScaleOutput( FS::FMulAdd( gradientRampValue3, falloff3, FS::FMulAdd( gradientRampValue2, falloff2, FS::FMulAdd( gradientRampValue1, falloff1, gradientRampValue0 * falloff0 ) ) ),
             -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
+    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         this->ScalePositions( x, y, z, w );
 
-        const float SQRT5 = 2.236067977499f;
-        const float F4 = (SQRT5 - 1.0f) / 4.0f;
-        const float G4 = (5.0f - SQRT5) / 20.0f;
-
-        float32v s = float32v( F4 ) * (x + y + z + w);
-        x += s;
-        y += s;
-        z += s;
-        w += s;
-
-        float32v x0 = FS::Floor( x );
-        float32v y0 = FS::Floor( y );
-        float32v z0 = FS::Floor( z );
-        float32v w0 = FS::Floor( w );
-        float32v xi = x - x0;
-        float32v yi = y - y0;
-        float32v zi = z - z0;
-        float32v wi = w - w0;
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
-        int32v k = FS::Convert<int32_t>( z0 ) * int32v( Primes::Z );
-        int32v l = FS::Convert<int32_t>( w0 ) * int32v( Primes::W );
-
-        float32v g = float32v( G4 ) * (xi + yi + zi + wi);
-        x0 = xi - g;
-        y0 = yi - g;
-        z0 = zi - g;
-        w0 = wi - g;
-
-        int32v rankx( 0 );
-        int32v ranky( 0 );
-        int32v rankz( 0 );
-        int32v rankw( 0 );
-
-        mask32v x_ge_y = x0 >= y0;
-        rankx = FS::MaskedIncrement( x_ge_y, rankx );
-        ranky = FS::MaskedIncrement( ~x_ge_y, ranky );
-
-        mask32v x_ge_z = x0 >= z0;
-        rankx = FS::MaskedIncrement( x_ge_z, rankx );
-        rankz = FS::MaskedIncrement( ~x_ge_z, rankz );
-
-        mask32v x_ge_w = x0 >= w0;
-        rankx = FS::MaskedIncrement( x_ge_w, rankx );
-        rankw = FS::MaskedIncrement( ~x_ge_w, rankw );
-
-        mask32v y_ge_z = y0 >= z0;
-        ranky = FS::MaskedIncrement( y_ge_z, ranky );
-        rankz = FS::MaskedIncrement( ~y_ge_z, rankz );
-
-        mask32v y_ge_w = y0 >= w0;
-        ranky = FS::MaskedIncrement( y_ge_w, ranky );
-        rankw = FS::MaskedIncrement( ~y_ge_w, rankw );
-
-        mask32v z_ge_w = z0 >= w0;
-        rankz = FS::MaskedIncrement( z_ge_w, rankz );
-        rankw = FS::MaskedIncrement( ~z_ge_w, rankw );
-
-        mask32v i1 = rankx > int32v( 2 );
-        mask32v j1 = ranky > int32v( 2 );
-        mask32v k1 = rankz > int32v( 2 );
-        mask32v l1 = rankw > int32v( 2 );
-
-        mask32v i2 = rankx > int32v( 1 );
-        mask32v j2 = ranky > int32v( 1 );
-        mask32v k2 = rankz > int32v( 1 );
-        mask32v l2 = rankw > int32v( 1 );
-
-        mask32v i3 = rankx > int32v( 0 );
-        mask32v j3 = ranky > int32v( 0 );
-        mask32v k3 = rankz > int32v( 0 );
-        mask32v l3 = rankw > int32v( 0 );
-
-        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1 ) ) + float32v( G4 );
-        float32v y1 = FS::MaskedSub( j1, y0, float32v( 1 ) ) + float32v( G4 );
-        float32v z1 = FS::MaskedSub( k1, z0, float32v( 1 ) ) + float32v( G4 );
-        float32v w1 = FS::MaskedSub( l1, w0, float32v( 1 ) ) + float32v( G4 );
-        float32v x2 = FS::MaskedSub( i2, x0, float32v( 1 ) ) + float32v( G4 * 2 );
-        float32v y2 = FS::MaskedSub( j2, y0, float32v( 1 ) ) + float32v( G4 * 2 );
-        float32v z2 = FS::MaskedSub( k2, z0, float32v( 1 ) ) + float32v( G4 * 2 );
-        float32v w2 = FS::MaskedSub( l2, w0, float32v( 1 ) ) + float32v( G4 * 2 );
-        float32v x3 = FS::MaskedSub( i3, x0, float32v( 1 ) ) + float32v( G4 * 3 );
-        float32v y3 = FS::MaskedSub( j3, y0, float32v( 1 ) ) + float32v( G4 * 3 );
-        float32v z3 = FS::MaskedSub( k3, z0, float32v( 1 ) ) + float32v( G4 * 3 );
-        float32v w3 = FS::MaskedSub( l3, w0, float32v( 1 ) ) + float32v( G4 * 3 );
-        float32v x4 = x0 + float32v( G4 * 4 - 1 );
-        float32v y4 = y0 + float32v( G4 * 4 - 1 );
-        float32v z4 = z0 + float32v( G4 * 4 - 1 );
-        float32v w4 = w0 + float32v( G4 * 4 - 1 );
-
-        float32v t0 = FS::FNMulAdd( x0, x0, FS::FNMulAdd( y0, y0, FS::FNMulAdd( z0, z0, FS::FNMulAdd( w0, w0, float32v( 0.6f ) ) ) ) );
-        float32v t1 = FS::FNMulAdd( x1, x1, FS::FNMulAdd( y1, y1, FS::FNMulAdd( z1, z1, FS::FNMulAdd( w1, w1, float32v( 0.6f ) ) ) ) );
-        float32v t2 = FS::FNMulAdd( x2, x2, FS::FNMulAdd( y2, y2, FS::FNMulAdd( z2, z2, FS::FNMulAdd( w2, w2, float32v( 0.6f ) ) ) ) );
-        float32v t3 = FS::FNMulAdd( x3, x3, FS::FNMulAdd( y3, y3, FS::FNMulAdd( z3, z3, FS::FNMulAdd( w3, w3, float32v( 0.6f ) ) ) ) );
-        float32v t4 = FS::FNMulAdd( x4, x4, FS::FNMulAdd( y4, y4, FS::FNMulAdd( z4, z4, FS::FNMulAdd( w4, w4, float32v( 0.6f ) ) ) ) );
-
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
-        t3 = FS::Max( t3, float32v( 0 ) );
-        t4 = FS::Max( t4, float32v( 0 ) );
-
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
-        t3 *= t3; t3 *= t3;
-        t4 *= t4; t4 *= t4;
-
-        float32v n0 = GetGradientDot( HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
-        float32v n1 = GetGradientDot( HashPrimes( seed, 
-            FS::MaskedAdd( i1, i, int32v( Primes::X ) ),
-            FS::MaskedAdd( j1, j, int32v( Primes::Y ) ),
-            FS::MaskedAdd( k1, k, int32v( Primes::Z ) ),
-            FS::MaskedAdd( l1, l, int32v( Primes::W ) ) ), x1, y1, z1, w1 );
-        float32v n2 = GetGradientDot( HashPrimes( seed, 
-            FS::MaskedAdd( i2, i, int32v( Primes::X ) ),
-            FS::MaskedAdd( j2, j, int32v( Primes::Y ) ),
-            FS::MaskedAdd( k2, k, int32v( Primes::Z ) ),
-            FS::MaskedAdd( l2, l, int32v( Primes::W ) ) ), x2, y2, z2, w2 );
-        float32v n3 = GetGradientDot( HashPrimes( seed,
-            FS::MaskedAdd( i3, i, int32v( Primes::X ) ),
-            FS::MaskedAdd( j3, j, int32v( Primes::Y ) ),
-            FS::MaskedAdd( k3, k, int32v( Primes::Z ) ),
-            FS::MaskedAdd( l3, l, int32v( Primes::W ) ) ), x3, y3, z3, w3 );
-        float32v n4 = GetGradientDot( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ), k + int32v( Primes::Z ), l + int32v( Primes::W ) ), x4, y4, z4, w4 );
-
-        constexpr float kBounding = 27.f;
-
-        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, FS::FMulAdd( n2, t2, FS::FMulAdd( n3, t3, n4 * t4 ) ) ) ),
+        constexpr double kRoot5 = 2.2360679774997896964091736687313;
+        constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
+        constexpr double kUnskew4 = -1.0 / ( kRoot5 + 5.0 );
+        constexpr double kFalloffRadiusSquared = 0.6;
+
+        float32v skewDelta = float32v( kSkew4 ) * ( x + y + z + w );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v wSkewed = w + skewDelta;
+
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v wSkewedBase = FS::Floor( wSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+        float32v dwSkewed = wSkewed - wSkewedBase;
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+        int32v wPrimedBase = FS::Convert<int32_t>( wSkewedBase ) * int32v( Primes::W );
+
+        float32v unskewDelta = float32v( kUnskew4 ) * ( dxSkewed + dySkewed + dzSkewed + dwSkewed );
+        float32v dx0 = dxSkewed + unskewDelta;
+        float32v dy0 = dySkewed + unskewDelta;
+        float32v dz0 = dzSkewed + unskewDelta;
+        float32v dw0 = dwSkewed + unskewDelta;
+
+        int32v rankX( 0 );
+        int32v rankY( 0 );
+        int32v rankZ( 0 );
+        int32v rankW( 0 );
+
+        mask32v xGreaterEqualY = dx0 >= dy0;
+        rankX = FS::MaskedIncrement(  xGreaterEqualY, rankX );
+        rankY = FS::MaskedIncrement( ~xGreaterEqualY, rankY );
+
+        mask32v xGreaterEqualZ = dx0 >= dz0;
+        rankX = FS::MaskedIncrement(  xGreaterEqualZ, rankX );
+        rankZ = FS::MaskedIncrement( ~xGreaterEqualZ, rankZ );
+
+        mask32v xGreaterEqualW = dx0 >= dw0;
+        rankX = FS::MaskedIncrement(  xGreaterEqualW, rankX );
+        rankW = FS::MaskedIncrement( ~xGreaterEqualW, rankW );
+
+        mask32v yGreaterEqualZ = dy0 >= dz0;
+        rankY = FS::MaskedIncrement(  yGreaterEqualZ, rankY );
+        rankZ = FS::MaskedIncrement( ~yGreaterEqualZ, rankZ );
+
+        mask32v yGreaterEqualW = dy0 >= dw0;
+        rankY = FS::MaskedIncrement(  yGreaterEqualW, rankY );
+        rankW = FS::MaskedIncrement( ~yGreaterEqualW, rankW );
+
+        mask32v zGreaterEqualW = dz0 >= dw0;
+        rankZ = FS::MaskedIncrement(  zGreaterEqualW, rankZ );
+        rankW = FS::MaskedIncrement( ~zGreaterEqualW, rankW );
+
+        mask32v maskX1 = rankX > int32v( 2 );
+        mask32v maskY1 = rankY > int32v( 2 );
+        mask32v maskZ1 = rankZ > int32v( 2 );
+        mask32v maskW1 = rankW > int32v( 2 );
+
+        mask32v maskX2 = rankX > int32v( 1 );
+        mask32v maskY2 = rankY > int32v( 1 );
+        mask32v maskZ2 = rankZ > int32v( 1 );
+        mask32v maskW2 = rankW > int32v( 1 );
+
+        mask32v maskX3 = rankX > int32v( 0 );
+        mask32v maskY3 = rankY > int32v( 0 );
+        mask32v maskZ3 = rankZ > int32v( 0 );
+        mask32v maskW3 = rankW > int32v( 0 );
+
+        float32v dx1 = FS::MaskedSub( maskX1, dx0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dy1 = FS::MaskedSub( maskY1, dy0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dz1 = FS::MaskedSub( maskZ1, dz0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dw1 = FS::MaskedSub( maskW1, dw0, float32v( 1 ) ) - float32v( kUnskew4 );
+        float32v dx2 = FS::MaskedSub( maskX2, dx0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dy2 = FS::MaskedSub( maskY2, dy0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dz2 = FS::MaskedSub( maskZ2, dz0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dw2 = FS::MaskedSub( maskW2, dw0, float32v( 1 ) ) - float32v( kUnskew4 * 2 );
+        float32v dx3 = FS::MaskedSub( maskX3, dx0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dy3 = FS::MaskedSub( maskY3, dy0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dz3 = FS::MaskedSub( maskZ3, dz0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dw3 = FS::MaskedSub( maskW3, dw0, float32v( 1 ) ) - float32v( kUnskew4 * 3 );
+        float32v dx4 = dx0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dy4 = dy0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dz4 = dz0 - float32v( kUnskew4 * 4 + 1 );
+        float32v dw4 = dw0 - float32v( kUnskew4 * 4 + 1 );
+
+        float32v falloff0 = FS::FNMulAdd( dw0, dw0, FS::FNMulAdd( dz0, dz0, FS::FNMulAdd( dy0, dy0, FS::FNMulAdd( dx0, dx0, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff1 = FS::FNMulAdd( dw1, dw1, FS::FNMulAdd( dz1, dz1, FS::FNMulAdd( dy1, dy1, FS::FNMulAdd( dx1, dx1, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff2 = FS::FNMulAdd( dw2, dw2, FS::FNMulAdd( dz2, dz2, FS::FNMulAdd( dy2, dy2, FS::FNMulAdd( dx2, dx2, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff3 = FS::FNMulAdd( dw3, dw3, FS::FNMulAdd( dz3, dz3, FS::FNMulAdd( dy3, dy3, FS::FNMulAdd( dx3, dx3, float32v( kFalloffRadiusSquared ) ) ) ) );
+        float32v falloff4 = falloff0 + FS::FMulAdd( unskewDelta,
+            float32v( -4.0 * ( kRoot5 + 3.0 ) / ( kRoot5 + 5.0 ) ),
+            float32v( -4.0 / 5.0 ) );
+
+        falloff0 = FS::Max( falloff0, float32v( 0 ) );
+        falloff1 = FS::Max( falloff1, float32v( 0 ) );
+        falloff2 = FS::Max( falloff2, float32v( 0 ) );
+        falloff3 = FS::Max( falloff3, float32v( 0 ) );
+        falloff4 = FS::Max( falloff4, float32v( 0 ) );
+
+        falloff0 *= falloff0; falloff0 *= falloff0;
+        falloff1 *= falloff1; falloff1 *= falloff1;
+        falloff2 *= falloff2; falloff2 *= falloff2;
+        falloff3 *= falloff3; falloff3 *= falloff3;
+        falloff4 *= falloff4; falloff4 *= falloff4;
+
+        float32v gradientRampValue0 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dx0, dy0, dz0, dw0 );
+        float32v gradientRampValue1 = GetGradientDotSimplex( HashPrimes( seed,
+            FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW1, wPrimedBase, int32v( Primes::W ) ) ), dx1, dy1, dz1, dw1 );
+        float32v gradientRampValue2 = GetGradientDotSimplex( HashPrimes( seed,
+            FS::MaskedAdd( maskX2, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY2, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ2, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW2, wPrimedBase, int32v( Primes::W ) ) ), dx2, dy2, dz2, dw2 );
+        float32v gradientRampValue3 = GetGradientDotSimplex( HashPrimes( seed,
+            FS::MaskedAdd( maskX3, xPrimedBase, int32v( Primes::X ) ),
+            FS::MaskedAdd( maskY3, yPrimedBase, int32v( Primes::Y ) ),
+            FS::MaskedAdd( maskZ3, zPrimedBase, int32v( Primes::Z ) ),
+            FS::MaskedAdd( maskW3, wPrimedBase, int32v( Primes::W ) ) ), dx3, dy3, dz3, dw3 );
+        float32v gradientRampValue4 = GetGradientDotSimplex( HashPrimes( seed,
+            xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ), wPrimedBase + int32v( Primes::W ) ),
+            dx4, dy4, dz4, dw4 );
+
+        constexpr double kBounding = 33.653125584827855;
+
+        return this->ScaleOutput( FS::FMulAdd( gradientRampValue0, falloff0, FS::FMulAdd( gradientRampValue1, falloff1, FS::FMulAdd( gradientRampValue2, falloff2, FS::FMulAdd( gradientRampValue3, falloff3, gradientRampValue4 * falloff4 ) ) ) ),
             -1 / kBounding, 1 / kBounding );
     }
-};
 
-template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2, SIMD> final : public virtual FastNoise::OpenSimplex2, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
-{
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
+    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y ) const
     {
         this->ScalePositions( x, y );
 
-        const float SQRT3 = 1.7320508075f;
-        const float F2 = 0.5f * (SQRT3 - 1.0f);
-        const float G2 = (3.0f - SQRT3) / 6.0f;
-
-        float32v f = float32v( F2 ) * (x + y);
-        float32v x0 = FS::Floor( x + f );
-        float32v y0 = FS::Floor( y + f );
-
-        int32v i = FS::Convert<int32_t>( x0 ) * int32v( Primes::X );
-        int32v j = FS::Convert<int32_t>( y0 ) * int32v( Primes::Y );
+        constexpr double kRoot3 = 1.7320508075688772935274463415059;
+        constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
+        constexpr double kUnskew2 = -1.0 / ( kRoot3 + 3.0 );
+        constexpr double kFalloffRadiusSquared = 2.0 / 3.0;
+
+        float32v skewDelta = float32v( kSkew2 ) * ( x + y );
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+
+        mask32v forwardXY = dxSkewed + dySkewed > float32v( 1.0f );
+        float32v boundaryXY = FS::Masked( forwardXY, float32v( -1.0f ) );
+        mask32v forwardX = FS::FMulAdd( dxSkewed, float32v( -2.0f ), dySkewed ) < boundaryXY;
+        mask32v forwardY = FS::FMulAdd( dySkewed, float32v( -2.0f ), dxSkewed ) < boundaryXY;
 
-        float32v g = float32v( G2 ) * (x0 + y0);
-        x0 = x - (x0 - g);
-        y0 = y - (y0 - g);
+        float32v unskewDelta = float32v( kUnskew2 ) * ( dxSkewed + dySkewed );
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
 
-        mask32v i1 = x0 > y0;
-        //mask32v j1 = ~i1; //InvMasked funcs
+        float32v falloffBase0, value;
 
-        float32v x1 = FS::MaskedSub( i1, x0, float32v( 1.f ) ) + float32v( G2 );
-        float32v y1 = FS::InvMaskedSub( i1, y0, float32v( 1.f ) ) + float32v( G2 );
-        float32v x2 = x0 + float32v( (G2 * 2) - 1 );
-        float32v y2 = y0 + float32v( (G2 * 2) - 1 );
+        // Vertex <0, 0>
+        {
+            int32v hash = HashPrimes( seed, xPrimedBase, yPrimedBase );
+            float32v gradientRampValue = GetGradientDotSimplex( hash, dxBase, dyBase );
+            falloffBase0 = FS::FNMulAdd( dxBase, dxBase, FS::FNMulAdd( dyBase, dyBase, float32v( kFalloffRadiusSquared ) ) );
+            float32v falloff = falloffBase0; falloff *= falloff; falloff *= falloff;
+            value = falloff * gradientRampValue;
+        }
 
-        float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
-        float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
-        float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
+        // Vertex <1, 1>
+        {
+            int32v hash = HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) );
+            float32v gradientRampValue = GetGradientDotSimplex( hash, dxBase - float32v( 2 * kUnskew2 + 1 ), dyBase - float32v( 2 * kUnskew2 + 1 ) );
+            float32v falloff = FS::FMulAdd( unskewDelta,
+                float32v( -4.0 * ( kRoot3 + 2.0 ) / ( kRoot3 + 3.0 ) ),
+                falloffBase0 - float32v( kFalloffRadiusSquared ) );
+            falloff *= falloff; falloff *= falloff;
+            value = FS::FMulAdd( falloff, gradientRampValue, value );
+        }
 
-        t0 = FS::Max( t0, float32v( 0 ) );
-        t1 = FS::Max( t1, float32v( 0 ) );
-        t2 = FS::Max( t2, float32v( 0 ) );
+        float32v xyDelta = FS::Select( forwardXY, float32v( kUnskew2 + 1 ), float32v( -kUnskew2 ) );
+        dxBase -= xyDelta;
+        dyBase -= xyDelta;
 
-        t0 *= t0; t0 *= t0;
-        t1 *= t1; t1 *= t1;
-        t2 *= t2; t2 *= t2;
+        // Vertex <1, 0> or <-1, 0> or <1, 2>
+        {
+            int32v hash = HashPrimes( seed,
+                FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardX, xPrimedBase, int32v( Primes::X * 2 ) ), int32v( Primes::X ) ),
+                FS::MaskedAdd( forwardXY, yPrimedBase, int32v( Primes::Y ) ) );
+            float32v dx = dxBase - FS::Select( forwardX, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
+            float32v dy = FS::MaskedSub( forwardX, dyBase, float32v( 2 * kUnskew2 ) );
+            float32v gradientRampValue = GetGradientDotSimplex( hash, dx, dy );
+            float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
+            falloff *= falloff; falloff *= falloff;
+            value = FS::FMulAdd( falloff, gradientRampValue, value );
+        }
 
-        float32v n0 = GetGradientDotFancy( HashPrimes( seed, i, j ), x0, y0 );
-        float32v n1 = GetGradientDotFancy( HashPrimes( seed, FS::MaskedAdd( i1, i, int32v( Primes::X ) ), FS::InvMaskedAdd( i1, j, int32v( Primes::Y ) ) ), x1, y1 );
-        float32v n2 = GetGradientDotFancy( HashPrimes( seed, i + int32v( Primes::X ), j + int32v( Primes::Y ) ), x2, y2 );
+        // Vertex <0, 1> or <0, -1> or <2, 1>
+        {
+            int32v hash = HashPrimes( seed,
+                FS::MaskedAdd( forwardXY, xPrimedBase, int32v( Primes::X ) ),
+                FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardY, yPrimedBase, int32v( (int32_t)( Primes::Y * 2LL ) ) ), int32v( Primes::Y ) ) );
+            float32v dx = FS::MaskedSub( forwardY, dxBase, float32v( 2 * kUnskew2 ) );
+            float32v dy = dyBase - FS::Select( forwardY, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
+            float32v gradientRampValue = GetGradientDotSimplex( hash, dx, dy );
+            float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
+            falloff *= falloff; falloff *= falloff;
+            value = FS::FMulAdd( falloff, gradientRampValue, value );
+        }
 
-        constexpr float kBounding = 49.918426513671875f;
+        constexpr double kBounding = 9.28993664146183;
 
-        return this->ScaleOutput( FS::FMulAdd( n0, t0, FS::FMulAdd( n1, t1, n2 * t2 ) ),
-            -1 / kBounding, 1 / kBounding );
+        return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
+    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y, float32v z ) const
     {
         this->ScalePositions( x, y, z );
 
-        float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
-        float32v xr = f - x;
-        float32v yr = f - y;
-        float32v zr = f - z;
+        constexpr double kSkew3 = 1.0 / 3.0;
+        constexpr double kReflectUnskew3 = -1.0 / 2.0;
+        constexpr double kTwiceUnskew3 = -1.0 / 4.0;
+
+        constexpr double kDistanceSquaredA = 3.0 / 4.0;
+        constexpr double kDistanceSquaredB = 1.0;
+        constexpr double kFalloffRadiusSquared = kDistanceSquaredA;
+
+        float32v skewDelta = float32v( kSkew3 ) * ( x + y + z );
 
-        float32v val( 0 );
-        for( size_t i = 0; ; i++ )
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+
+        // From unit cell base, find closest vertex
         {
-            float32v v0xr = FS::Round( xr );
-            float32v v0yr = FS::Round( yr );
-            float32v v0zr = FS::Round( zr );
-            float32v d0xr = xr - v0xr;
-            float32v d0yr = yr - v0yr;
-            float32v d0zr = zr - v0zr;
-
-            float32v score0xr = FS::Abs( d0xr );
-            float32v score0yr = FS::Abs( d0yr );
-            float32v score0zr = FS::Abs( d0zr );
-            mask32v dir0xr = FS::Max( score0yr, score0zr ) <= score0xr;
-            mask32v dir0yr = FS::BitwiseAndNot( FS::Max( score0zr, score0xr ) <= score0yr, dir0xr );
-            mask32v dir0zr = ~(dir0xr | dir0yr);
-            float32v v1xr = FS::MaskedAdd( dir0xr, v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ) );
-            float32v v1yr = FS::MaskedAdd( dir0yr, v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ) );
-            float32v v1zr = FS::MaskedAdd( dir0zr, v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ) );
-            float32v d1xr = xr - v1xr;
-            float32v d1yr = yr - v1yr;
-            float32v d1zr = zr - v1zr;
-
-            int32v hv0xr = FS::Convert<int32_t>( v0xr ) * int32v( Primes::X );
-            int32v hv0yr = FS::Convert<int32_t>( v0yr ) * int32v( Primes::Y );
-            int32v hv0zr = FS::Convert<int32_t>( v0zr ) * int32v( Primes::Z );
-
-            int32v hv1xr = FS::Convert<int32_t>( v1xr ) * int32v( Primes::X );
-            int32v hv1yr = FS::Convert<int32_t>( v1yr ) * int32v( Primes::Y );
-            int32v hv1zr = FS::Convert<int32_t>( v1zr ) * int32v( Primes::Z );
-
-            float32v t0 = FS::FNMulAdd( d0zr, d0zr, FS::FNMulAdd( d0yr, d0yr, FS::FNMulAdd( d0xr, d0xr, float32v( 0.6f ) ) ) );
-            float32v t1 = FS::FNMulAdd( d1zr, d1zr, FS::FNMulAdd( d1yr, d1yr, FS::FNMulAdd( d1xr, d1xr, float32v( 0.6f ) ) ) );
-            t0 = FS::Max( t0, float32v( 0 ) );
-            t1 = FS::Max( t1, float32v( 0 ) );
-            t0 *= t0; t0 *= t0;
-            t1 *= t1; t1 *= t1;
-
-            float32v v0 = GetGradientDot( HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
-            float32v v1 = GetGradientDot( HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
-
-            val = FS::FMulAdd( v0, t0, FS::FMulAdd( v1, t1, val ) );
-
-            if( i == 1 )
+            // Perform a double unskew to get the vector whose dot product with skewed vectors produces the unskewed result.
+            float32v twiceUnskewDelta = float32v( kTwiceUnskew3 ) * ( dxSkewed + dySkewed + dzSkewed );
+            float32v xNormal = dxSkewed + twiceUnskewDelta;
+            float32v yNormal = dySkewed + twiceUnskewDelta;
+            float32v zNormal = dzSkewed + twiceUnskewDelta;
+            float32v xyzNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal
+
+            // Using those, compare scores to determine which vertex is closest.
+            constexpr auto considerVertex = [] ( float32v& maxScore, int32v& moveMaskBits, float32v score, int32v bits ) constexpr
             {
-                break;
-            }
+                moveMaskBits = FS::Select( score > maxScore, bits, moveMaskBits );
+                maxScore = FS::Max( maxScore, score );
+            };
+            float32v maxScore = float32v( 0.375f );
+            int32v moveMaskBits = FS::Masked( xyzNormal > maxScore, int32v( -1 ) );
+            maxScore = FS::Max( maxScore, xyzNormal );
+            considerVertex( maxScore, moveMaskBits, xNormal, 0b001 );
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b100 );
+            maxScore += float32v( 0.125f ) - xyzNormal;
+            considerVertex( maxScore, moveMaskBits, -zNormal, 0b011 );
+            considerVertex( maxScore, moveMaskBits, -yNormal, 0b101 );
+            considerVertex( maxScore, moveMaskBits, -xNormal, 0b110 );
+
+            mask32v moveX = ( moveMaskBits & int32v( 0b001 ) ) != int32v( 0 );
+            mask32v moveY = ( moveMaskBits & int32v( 0b010 ) ) != int32v( 0 );
+            mask32v moveZ = ( moveMaskBits & int32v( 0b100 ) ) != int32v( 0 );
+
+            xSkewedBase = FS::MaskedIncrement( moveX, xSkewedBase );
+            ySkewedBase = FS::MaskedIncrement( moveY, ySkewedBase );
+            zSkewedBase = FS::MaskedIncrement( moveZ, zSkewedBase );
+
+            dxSkewed = FS::MaskedDecrement( moveX, dxSkewed );
+            dySkewed = FS::MaskedDecrement( moveY, dySkewed );
+            dzSkewed = FS::MaskedDecrement( moveZ, dzSkewed );
+        }
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+
+        float32v skewedCoordinateSum = dxSkewed + dySkewed + dzSkewed;
+        float32v twiceUnskewDelta = float32v( kTwiceUnskew3 ) * skewedCoordinateSum;
+        float32v xNormal = dxSkewed + twiceUnskewDelta;
+        float32v yNormal = dySkewed + twiceUnskewDelta;
+        float32v zNormal = dzSkewed + twiceUnskewDelta;
+        float32v xyzNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal
+
+        float32v unskewDelta = float32v( kReflectUnskew3 ) * skewedCoordinateSum;
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
+        float32v dzBase = dzSkewed + unskewDelta;
+
+        float32v coordinateSum = float32v( 1 + 3 * kReflectUnskew3 ) * skewedCoordinateSum; // dxBase + dyBase + dzBase
+
+        // Vertex <0, 0, 0>
+        float32v value, falloffBaseStemA, falloffBaseStemB;
+        {
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dxBase, dyBase, dzBase );
+            float32v falloffBase = FS::FNMulAdd( dzBase, dzBase, FS::FNMulAdd( dyBase, dyBase, FS::FNMulAdd( dxBase, dxBase, float32v( kFalloffRadiusSquared ) ) ) ) * float32v( 0.5f );
+            falloffBaseStemA = falloffBase - float32v( kDistanceSquaredA * 0.5 );
+            falloffBaseStemB = falloffBase - float32v( kDistanceSquaredB * 0.5 );
+            value = ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ) * gradientRampValue;
+        }
+
+        // Vertex <1, 1, 1> or <-1, -1, -1>
+        {
+            mask32v signMask = xyzNormal < float32v( 0 );
 
-            xr += float32v( 0.5f );
-            yr += float32v( 0.5f );
-            zr += float32v( 0.5f );
-            seed = ~seed;
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset = float32v( 3 * kReflectUnskew3 + 1 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimed, yPrimed, zPrimed ), dxBase - offset, dyBase - offset, dzBase - offset );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset, coordinateSum, falloffBaseStemA ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
 
-        constexpr float kBounding = 32.69428253173828125f;
+        // Vertex <1, 1, 0> or <-1, -1, 0>
+        {
+            mask32v signMask = xyzNormal < zNormal;
 
-        return this->ScaleOutput( val, -1 / kBounding, 1 / kBounding );
-    }
-};
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
 
-template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::OpenSimplex2S, SIMD> final : public virtual FastNoise::OpenSimplex2S, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
-{
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
-    {
-        this->ScalePositions( x, y );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
 
-        const float SQRT3 = 1.7320508075688772935274463415059f;
-        const float F2 = 0.5f * ( SQRT3 - 1.0f );
-        const float G2 = ( SQRT3 - 3.0f ) / 6.0f;
-
-        float32v s = float32v( F2 ) * ( x + y );
-        float32v xs = x + s;
-        float32v ys = y + s;
-        float32v xsb = FS::Floor( xs );
-        float32v ysb = FS::Floor( ys );
-        float32v xsi = xs - xsb;
-        float32v ysi = ys - ysb;
-        int32v xsbp = FS::Convert<int32_t>( xsb ) * int32v( Primes::X );
-        int32v ysbp = FS::Convert<int32_t>( ysb ) * int32v( Primes::Y );
-
-        mask32v forwardXY = xsi + ysi > float32v( 1.0f );
-        float32v boundaryXY = FS::Masked( forwardXY, float32v( -1.0f ) );
-        mask32v forwardX = FS::FMulAdd( xsi, float32v( -2.0f ), ysi ) < boundaryXY;
-        mask32v forwardY = FS::FMulAdd( ysi, float32v( -2.0f ), xsi ) < boundaryXY;
-
-        float32v t = float32v( G2 ) * ( xsi + ysi );
-        float32v xi = xsi + t;
-        float32v yi = ysi + t;
-
-        int32v h0 = HashPrimes( seed, xsbp, ysbp );
-        float32v v0 = GetGradientDotFancy( h0, xi, yi );
-        float32v a = FS::FNMulAdd( xi, xi, FS::FNMulAdd( yi, yi, float32v( 2.0f / 3.0f ) ) );
-        float32v a0 = a; a0 *= a0; a0 *= a0;
-        float32v value = a0 * v0;
-
-        int32v h1 = HashPrimes( seed, xsbp + int32v( Primes::X ), ysbp + int32v( Primes::Y ) );
-        float32v v1 = GetGradientDotFancy( h1, xi - float32v( 2 * G2 + 1 ), yi - float32v( 2 * G2 + 1 ) );
-        float32v a1 = FS::FMulAdd( float32v( 2 * ( 1 + 2 * G2 ) * ( 1 / G2 + 2 ) ), t, a + float32v( -2 * ( 1 + 2 * G2 ) * ( 1 + 2 * G2 ) ) );
-        a1 *= a1; a1 *= a1;
-        value = FS::FMulAdd( a1, v1, value );
-
-        float32v xyDelta = FS::Select( forwardXY, float32v( G2 + 1 ), float32v( -G2 ) );
-        xi -= xyDelta;
-        yi -= xyDelta;
-
-        int32v h2 = HashPrimes( seed,
-            FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardX, xsbp, int32v( Primes::X * 2 ) ), int32v( Primes::X ) ),
-            FS::MaskedAdd( forwardXY, ysbp, int32v( Primes::Y ) ) );
-        float32v xi2 = xi - FS::Select( forwardX, float32v( 1 + 2 * G2 ), float32v( -1 ) );
-        float32v yi2 = FS::MaskedSub( forwardX, yi, float32v( 2 * G2 ) );
-        float32v v2 = GetGradientDotFancy( h2, xi2, yi2 );
-        float32v a2 = FS::Max( FS::FNMulAdd( xi2, xi2, FS::FNMulAdd( yi2, yi2, float32v( 2.0f / 3.0f ) ) ), float32v( 0 ) );
-        a2 *= a2; a2 *= a2;
-        value = FS::FMulAdd( a2, v2, value );
-
-        int32v h3 = HashPrimes( seed,
-            FS::MaskedAdd( forwardXY, xsbp, int32v( Primes::X ) ),
-            FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardY, ysbp, int32v( (int32_t)( Primes::Y * 2LL ) ) ), int32v( Primes::Y ) ) );
-        float32v xi3 = FS::MaskedSub( forwardY, xi, float32v( 2 * G2 ) );
-        float32v yi3 = yi - FS::Select( forwardY, float32v( 1 + 2 * G2 ), float32v( -1 ) );
-        float32v v3 = GetGradientDotFancy( h3, xi3, yi3 );
-        float32v a3 = FS::Max( FS::FNMulAdd( xi3, xi3, FS::FNMulAdd( yi3, yi3, float32v( 2.0f / 3.0f ) ) ), float32v( 0 ) );
-        a3 *= a3; a3 *= a3;
-        value = FS::FMulAdd( a3, v3, value );
-                
-        constexpr float kBounding = 9.28993664146183f;
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase ), dxBase, dyBase, dzBase - offset0 );
+            float32v falloffBase = FS::Min( ( sign ^ dzBase ) - falloffBaseStemB, float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 0, 1> or <-1, 0, -1>
+        {
+            mask32v signMask = xyzNormal < yNormal;
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed ), dxBase, dyBase - offset0, dzBase );
+            float32v falloffBase = FS::Min( ( sign ^ dyBase ) - falloffBaseStemB, float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 1> or <0, -1, -1>
+        {
+            mask32v signMask = xyzNormal < xNormal;
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed ), dxBase - offset0, dyBase, dzBase );
+            float32v falloffBase = FS::Min( ( sign ^ dxBase ) - falloffBaseStemB, float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+        
+        // Vertex <1, 0, 0> or <-1, 0, 0>
+        {
+            mask32v signMask = xNormal < float32v( 0 );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase ), dxBase + offset0, dyBase - offset0, dzBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dxBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 0> or <0, -1, 0>
+        {
+            mask32v signMask = yNormal < float32v( 0 );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase ), dxBase - offset0, dyBase + offset0, dzBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dyBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 0, 1> or <0, 0, -1>
+        {
+            mask32v signMask = zNormal < float32v( 0 );
+
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+            float32v offset0 = float32v( kReflectUnskew3 ) ^ sign; // offset1 = -offset0 because kReflectUnskew3 + 1 = -kReflectUnskew3
+
+            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed ), dxBase - offset0, dyBase - offset0, dzBase + offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dzBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        constexpr double kBounding = 144.736422163332608;
 
         return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
+    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
-        this->ScalePositions( x, y, z );
-
-        float32v f = float32v( 2.0f / 3.0f ) * ( x + y + z );
-        float32v xr = f - x;
-        float32v yr = f - y;
-        float32v zr = f - z;
-
-        float32v xrb = FS::Floor( xr );
-        float32v yrb = FS::Floor( yr );
-        float32v zrb = FS::Floor( zr );
-        float32v xri = xr - xrb;
-        float32v yri = yr - yrb;
-        float32v zri = zr - zrb;
-        int32v xrbp = FS::Convert<int32_t>( xrb ) * int32v( Primes::X );
-        int32v yrbp = FS::Convert<int32_t>( yrb ) * int32v( Primes::Y );
-        int32v zrbp = FS::Convert<int32_t>( zrb ) * int32v( Primes::Z );
-
-        float32v value( 0 );
-        for( size_t i = 0; ; i++ )
+        this->ScalePositions( x, y, z, w );
+        
+        constexpr double kRoot5 = 2.2360679774997896964091736687313;
+        constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
+        constexpr double kUnskew4 = -1.0 / ( kRoot5 + 5.0 );
+        constexpr double kTwiceUnskew4 = -1.0 / 5.0;
+
+        constexpr double kDistanceSquaredA = 4.0 / 5.0;
+        constexpr double kDistanceSquaredB = 6.0 / 5.0;
+        constexpr double kFalloffRadiusSquared = kDistanceSquaredA;
+
+        float32v skewDelta = float32v( kSkew4 ) * ( x + y + z + w );
+
+        float32v xSkewed = x + skewDelta;
+        float32v ySkewed = y + skewDelta;
+        float32v zSkewed = z + skewDelta;
+        float32v wSkewed = w + skewDelta;
+        float32v xSkewedBase = FS::Floor( xSkewed );
+        float32v ySkewedBase = FS::Floor( ySkewed );
+        float32v zSkewedBase = FS::Floor( zSkewed );
+        float32v wSkewedBase = FS::Floor( wSkewed );
+        float32v dxSkewed = xSkewed - xSkewedBase;
+        float32v dySkewed = ySkewed - ySkewedBase;
+        float32v dzSkewed = zSkewed - zSkewedBase;
+        float32v dwSkewed = wSkewed - wSkewedBase;
+
+        // From unit cell base, find closest vertex
         {
-            float32v a = FS::FNMulAdd( xri, xri, FS::FNMulAdd( yri, yri, FS::FNMulAdd( zri, zri, float32v( 0.75f ) ) ) ) * float32v( 0.5f );
-
-            float32v p0 = zri + yri + xri - float32v( 1.5f );
-            mask32v flip0 = p0 >= float32v( 0.0f );
-            float32v a0 = FS::Max( FS::MaskedAdd( flip0, a, p0 ), float32v( 0 ) );
-            a0 *= a0; a0 *= a0;
-            int32v h0 = HashPrimes( seed, FS::MaskedAdd( flip0, xrbp, int32v( Primes::X ) ), FS::MaskedAdd( flip0, yrbp, int32v( Primes::Y )), FS::MaskedAdd( flip0, zrbp, int32v( Primes::Z )));
-            float32v v0 = GetGradientDot( h0, FS::MaskedDecrement( flip0, xri ), FS::MaskedDecrement( flip0, yri ), FS::MaskedDecrement( flip0, zri ) );
-            value = FS::FMulAdd( a0, v0, value );
-            a -= float32v( 0.5f );
-
-            float32v p1 = zri + yri - xri + float32v( -0.5f );
-            mask32v flip1 = p1 >= float32v( 0.0f );
-            float32v a1 = FS::Max( FS::MaskedAdd( flip1, a + xri, p1 ), float32v( 0 ) );
-            a1 *= a1; a1 *= a1;
-            int32v h1 = HashPrimes( seed, FS::InvMaskedAdd( flip1, xrbp, int32v( Primes::X )), FS::MaskedAdd( flip1, yrbp, int32v( Primes::Y ) ), FS::MaskedAdd( flip1, zrbp, int32v( Primes::Z )));
-            float32v v1 = GetGradientDot( h1, FS::InvMaskedSub( flip1, xri, float32v( 1.0f ) ), FS::MaskedDecrement( flip1, yri ), FS::MaskedDecrement( flip1, zri ) );
-            value = FS::FMulAdd( a1, v1, value );
-
-            float32v p2 = xri + float32v( -0.5f ) + ( zri - yri );
-            mask32v flip2 = p2 >= float32v( 0.0f );
-            float32v a2 = FS::Max( FS::MaskedAdd( flip2, a + yri, p2 ), float32v( 0 ) );
-            a2 *= a2; a2 *= a2;
-            int32v h2 = HashPrimes( seed, FS::MaskedAdd( flip2, xrbp, int32v( Primes::X )), FS::InvMaskedAdd( flip2, yrbp, int32v( Primes::Y )), FS::MaskedAdd( flip2, zrbp, int32v( Primes::Z )));
-            float32v v2 = GetGradientDot( h2, FS::MaskedDecrement( flip2, xri ), FS::InvMaskedSub( flip2, yri, float32v( 1.0f ) ), FS::MaskedDecrement( flip2, zri ) );
-            value = FS::FMulAdd( a2, v2, value );
-
-            float32v p3 = xri + float32v( -0.5f ) - ( zri - yri );
-            mask32v flip3 = p3 >= float32v( 0.0f );
-            float32v a3 = FS::Max( FS::MaskedAdd( flip3, a + zri, p3 ), float32v( 0 ) );
-            a3 *= a3; a3 *= a3;
-            int32v h3 = HashPrimes( seed, FS::MaskedAdd( flip3, xrbp, int32v( Primes::X )), FS::MaskedAdd( flip3, yrbp, int32v( Primes::Y )), FS::InvMaskedAdd( flip3, zrbp, int32v( Primes::Z )));
-            float32v v3 = GetGradientDot( h3, FS::MaskedDecrement( flip3, xri ), FS::MaskedDecrement( flip3, yri ), FS::InvMaskedSub( flip3, zri, float32v( 1.0f ) ) );
-            value = FS::FMulAdd( a3, v3, value );
-
-            if( i == 1 )
+            // Perform a double unskew to get the vector whose dot product with skewed vectors produces the unskewed result.
+            float32v twiceUnskewDelta = float32v( kTwiceUnskew4 ) * ( dxSkewed + dySkewed + dzSkewed + dwSkewed );
+            float32v xNormal = dxSkewed + twiceUnskewDelta;
+            float32v yNormal = dySkewed + twiceUnskewDelta;
+            float32v zNormal = dzSkewed + twiceUnskewDelta;
+            float32v wNormal = dwSkewed + twiceUnskewDelta;
+            float32v xyzwNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal + wNormal
+
+            // Using those, compare scores to determine which vertex is closest.
+            constexpr auto considerVertex = [] ( float32v& maxScore, int32v& moveMaskBits, float32v score, int32v bits ) constexpr
             {
-                break;
-            }
+                moveMaskBits = FS::Select( score > maxScore, bits, moveMaskBits );
+                maxScore = FS::Max( maxScore, score );
+            };
+            float32v maxScore = float32v( 0.6f ) - xyzwNormal;
+            int32v moveMaskBits = FS::Masked( float32v( 0.2f ) > maxScore, int32v( -1 ) );
+            maxScore = FS::Max( maxScore, float32v( 0.2f ) );
+            considerVertex( maxScore, moveMaskBits, -wNormal, 0b0111 );
+            considerVertex( maxScore, moveMaskBits, -zNormal, 0b1011 );
+            considerVertex( maxScore, moveMaskBits, -yNormal, 0b1101 );
+            considerVertex( maxScore, moveMaskBits, -xNormal, 0b1110 );
+            maxScore += xyzwNormal - float32v( 0.2f );
+            considerVertex( maxScore, moveMaskBits, xNormal, 0b0001 );
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b0010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b0100 );
+            considerVertex( maxScore, moveMaskBits, wNormal, 0b1000 );
+            maxScore += float32v( 0.2f ) - xNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b0011 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b0101 );
+            considerVertex( maxScore, moveMaskBits, wNormal, 0b1001 );
+            maxScore += xNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal + zNormal, 0b0110 );
+            maxScore -= wNormal;
+            considerVertex( maxScore, moveMaskBits, yNormal, 0b1010 );
+            considerVertex( maxScore, moveMaskBits, zNormal, 0b1100 );
+            
+            mask32v moveX = ( moveMaskBits & int32v( 0b0001 ) ) != int32v( 0 );
+            mask32v moveY = ( moveMaskBits & int32v( 0b0010 ) ) != int32v( 0 );
+            mask32v moveZ = ( moveMaskBits & int32v( 0b0100 ) ) != int32v( 0 );
+            mask32v moveW = ( moveMaskBits & int32v( 0b1000 ) ) != int32v( 0 );
+
+            xSkewedBase = FS::MaskedIncrement( moveX, xSkewedBase );
+            ySkewedBase = FS::MaskedIncrement( moveY, ySkewedBase );
+            zSkewedBase = FS::MaskedIncrement( moveZ, zSkewedBase );
+            wSkewedBase = FS::MaskedIncrement( moveW, wSkewedBase );
+
+            dxSkewed = FS::MaskedDecrement( moveX, dxSkewed );
+            dySkewed = FS::MaskedDecrement( moveY, dySkewed );
+            dzSkewed = FS::MaskedDecrement( moveZ, dzSkewed );
+            dwSkewed = FS::MaskedDecrement( moveW, dwSkewed );
+        }
+
+        int32v xPrimedBase = FS::Convert<int32_t>( xSkewedBase ) * int32v( Primes::X );
+        int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
+        int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
+        int32v wPrimedBase = FS::Convert<int32_t>( wSkewedBase ) * int32v( Primes::W );
+        
+        float32v skewedCoordinateSum = dxSkewed + dySkewed + dzSkewed + dwSkewed;
+        float32v twiceUnskewDelta = float32v( kTwiceUnskew4 ) * skewedCoordinateSum;
+        float32v xNormal = dxSkewed + twiceUnskewDelta;
+        float32v yNormal = dySkewed + twiceUnskewDelta;
+        float32v zNormal = dzSkewed + twiceUnskewDelta;
+        float32v wNormal = dwSkewed + twiceUnskewDelta;
+        float32v xyzwNormal = -twiceUnskewDelta; // xNormal + yNormal + zNormal + wNormal
+
+        float32v unskewDelta = float32v( kUnskew4 ) * skewedCoordinateSum;
+        float32v dxBase = dxSkewed + unskewDelta;
+        float32v dyBase = dySkewed + unskewDelta;
+        float32v dzBase = dzSkewed + unskewDelta;
+        float32v dwBase = dwSkewed + unskewDelta;
+
+        float32v coordinateSum = float32v( 1 + 4 * kUnskew4 ) * skewedCoordinateSum; // dxBase + dyBase + dzBase + dwBase
+
+        // Vertex <0, 0, 0, 0>
+        float32v value, falloffBaseStemA, falloffBaseStemB;
+        {
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase, dyBase, dzBase, dwBase );
+            float32v falloffBase = FS::FNMulAdd( dwBase, dwBase, FS::FNMulAdd( dzBase, dzBase, FS::FNMulAdd( dyBase, dyBase, FS::FNMulAdd( dxBase, dxBase, float32v( kFalloffRadiusSquared ) ) ) ) ) * float32v( 0.5f );
+            falloffBaseStemA = falloffBase - float32v( kDistanceSquaredA * 0.5 );
+            falloffBaseStemB = falloffBase - float32v( kDistanceSquaredB * 0.5 );
+            value = ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ) * gradientRampValue;
+        }
+
+        // Vertex <1, 1, 1, 1> or <-1, -1, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset = float32v( 4 * kUnskew4 + 1 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimed ), dxBase - offset, dyBase - offset, dzBase - offset, dwBase - offset );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset, coordinateSum, falloffBaseStemA ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 1, 1, 0> or <-1, -1, -1, 0>
+        {
+            mask32v signMask = xyzwNormal < wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dwBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 1, 0, 1> or <-1, -1, 0, -1>
+        {
+            mask32v signMask = xyzwNormal < zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dzBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 0, 1, 1> or <-1, 0, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < yNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dyBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 1, 1> or <0, -1, -1, -1>
+        {
+            mask32v signMask = xyzwNormal < xNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dxBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 0, 0, 0> or <-1, 0, 0, 0>
+        {
+            mask32v signMask = xNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dxBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 1, 0, 0> or <-1, -1, 0, 0>
+        {
+            mask32v signMask = xNormal < -yNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dyBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 0, 1, 0> or <-1, 0, -1, 0>
+        {
+            mask32v signMask = xNormal < -zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dzBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <1, 0, 0, 1> or <-1, 0, 0, -1>
+        {
+            mask32v signMask = xNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v xPrimed = xPrimedBase + FS::Select( signMask, int32v( -Primes::X ), int32v( Primes::X ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dwBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 0, 0> or <0, -1, 0, 0>
+        {
+            mask32v signMask = yNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dyBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 1, 0> or <0, -1, -1, 0>
+        {
+            mask32v signMask = yNormal < -zNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            mask32v sideX = xri >= float32v( 0.5f );
-            mask32v sideY = yri >= float32v( 0.5f );
-            mask32v sideZ = zri >= float32v( 0.5f );
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dzBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 1, 0, 1> or <0, -1, 0, -1>
+        {
+            mask32v signMask = yNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v yPrimed = yPrimedBase + FS::Select( signMask, int32v( -Primes::Y ), int32v( Primes::Y ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dwBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 0, 1, 0> or <0, 0, -1, 0>
+        {
+            mask32v signMask = zNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
 
-            xrbp = FS::MaskedAdd( sideX, xrbp, int32v( Primes::X ) );
-            yrbp = FS::MaskedAdd( sideY, yrbp, int32v( Primes::Y ) );
-            zrbp = FS::MaskedAdd( sideZ, zrbp, int32v( Primes::Z ) );
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
 
-            xri += FS::Select( sideX, float32v( -0.5f ), float32v( 0.5f ) );
-            yri += FS::Select( sideY, float32v( -0.5f ), float32v( 0.5f ) );
-            zri += FS::Select( sideZ, float32v( -0.5f ), float32v( 0.5f ) );
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
 
-            seed = ~seed;
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dzBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
-                
-        constexpr float kBounding = 144.736422163332608f;
+
+        // Vertex <0, 0, 1, 1> or <0, 0, -1, -1>
+        {
+            mask32v signMask = zNormal < -wNormal;
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v zPrimed = zPrimedBase + FS::Select( signMask, int32v( -Primes::Z ), int32v( Primes::Z ) );
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dzBase + dwBase ) ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        // Vertex <0, 0, 0, 1> or <0, 0, 0, -1>
+        {
+            mask32v signMask = wNormal < float32v( 0 );
+            float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
+
+            int32v wPrimed = wPrimedBase + FS::Select( signMask, int32v( -Primes::W ), int32v( Primes::W ) );
+
+            float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
+            float32v offset0 = float32v( kUnskew4 ) ^ sign;
+
+            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
+            float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dwBase ), float32v( 0.0f ) );
+            value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
+        }
+
+        constexpr double kBounding = 115.21625311930542;
 
         return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 };
-
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index e706e757..62acc447 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -13,210 +13,932 @@ namespace FastNoise
         static constexpr int Lookup[] = { X,Y,Z,W };
     }
 
+    static constexpr double kRoot2 = 1.4142135623730950488016887242097;
+    static constexpr double kRoot3 = 1.7320508075688772935274463415059;
+    static constexpr double kRoot5 = 2.2360679774997896964091736687313;
+    static constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
+    static constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
+
     static constexpr float kValueBounds = 2147483648.f;
-    static constexpr float kRoot2 = 1.4142135623730950488f;
-    static constexpr float kRoot3 = 1.7320508075688772935f;
+    static constexpr float kRoot2f = kRoot2;
+    static constexpr float kRoot3f = kRoot3;
+    static constexpr float kSkew2f = kSkew2;
+    static constexpr float kSkew4f = kSkew4;
 
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
-    FS_FORCEINLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
+    FS_FORCEINLINE static float32v GetGradientDotSimplex( int32v hash31, float32v fX, float32v fY )
     {
-        int32v index = FS::Convert<int32_t>( FS::Convert<float>( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
+        int32v index = FS::BitShiftRightZeroExtend( hash31, 1 ) * int32v( 12 >> 2 ); // [0,12) in the upper four bits
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( kRoot3, kRoot3, 2, 2, 1, -1, 0, 0, -kRoot3, -kRoot3, -2, -2, -1, 1, 0, 0 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 0, 0, kRoot3, kRoot3, 2, 2, -1, 1, 0, 0, -kRoot3, -kRoot3, -2, -2 ) );
+            index >>= 28;
+
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( kRoot3f, -kRoot3f, 1, -1, kRoot3f, -kRoot3f, -1, 1, 2, -2, 0, 0, 0, 0, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, kRoot3f, -kRoot3f, -1, 1, kRoot3f, -kRoot3f, 0, 0, 2, -2, 0, 0, 0, 0 ) );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
         else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kRoot3, kRoot3, 2, 2, 1, -1, 0, 0 ), index );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, -1, 0, 0, kRoot3, kRoot3, 2, 2 ), index );
+            float32v finalSign = FS::Cast<float>( ( index >> 28 ) << 31 );
+            index >>= 29;
+
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kRoot3f, 1, kRoot3f, -1, 2, 0, 0, 0 ), index );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, kRoot3f, -1, kRoot3f, 0, 2, 0, 0 ), index );
 
-            // Bit-8 = Flip sign of a + b 
-            return FS::FMulAdd( gX, fX, fY * gY ) ^ FS::Cast<float>( ( index >> 3 ) << 31 );
+            return FS::FMulAdd( gX, fX, fY * gY ) ^ finalSign;
         }
         else
         {
-            // Bit-3 = Choose X Y ordering
-            mask32v bit3;
+            float32v u = FS::SelectHighBit( index << 2, fY, fX );
+            float32v v = FS::SelectHighBit( index << 2, fX, fY );
 
-            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE2 )
-            {
-                if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
-                {
-                    bit3 = FS::Cast<FS::Mask<32>>( index << 29 );
-                }
-                else
-                {
-                    bit3 = FS::Cast<FS::Mask<32>>( ( index << 29 ) >> 31 );
-                }
-            }
-            else
-            {
-                bit3 = ( index & int32v( 1 << 2 ) ) != int32v( 0 );
-            }
+            float32v a = u * FS::SelectHighBit( index, float32v( 2 ), float32v( kRoot3f ) );
+            float32v b = v ^ FS::Cast<float>( ( index >> 30 ) << 31 );
 
-            float32v a = FS::Select( bit3, fY, fX );
-            float32v b = FS::Select( bit3, fX, fY );
+            return FS::MaskedAdd( index >= int32v( 0 ), a, b ) ^ FS::Cast<float>( ( index >> 28 ) << 31 );
+        }
+    }
 
-            // Bit-1 = b flip sign
-            b ^= FS::Cast<float>( index << 31 );
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static float32v GetGradientDotSimplex( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v fW )
+    {
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 2 );
+        int32v index = hashShifted * int32v( 20 >> 2 ); // [0,20) in the upper five bits
+
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            index = FS::BitShiftRightZeroExtend( index, 27 );
 
-            // Bit-2 = Mul a by 2 or Root3
-            mask32v bit2 = ( index & int32v( 2 ) ) == int32v( 0 );
+            const auto tableX = FS::Constant<float>( kSkew4f + 1, kSkew4f, kSkew4f, kSkew4f, -1, 1, 0, 0, -1, 0, 1, 0, -1, 0, 0, 1 );
+            const auto tableY = FS::Constant<float>( kSkew4f, kSkew4f + 1, kSkew4f, kSkew4f, 1, -1, 0, 0, 0, -1, 0, 1, 0, -1, 1, 0 );
+            const auto tableZ = FS::Constant<float>( kSkew4f, kSkew4f, kSkew4f + 1, kSkew4f, 0, 0, -1, 1, 1, 0, -1, 0, 0, 1, -1, 0 );
+            const auto tableW = FS::Constant<float>( kSkew4f, kSkew4f, kSkew4f, kSkew4f + 1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, -1 );
 
-            a *= FS::Select( bit2, float32v( 2 ), float32v( kRoot3 ) );
-            // b zero value if a mul 2
-            float32v c = FS::MaskedAdd( bit2, a, b );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableX, index, -tableX );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableY, index, -tableY );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableZ, index, -tableZ );
+            float32v gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableW, index, -tableW );
 
-            // Bit-4 = Flip sign of a + b
-            return c ^ FS::Cast<float>( ( index >> 3 ) << 31 );
+            return FS::FMulAdd( gW, fW, FS::FMulAdd( gZ, fZ, FS::FMulAdd( gY, fY, gX * fX ) ) );
+        }
+        else
+        {
+            int32v indexA = index & int32v( 0x03 << 27 );
+            int32v indexB = ( index >> 2 ) & int32v( 0x07 << 27 );
+            indexB ^= indexA; // Simplifies the AVX512_F case.
+
+            mask32v extra = indexB >= int32v( 0x04 << 27 );
+            mask32v equal = ( indexA == indexB );
+            indexA |= FS::Cast<int32_t>( equal ); // Forces decrement conditions to fail.
+
+            float32v neutral = FS::Masked( equal | extra, FS::MaskedMul( extra, float32v( kSkew4f ), float32v( -1.0f ) ) );
+
+            float32v gX = FS::MaskedIncrement( indexB == int32v( 0 << 27 ), FS::MaskedDecrement( indexA == int32v( 0 << 27 ), neutral ) );
+            float32v gY = FS::MaskedIncrement( indexB == int32v( 1 << 27 ), FS::MaskedDecrement( indexA == int32v( 1 << 27 ), neutral ) );
+            float32v gZ = FS::MaskedIncrement( indexB == int32v( 2 << 27 ), FS::MaskedDecrement( indexA == int32v( 2 << 27 ), neutral ) );
+            float32v gW = FS::MaskedIncrement( indexB == int32v( 3 << 27 ), FS::MaskedDecrement( indexA == int32v( 3 << 27 ), neutral ) );
+
+            return FS::FMulAdd( gW, fW, FS::FMulAdd( gZ, fZ, FS::FMulAdd( gY, fY, gX * fX ) ) );
         }
     }
 
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
+    FS_FORCEINLINE static float32v GetGradientDotCommon( int32v hash31, float32v fX, float32v fY, float32v fZ )
+    {
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
+        int32v index = FS::BitShiftRightZeroExtend( hashShifted * int32v( 12 >> 2 ), 28 ); // [0,12)
+
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 0, 0, 0 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 0, 0 ) );
+
+            return FS::FMulAdd( gZ, fZ, FS::FMulAdd( fY, gY, fX * gX ) );
+        }
+        else
+        {
+            float32v sign0 = FS::Cast<float>( index << 31 );
+            float32v sign1 = FS::Cast<float>( ( index >> 1 ) << 31 );
+
+            mask32v thirdCombo = constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 ) ?
+                FS::Cast<FS::Mask<32>>( index << ( 31 - 3 ) ) :
+                index >= int32v( 8 );
+
+            float32v u = FS::Select( thirdCombo, fY, fX );
+            float32v v = FS::Select( index >= int32v( 4 ), fZ, fY );
+
+            return ( u ^ sign0 ) + ( v ^ sign1 );
+        }
+    }
+
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static float32v GetGradientDotPerlin( int32v hash, float32v fX, float32v fY )
     {
         // ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
         // ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1, 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2, 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1 + kRoot2f, -1 - kRoot2f, 1 + kRoot2f, -1 - kRoot2f, 1, -1, 1, -1, 1 + kRoot2f, -1 - kRoot2f, 1 + kRoot2f, -1 - kRoot2f, 1, -1, 1, -1 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2f, 1 + kRoot2f, -1 - kRoot2f, -1 - kRoot2f, 1, 1, -1, -1, 1 + kRoot2f, 1 + kRoot2f, -1 - kRoot2f, -1 - kRoot2f ) );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
         else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + kRoot2, -1 - kRoot2, 1 + kRoot2, -1 - kRoot2, 1, -1, 1, -1 ), hash );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2, 1 + kRoot2, -1 - kRoot2, -1 - kRoot2 ), hash );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1 + kRoot2f, -1 - kRoot2f, 1 + kRoot2f, -1 - kRoot2f, 1, -1, 1, -1 ), hash );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, 1, -1, -1, 1 + kRoot2f, 1 + kRoot2f, -1 - kRoot2f, -1 - kRoot2f ), hash );
 
             return FS::FMulAdd( gX, fX, fY * gY );
         }
         else
         {
-            int32v bit1 = hash << 31;
-            int32v bit2 = ( hash >> 1 ) << 31;
-            int32v bit4 = hash << 29;
+            fX ^= FS::Cast<float>( hash << 31 );
+            fY ^= FS::Cast<float>( ( hash >> 1 ) << 31 );
+
+            float32v u = FS::SelectHighBit( hash << 29, fY, fX );
+            float32v v = FS::SelectHighBit( hash << 29, fX, fY );
+
+            return FS::FMulAdd( float32v( 1.0f + kRoot2f ), u, v );
+        }
+    }
+    
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static float32v GetGradientDotPerlin( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
+    {
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
+            float32v gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+
+            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
+        }
+        else
+        {
+            int32v p = hash & int32v( 3 << 3 );
+
+            float32v a = FS::Select( p > int32v( 0 ), fX, fY );
+            float32v b = FS::SelectHighBit( hash << 27, fY, fZ );
+            float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
+
+            float32v aSign = FS::Cast<float>( hash << 31 );
+            float32v bSign = FS::Cast<float>( ( hash >> 1 ) << 31 );
+            float32v cSign = FS::Cast<float>( ( hash >> 2 ) << 31 );
+
+            return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
+        }
+    }
+
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static void ApplyGradientOuterProductVectorProductSimplex( int32v hash31, float32v fX, float32v fY, float32v multiplier, float32v& valueX, float32v& valueY )
+    {
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
+        int32v indexGradient = hashShifted * int32v( 12 >> 2 ); // [0,12) in the upper four bits
+        int32v indexOuterVector = ( hashShifted * int32v( ( -4LL << 30 ) / 3 ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31
+
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            indexGradient >>= 28;
+            indexOuterVector |= indexOuterVector >> 28;
+
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexGradient, FS::Constant<float>( kRoot3f, -kRoot3f, 1, -1, kRoot3f, -kRoot3f, -1, 1, 2, -2, 0, 0, 0, 0, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexGradient, FS::Constant<float>( 1, -1, kRoot3f, -kRoot3f, -1, 1, kRoot3f, -kRoot3f, 0, 0, 2, -2, 0, 0, 0, 0 ) );
+
+            multiplier *= FS::FMulAdd( fY, gY, fX * gX );
+
+            valueX = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( kRoot3f, -kRoot3f, 1, -1, kRoot3f, -kRoot3f, -1, 1, 2, -2, 0, 0, 0, 0, 0, 0 ) ), valueX );
+            valueY = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( 1, -1, kRoot3f, -kRoot3f, -1, 1, kRoot3f, -kRoot3f, 0, 0, 2, -2, 0, 0, 0, 0 ) ), valueY );
+        }
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        {
+            float32v finalSign = FS::Cast<float>( ( ( indexGradient >> 28 ) ^ indexOuterVector ) << 31 );
+            indexGradient >>= 29;
+            indexOuterVector = ( indexOuterVector >> 1 ) | ( indexOuterVector >> 29 );
+
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kRoot3f, 1, kRoot3f, -1, 2, 0, 0, 0 ), indexGradient );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, kRoot3f, -1, kRoot3f, 0, 2, 0, 0 ), indexGradient );
+
+            multiplier *= FS::FMulAdd( fY, gY, fX * gX ) ^ finalSign;
+
+            valueX = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kRoot3f, 1, kRoot3f, -1, 2, 0, 0, 0 ), indexOuterVector ), valueX );
+            valueY = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( 1, kRoot3f, -1, kRoot3f, 0, 2, 0, 0 ), indexOuterVector ), valueY );
+        }
+        else
+        {
+            {
+                float32v u = FS::SelectHighBit( indexGradient << 2, fY, fX );
+                float32v v = FS::SelectHighBit( indexGradient << 2, fX, fY );
+
+                float32v a = u * FS::SelectHighBit( indexGradient, float32v( 2 ), float32v( kRoot3f ) );
+                float32v b = v ^ FS::Cast<float>( ( indexGradient >> 30 ) << 31 );
+
+                multiplier *= FS::MaskedAdd( indexGradient >= int32v( 0 ), a, b ) ^ FS::Cast<float>( ( ( indexGradient >> 28 ) ^ indexOuterVector ) << 31 );
+            }
 
-            if constexpr( !( SIMD & FastSIMD::FeatureFlag::SSE41 ) )
             {
-                bit4 >>= 31;
+                float32v a = multiplier * FS::SelectHighBit( indexOuterVector, float32v( 2 ), float32v( kRoot3f ) );
+                float32v b = FS::Masked( indexOuterVector >= int32v( 0 ), multiplier ) ^ FS::Cast<float>( ( indexOuterVector >> 30 ) << 31 );
+
+                valueX += FS::SelectHighBit( indexOuterVector << 30, b, a );
+                valueY += FS::SelectHighBit( indexOuterVector << 30, a, b );
             }
+        }
+    }
 
-            auto bit4Mask = FS::Cast<FS::Mask<32, false>>( bit4 );
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static void ApplyGradientOuterProductVectorProductCommon( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ )
+    {
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
+        int32v indexGradient = FS::BitShiftRightZeroExtend( hashShifted * int32v( 12 >> 2 ), 28 ); // [0,12)
+        int32v indexOuterVector = ( hashShifted * int32v( ( -4LL << 30 ) / 3 ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31
+        indexOuterVector |= indexOuterVector >> 28;
 
-            fX ^= FS::Cast<float>( bit1 );
-            fY ^= FS::Cast<float>( bit2 );
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexGradient, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexGradient, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 0, 0, 0 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexGradient, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 0, 0 ) );
 
-            float32v a = FS::Select( bit4Mask, fY, fX );
-            float32v b = FS::Select( bit4Mask, fX, fY );
+            multiplier *= FS::FMulAdd( gZ, fZ, FS::FMulAdd( fY, gY, fX * gX ) );
 
-            return FS::FMulAdd( float32v( 1.0f + kRoot2 ), a, b );
+            valueX = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) ), valueX );
+            valueY = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 0, 0, 0 ) ), valueY );
+            valueZ = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 0, 0 ) ), valueZ );
+        }
+        else
+        {
+            {
+                float32v sign0 = FS::Cast<float>( indexGradient << 31 );
+                float32v sign1 = FS::Cast<float>( ( indexGradient >> 1 ) << 31 );
+
+                mask32v thirdCombo = constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 ) ?
+                    FS::Cast<FS::Mask<32>>( indexGradient << ( 31 - 3 ) ) :
+                    indexGradient >= int32v( 8 );
+
+                float32v u = FS::Select( thirdCombo, fY, fX );
+                float32v v = FS::Select( indexGradient >= int32v( 4 ), fZ, fY );
+
+                multiplier *= ( u ^ sign0 ) + ( v ^ sign1 );
+            }
+
+            {
+                indexOuterVector &= int32v( 0xF );
+
+                float32v signed0 = multiplier ^ FS::Cast<float>( indexOuterVector << 31 );
+                float32v signed1 = multiplier ^ FS::Cast<float>( ( indexOuterVector >> 1 ) << 31 );
+
+                mask32v notYZ = indexOuterVector < int32v( 8 );
+                mask32v notXY = indexOuterVector >= int32v( 4 );
+
+                valueX = FS::MaskedAdd( notYZ, valueX, signed0 );
+                valueZ = FS::MaskedAdd( notXY, valueZ, signed1 );
+                valueY = FS::InvMaskedAdd( notYZ & notXY, valueY, FS::Select( notXY, signed0, signed1 ) );
+            }
         }
     }
-    
+
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
-    {        
+    FS_FORCEINLINE static void ApplyGradientOuterProductVectorProductSimplex( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ, float32v& valueW )
+    {
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash, 2 );
+        int32v indexGradient = hashShifted * int32v( 20 >> 2 ); // [0,20) in the upper five bits
+        int32v indexOuterVector = hashShifted * int32v( ( -8LL << 29 ) / 5 );
+        indexOuterVector = ( indexOuterVector & int32v( 0xE0000003 ) ) * int32v( 3 | ( 1 << 27 ) ); // [0,20) in the upper five bits, independently of the above
+
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
+            indexGradient = FS::BitShiftRightZeroExtend( indexGradient, 27 );
+            indexOuterVector = FS::BitShiftRightZeroExtend( indexOuterVector, 27 );
+
+            const auto tableX = FS::Constant<float>( kSkew4f + 1, kSkew4f, kSkew4f, kSkew4f, -1, 1, 0, 0, -1, 0, 1, 0, -1, 0, 0, 1 );
+            const auto tableY = FS::Constant<float>( kSkew4f, kSkew4f + 1, kSkew4f, kSkew4f, 1, -1, 0, 0, 0, -1, 0, 1, 0, -1, 1, 0 );
+            const auto tableZ = FS::Constant<float>( kSkew4f, kSkew4f, kSkew4f + 1, kSkew4f, 0, 0, -1, 1, 1, 0, -1, 0, 0, 1, -1, 0 );
+            const auto tableW = FS::Constant<float>( kSkew4f, kSkew4f, kSkew4f, kSkew4f + 1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, -1 );
 
-            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableX, indexGradient, -tableX );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableY, indexGradient, -tableY );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableZ, indexGradient, -tableZ );
+            float32v gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableW, indexGradient, -tableW );
+
+            multiplier *= FS::FMulAdd( gW, fW, FS::FMulAdd( gZ, fZ, FS::FMulAdd( gY, fY, gX * fX ) ) );
+
+            valueX = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableX, indexOuterVector, -tableX ), valueX );
+            valueY = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableY, indexOuterVector, -tableY ), valueY );
+            valueZ = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableZ, indexOuterVector, -tableZ ), valueZ );
+            valueW = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableW, indexOuterVector, -tableW ), valueW );
         }
         else
         {
-            int32v hasha13 = hash & int32v( 13 );
-
-            // if h > 7 then y, else x
-            mask32v gt7;
-            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
             {
-                gt7 = FS::Cast<FS::Mask<32>>( hash << 28 );
+                int32v indexA = indexGradient & int32v( 0x03 << 27 );
+                int32v indexB = ( indexGradient >> 2 ) & int32v( 0x07 << 27 );
+                indexB ^= indexA; // Simplifies the AVX512_F case.
+
+                mask32v extra = indexB >= int32v( 0x04 << 27 );
+                mask32v equal = ( indexA == indexB );
+                indexA |= FS::Cast<int32_t>( equal ); // Forces decrement conditions to fail.
+
+                float32v neutral = FS::Masked( equal | extra, FS::MaskedMul( extra, float32v( kSkew4f ), float32v( -1.0f ) ) );
+
+                float32v gX = FS::MaskedIncrement( indexB == int32v( 0 << 27 ), FS::MaskedDecrement( indexA == int32v( 0 << 27 ), neutral ) );
+                float32v gY = FS::MaskedIncrement( indexB == int32v( 1 << 27 ), FS::MaskedDecrement( indexA == int32v( 1 << 27 ), neutral ) );
+                float32v gZ = FS::MaskedIncrement( indexB == int32v( 2 << 27 ), FS::MaskedDecrement( indexA == int32v( 2 << 27 ), neutral ) );
+                float32v gW = FS::MaskedIncrement( indexB == int32v( 3 << 27 ), FS::MaskedDecrement( indexA == int32v( 3 << 27 ), neutral ) );
+
+                multiplier *= FS::FMulAdd( gW, fW, FS::FMulAdd( gZ, fZ, FS::FMulAdd( gY, fY, gX * fX ) ) );
             }
-            else
+
             {
-                gt7 = hasha13 > int32v( 7 );
+                int32v indexA = indexOuterVector & int32v( 0x03 << 27 );
+                int32v indexB = ( indexOuterVector >> 2 ) & int32v( 0x07 << 27 );
+                indexB ^= indexA; // Simplifies the AVX512_F case.
+
+                mask32v extra = indexB >= int32v( 0x04 << 27 );
+                mask32v equal = ( indexA == indexB );
+                indexA |= FS::Cast<int32_t>( equal ); // Forces decrement conditions to fail.
+
+                float32v neutral = FS::Masked( equal | extra, FS::MaskedMul( extra, float32v( kSkew4f ), float32v( -1.0f ) ) );
+
+                float32v gX = FS::MaskedIncrement( indexB == int32v( 0 << 27 ), FS::MaskedDecrement( indexA == int32v( 0 << 27 ), neutral ) );
+                float32v gY = FS::MaskedIncrement( indexB == int32v( 1 << 27 ), FS::MaskedDecrement( indexA == int32v( 1 << 27 ), neutral ) );
+                float32v gZ = FS::MaskedIncrement( indexB == int32v( 2 << 27 ), FS::MaskedDecrement( indexA == int32v( 2 << 27 ), neutral ) );
+                float32v gW = FS::MaskedIncrement( indexB == int32v( 3 << 27 ), FS::MaskedDecrement( indexA == int32v( 3 << 27 ), neutral ) );
+
+                valueX = FS::FMulAdd( multiplier, gX, valueX );
+                valueY = FS::FMulAdd( multiplier, gY, valueY );
+                valueZ = FS::FMulAdd( multiplier, gZ, valueZ );
+                valueW = FS::FMulAdd( multiplier, gW, valueW );
             }
-            float32v u = FS::Select( gt7, fY, fX );
-
-            // if h < 4 then y else if h is 12 or 14 then x else z
-            float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
-            v = FS::Select( hasha13 < int32v( 2 ), fY, v );
-
-            // if h1 then -u else u
-            // if h2 then -v else v
-            float32v h1 = FS::Cast<float>( hash << 31 );
-            float32v h2 = FS::Cast<float>( ( hash >> 1 ) << 31 );
-            // then add them
-            return ( u ^ h1 ) + ( v ^ h2 );
         }
     }
-    
+
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
-    FS_FORCEINLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
+    FS_FORCEINLINE static void ApplyOrthogonalGradientMatrixVectorProductSimplex( int32v hash31, float32v fX, float32v fY, float32v multiplier, float32v& valueX, float32v& valueY )
     {
+        int32v index = FS::BitShiftRightZeroExtend( hash31, 1 ) * int32v( 12 >> 2 ); // [0,12) in the upper four bits
+
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
-            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, FS::Constant<float>( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
-            float32v gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, FS::Constant<float>( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
+            index = FS::BitShiftRightZeroExtend( index, 28 );
 
-            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, FS::FMulAdd( fZ, gZ, fW * gW ) ));
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( kSkew2f, -kSkew2f, kSkew2f, -kSkew2f, kSkew2f + 1, -kSkew2f - 1, kSkew2f + 1, -kSkew2f - 1, 1, -1, 1, -1, 0, 0, 0, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( kSkew2f + 1, kSkew2f + 1, -kSkew2f - 1, -kSkew2f - 1, kSkew2f, kSkew2f, -kSkew2f, -kSkew2f, 1, 1, -1, -1, 0, 0, 0, 0 ) );
+
+            valueX = FS::FMulAdd( multiplier, FS::FMulAdd( fY, gY, fX * gX ), valueX );
+            multiplier ^= FS::Cast<float>( hash31 << 31 );
+            valueY = FS::FMulAdd( multiplier, FS::FMulSub( fY, gX, fX * gY ), valueY );
+        }
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        {
+            float32v signX = FS::Cast<float>( ( index >> 28 ) << 31 );
+            index = FS::BitShiftRightZeroExtend( index, 29 );
+
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kSkew2f, kSkew2f, kSkew2f + 1, kSkew2f + 1, 1, 1, 0, 0 ), index ) ^ signX;
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( kSkew2f + 1, -kSkew2f - 1, kSkew2f, -kSkew2f, 1, -1, 0, 0 ), index );
+
+            valueX = FS::FMulAdd( multiplier, FS::FMulAdd( fY, gY, fX * gX ), valueX );
+            multiplier ^= FS::Cast<float>( hash31 << 31 );
+            valueY = FS::FMulAdd( multiplier, FS::FMulSub( fY, gX, fX * gY ), valueY );
         }
         else
         {
-            int32v p = hash & int32v( 3 << 3 );
+            int32v ofThree = FS::BitShiftRightZeroExtend( index, 30 );
+            float32v signX = FS::Cast<float>( ( index >> 28 ) << 31 );
+            float32v signY = FS::Cast<float>( ( index >> 29 ) << 31 );
+
+            float32v masked = FS::Masked( index >= int32v( 0 ), float32v( kSkew2f ) );
+            float32v gX = FS::MaskedIncrement( ofThree != int32v( 0 ), masked ) ^ signX;
+            float32v gY = FS::MaskedIncrement( ofThree != int32v( 1 ), masked ) ^ signY;
+
+            valueX = FS::FMulAdd( multiplier, FS::FMulAdd( fY, gY, fX * gX ), valueX );
+            multiplier ^= FS::Cast<float>( hash31 << 31 );
+            valueY = FS::FMulAdd( multiplier, FS::FMulSub( fY, gX, fX * gY ), valueY );
+        }
+    }
+
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    FS_FORCEINLINE static void ApplyOrthogonalGradientMatrixVectorProductCommon( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ )
+    {
+        const float kComponentA = 2.224744871391589f;
+        const float kComponentB = -0.224744871391589f;
+        const float kComponentC = -1.0f;
+        const float kComponentsDE = 1.0f;
+        const float kComponentF = 2.0f;
+        
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
+        int32v indexFacetBasisWithPermute2 = hashShifted * int32v( ( -4LL << 30 ) / 3 ); // [0,3) in the highest two bits, [0,8) in the lowest three bits
+        int32v indexPermutation2HighBit = ( indexFacetBasisWithPermute2 << 29 ); // & int32v( 1 << 31 ); // [0,1) in the most significant bit
+        int32v indexPermutation3 = FS::BitShiftRightZeroExtend( hashShifted * int32v( 3 ), 30 ); // [0,3)
+        float32v finalSign = FS::Cast<float>( hash31 << 31 );
+
+        float32v valueAB, valueBA, valueC;
+
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            //indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, 2 );
+            indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rolv_epi32 ), indexFacetBasisWithPermute2, int32v( 2 ) );
+
+            const auto tableA_gX = FS::Constant<float>( kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC, kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC );
+            const auto tableA_gY = FS::Constant<float>( kComponentC, kComponentB, kComponentA, kComponentA, kComponentC, kComponentB, -kComponentA, -kComponentA, kComponentC, -kComponentB, kComponentA, kComponentA, kComponentC, -kComponentB, -kComponentA, -kComponentA );
+            const auto tableA_gZ = FS::Constant<float>( kComponentB, kComponentC, kComponentB, kComponentB, kComponentB, kComponentC, kComponentB, kComponentB, -kComponentB, kComponentC, -kComponentB, -kComponentB, -kComponentB, kComponentC, -kComponentB, -kComponentB );
+
+            const auto tableB_gX = FS::Constant<float>( kComponentB, kComponentB, kComponentC, kComponentC, -kComponentB, -kComponentB, kComponentC, kComponentC, kComponentB, kComponentB, kComponentC, kComponentC, -kComponentB, -kComponentB, kComponentC, kComponentC );
+            const auto tableB_gY = FS::Constant<float>( kComponentC, kComponentA, kComponentB, kComponentB, kComponentC, kComponentA, -kComponentB, -kComponentB, kComponentC, -kComponentA, kComponentB, kComponentB, kComponentC, -kComponentA, -kComponentB, -kComponentB );
+            const auto tableB_gZ = FS::Constant<float>( kComponentA, kComponentC, kComponentA, kComponentA, kComponentA, kComponentC, kComponentA, kComponentA, -kComponentA, kComponentC, -kComponentA, -kComponentA, -kComponentA, kComponentC, -kComponentA, -kComponentA );
+
+            const auto tableC_gX = FS::Constant<float>( kComponentsDE, kComponentsDE, kComponentF, kComponentF, kComponentC, kComponentC, kComponentF, kComponentF, kComponentsDE, kComponentsDE, kComponentF, kComponentF, kComponentC, kComponentC, kComponentF, kComponentF );
+            const auto tableC_gY = FS::Constant<float>( kComponentF, kComponentsDE, kComponentsDE, kComponentsDE, kComponentF, kComponentsDE, kComponentC, kComponentC, kComponentF, kComponentC, kComponentsDE, kComponentsDE, kComponentF, kComponentC, kComponentC, kComponentC );
+            const auto tableC_gZ = FS::Constant<float>( kComponentsDE, kComponentF, kComponentsDE, kComponentsDE, kComponentsDE, kComponentF, kComponentsDE, kComponentsDE, kComponentC, kComponentF, kComponentC, kComponentC, kComponentC, kComponentF, kComponentC, kComponentC );
+
+            float32v valueAB_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableA_gX, indexFacetBasisWithPermute2, tableB_gX );
+            float32v valueAB_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableA_gY, indexFacetBasisWithPermute2, tableB_gY );
+            float32v valueAB_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableA_gZ, indexFacetBasisWithPermute2, tableB_gZ );
+            valueAB = FS::FMulAdd( valueAB_gZ, fZ, FS::FMulAdd( fY, valueAB_gY, fX * valueAB_gX ) );
+
+            float32v valueBA_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableB_gX, indexFacetBasisWithPermute2, tableA_gX );
+            float32v valueBA_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableB_gY, indexFacetBasisWithPermute2, tableA_gY );
+            float32v valueBA_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ), tableB_gZ, indexFacetBasisWithPermute2, tableA_gZ );
+            valueBA = FS::FMulAdd( valueBA_gZ, fZ, FS::FMulAdd( fY, valueBA_gY, fX * valueBA_gX ) );
+
+            float32v valueC_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexFacetBasisWithPermute2, tableC_gX );
+            float32v valueC_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexFacetBasisWithPermute2, tableC_gY );
+            float32v valueC_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexFacetBasisWithPermute2, tableC_gZ );
+            valueC = FS::FMulAdd( valueC_gZ, fZ, FS::FMulAdd( fY, valueC_gY, fX * valueC_gX ) );
+        }
+        else
+        {
+            float32v sign0 = FS::Cast<float>( indexFacetBasisWithPermute2 << 31 );
+            float32v sign1 = FS::Cast<float>( ( indexFacetBasisWithPermute2 << 30 ) & int32v( 1 << 31 ) );
+
+            auto notYZ = indexFacetBasisWithPermute2 >= int32v( 0 );
+            auto notXY = ( indexFacetBasisWithPermute2 << 1 ) >= int32v( 0 );
+
+            float32v valueA_gX = FS::Select( notYZ, float32v( kComponentA ) ^ sign0, float32v( kComponentC ) );
+            float32v valueA_gY = FS::Select( notYZ & notXY, float32v( kComponentC ), FS::Select( notXY, float32v( kComponentA ) ^ sign0, float32v( kComponentB ) ^ sign1 ) );
+            float32v valueA_gZ = FS::Select( notXY, float32v( kComponentB ) ^ sign1, float32v( kComponentC ) );
+            float32v valueA = FS::FMulAdd( valueA_gZ, fZ, FS::FMulAdd( fY, valueA_gY, fX * valueA_gX ) );
+
+            float32v valueB_gX = FS::Select( notYZ, float32v( kComponentB ) ^ sign0, float32v( kComponentC ) );
+            float32v valueB_gY = FS::Select( notYZ & notXY, float32v( kComponentC ), FS::Select( notXY, float32v( kComponentB ) ^ sign0, float32v( kComponentA ) ^ sign1 ) );
+            float32v valueB_gZ = FS::Select( notXY, float32v( kComponentA ) ^ sign1, float32v( kComponentC ) );
+            float32v valueB = FS::FMulAdd( valueB_gZ, fZ, FS::FMulAdd( fY, valueB_gY, fX * valueB_gX ) );
+
+            float32v valueC_gX = FS::Select( notYZ, float32v( kComponentsDE ) ^ sign0, float32v( kComponentF ) );
+            float32v valueC_gY = FS::Select( notYZ & notXY, float32v( kComponentF ), FS::Select( notXY, float32v( kComponentsDE ) ^ sign0, float32v( kComponentsDE ) ^ sign1 ) );
+            float32v valueC_gZ = FS::Select( notXY, float32v( kComponentsDE ) ^ sign1, float32v( kComponentF ) );
+            valueC = FS::FMulAdd( valueC_gZ, fZ, FS::FMulAdd( fY, valueC_gY, fX * valueC_gX ) );
+
+            valueAB = FS::SelectHighBit( indexPermutation2HighBit, valueB, valueA );
+            valueBA = FS::SelectHighBit( indexPermutation2HighBit, valueA, valueB );
+        }
+        
+        multiplier ^= finalSign;
+        valueX = FS::FMulAdd( multiplier, FS::Select( indexPermutation3 == int32v( 0 ), valueC, valueAB ), valueX );
+        valueY = FS::FMulAdd( multiplier, FS::Select( indexPermutation3 == int32v( 1 ), valueC, FS::Select( indexPermutation3 == int32v( 2 ), valueBA, valueAB ) ), valueY );
+        valueZ = FS::FMulAdd( multiplier, FS::Select( indexPermutation3 == int32v( 2 ), valueC, valueBA ), valueZ );
+    }
+
+    template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
+    static void FS_VECTORCALL ApplyOrthogonalGradientMatrixVectorProductSimplex( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v fW, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ, float32v& valueW )
+    {
+        const float kComponentPairwiseIndexedNegativeAB = -0.375999676691291f;
+        const float kComponentPairwiseUnindexedFillerAB = 0.222726847849776f;
+        const float kComponentPairwiseIndexedPositiveD = -kSkew4f;
+        const float kComponentPairwiseUnindexedD = kSkew4f;
+
+        const float kDeltaPairwiseToSingleAB = -0.124000323308709f;
+        const float kDeltaPairwiseToSingleD = 0.190983005625053f;
+        const float kDeltaSingleToExtra = kSkew4f;
+        const float kDeltaPairwiseABToC = 0.437016024448821f;
+        const float kDeltaUnindexedFillerToDiagonal = -kRoot2f;
+
+        const float kDeltaPairwiseToSingleExtraAB = kDeltaPairwiseToSingleAB + kDeltaSingleToExtra;
+        const float kDeltaPairwiseToSingleExtraD = kDeltaPairwiseToSingleD + kDeltaSingleToExtra;
+
+        const float sIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleAB;
+        const float sDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB + kDeltaUnindexedFillerToDiagonal;
+        const float sFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB;
+        const float sIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleD - 1;
+        const float sFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleD;
+
+        const float pIdxPosAB = kComponentPairwiseIndexedNegativeAB + 1;
+        const float pIdxNegAB = kComponentPairwiseIndexedNegativeAB;
+        const float pFillAB = kComponentPairwiseUnindexedFillerAB;
+        const float pDiagAB = kComponentPairwiseUnindexedFillerAB + kDeltaUnindexedFillerToDiagonal;
+        const float pIdxPosC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC + 1;
+        const float pIdxNegC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC;
+        const float pFillC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseABToC;
+        const float pIdxPosD = kComponentPairwiseIndexedPositiveD;
+        const float pIdxNegD = kComponentPairwiseIndexedPositiveD - 1;
+        const float pFillD = kComponentPairwiseUnindexedD;
+
+        const float eIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleExtraAB + 1;
+        const float eDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB + kDeltaUnindexedFillerToDiagonal;
+        const float eFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB;
+        const float eIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleExtraD;
+        const float eFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleExtraD;
+
+        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 2 );
+        int32v indexBasis = hashShifted * int32v( 20 >> 2 ); // [0,20) << 27
+        int32v indexPermutation3 = ( hashShifted * int32v( ( -4LL << 29 ) / 3 ) ) >> 29; // [0,3)
+        int32v indexPermutation8 = indexBasis >> 24; // & int32v( 0x07 );
+        float32v finalSign = FS::Cast<float>( hash31 << 31 );
+
+        float32v valueA, valueB, valueC, valueD;
+        float32v valueA_gX, valueB_gX, valueC_gX, valueD_gX;
+        float32v valueA_gY, valueB_gY, valueC_gY, valueD_gY;
+        float32v valueA_gZ, valueB_gZ, valueC_gZ, valueD_gZ;
+        float32v valueA_gW, valueB_gW, valueC_gW, valueD_gW;
+
+        if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
+        {
+            indexBasis >>= 27;
+
+            valueA_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sIdxABC, sDiagABC, sDiagABC, sDiagABC, pIdxPosAB, pIdxNegAB, pDiagAB, pDiagAB, pIdxPosAB, pDiagAB, pIdxNegAB, pDiagAB, pIdxPosAB, pDiagAB, pDiagAB, pIdxNegAB ), indexBasis,
+                FS::Constant<float>( eIdxABC, eDiagABC, eDiagABC, eDiagABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueB_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sIdxABC, sFillABC, sFillABC, sFillABC, pIdxPosAB, pIdxNegAB, pFillAB, pFillAB, pIdxPosAB, pFillAB, pIdxNegAB, pFillAB, pIdxPosAB, pFillAB, pFillAB, pIdxNegAB ), indexBasis,
+                FS::Constant<float>( eIdxABC, eFillABC, eFillABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueC_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sIdxABC, sFillABC, sFillABC, sFillABC, pIdxPosC, pIdxNegC, pFillC, pFillC, pIdxPosC, pFillC, pIdxNegC, pFillC, pIdxPosC, pFillC, pFillC, pIdxNegC ), indexBasis,
+                FS::Constant<float>( eIdxABC, eFillABC, eFillABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueD_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sIdxD, sFillD, sFillD, sFillD, pIdxPosD, pIdxNegD, pFillD, pFillD, pIdxPosD, pFillD, pIdxNegD, pFillD, pIdxPosD, pFillD, pFillD, pIdxNegD ), indexBasis,
+                FS::Constant<float>( eIdxD, eFillD, eFillD, eFillD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+
+            valueA = valueA_gX * fX;
+            valueB = valueB_gX * fX;
+            valueC = valueC_gX * fX;
+            valueD = valueD_gX * fX;
+
+            valueA_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sDiagABC, sIdxABC, sFillABC, sFillABC, pIdxNegAB, pIdxPosAB, pFillAB, pFillAB, pDiagAB, pIdxPosAB, pDiagAB, pIdxNegAB, pDiagAB, pIdxPosAB, pIdxNegAB, pDiagAB ), indexBasis,
+                FS::Constant<float>( eDiagABC, eIdxABC, eFillABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueB_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sIdxABC, sDiagABC, sDiagABC, pIdxNegAB, pIdxPosAB, pDiagAB, pDiagAB, pFillAB, pIdxPosAB, pFillAB, pIdxNegAB, pFillAB, pIdxPosAB, pIdxNegAB, pFillAB ), indexBasis,
+                FS::Constant<float>( eFillABC, eIdxABC, eDiagABC, eDiagABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueC_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sIdxABC, sFillABC, sFillABC, pIdxNegC, pIdxPosC, pFillC, pFillC, pFillC, pIdxPosC, pFillC, pIdxNegC, pFillC, pIdxPosC, pIdxNegC, pFillC ), indexBasis,
+                FS::Constant<float>( eFillABC, eIdxABC, eFillABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueD_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillD, sIdxD, sFillD, sFillD, pIdxNegD, pIdxPosD, pFillD, pFillD, pFillD, pIdxPosD, pFillD, pIdxNegD, pFillD, pIdxPosD, pIdxNegD, pFillD ), indexBasis,
+                FS::Constant<float>( eFillD, eIdxD, eFillD, eFillD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+
+            valueA = FS::FMulAdd( valueA_gY, fY, valueA );
+            valueB = FS::FMulAdd( valueB_gY, fY, valueB );
+            valueC = FS::FMulAdd( valueC_gY, fY, valueC );
+            valueD = FS::FMulAdd( valueD_gY, fY, valueD );
+
+            valueA_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sFillABC, sIdxABC, sFillABC, pDiagAB, pDiagAB, pIdxPosAB, pIdxNegAB, pIdxNegAB, pFillAB, pIdxPosAB, pFillAB, pFillAB, pIdxNegAB, pIdxPosAB, pFillAB ), indexBasis,
+                FS::Constant<float>( eFillABC, eFillABC, eIdxABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueB_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sDiagABC, sDiagABC, sIdxABC, sFillABC, pFillAB, pFillAB, pIdxPosAB, pIdxNegAB, pIdxNegAB, pDiagAB, pIdxPosAB, pDiagAB, pDiagAB, pIdxNegAB, pIdxPosAB, pDiagAB ), indexBasis,
+                FS::Constant<float>( eDiagABC, eDiagABC, eIdxABC, eFillABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueC_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sFillABC, sIdxABC, sDiagABC, pFillC, pFillC, pIdxPosC, pIdxNegC, pIdxNegC, pFillC, pIdxPosC, pFillC, pFillC, pIdxNegC, pIdxPosC, pFillC ), indexBasis,
+                FS::Constant<float>( eFillABC, eFillABC, eIdxABC, eDiagABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueD_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillD, sFillD, sIdxD, sFillD, pFillD, pFillD, pIdxPosD, pIdxNegD, pIdxNegD, pFillD, pIdxPosD, pFillD, pFillD, pIdxNegD, pIdxPosD, pFillD ), indexBasis,
+                FS::Constant<float>( eFillD, eFillD, eIdxD, eFillD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+
+            valueA = FS::FMulAdd( valueA_gZ, fZ, valueA );
+            valueB = FS::FMulAdd( valueB_gZ, fZ, valueB );
+            valueC = FS::FMulAdd( valueC_gZ, fZ, valueC );
+            valueD = FS::FMulAdd( valueD_gZ, fZ, valueD );
+
+            valueA_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sFillABC, sFillABC, sIdxABC, pFillAB, pFillAB, pIdxNegAB, pIdxPosAB, pFillAB, pIdxNegAB, pFillAB, pIdxPosAB, pIdxNegAB, pFillAB, pFillAB, pIdxPosAB ), indexBasis,
+                FS::Constant<float>( eFillABC, eFillABC, eFillABC, eIdxABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueB_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillABC, sFillABC, sFillABC, sIdxABC, pDiagAB, pDiagAB, pIdxNegAB, pIdxPosAB, pDiagAB, pIdxNegAB, pDiagAB, pIdxPosAB, pIdxNegAB, pDiagAB, pDiagAB, pIdxPosAB ), indexBasis,
+                FS::Constant<float>( eFillABC, eFillABC, eFillABC, eIdxABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueC_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sDiagABC, sDiagABC, sDiagABC, sIdxABC, pFillC, pFillC, pIdxNegC, pIdxPosC, pFillC, pIdxNegC, pFillC, pIdxPosC, pIdxNegC, pFillC, pFillC, pIdxPosC ), indexBasis,
+                FS::Constant<float>( eDiagABC, eDiagABC, eDiagABC, eIdxABC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+            valueD_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutex2var_ps ),
+                FS::Constant<float>( sFillD, sFillD, sFillD, sIdxD, pFillD, pFillD, pIdxNegD, pIdxPosD, pFillD, pIdxNegD, pFillD, pIdxPosD, pIdxNegD, pFillD, pFillD, pIdxPosD ), indexBasis,
+                FS::Constant<float>( eFillD, eFillD, eFillD, eIdxD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+            );
+
+            valueA = FS::FMulAdd( valueA_gW, fW, valueA );
+            valueB = FS::FMulAdd( valueB_gW, fW, valueB );
+            valueC = FS::FMulAdd( valueC_gW, fW, valueC );
+            valueD = FS::FMulAdd( valueD_gW, fW, valueD );
+        }
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        {
+            const auto tableAB = FS::Constant<float>( pFillAB, pIdxNegAB, pFillAB, pIdxPosAB, sFillABC, sIdxABC, eFillABC, eIdxABC );
+            const auto tableC  = FS::Constant<float>( pFillC,  pIdxNegC,  pFillC,  pIdxPosC,  sFillABC, sIdxABC, eFillABC, eIdxABC );
+            const auto tableD  = FS::Constant<float>( pFillD,  pIdxNegD,  pFillD,  pIdxPosD,  sFillD,   sIdxD,   eFillD,   eIdxD   );
+
+            int32v indexPositive = indexBasis & int32v( 0x03 << 27 );
+            int32v indexNegative = ( indexBasis >> 2 ) & int32v( 0x03 << 27 );
+            indexNegative ^= indexPositive;
+
+            auto extraCase = ( indexBasis >= int32v( 0x10 << 27 ) );
+            auto singleCase = ( indexPositive == indexNegative );
+            indexPositive |= FS::Cast<int32_t>( singleCase ); // Force indexPositive checks to fail
+
+            int32v indexSelectBase = FS::Masked( singleCase, int32v( 4 ) ) | FS::Masked( extraCase, int32v( 2 ) );
+
+            int32v indexedCounter( -1 );
 
-            float32v a = FS::Select( p > int32v( 0 ), fX, fY );
-            float32v b;
-            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
             {
-                b = FS::Select( FS::Cast<FS::Mask<32>>( hash << 27 ), fY, fZ );
+                auto indexedPositive = ( indexPositive == int32v( 0 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 0 << 27 ) );
+                int32v indexSelect = FS::MaskedIncrement( indexed, indexSelectBase | FS::Masked( indexedPositive, int32v( 2 ) ) );
+
+                valueA_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), FS::Constant<float>( pDiagAB, pIdxNegAB, pDiagAB, pIdxPosAB, sDiagABC, sIdxABC, eDiagABC, eIdxABC ), indexSelect );
+                valueB_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableAB, indexSelect );
+                valueC_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableC,  indexSelect );
+                valueD_gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableD,  indexSelect );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
             }
-            else
+
+            valueA = valueA_gX * fX;
+            valueB = valueB_gX * fX;
+            valueC = valueC_gX * fX;
+            valueD = valueD_gX * fX;
+
             {
-                b = FS::Select( p > int32v( 1 << 3 ), fY, fZ );
+                auto indexedPositive = ( indexPositive == int32v( 1 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 1 << 27 ) );
+                int32v indexSelect = FS::MaskedIncrement( indexed, indexSelectBase | FS::Masked( indexedPositive, int32v( 2 ) ) );
+
+                valueA_gY = valueB_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableAB, indexSelect );
+                valueC_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableC, indexSelect );
+                valueD_gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableD, indexSelect );
+
+                int32v maskedIndexedCounter = FS::InvMasked( indexed, indexedCounter );
+                valueA_gY = FS::MaskedAdd( maskedIndexedCounter == int32v( -2 ), valueA_gY, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                valueB_gY = FS::MaskedAdd( maskedIndexedCounter == int32v( -1 ), valueB_gY, float32v( kDeltaUnindexedFillerToDiagonal ) );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
             }
-            float32v c = FS::Select( p > int32v( 2 << 3 ), fZ, fW );
 
-            float32v aSign = FS::Cast<float>( hash << 31 );
-            float32v bSign = FS::Cast<float>( ( hash >> 1 ) << 31 );
-            float32v cSign = FS::Cast<float>( ( hash >> 2 ) << 31 );
+            valueA = FS::FMulAdd( valueA_gY, fY, valueA );
+            valueB = FS::FMulAdd( valueB_gY, fY, valueB );
+            valueC = FS::FMulAdd( valueC_gY, fY, valueC );
+            valueD = FS::FMulAdd( valueD_gY, fY, valueD );
 
-            return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
+            {
+                auto indexedPositive = ( indexPositive == int32v( 2 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 2 << 27 ) );
+                int32v indexSelect = FS::MaskedIncrement( indexed, indexSelectBase | FS::Masked( indexedPositive, int32v( 2 ) ) );
+
+                valueA_gZ = valueB_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableAB, indexSelect );
+                valueC_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableC, indexSelect );
+                valueD_gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableD, indexSelect );
+
+                int32v maskedIndexedCounter = FS::InvMasked( indexed, indexedCounter );
+                valueA_gZ = FS::MaskedAdd( maskedIndexedCounter == int32v( -3 ), valueA_gZ, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                valueB_gZ = FS::MaskedAdd( maskedIndexedCounter == int32v( -2 ), valueB_gZ, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                valueC_gZ = FS::MaskedAdd( maskedIndexedCounter == int32v( -1 ), valueC_gZ, float32v( kDeltaUnindexedFillerToDiagonal ) );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
+            }
+
+            valueA = FS::FMulAdd( valueA_gZ, fZ, valueA );
+            valueB = FS::FMulAdd( valueB_gZ, fZ, valueB );
+            valueC = FS::FMulAdd( valueC_gZ, fZ, valueC );
+            valueD = FS::FMulAdd( valueD_gZ, fZ, valueD );
+
+            {
+                auto indexedPositive = ( indexPositive == int32v( 3 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 3 << 27 ) );
+                int32v indexSelect = FS::MaskedIncrement( indexed, indexSelectBase | FS::Masked( indexedPositive, int32v( 2 ) ) );
+
+                valueA_gW = valueB_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableAB, indexSelect );
+                valueC_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableC, indexSelect );
+                valueD_gW = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm256_permutevar8x32_ps ), tableD, indexSelect );
+
+                int32v maskedIndexedCounter = FS::InvMasked( indexed, indexedCounter );
+                valueB_gW = FS::MaskedAdd( maskedIndexedCounter == int32v( -3 ), valueB_gW, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                valueC_gW = FS::MaskedAdd( maskedIndexedCounter == int32v( -2 ), valueC_gW, float32v( kDeltaUnindexedFillerToDiagonal ) );
+            }
+
+            valueA = FS::FMulAdd( valueA_gW, fW, valueA );
+            valueB = FS::FMulAdd( valueB_gW, fW, valueB );
+            valueC = FS::FMulAdd( valueC_gW, fW, valueC );
+            valueD = FS::FMulAdd( valueD_gW, fW, valueD );
+        }
+        else
+        {
+            int32v indexPositive = indexBasis & int32v( 0x03 << 27 );
+            int32v indexNegative = ( indexBasis >> 2 ) & int32v( 0x03 << 27 );
+            indexNegative ^= indexPositive;
+
+            auto extraCase = ( indexBasis >= int32v( 0x10 << 27 ) );
+            auto singleCase = ( indexPositive == indexNegative );
+            auto singleNonExtraCase = indexBasis < int32v( 0x04 << 27 );
+            indexPositive |= FS::Cast<int32_t>( singleNonExtraCase ); // Force indexPositive checks to fail
+
+            float32v singleOffsetAB = FS::MaskedAdd( extraCase, float32v( kDeltaPairwiseToSingleAB ), float32v( kDeltaSingleToExtra ) );
+            float32v componentIndexedNegativeAB = FS::MaskedAdd( singleCase, float32v( kComponentPairwiseIndexedNegativeAB ), singleOffsetAB );
+            float32v componentUnindexedFillerAB = FS::MaskedAdd( singleCase, float32v( kComponentPairwiseUnindexedFillerAB ), singleOffsetAB );
+
+            float32v componentIndexedNegativeC = FS::InvMaskedAdd( singleCase, componentIndexedNegativeAB, float32v( kDeltaPairwiseABToC ) );
+            float32v componentUnindexedFillerC = FS::InvMaskedAdd( singleCase, componentUnindexedFillerAB, float32v( kDeltaPairwiseABToC ) );
+
+            float32v singleOffsetD = FS::MaskedAdd( extraCase, float32v( kDeltaPairwiseToSingleD ), float32v( kDeltaSingleToExtra ) );
+            float32v componentIndexedPositiveD = FS::MaskedAdd( singleCase, float32v( kComponentPairwiseIndexedPositiveD ), singleOffsetD );
+            float32v componentUnindexedD = FS::MaskedAdd( singleCase, float32v( kComponentPairwiseUnindexedD ), singleOffsetD );
+
+            int32v indexedCounter( -1 );
+
+            {
+                auto indexedPositive = ( indexPositive == int32v( 0 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 0 << 27 ) );
+
+                float32v indexedComponentAB = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeAB );
+                float32v indexedComponentC = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeC );
+                float32v indexedComponentD = FS::MaskedDecrement( ~indexedPositive, componentIndexedPositiveD );
+
+                float32v unindexedComponentA = componentUnindexedFillerAB + float32v( kDeltaUnindexedFillerToDiagonal );
+                float32v unindexedComponentB = componentUnindexedFillerAB;
+                float32v unindexedComponentC = componentUnindexedFillerC;
+
+                valueA_gX = FS::Select( indexed, indexedComponentAB, unindexedComponentA );
+                valueB_gX = FS::Select( indexed, indexedComponentAB, unindexedComponentB );
+                valueC_gX = FS::Select( indexed, indexedComponentC,  unindexedComponentC );
+                valueD_gX = FS::Select( indexed, indexedComponentD,  componentUnindexedD );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
+            }
+
+            valueA = valueA_gX * fX;
+            valueB = valueB_gX * fX;
+            valueC = valueC_gX * fX;
+            valueD = valueD_gX * fX;
+
+            {
+                auto indexedPositive = ( indexPositive == int32v( 1 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 1 << 27 ) );
+
+                float32v indexedComponentAB = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeAB );
+                float32v indexedComponentC = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeC );
+                float32v indexedComponentD = FS::MaskedDecrement( ~indexedPositive, componentIndexedPositiveD );
+
+                float32v unindexedComponentA = FS::MaskedAdd( indexedCounter == int32v( -2 ), componentUnindexedFillerAB, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                float32v unindexedComponentB = FS::MaskedAdd( indexedCounter == int32v( -1 ), componentUnindexedFillerAB, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                float32v unindexedComponentC = componentUnindexedFillerC;
+
+                valueA_gY = FS::Select( indexed, indexedComponentAB, unindexedComponentA );
+                valueB_gY = FS::Select( indexed, indexedComponentAB, unindexedComponentB );
+                valueC_gY = FS::Select( indexed, indexedComponentC,  unindexedComponentC );
+                valueD_gY = FS::Select( indexed, indexedComponentD,  componentUnindexedD );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
+            }
+
+            valueA = FS::FMulAdd( valueA_gY, fY, valueA );
+            valueB = FS::FMulAdd( valueB_gY, fY, valueB );
+            valueC = FS::FMulAdd( valueC_gY, fY, valueC );
+            valueD = FS::FMulAdd( valueD_gY, fY, valueD );
+
+            {
+                auto indexedPositive = ( indexPositive == int32v( 2 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 2 << 27 ) );
+
+                float32v indexedComponentAB = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeAB );
+                float32v indexedComponentC = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeC );
+                float32v indexedComponentD = FS::MaskedDecrement( ~indexedPositive, componentIndexedPositiveD );
+
+                float32v unindexedComponentA = FS::MaskedAdd( indexedCounter == int32v( -3 ), componentUnindexedFillerAB, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                float32v unindexedComponentB = FS::MaskedAdd( indexedCounter == int32v( -2 ), componentUnindexedFillerAB, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                float32v unindexedComponentC = FS::MaskedAdd( indexedCounter == int32v( -1 ), componentUnindexedFillerC,  float32v( kDeltaUnindexedFillerToDiagonal ) );
+
+                valueA_gZ = FS::Select( indexed, indexedComponentAB, unindexedComponentA );
+                valueB_gZ = FS::Select( indexed, indexedComponentAB, unindexedComponentB );
+                valueC_gZ = FS::Select( indexed, indexedComponentC,  unindexedComponentC );
+                valueD_gZ = FS::Select( indexed, indexedComponentD,  componentUnindexedD );
+
+                indexedCounter = FS::MaskedDecrement( indexed, indexedCounter );
+            }
+
+            valueA = FS::FMulAdd( valueA_gZ, fZ, valueA );
+            valueB = FS::FMulAdd( valueB_gZ, fZ, valueB );
+            valueC = FS::FMulAdd( valueC_gZ, fZ, valueC );
+            valueD = FS::FMulAdd( valueD_gZ, fZ, valueD );
+
+            {
+                auto indexedPositive = ( indexPositive == int32v( 3 << 27 ) );
+                auto indexed = indexedPositive | ( indexNegative == int32v( 3 << 27 ) );
+
+                float32v indexedComponentAB = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeAB );
+                float32v indexedComponentC = FS::MaskedIncrement( indexedPositive, componentIndexedNegativeC );
+                float32v indexedComponentD = FS::MaskedDecrement( ~indexedPositive, componentIndexedPositiveD );
+
+                float32v unindexedComponentA = componentUnindexedFillerAB;
+                float32v unindexedComponentB = FS::MaskedAdd( indexedCounter == int32v( -3 ), componentUnindexedFillerAB, float32v( kDeltaUnindexedFillerToDiagonal ) );
+                float32v unindexedComponentC = FS::MaskedAdd( indexedCounter == int32v( -2 ), componentUnindexedFillerC,  float32v( kDeltaUnindexedFillerToDiagonal ) );
+
+                valueA_gW = FS::Select( indexed, indexedComponentAB, unindexedComponentA );
+                valueB_gW = FS::Select( indexed, indexedComponentAB, unindexedComponentB );
+                valueC_gW = FS::Select( indexed, indexedComponentC,  unindexedComponentC );
+                valueD_gW = FS::Select( indexed, indexedComponentD,  componentUnindexedD );
+            }
+
+            valueA = FS::FMulAdd( valueA_gW, fW, valueA );
+            valueB = FS::FMulAdd( valueB_gW, fW, valueB );
+            valueC = FS::FMulAdd( valueC_gW, fW, valueC );
+            valueD = FS::FMulAdd( valueD_gW, fW, valueD );
         }
+
+        int32v valueIndexX = ( indexPermutation8 >> 1 ); // & int32v( 0x3 );
+        int32v valueIndexY = ( FS::Increment( valueIndexX ) + indexPermutation3 ); // & int32v( 0x3 );
+        int32v valueIndexZ = indexPermutation8 & int32v( 0x1 );
+        valueIndexZ = ( FS::Increment( valueIndexX ) + FS::MaskedIncrement( valueIndexZ >= indexPermutation3, valueIndexZ ) ); // & int32v( 0x3 );
+        int32v valueIndexSumXYZ = valueIndexX + valueIndexY + valueIndexZ;
+
+        multiplier ^= finalSign;
+        valueX = FS::FMulAdd( multiplier, FS::SelectHighBit( valueIndexX << 31, FS::SelectHighBit( valueIndexX << 30, valueD, valueB ), FS::SelectHighBit( valueIndexX << 30, valueC, valueA ) ), valueX );
+        valueY = FS::FMulAdd( multiplier, FS::SelectHighBit( valueIndexY << 31, FS::SelectHighBit( valueIndexY << 30, valueD, valueB ), FS::SelectHighBit( valueIndexY << 30, valueC, valueA ) ), valueY );
+        valueZ = FS::FMulAdd( multiplier, FS::SelectHighBit( valueIndexZ << 31, FS::SelectHighBit( valueIndexZ << 30, valueD, valueB ), FS::SelectHighBit( valueIndexZ << 30, valueC, valueA ) ), valueZ );
+        valueW = FS::FMulAdd( multiplier, FS::SelectHighBit( valueIndexSumXYZ << 31, FS::SelectHighBit( valueIndexSumXYZ << 30, valueD, valueB ), FS::SelectHighBit( valueIndexSumXYZ << 30, valueA, valueC ) ), valueW );
     }
 
-    template<typename... P>
+    template<VectorizationScheme Scheme>
+    FS_FORCEINLINE static void ApplyVectorContributionSimplex( int32v hash, float32v fX, float32v fY, float32v multiplier, float32v& valueX, float32v& valueY ) {
+        switch( Scheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return ApplyOrthogonalGradientMatrixVectorProductSimplex( hash, fX, fY, multiplier, valueX, valueY );
+            case VectorizationScheme::GradientOuterProduct:
+                return ApplyGradientOuterProductVectorProductSimplex( hash, fX, fY, multiplier, valueX, valueY );
+        }
+    }
+
+    template<VectorizationScheme Scheme>
+    FS_FORCEINLINE static void ApplyVectorContributionCommon( int32v hash, float32v fX, float32v fY, float32v fZ, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ ) {
+        switch( Scheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return ApplyOrthogonalGradientMatrixVectorProductCommon( hash, fX, fY, fZ, multiplier, valueX, valueY, valueZ );
+            case VectorizationScheme::GradientOuterProduct:
+                return ApplyGradientOuterProductVectorProductCommon( hash, fX, fY, fZ, multiplier, valueX, valueY, valueZ );
+        }
+    }
+
+    template<VectorizationScheme Scheme>
+    FS_FORCEINLINE static void ApplyVectorContributionSimplex( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ, float32v& valueW ) {
+        switch( Scheme ) {
+            case VectorizationScheme::OrthogonalGradientMatrix:
+                return ApplyOrthogonalGradientMatrixVectorProductSimplex( hash, fX, fY, fZ, fW, multiplier, valueX, valueY, valueZ, valueW );
+            case VectorizationScheme::GradientOuterProduct:
+                return ApplyGradientOuterProductVectorProductSimplex( hash, fX, fY, fZ, fW, multiplier, valueX, valueY, valueZ, valueW );
+        }
+    }
+
+    enum HashMultiplier
+    {
+        A = 0x27D4EB2D
+    };
+
+    template<HashMultiplier Multiplier = A, typename... P>
     FS_FORCEINLINE static int32v HashPrimes( int32v seed, P... primedPos )
     {
         int32v hash = seed;
-        hash ^= (primedPos ^ ...);
+        hash ^= ( primedPos ^ ... );
 
-        hash *= int32v( 0x27d4eb2d );
-        return (hash >> 15) ^ hash;
+        hash *= int32v( Multiplier );
+
+        return ( hash >> 15 ) ^ hash;
     }
 
     template<typename... P>
     FS_FORCEINLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
     {
         int32v hash = seed;
-        hash ^= (primedPos ^ ...);
+        hash ^= ( primedPos ^ ... );
         
         hash *= int32v( 0x27d4eb2d );
         return hash;
-    }  
+    }
 
     template<typename... P>
-     FS_FORCEINLINE static float32v GetValueCoord( int32v seed, P... primedPos )
+    FS_FORCEINLINE static float32v GetValueCoord( int32v seed, P... primedPos )
     {
         int32v hash = seed;
         hash ^= (primedPos ^ ...);
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index 97330116..26a477f9 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -91,7 +91,6 @@ FASTNOISE_REGISTER_NODE( PositionOutput );
 FASTNOISE_REGISTER_NODE( DistanceToPoint );
 
 FASTNOISE_REGISTER_NODE( Simplex );
-FASTNOISE_REGISTER_NODE( OpenSimplex2 );
 FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Value );
                        
@@ -103,8 +102,7 @@ FASTNOISE_REGISTER_NODE( FractalFBm );
 FASTNOISE_REGISTER_NODE( FractalPingPong );
 FASTNOISE_REGISTER_NODE( FractalRidged );
 
-FASTNOISE_REGISTER_NODE( DomainWarpOpenSimplex );
-FASTNOISE_REGISTER_NODE( OpenSimplex2S );
+FASTNOISE_REGISTER_NODE( DomainWarpSimplex );
 FASTNOISE_REGISTER_NODE( DomainWarpGradient );
 
 FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );

From 93395f28f6b1217dca208322029961698b53bfcf Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 3 Dec 2024 23:36:32 +0000
Subject: [PATCH 115/139] Fix compiler errors from simplex merge #135

---
 .../Generators/DomainWarpSimplex.inl          |  24 ++--
 include/FastNoise/Generators/Utils.inl        | 130 ++++++++++--------
 src/FastNoise/Metadata.cpp                    |   6 +-
 3 files changed, 84 insertions(+), 76 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
index d40c2d08..f7a0842d 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.inl
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -118,9 +118,9 @@ protected:
         ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1, falloff1, valueX, valueY );
         ApplyVectorContributionSimplex<Scheme>( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2, falloff2, valueX, valueY );
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             49.918426513671875 / 2.0 :
-            70.1480577066486;
+            70.1480577066486 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
@@ -214,9 +214,9 @@ protected:
             valueZ += valueTransformDelta;
         }
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             32.69428253173828125 / 1.4142135623730951 :
-            16.281631889139874;
+            16.281631889139874 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
@@ -367,9 +367,9 @@ protected:
             xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ), wPrimedBase + int32v( Primes::W ) ),
             dx4, dy4, dz4, dw4, falloff4, valueX, valueY, valueZ, valueW );
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             33.653125584827855 / 1.4142135623730951 :
-            30.88161777516092;
+            30.88161777516092 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
@@ -457,9 +457,9 @@ protected:
             ApplyVectorContributionSimplex<Scheme>( hash, dx, dy, falloff, valueX, valueY );
         }
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             9.28993664146183 / 2.0 :
-            12.814453124999995;
+            12.814453124999995 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
@@ -684,9 +684,9 @@ protected:
             valueZ += valueTransformDelta;
         }
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             144.736422163332608 / 1.4142135623730951 :
-            37.63698669623629;
+            37.63698669623629 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
@@ -1070,9 +1070,9 @@ protected:
                 ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), valueX, valueY, valueZ, valueW );
         }
 
-        constexpr double kBounding = constexpr( Scheme == VectorizationScheme::GradientOuterProduct ) ?
+        constexpr double kBounding = ( Scheme == VectorizationScheme::GradientOuterProduct ?
             115.21625311930542 / 1.4142135623730951 :
-            48.80058117543753;
+            48.80058117543753 );
 
         warpAmp *= float32v( kBounding );
         xOut = FS::FMulAdd( valueX, warpAmp, xOut );
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 62acc447..f23a9215 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -123,11 +123,15 @@ namespace FastNoise
             float32v sign0 = FS::Cast<float>( index << 31 );
             float32v sign1 = FS::Cast<float>( ( index >> 1 ) << 31 );
 
-            mask32v thirdCombo = constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 ) ?
-                FS::Cast<FS::Mask<32>>( index << ( 31 - 3 ) ) :
-                index >= int32v( 8 );
-
-            float32v u = FS::Select( thirdCombo, fY, fX );
+            float32v u;
+            if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+            {
+                u = FS::SelectHighBit( index << ( 31 - 3 ), fY, fX );
+            }
+            else
+            {
+                u = FS::Select( index >= int32v( 8 ), fY, fX );
+            }
             float32v v = FS::Select( index >= int32v( 4 ), fZ, fY );
 
             return ( u ^ sign0 ) + ( v ^ sign1 );
@@ -199,7 +203,7 @@ namespace FastNoise
     {
         int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
         int32v indexGradient = hashShifted * int32v( 12 >> 2 ); // [0,12) in the upper four bits
-        int32v indexOuterVector = ( hashShifted * int32v( ( -4LL << 30 ) / 3 ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31
+        int32v indexOuterVector = ( hashShifted * int32v( 0xAAAAAAAB ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31 // ( -4LL << 30 ) / 3 )
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
@@ -255,7 +259,7 @@ namespace FastNoise
     {
         int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
         int32v indexGradient = FS::BitShiftRightZeroExtend( hashShifted * int32v( 12 >> 2 ), 28 ); // [0,12)
-        int32v indexOuterVector = ( hashShifted * int32v( ( -4LL << 30 ) / 3 ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31
+        int32v indexOuterVector = ( hashShifted * int32v( 0xAAAAAAAB ) ) & int32v( 0xC0000003 ); // [0,12) in bits 0,1,30,31 // ( -4LL << 30 ) / 3 )
         indexOuterVector |= indexOuterVector >> 28;
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
@@ -276,11 +280,15 @@ namespace FastNoise
                 float32v sign0 = FS::Cast<float>( indexGradient << 31 );
                 float32v sign1 = FS::Cast<float>( ( indexGradient >> 1 ) << 31 );
 
-                mask32v thirdCombo = constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 ) ?
-                    FS::Cast<FS::Mask<32>>( indexGradient << ( 31 - 3 ) ) :
-                    indexGradient >= int32v( 8 );
-
-                float32v u = FS::Select( thirdCombo, fY, fX );
+                float32v u;
+                if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
+                {
+                    u = FS::SelectHighBit( indexGradient << ( 31 - 3 ), fY, fX );
+                }
+                else
+                {
+                    u = FS::Select( indexGradient >= int32v( 8 ), fY, fX );
+                }
                 float32v v = FS::Select( indexGradient >= int32v( 4 ), fZ, fY );
 
                 multiplier *= ( u ^ sign0 ) + ( v ^ sign1 );
@@ -307,7 +315,7 @@ namespace FastNoise
     {
         int32v hashShifted = FS::BitShiftRightZeroExtend( hash, 2 );
         int32v indexGradient = hashShifted * int32v( 20 >> 2 ); // [0,20) in the upper five bits
-        int32v indexOuterVector = hashShifted * int32v( ( -8LL << 29 ) / 5 );
+        int32v indexOuterVector = hashShifted * int32v( 0xCCCCCCCD ); // ( -8LL << 29 ) / 5
         indexOuterVector = ( indexOuterVector & int32v( 0xE0000003 ) ) * int32v( 3 | ( 1 << 27 ) ); // [0,20) in the upper five bits, independently of the above
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
@@ -424,14 +432,14 @@ namespace FastNoise
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     FS_FORCEINLINE static void ApplyOrthogonalGradientMatrixVectorProductCommon( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ )
     {
-        const float kComponentA = 2.224744871391589f;
-        const float kComponentB = -0.224744871391589f;
-        const float kComponentC = -1.0f;
-        const float kComponentsDE = 1.0f;
-        const float kComponentF = 2.0f;
+        constexpr float kComponentA = 2.224744871391589f;
+        constexpr float kComponentB = -0.224744871391589f;
+        constexpr float kComponentC = -1.0f;
+        constexpr float kComponentsDE = 1.0f;
+        constexpr float kComponentF = 2.0f;
         
         int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
-        int32v indexFacetBasisWithPermute2 = hashShifted * int32v( ( -4LL << 30 ) / 3 ); // [0,3) in the highest two bits, [0,8) in the lowest three bits
+        int32v indexFacetBasisWithPermute2 = hashShifted * int32v( 0xAAAAAAAB ); // [0,3) in the highest two bits, [0,8) in the lowest three bits // ( -4LL << 30 ) / 3
         int32v indexPermutation2HighBit = ( indexFacetBasisWithPermute2 << 29 ); // & int32v( 1 << 31 ); // [0,1) in the most significant bit
         int32v indexPermutation3 = FS::BitShiftRightZeroExtend( hashShifted * int32v( 3 ), 30 ); // [0,3)
         float32v finalSign = FS::Cast<float>( hash31 << 31 );
@@ -506,46 +514,46 @@ namespace FastNoise
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
     static void FS_VECTORCALL ApplyOrthogonalGradientMatrixVectorProductSimplex( int32v hash31, float32v fX, float32v fY, float32v fZ, float32v fW, float32v multiplier, float32v& valueX, float32v& valueY, float32v& valueZ, float32v& valueW )
     {
-        const float kComponentPairwiseIndexedNegativeAB = -0.375999676691291f;
-        const float kComponentPairwiseUnindexedFillerAB = 0.222726847849776f;
-        const float kComponentPairwiseIndexedPositiveD = -kSkew4f;
-        const float kComponentPairwiseUnindexedD = kSkew4f;
-
-        const float kDeltaPairwiseToSingleAB = -0.124000323308709f;
-        const float kDeltaPairwiseToSingleD = 0.190983005625053f;
-        const float kDeltaSingleToExtra = kSkew4f;
-        const float kDeltaPairwiseABToC = 0.437016024448821f;
-        const float kDeltaUnindexedFillerToDiagonal = -kRoot2f;
-
-        const float kDeltaPairwiseToSingleExtraAB = kDeltaPairwiseToSingleAB + kDeltaSingleToExtra;
-        const float kDeltaPairwiseToSingleExtraD = kDeltaPairwiseToSingleD + kDeltaSingleToExtra;
-
-        const float sIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleAB;
-        const float sDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB + kDeltaUnindexedFillerToDiagonal;
-        const float sFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB;
-        const float sIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleD - 1;
-        const float sFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleD;
-
-        const float pIdxPosAB = kComponentPairwiseIndexedNegativeAB + 1;
-        const float pIdxNegAB = kComponentPairwiseIndexedNegativeAB;
-        const float pFillAB = kComponentPairwiseUnindexedFillerAB;
-        const float pDiagAB = kComponentPairwiseUnindexedFillerAB + kDeltaUnindexedFillerToDiagonal;
-        const float pIdxPosC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC + 1;
-        const float pIdxNegC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC;
-        const float pFillC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseABToC;
-        const float pIdxPosD = kComponentPairwiseIndexedPositiveD;
-        const float pIdxNegD = kComponentPairwiseIndexedPositiveD - 1;
-        const float pFillD = kComponentPairwiseUnindexedD;
-
-        const float eIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleExtraAB + 1;
-        const float eDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB + kDeltaUnindexedFillerToDiagonal;
-        const float eFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB;
-        const float eIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleExtraD;
-        const float eFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleExtraD;
+        constexpr float kComponentPairwiseIndexedNegativeAB = -0.375999676691291f;
+        constexpr float kComponentPairwiseUnindexedFillerAB = 0.222726847849776f;
+        constexpr float kComponentPairwiseIndexedPositiveD = -kSkew4f;
+        constexpr float kComponentPairwiseUnindexedD = kSkew4f;
+
+        constexpr float kDeltaPairwiseToSingleAB = -0.124000323308709f;
+        constexpr float kDeltaPairwiseToSingleD = 0.190983005625053f;
+        constexpr float kDeltaSingleToExtra = kSkew4f;
+        constexpr float kDeltaPairwiseABToC = 0.437016024448821f;
+        constexpr float kDeltaUnindexedFillerToDiagonal = -kRoot2f;
+
+        constexpr float kDeltaPairwiseToSingleExtraAB = kDeltaPairwiseToSingleAB + kDeltaSingleToExtra;
+        constexpr float kDeltaPairwiseToSingleExtraD = kDeltaPairwiseToSingleD + kDeltaSingleToExtra;
+
+        constexpr float sIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleAB;
+        constexpr float sDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB + kDeltaUnindexedFillerToDiagonal;
+        constexpr float sFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleAB;
+        constexpr float sIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleD - 1;
+        constexpr float sFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleD;
+
+        constexpr float pIdxPosAB = kComponentPairwiseIndexedNegativeAB + 1;
+        constexpr float pIdxNegAB = kComponentPairwiseIndexedNegativeAB;
+        constexpr float pFillAB = kComponentPairwiseUnindexedFillerAB;
+        constexpr float pDiagAB = kComponentPairwiseUnindexedFillerAB + kDeltaUnindexedFillerToDiagonal;
+        constexpr float pIdxPosC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC + 1;
+        constexpr float pIdxNegC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseABToC;
+        constexpr float pFillC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseABToC;
+        constexpr float pIdxPosD = kComponentPairwiseIndexedPositiveD;
+        constexpr float pIdxNegD = kComponentPairwiseIndexedPositiveD - 1;
+        constexpr float pFillD = kComponentPairwiseUnindexedD;
+
+        constexpr float eIdxABC = kComponentPairwiseIndexedNegativeAB + kDeltaPairwiseToSingleExtraAB + 1;
+        constexpr float eDiagABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB + kDeltaUnindexedFillerToDiagonal;
+        constexpr float eFillABC = kComponentPairwiseUnindexedFillerAB + kDeltaPairwiseToSingleExtraAB;
+        constexpr float eIdxD = kComponentPairwiseIndexedPositiveD + kDeltaPairwiseToSingleExtraD;
+        constexpr float eFillD = kComponentPairwiseUnindexedD + kDeltaPairwiseToSingleExtraD;
 
         int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 2 );
         int32v indexBasis = hashShifted * int32v( 20 >> 2 ); // [0,20) << 27
-        int32v indexPermutation3 = ( hashShifted * int32v( ( -4LL << 29 ) / 3 ) ) >> 29; // [0,3)
+        int32v indexPermutation3 = ( hashShifted * int32v( 0xD5555556 ) ) >> 29; // [0,3] // ( -4LL << 29 ) / 3
         int32v indexPermutation8 = indexBasis >> 24; // & int32v( 0x07 );
         float32v finalSign = FS::Cast<float>( hash31 << 31 );
 
@@ -911,18 +919,18 @@ namespace FastNoise
         }
     }
 
-    enum HashMultiplier
+    enum class HashMultiplier : int32_t
     {
         A = 0x27D4EB2D
     };
 
-    template<HashMultiplier Multiplier = A, typename... P>
+    template<HashMultiplier Multiplier = HashMultiplier::A, typename... P>
     FS_FORCEINLINE static int32v HashPrimes( int32v seed, P... primedPos )
     {
         int32v hash = seed;
         hash ^= ( primedPos ^ ... );
 
-        hash *= int32v( Multiplier );
+        hash *= int32v( (int32_t)Multiplier );
 
         return ( hash >> 15 ) ^ hash;
     }
@@ -933,7 +941,7 @@ namespace FastNoise
         int32v hash = seed;
         hash ^= ( primedPos ^ ... );
         
-        hash *= int32v( 0x27d4eb2d );
+        hash *= int32v( (int32_t)HashMultiplier::A );
         return hash;
     }
 
@@ -943,7 +951,7 @@ namespace FastNoise
         int32v hash = seed;
         hash ^= (primedPos ^ ...);
 
-        hash *= hash * int32v( 0x27d4eb2d );
+        hash *= hash * int32v( (int32_t)HashMultiplier::A );
         return FS::Convert<float>( hash );
     }
      
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index 87db80b8..f4022a97 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -21,11 +21,11 @@ constexpr static std::nullptr_t gMetadataVectorSize = nullptr; // Invalid
 // Setting these values avoids needless vector resizing and oversizing on startup
 // Sadly there is no way to automate this as they fill up as part of static init
 template<>
-constexpr size_t gMetadataVectorSize<const Metadata*> = 46;
+constexpr size_t gMetadataVectorSize<const Metadata*> = 44;
 template<>
-constexpr size_t gMetadataVectorSize<const char*> = 84;
+constexpr size_t gMetadataVectorSize<const char*> = 88;
 template<>
-constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 71;
+constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 68;
 template<>
 constexpr size_t gMetadataVectorSize<Metadata::MemberNodeLookup> = 30;
 template<>

From 04e3287849499380ac6caf54aac6d76ede2e8faf Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 14 Dec 2024 14:13:31 +0000
Subject: [PATCH 116/139] Remove unused variable

---
 include/FastNoise/Generators/DomainWarpSimplex.inl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
index f7a0842d..121964e6 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.inl
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -622,7 +622,6 @@ protected:
             float32v sign = FS::Masked( signMask, float32v( FS::Cast<float>( int32v( 1 << 31 ) ) ) );
             float32v offset0 = float32v( 2 * kReflectUnskew3 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed ), dxBase - offset0, dyBase, dzBase );
             float32v falloffBase = FS::Min( ( sign ^ dxBase ) - falloffBaseStemB, float32v( 0.0f ) );
 
             ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed ), dxBase - offset0, dyBase, dzBase,

From 19a8184777140801b889d213df7adcb2d631c21f Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 14 Dec 2024 14:36:54 +0000
Subject: [PATCH 117/139] update to upload-artifact@v4

---
 .github/workflows/benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 8a2f7219..b6cfc79d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -43,7 +43,7 @@ jobs:
       run: cmake --build ${{ github.workspace }}/build --config Release --target FastNoiseBenchmark --parallel 4
     
     - name: 'Upload artifact'
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         name: ${{ matrix.name }}-benchmark-bin
         path: ${{ github.workspace }}/build/Release/bin/

From 57973f68208ca48c0a22da89ca448ab1d462db30 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sat, 14 Dec 2024 14:40:17 +0000
Subject: [PATCH 118/139] update benchmark

---
 .github/workflows/benchmark.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index b6cfc79d..360cbf6d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -74,9 +74,3 @@ jobs:
         repository: Auburn/FastNoise2Benchmarking
         event-type: benchmark
         client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "runid": "${{ github.run_id }}", "name": "${{ matrix.name }}", "msg": "${{ steps.message-format.outputs.value }}"}'
-
-  benchmarkbin-complete:
-    runs-on: ubuntu-latest
-    needs: benchmark-matrix
-    steps: 
-      - run: echo benchmarkbin-complete

From e257d4d7a07d5dd76c3ef7ded4547d53b2b81f11 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 15 Dec 2024 23:34:56 +0000
Subject: [PATCH 119/139] Don't zoom node editor when scrolling enums

---
 tools/NodeEditor/FastNoiseNodeEditor.cpp | 11 +++++++----
 tools/NodeEditor/util/ImGuiExtra.h       |  2 ++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/NodeEditor/FastNoiseNodeEditor.cpp b/tools/NodeEditor/FastNoiseNodeEditor.cpp
index a31db346..66e4be44 100644
--- a/tools/NodeEditor/FastNoiseNodeEditor.cpp
+++ b/tools/NodeEditor/FastNoiseNodeEditor.cpp
@@ -656,7 +656,7 @@ FastNoiseNodeEditor::FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp ) :
         state = FastNoise::Metadata::DebugCheckVectorStorageSize( debugMetadataVectorCheckIdx++ );
         if( state.first > 0 )
         {
-            Error{} << "Non-optimal metadata vector, in FastNoise Metadata.cpp adjust gMetadataVectorSize " << state.second << " to: " << state.first;
+            Error{} << "Non-optimal metadata vector, in FastNoise Metadata.cpp adjust gMetadataVectorSize<" << state.second << "> to: " << state.first;
         }
 
     } while( state.second );
@@ -838,13 +838,16 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
 
         ImNodes::MiniMap( 0.2f, ImNodesMiniMapLocation_BottomLeft );
 
+        // Capture in the editor imgui context
+        float editorMouseWheel = ImGui::GetIO().MouseWheel;
+
         ImNodes::EndNodeEditor();
 
         // Zoom
-        if( ImNodes::IsEditorHovered() && ImGui::GetIO().MouseWheel != 0 )
+        if( ImNodes::IsEditorHovered() && editorMouseWheel != 0 )
         {
             float zoom = ImNodes::EditorContextGetZoom();
-            if( ImGui::GetIO().MouseWheel > 0 )
+            if( editorMouseWheel > 0 )
             {
                 zoom *= 1.5f;
                 if( zoom > 0.9f )
@@ -1111,7 +1114,7 @@ void FastNoiseNodeEditor::DoNodes()
         }
         ImGui::PopStyleVar();
 
-        ImGui::PushItemWidth( 60.0f );
+        ImGui::PushItemWidth( 90.0f );
 
         ImNodes::PushAttributeFlag( ImNodesAttributeFlags_EnableLinkCreationOnSnap );
         ImNodes::PushAttributeFlag( ImNodesAttributeFlags_EnableLinkDetachWithDragClick );
diff --git a/tools/NodeEditor/util/ImGuiExtra.h b/tools/NodeEditor/util/ImGuiExtra.h
index da3e41af..ca76bed7 100644
--- a/tools/NodeEditor/util/ImGuiExtra.h
+++ b/tools/NodeEditor/util/ImGuiExtra.h
@@ -34,12 +34,14 @@ namespace ImGuiExtra
             if( ImGui::GetIO().MouseWheel < 0 && *comboIndex < comboCount - 1 )
             {
                 (*comboIndex)++;
+                ImGui::GetIO().MouseWheel = 0;
                 return true;
             }
 
             if( ImGui::GetIO().MouseWheel > 0 && *comboIndex > 0 )
             {
                 (*comboIndex)--;
+                ImGui::GetIO().MouseWheel = 0;
                 return true;
             }
         }

From 9b0708552bc241e7572382a6c195b5addd69f04b Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 15 Dec 2024 23:35:40 +0000
Subject: [PATCH 120/139] More node descriptions, fancier wiki generator

---
 .../FastNoise/Generators/BasicGenerators.h    |  8 ++++--
 util/WikiGenerator/main.cpp                   | 27 ++++++++++++++-----
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 7004a92d..8a943823 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -81,6 +81,9 @@ namespace FastNoise
         {
             groups.push_back( "Basic Generators" );
             this->AddVariable( { "Value", "Constant output" }, 1.0f, &Constant::SetValue );
+
+            description =
+                "Outputs a constant value";
         }
     };
 #endif
@@ -123,7 +126,8 @@ namespace FastNoise
         {
             groups.push_back( "Basic Generators" );
             description =
-                "Outputs checkerboard pattern";
+                "Outputs a checkerboard pattern\n"
+                "Each checkerboard cell is \"Feature Scale\" sized in each dimension";
         }
     };
 #endif
@@ -184,7 +188,7 @@ namespace FastNoise
 
             description =
                 "Takes the input position and does the following per dimension\n"
-                "(input + offset) * multiplier\n"
+                "`(input + offset) * multiplier`\n"
                 "The output is the sum of all results";
         }
     };
diff --git a/util/WikiGenerator/main.cpp b/util/WikiGenerator/main.cpp
index 16cada6f..7711b7cd 100644
--- a/util/WikiGenerator/main.cpp
+++ b/util/WikiGenerator/main.cpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <iostream>
 #include <sstream>
+#include <regex>
 #include <unordered_map>
 
 static constexpr int imageSizeX = 256;
@@ -136,12 +137,26 @@ bool CreateImage( const FastNoise::Metadata* metadata, const std::string& outDir
     return false;
 }
 
+std::string FormatDescription( const char* description )
+{
+    std::string formatted = description;
+    size_t pos = 0;
+    
+    while( (pos = formatted.find( '\n', pos )) != std::string::npos )
+    {
+        formatted.insert( pos, "<br/>" );
+        pos += 6; // Length of "\n<br/>"
+    }
+
+    return formatted;
+}
+
 void DoNode( std::stringstream& output, const FastNoise::Metadata* metadata, const std::string& outDir )
 {
     std::string nodeName = FastNoise::Metadata::FormatMetadataNodeName( metadata, false );
 
     output << "## " << nodeName << '\n';
-    output << metadata->description << "\n\n";
+    output << FormatDescription( metadata->description ) << "\n\n";
 
     if( CreateImage( metadata, outDir, nodeName ) )
     {
@@ -150,12 +165,12 @@ void DoNode( std::stringstream& output, const FastNoise::Metadata* metadata, con
 
     for( auto& node_lookup : metadata->memberNodeLookups )
     {
-        output << "### " << node_lookup.name << " - Node Lookup\n" << node_lookup.description << '\n';
+        output << "### " << node_lookup.name << " _- Node Lookup_\n" << FormatDescription( node_lookup.description ) << '\n';
     }
 
     for( auto& hybrid_lookup : metadata->memberHybrids )
     {
-        output << "### " << hybrid_lookup.name << " - Hybrid Lookup '= " << hybrid_lookup.valueDefault << "f`\n" << hybrid_lookup.description << '\n';
+        output << "### " << hybrid_lookup.name << " `= " << hybrid_lookup.valueDefault << "f` _- Hybrid Lookup_\n" << FormatDescription( hybrid_lookup.description ) << '\n';
     }
 
     for( auto& variable : metadata->memberVariables )
@@ -163,13 +178,13 @@ void DoNode( std::stringstream& output, const FastNoise::Metadata* metadata, con
         switch( variable.type )
         {
         case FastNoise::Metadata::MemberVariable::EFloat:
-            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.f << "f`\n" << variable.description << '\n';
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.f << "f`\n" << FormatDescription( variable.description ) << '\n';
             break;
         case FastNoise::Metadata::MemberVariable::EInt:
-            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.i << "`\n" << variable.description << '\n';
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.valueDefault.i << "`\n" << FormatDescription( variable.description ) << '\n';
             break;
         case FastNoise::Metadata::MemberVariable::EEnum:
-            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.enumNames[variable.valueDefault.i] << "`\n" << variable.description << '\n';
+            output << "### " << FastNoise::Metadata::FormatMetadataMemberName( variable ) << " `= " << variable.enumNames[variable.valueDefault.i] << "` _- Enum_\n" << FormatDescription( variable.description ) << '\n';
             for( size_t i = 0; i < variable.enumNames.size(); i++ )
             {
                 output << "* " << variable.enumNames[i] << (variable.valueDefault.i == i ? " (Default)\n" : "\n");

From 97fc7a05ffdee662e319f5b8037fbb74b79aa9d1 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 16 Dec 2024 00:16:21 +0000
Subject: [PATCH 121/139] Benchmark test previous gradient functions

---
 include/FastNoise/Generators/Simplex.inl | 56 ++++++++++++------------
 include/FastNoise/Generators/Utils.inl   | 38 +++++++++-------
 2 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index d35fc151..7489b236 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -80,9 +80,9 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         falloff1 *= falloff1; falloff1 *= falloff1;
         falloff2 *= falloff2; falloff2 *= falloff2;
 
-        float32v gradientRampValue0 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase ), dx0, dy0 );
-        float32v gradientRampValue1 = GetGradientDotSimplex( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1 );
-        float32v gradientRampValue2 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2 );
+        float32v gradientRampValue0 = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase ), dx0, dy0 );
+        float32v gradientRampValue1 = GetGradientDotPerlin( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1 );
+        float32v gradientRampValue2 = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2 );
 
         constexpr double kBounding = 49.918426513671875;
 
@@ -283,23 +283,23 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         falloff3 *= falloff3; falloff3 *= falloff3;
         falloff4 *= falloff4; falloff4 *= falloff4;
 
-        float32v gradientRampValue0 = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dx0, dy0, dz0, dw0 );
-        float32v gradientRampValue1 = GetGradientDotSimplex( HashPrimes( seed,
+        float32v gradientRampValue0 = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dx0, dy0, dz0, dw0 );
+        float32v gradientRampValue1 = GetGradientDotPerlin( HashPrimes( seed,
             FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ),
             FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ),
             FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ),
             FS::MaskedAdd( maskW1, wPrimedBase, int32v( Primes::W ) ) ), dx1, dy1, dz1, dw1 );
-        float32v gradientRampValue2 = GetGradientDotSimplex( HashPrimes( seed,
+        float32v gradientRampValue2 = GetGradientDotPerlin( HashPrimes( seed,
             FS::MaskedAdd( maskX2, xPrimedBase, int32v( Primes::X ) ),
             FS::MaskedAdd( maskY2, yPrimedBase, int32v( Primes::Y ) ),
             FS::MaskedAdd( maskZ2, zPrimedBase, int32v( Primes::Z ) ),
             FS::MaskedAdd( maskW2, wPrimedBase, int32v( Primes::W ) ) ), dx2, dy2, dz2, dw2 );
-        float32v gradientRampValue3 = GetGradientDotSimplex( HashPrimes( seed,
+        float32v gradientRampValue3 = GetGradientDotPerlin( HashPrimes( seed,
             FS::MaskedAdd( maskX3, xPrimedBase, int32v( Primes::X ) ),
             FS::MaskedAdd( maskY3, yPrimedBase, int32v( Primes::Y ) ),
             FS::MaskedAdd( maskZ3, zPrimedBase, int32v( Primes::Z ) ),
             FS::MaskedAdd( maskW3, wPrimedBase, int32v( Primes::W ) ) ), dx3, dy3, dz3, dw3 );
-        float32v gradientRampValue4 = GetGradientDotSimplex( HashPrimes( seed,
+        float32v gradientRampValue4 = GetGradientDotPerlin( HashPrimes( seed,
             xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ), wPrimedBase + int32v( Primes::W ) ),
             dx4, dy4, dz4, dw4 );
 
@@ -342,7 +342,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         // Vertex <0, 0>
         {
             int32v hash = HashPrimes( seed, xPrimedBase, yPrimedBase );
-            float32v gradientRampValue = GetGradientDotSimplex( hash, dxBase, dyBase );
+            float32v gradientRampValue = GetGradientDotPerlin( hash, dxBase, dyBase );
             falloffBase0 = FS::FNMulAdd( dxBase, dxBase, FS::FNMulAdd( dyBase, dyBase, float32v( kFalloffRadiusSquared ) ) );
             float32v falloff = falloffBase0; falloff *= falloff; falloff *= falloff;
             value = falloff * gradientRampValue;
@@ -351,7 +351,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         // Vertex <1, 1>
         {
             int32v hash = HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) );
-            float32v gradientRampValue = GetGradientDotSimplex( hash, dxBase - float32v( 2 * kUnskew2 + 1 ), dyBase - float32v( 2 * kUnskew2 + 1 ) );
+            float32v gradientRampValue = GetGradientDotPerlin( hash, dxBase - float32v( 2 * kUnskew2 + 1 ), dyBase - float32v( 2 * kUnskew2 + 1 ) );
             float32v falloff = FS::FMulAdd( unskewDelta,
                 float32v( -4.0 * ( kRoot3 + 2.0 ) / ( kRoot3 + 3.0 ) ),
                 falloffBase0 - float32v( kFalloffRadiusSquared ) );
@@ -370,7 +370,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
                 FS::MaskedAdd( forwardXY, yPrimedBase, int32v( Primes::Y ) ) );
             float32v dx = dxBase - FS::Select( forwardX, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
             float32v dy = FS::MaskedSub( forwardX, dyBase, float32v( 2 * kUnskew2 ) );
-            float32v gradientRampValue = GetGradientDotSimplex( hash, dx, dy );
+            float32v gradientRampValue = GetGradientDotPerlin( hash, dx, dy );
             float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
             falloff *= falloff; falloff *= falloff;
             value = FS::FMulAdd( falloff, gradientRampValue, value );
@@ -383,7 +383,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
                 FS::InvMaskedSub( forwardXY, FS::MaskedAdd( forwardY, yPrimedBase, int32v( (int32_t)( Primes::Y * 2LL ) ) ), int32v( Primes::Y ) ) );
             float32v dx = FS::MaskedSub( forwardY, dxBase, float32v( 2 * kUnskew2 ) );
             float32v dy = dyBase - FS::Select( forwardY, float32v( 1 + 2 * kUnskew2 ), float32v( -1 ) );
-            float32v gradientRampValue = GetGradientDotSimplex( hash, dx, dy );
+            float32v gradientRampValue = GetGradientDotPerlin( hash, dx, dy );
             float32v falloff = FS::Max( FS::FNMulAdd( dx, dx, FS::FNMulAdd( dy, dy, float32v( kFalloffRadiusSquared ) ) ), float32v( 0 ) );
             falloff *= falloff; falloff *= falloff;
             value = FS::FMulAdd( falloff, gradientRampValue, value );
@@ -699,7 +699,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         // Vertex <0, 0, 0, 0>
         float32v value, falloffBaseStemA, falloffBaseStemB;
         {
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase, dyBase, dzBase, dwBase );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase, dyBase, dzBase, dwBase );
             float32v falloffBase = FS::FNMulAdd( dwBase, dwBase, FS::FNMulAdd( dzBase, dzBase, FS::FNMulAdd( dyBase, dyBase, FS::FNMulAdd( dxBase, dxBase, float32v( kFalloffRadiusSquared ) ) ) ) ) * float32v( 0.5f );
             falloffBaseStemA = falloffBase - float32v( kDistanceSquaredA * 0.5 );
             falloffBaseStemB = falloffBase - float32v( kDistanceSquaredB * 0.5 );
@@ -718,7 +718,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
 
             float32v offset = float32v( 4 * kUnskew4 + 1 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimed ), dxBase - offset, dyBase - offset, dzBase - offset, dwBase - offset );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimed ), dxBase - offset, dyBase - offset, dzBase - offset, dwBase - offset );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset, coordinateSum, falloffBaseStemA ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -735,7 +735,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimed, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dwBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -752,7 +752,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dzBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -769,7 +769,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dyBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -786,7 +786,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 3 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 3 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset1, coordinateSum, falloffBaseStemB ) - ( sign ^ dxBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -801,7 +801,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dxBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -817,7 +817,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset1, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dyBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -833,7 +833,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset1, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dzBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -849,7 +849,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimed, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset1, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dxBase + dwBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -864,7 +864,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dyBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -880,7 +880,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimed, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset1, dzBase - offset1, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dzBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -896,7 +896,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimed, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset1, dzBase - offset0, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dyBase + dwBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -911,7 +911,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimedBase ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset0 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dzBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -927,7 +927,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( 2 * kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( 2 * kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimed, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset1, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemB ) + ( sign ^ ( dzBase + dwBase ) ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
@@ -942,7 +942,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v offset1 = float32v( kUnskew4 + 1 ) ^ sign;
             float32v offset0 = float32v( kUnskew4 ) ^ sign;
 
-            float32v gradientRampValue = GetGradientDotSimplex( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
+            float32v gradientRampValue = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase, wPrimed ), dxBase - offset0, dyBase - offset0, dzBase - offset0, dwBase - offset1 );
             float32v falloffBase = FS::Max( FS::FMulAdd( offset0, coordinateSum, falloffBaseStemA ) + ( sign ^ dwBase ), float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index f23a9215..f9f19545 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -105,36 +105,42 @@ namespace FastNoise
     }
 
     template<FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
-    FS_FORCEINLINE static float32v GetGradientDotCommon( int32v hash31, float32v fX, float32v fY, float32v fZ )
+    FS_FORCEINLINE static float32v GetGradientDotCommon( int32v hash, float32v fX, float32v fY, float32v fZ )
     {
-        int32v hashShifted = FS::BitShiftRightZeroExtend( hash31, 1 );
-        int32v index = FS::BitShiftRightZeroExtend( hashShifted * int32v( 12 >> 2 ), 28 ); // [0,12)
-
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
-            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 0, 0, 0 ) );
-            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), index, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 0, 0 ) );
+            float32v gX = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
+            float32v gY = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
+            float32v gZ = FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), hash, FS::Constant<float>( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
 
-            return FS::FMulAdd( gZ, fZ, FS::FMulAdd( fY, gY, fX * gX ) );
+            return FS::FMulAdd( gX, fX, FS::FMulAdd( fY, gY, fZ * gZ ));
         }
         else
         {
-            float32v sign0 = FS::Cast<float>( index << 31 );
-            float32v sign1 = FS::Cast<float>( ( index >> 1 ) << 31 );
+            int32v hasha13 = hash & int32v( 13 );
 
-            float32v u;
+            // if h > 7 then y, else x
+            mask32v gt7;
             if constexpr( SIMD & FastSIMD::FeatureFlag::SSE41 )
             {
-                u = FS::SelectHighBit( index << ( 31 - 3 ), fY, fX );
+                gt7 = FS::Cast<FS::Mask<32>>( hash << 28 );
             }
             else
             {
-                u = FS::Select( index >= int32v( 8 ), fY, fX );
+                gt7 = hasha13 > int32v( 7 );
             }
-            float32v v = FS::Select( index >= int32v( 4 ), fZ, fY );
-
-            return ( u ^ sign0 ) + ( v ^ sign1 );
+            float32v u = FS::Select( gt7, fY, fX );
+
+            // if h < 4 then y else if h is 12 or 14 then x else z
+            float32v v = FS::Select( hasha13 == int32v( 12 ), fX, fZ );
+            v = FS::Select( hasha13 < int32v( 2 ), fY, v );
+
+            // if h1 then -u else u
+            // if h2 then -v else v
+            float32v h1 = FS::Cast<float>( hash << 31 );
+            float32v h2 = FS::Cast<float>( ( hash >> 1 ) << 31 );
+            // then add them
+            return ( u ^ h1 ) + ( v ^ h2 );
         }
     }
 

From 4dda0e2930d3de8e4698825dc62eca7273914230 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 17 Dec 2024 00:17:39 +0000
Subject: [PATCH 122/139] Small util function perf bumps

---
 include/FastNoise/Generators/Utils.inl | 38 +++++++++++++++++---------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index f9f19545..7968bfd7 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -57,7 +57,16 @@ namespace FastNoise
             float32v a = u * FS::SelectHighBit( index, float32v( 2 ), float32v( kRoot3f ) );
             float32v b = v ^ FS::Cast<float>( ( index >> 30 ) << 31 );
 
-            return FS::MaskedAdd( index >= int32v( 0 ), a, b ) ^ FS::Cast<float>( ( index >> 28 ) << 31 );
+            if constexpr( SIMD & FastSIMD::FeatureFlag::x86 )
+            {
+                auto indexNegativeMask = FS::Cast<FS::Mask<32, false>>( index >> 31 );
+
+                return FS::InvMaskedAdd( indexNegativeMask, a, b ) ^ FS::Cast<float>( ( index >> 28 ) << 31 );
+            }
+            else
+            {
+                return FS::MaskedAdd( index >= int32v( 0 ), a, b ) ^ FS::Cast<float>( ( index >> 28 ) << 31 );
+            }
         }
     }
 
@@ -454,8 +463,11 @@ namespace FastNoise
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-            //indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, 2 );
+#if defined( _MSC_VER ) && !defined( __clang__ )
+            indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, std::integral_constant<int, 2>() );
+#else
             indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rolv_epi32 ), indexFacetBasisWithPermute2, int32v( 2 ) );
+#endif
 
             const auto tableA_gX = FS::Constant<float>( kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC, kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC );
             const auto tableA_gY = FS::Constant<float>( kComponentC, kComponentB, kComponentA, kComponentA, kComponentC, kComponentB, -kComponentA, -kComponentA, kComponentC, -kComponentB, kComponentA, kComponentA, kComponentC, -kComponentB, -kComponentA, -kComponentA );
@@ -489,22 +501,22 @@ namespace FastNoise
             float32v sign0 = FS::Cast<float>( indexFacetBasisWithPermute2 << 31 );
             float32v sign1 = FS::Cast<float>( ( indexFacetBasisWithPermute2 << 30 ) & int32v( 1 << 31 ) );
 
-            auto notYZ = indexFacetBasisWithPermute2 >= int32v( 0 );
-            auto notXY = ( indexFacetBasisWithPermute2 << 1 ) >= int32v( 0 );
+            auto notYZ = indexFacetBasisWithPermute2;
+            auto notXY = indexFacetBasisWithPermute2 << 1;
 
-            float32v valueA_gX = FS::Select( notYZ, float32v( kComponentA ) ^ sign0, float32v( kComponentC ) );
-            float32v valueA_gY = FS::Select( notYZ & notXY, float32v( kComponentC ), FS::Select( notXY, float32v( kComponentA ) ^ sign0, float32v( kComponentB ) ^ sign1 ) );
-            float32v valueA_gZ = FS::Select( notXY, float32v( kComponentB ) ^ sign1, float32v( kComponentC ) );
+            float32v valueA_gX = FS::SelectHighBit( notYZ, float32v( kComponentC ), float32v( kComponentA ) ^ sign0 );
+            float32v valueA_gY = FS::SelectHighBit( notYZ | notXY, FS::SelectHighBit( notXY, float32v( kComponentB ) ^ sign1, float32v( kComponentA ) ^ sign0 ), float32v( kComponentC ) );
+            float32v valueA_gZ = FS::SelectHighBit( notXY, float32v( kComponentC ), float32v( kComponentB ) ^ sign1 );
             float32v valueA = FS::FMulAdd( valueA_gZ, fZ, FS::FMulAdd( fY, valueA_gY, fX * valueA_gX ) );
 
-            float32v valueB_gX = FS::Select( notYZ, float32v( kComponentB ) ^ sign0, float32v( kComponentC ) );
-            float32v valueB_gY = FS::Select( notYZ & notXY, float32v( kComponentC ), FS::Select( notXY, float32v( kComponentB ) ^ sign0, float32v( kComponentA ) ^ sign1 ) );
-            float32v valueB_gZ = FS::Select( notXY, float32v( kComponentA ) ^ sign1, float32v( kComponentC ) );
+            float32v valueB_gX = FS::SelectHighBit( notYZ, float32v( kComponentC ), float32v( kComponentB ) ^ sign0 );
+            float32v valueB_gY = FS::SelectHighBit( notYZ | notXY, FS::SelectHighBit( notXY, float32v( kComponentA ) ^ sign1, float32v( kComponentB ) ^ sign0 ), float32v( kComponentC ) );
+            float32v valueB_gZ = FS::SelectHighBit( notXY, float32v( kComponentC ), float32v( kComponentA ) ^ sign1 );
             float32v valueB = FS::FMulAdd( valueB_gZ, fZ, FS::FMulAdd( fY, valueB_gY, fX * valueB_gX ) );
 
-            float32v valueC_gX = FS::Select( notYZ, float32v( kComponentsDE ) ^ sign0, float32v( kComponentF ) );
-            float32v valueC_gY = FS::Select( notYZ & notXY, float32v( kComponentF ), FS::Select( notXY, float32v( kComponentsDE ) ^ sign0, float32v( kComponentsDE ) ^ sign1 ) );
-            float32v valueC_gZ = FS::Select( notXY, float32v( kComponentsDE ) ^ sign1, float32v( kComponentF ) );
+            float32v valueC_gX = FS::SelectHighBit( notYZ, float32v( kComponentF ), float32v( kComponentsDE ) ^ sign0 );
+            float32v valueC_gY = FS::SelectHighBit( notYZ | notXY, FS::SelectHighBit( notXY, float32v( kComponentsDE ) ^ sign1, float32v( kComponentsDE ) ^ sign0 ), float32v( kComponentF ) );
+            float32v valueC_gZ = FS::SelectHighBit( notXY, float32v( kComponentF ), float32v( kComponentsDE ) ^ sign1 );
             valueC = FS::FMulAdd( valueC_gZ, fZ, FS::FMulAdd( fY, valueC_gY, fX * valueC_gX ) );
 
             valueAB = FS::SelectHighBit( indexPermutation2HighBit, valueB, valueA );

From 24f89770bbc48da9a75947a16846437f9e326523 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 17 Dec 2024 00:57:07 +0000
Subject: [PATCH 123/139] Small bump for OrthogonalGradient 3D

---
 include/FastNoise/Generators/Utils.inl | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index 7968bfd7..a7e10f35 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -463,11 +463,7 @@ namespace FastNoise
 
         if constexpr( SIMD & FastSIMD::FeatureFlag::AVX512_F )
         {
-#if defined( _MSC_VER ) && !defined( __clang__ )
-            indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rol_epi32 ), indexFacetBasisWithPermute2, std::integral_constant<int, 2>() );
-#else
-            indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( FS_BIND_INTRINSIC( _mm512_rolv_epi32 ), indexFacetBasisWithPermute2, int32v( 2 ) );
-#endif
+            indexFacetBasisWithPermute2 = FS::NativeExec<int32v>( []( auto a ){ return _mm512_rol_epi32( a, 2 ); }, indexFacetBasisWithPermute2 );
 
             const auto tableA_gX = FS::Constant<float>( kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC, kComponentA, kComponentA, kComponentC, kComponentC, -kComponentA, -kComponentA, kComponentC, kComponentC );
             const auto tableA_gY = FS::Constant<float>( kComponentC, kComponentB, kComponentA, kComponentA, kComponentC, kComponentB, -kComponentA, -kComponentA, kComponentC, -kComponentB, kComponentA, kComponentA, kComponentC, -kComponentB, -kComponentA, -kComponentA );

From 619739c7d55ad0cee49d4b5a6448de28bc930167 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 24 Dec 2024 16:01:15 +0000
Subject: [PATCH 124/139] Simplex 3D inv masked perf bump

---
 include/FastNoise/Generators/Simplex.inl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 7489b236..de3fe709 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -125,10 +125,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
 
         mask32v maskX1 = xGreaterEqualY & xGreaterEqualZ;
         mask32v maskY1 = FS::BitwiseAndNot( yGreaterEqualZ, xGreaterEqualY );
-        mask32v maskZ1 = FS::BitwiseAndNot( ~xGreaterEqualZ, yGreaterEqualZ );
+        mask32v maskZ1 = xGreaterEqualZ | yGreaterEqualZ; // Inv masked
 
-        mask32v nMaskX2 = ~( xGreaterEqualY | xGreaterEqualZ );
-        mask32v nMaskY2 = xGreaterEqualY & ~yGreaterEqualZ;
+        mask32v nMaskX2 = xGreaterEqualY | xGreaterEqualZ; // Inv masked
+        mask32v nMaskY2 = FS::BitwiseAndNot( xGreaterEqualY, yGreaterEqualZ );
         mask32v nMaskZ2 = xGreaterEqualZ & yGreaterEqualZ;
 
         float32v dx3 = dx0 - float32v( kReflectUnskew3 * 3 + 1 );
@@ -136,8 +136,8 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         float32v dz3 = dz0 - float32v( kReflectUnskew3 * 3 + 1 );
         float32v dx1 = FS::MaskedSub( maskX1, dx3, float32v( 1 ) ); // kReflectUnskew3 * 3 + 1 = kReflectUnskew3, so dx0 - kReflectUnskew3 = dx3
         float32v dy1 = FS::MaskedSub( maskY1, dy3, float32v( 1 ) );
-        float32v dz1 = FS::MaskedSub( maskZ1, dz3, float32v( 1 ) );
-        float32v dx2 = FS::MaskedIncrement( nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
+        float32v dz1 = FS::InvMaskedSub( maskZ1, dz3, float32v( 1 ) );
+        float32v dx2 = FS::MaskedIncrement( ~nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
         float32v dy2 = FS::MaskedIncrement( nMaskY2, dy0 );
         float32v dz2 = FS::MaskedIncrement( nMaskZ2, dz0 );
 
@@ -157,8 +157,8 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         falloff3 *= falloff3; falloff3 *= falloff3;
 
         float32v gradientRampValue0 = GetGradientDotCommon( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dx0, dy0, dz0 );
-        float32v gradientRampValue1 = GetGradientDotCommon( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1 );
-        float32v gradientRampValue2 = GetGradientDotCommon( HashPrimes( seed, FS::InvMaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2 );
+        float32v gradientRampValue1 = GetGradientDotCommon( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1 );
+        float32v gradientRampValue2 = GetGradientDotCommon( HashPrimes( seed, FS::MaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2 );
         float32v gradientRampValue3 = GetGradientDotCommon( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ) ), dx3, dy3, dz3 );
 
         constexpr double kBounding = 32.69428253173828125;

From 6792ff99e727330c24de084fc251884990e63eee Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Tue, 24 Dec 2024 23:16:36 +0000
Subject: [PATCH 125/139] SimplexSmooth own node type

---
 include/FastNoise/Generators/Simplex.h   | 32 ++++++++++----
 include/FastNoise/Generators/Simplex.inl | 55 +++++++-----------------
 src/FastNoise/FastSIMD_Build.inl         |  2 +
 3 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index f56949bf..ac7fd46d 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -6,11 +6,7 @@ namespace FastNoise
     class Simplex : public virtual VariableRange<ScalableGenerator>
     {
     public:
-        void SetType( SimplexType value ) { mType = value; }
         const Metadata& GetMetadata() const override;
-
-    protected:
-        SimplexType mType = SimplexType::Standard;
     };
 
 #ifdef FASTNOISE_METADATA
@@ -26,12 +22,30 @@ namespace FastNoise
             description = 
                 "Smooth gradient noise from an N dimensional simplex grid\n"
                 "Developed by Ken Perlin in 2001";
+        }
+    };
+#endif
+
+    class SimplexSmooth : public virtual VariableRange<ScalableGenerator>
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<SimplexSmooth> : MetadataT<VariableRange<ScalableGenerator>>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+            groups.push_back( "Coherent Noise" );
 
-            this->AddVariableEnum(
-                { "Type", "Noise character style" },
-                SimplexType::Standard, &Simplex::SetType,
-                kSimplexType_Strings
-            );
+            description =
+                "Extra smooth gradient noise from an N dimensional simplex grid\n"
+                "Slower to generate than Simplex noise\n"
+                "Developed by K.jpg";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index de3fe709..96fea1f1 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -4,37 +4,7 @@
 template<FastSIMD::FeatureSet SIMD>
 class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual FastNoise::Simplex, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
-    {
-        switch( mType ) {
-        case SimplexType::Standard:
-            return Gen_Standard( seed, x, y );
-        case SimplexType::Smooth:
-            return Gen_Smooth( seed, x, y );
-        }
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
-    {
-        switch( mType ) {
-        case SimplexType::Standard:
-            return Gen_Standard( seed, x, y, z );
-        case SimplexType::Smooth:
-            return Gen_Smooth( seed, x, y, z );
-        }
-    }
-
-    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
-    {
-        switch( mType ) {
-        case SimplexType::Standard:
-            return Gen_Standard( seed, x, y, z, w );
-        case SimplexType::Smooth:
-            return Gen_Smooth( seed, x, y, z, w );
-        }
-    }
-
-    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         this->ScalePositions( x, y );
 
@@ -90,7 +60,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y, float32v z ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         this->ScalePositions( x, y, z );
 
@@ -167,7 +137,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen_Standard( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         this->ScalePositions( x, y, z, w );
 
@@ -309,7 +279,12 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y ) const
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::SimplexSmooth, SIMD> final : public virtual FastNoise::SimplexSmooth, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
+{
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
         this->ScalePositions( x, y );
 
@@ -394,7 +369,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y, float32v z ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const
     {
         this->ScalePositions( x, y, z );
 
@@ -545,7 +520,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             float32v falloffBase = FS::Min( ( sign ^ dxBase ) - falloffBaseStemB, float32v( 0.0f ) );
             value = FS::FMulAdd( ( falloffBase * falloffBase ) * ( falloffBase * falloffBase ), gradientRampValue, value );
         }
-        
+
         // Vertex <1, 0, 0> or <-1, 0, 0>
         {
             mask32v signMask = xNormal < float32v( 0 );
@@ -593,10 +568,10 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         return this->ScaleOutput( value, -1 / kBounding, 1 / kBounding );
     }
 
-    float32v FS_VECTORCALL Gen_Smooth( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
+    float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const
     {
         this->ScalePositions( x, y, z, w );
-        
+
         constexpr double kRoot5 = 2.2360679774997896964091736687313;
         constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
         constexpr double kUnskew4 = -1.0 / ( kRoot5 + 5.0 );
@@ -658,7 +633,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
             maxScore -= wNormal;
             considerVertex( maxScore, moveMaskBits, yNormal, 0b1010 );
             considerVertex( maxScore, moveMaskBits, zNormal, 0b1100 );
-            
+
             mask32v moveX = ( moveMaskBits & int32v( 0b0001 ) ) != int32v( 0 );
             mask32v moveY = ( moveMaskBits & int32v( 0b0010 ) ) != int32v( 0 );
             mask32v moveZ = ( moveMaskBits & int32v( 0b0100 ) ) != int32v( 0 );
@@ -679,7 +654,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         int32v yPrimedBase = FS::Convert<int32_t>( ySkewedBase ) * int32v( Primes::Y );
         int32v zPrimedBase = FS::Convert<int32_t>( zSkewedBase ) * int32v( Primes::Z );
         int32v wPrimedBase = FS::Convert<int32_t>( wSkewedBase ) * int32v( Primes::W );
-        
+
         float32v skewedCoordinateSum = dxSkewed + dySkewed + dzSkewed + dwSkewed;
         float32v twiceUnskewDelta = float32v( kTwiceUnskew4 ) * skewedCoordinateSum;
         float32v xNormal = dxSkewed + twiceUnskewDelta;
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index 26a477f9..feca4a79 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -91,6 +91,7 @@ FASTNOISE_REGISTER_NODE( PositionOutput );
 FASTNOISE_REGISTER_NODE( DistanceToPoint );
 
 FASTNOISE_REGISTER_NODE( Simplex );
+FASTNOISE_REGISTER_NODE( SimplexSmooth );
 FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Value );
                        
@@ -103,6 +104,7 @@ FASTNOISE_REGISTER_NODE( FractalPingPong );
 FASTNOISE_REGISTER_NODE( FractalRidged );
 
 FASTNOISE_REGISTER_NODE( DomainWarpSimplex );
+//FASTNOISE_REGISTER_NODE( DomainWarpSimplexSmooth );
 FASTNOISE_REGISTER_NODE( DomainWarpGradient );
 
 FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );

From d25e043341583b0b551236f23d888f9d0dffe389 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Thu, 26 Dec 2024 21:25:25 +0000
Subject: [PATCH 126/139] Move node editor context menus to main imgui context
 to avoid scaling

---
 tools/NodeEditor/FastNoiseNodeEditor.cpp | 26 ++++++++++++++++--------
 tools/NodeEditor/FastNoiseNodeEditor.h   |  1 +
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/tools/NodeEditor/FastNoiseNodeEditor.cpp b/tools/NodeEditor/FastNoiseNodeEditor.cpp
index 66e4be44..c7fef631 100644
--- a/tools/NodeEditor/FastNoiseNodeEditor.cpp
+++ b/tools/NodeEditor/FastNoiseNodeEditor.cpp
@@ -199,8 +199,6 @@ void FastNoiseNodeEditor::Node::GeneratePreview( bool nodeTreeChanged, bool benc
         genRGB->SetSource( scale );
         scale->SetSource( generator );
         scale->SetScaling( editor.mNodeScale );
-
-        FastNoise::SmartNode<FastNoise::ConvertRGBA8> l(nullptr);
         
         auto startTime = std::chrono::high_resolution_clock::now();
 
@@ -335,7 +333,7 @@ bool FastNoiseNodeEditor::MetadataMenuItem::CanDraw( std::function<bool( const F
 
 const FastNoise::Metadata* FastNoiseNodeEditor::MetadataMenuItem::DrawUI( std::function<bool( const FastNoise::Metadata* )> isValid, bool drawGroups ) const
 {
-    std::string format = FastNoise::Metadata::FormatMetadataNodeName( metadata, true );
+    std::string format = FastNoise::Metadata::FormatMetadataNodeName( metadata, drawGroups );
     
     if( ImGui::MenuItem( format.c_str() ) )
     {
@@ -589,6 +587,7 @@ void FastNoiseNodeEditor::SetupSettingsHandlers()
 
 FastNoiseNodeEditor::FastNoiseNodeEditor( NodeEditorApp& nodeEditorApp ) :
     mNodeEditorApp( nodeEditorApp ),
+    mMainContext( ImGui::GetCurrentContext() ),
     mOverheadNode( *this, new FastNoise::NodeData( &FastNoise::Metadata::Get<FastNoise::Constant>() ), false )
 {
     if( !mNodeEditorApp.IsDetachedNodeGraph() )
@@ -830,9 +829,10 @@ void FastNoiseNodeEditor::Draw( const Matrix4& transformation, const Matrix4& pr
             OpenStandaloneNodeGraph();
         }
 
+        ImGui::SetCurrentContext( mMainContext );
         DoHelp();
-
         DoContextMenu();
+        ImGui::SetCurrentContext( ImNodes::GetNodeEditorImGuiContext() );
 
         DoNodes();
 
@@ -1049,9 +1049,17 @@ void FastNoiseNodeEditor::DoNodes()
 
         ImNodes::EndNodeTitleBar();
 
+        if( ImGui::IsMouseReleased( ImGuiMouseButton_Right ) && ImGui::IsItemHovered( ImGuiHoveredFlags_AllowWhenBlockedByPopup ) )
+        {
+            ImGui::SetCurrentContext( mMainContext );
+            ImGui::OpenPopup( "node_title" );
+        }
+
+        ImGui::SetCurrentContext( mMainContext );
         // Right click node title to change node type
         ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 4, 4 ) );
-        if( ImGui::BeginPopupContextItem() )
+
+        if( ImGui::BeginPopup( "node_title" ) )
         {
             if( ImGui::MenuItem( "Copy Encoded Node Tree" ) )
             {
@@ -1074,7 +1082,7 @@ void FastNoiseNodeEditor::DoNodes()
                     MatchingMembers( newMetadata->memberNodeLookups, nodeMetadata->memberNodeLookups ) &&
                     MatchingMembers( newMetadata->memberHybrids, nodeMetadata->memberHybrids ) )
                 {
-                    nodeMetadata = newMetadata;                    
+                    nodeMetadata = newMetadata;
                 }
                 else
                 {
@@ -1104,7 +1112,7 @@ void FastNoiseNodeEditor::DoNodes()
                         links.pop();
                     }
 
-                    *node.second.data = std::move( newData );                  
+                    *node.second.data = std::move( newData );
                 }
 
                 node.second.GeneratePreview();
@@ -1114,6 +1122,8 @@ void FastNoiseNodeEditor::DoNodes()
         }
         ImGui::PopStyleVar();
 
+        ImGui::SetCurrentContext( ImNodes::GetNodeEditorImGuiContext() );
+
         ImGui::PushItemWidth( 90.0f );
 
         ImNodes::PushAttributeFlag( ImNodesAttributeFlags_EnableLinkCreationOnSnap );
@@ -1286,7 +1296,7 @@ void FastNoiseNodeEditor::DoHelp()
     ImGui::Text( " Help" );
     if( ImGui::IsItemHovered() )
     {
-        ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 4.f, 4.f ) );
+        ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 6.f, 6.f ) );
         ImGui::BeginTooltip();
         constexpr float alignPx = 110;
 
diff --git a/tools/NodeEditor/FastNoiseNodeEditor.h b/tools/NodeEditor/FastNoiseNodeEditor.h
index ed2db70e..cbb4b0b7 100644
--- a/tools/NodeEditor/FastNoiseNodeEditor.h
+++ b/tools/NodeEditor/FastNoiseNodeEditor.h
@@ -126,6 +126,7 @@ namespace Magnum
         void UpdateSelected();
 
         NodeEditorApp& mNodeEditorApp;
+        ImGuiContext* mMainContext;
 
         std::unordered_map<FastNoise::NodeData*, Node> mNodes;
         FastNoise::NodeData* mDroppedLinkNode = nullptr;

From 16dd2effc402641fa4503c5b7b20582372d96ed7 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Thu, 26 Dec 2024 22:58:57 +0000
Subject: [PATCH 127/139] Node editor fix node title context menus

---
 tools/NodeEditor/FastNoiseNodeEditor.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/NodeEditor/FastNoiseNodeEditor.cpp b/tools/NodeEditor/FastNoiseNodeEditor.cpp
index c7fef631..86065919 100644
--- a/tools/NodeEditor/FastNoiseNodeEditor.cpp
+++ b/tools/NodeEditor/FastNoiseNodeEditor.cpp
@@ -1048,18 +1048,19 @@ void FastNoiseNodeEditor::DoNodes()
         }
 
         ImNodes::EndNodeTitleBar();
+        ImGuiID popupId = ImGui::GetItemID();
 
         if( ImGui::IsMouseReleased( ImGuiMouseButton_Right ) && ImGui::IsItemHovered( ImGuiHoveredFlags_AllowWhenBlockedByPopup ) )
         {
             ImGui::SetCurrentContext( mMainContext );
-            ImGui::OpenPopup( "node_title" );
+            ImGui::OpenPopup( popupId );
         }
 
         ImGui::SetCurrentContext( mMainContext );
         // Right click node title to change node type
         ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 4, 4 ) );
 
-        if( ImGui::BeginPopup( "node_title" ) )
+        if( ImGui::BeginPopupEx( popupId, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings ) )
         {
             if( ImGui::MenuItem( "Copy Encoded Node Tree" ) )
             {

From ef2a6044b780b8ed4ec915be75a8ed3ad506cbc4 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 26 Feb 2025 00:06:53 +0000
Subject: [PATCH 128/139] Fix Fractal PingPong math, now matches FastNoiseLite

---
 include/FastNoise/Generators/Fractal.h   |  2 +-
 include/FastNoise/Generators/Fractal.inl | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/FastNoise/Generators/Fractal.h b/include/FastNoise/Generators/Fractal.h
index 6d767491..8e6e8534 100644
--- a/include/FastNoise/Generators/Fractal.h
+++ b/include/FastNoise/Generators/Fractal.h
@@ -94,7 +94,7 @@ namespace FastNoise
         void SetPingPongStrength( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPingPongStrength, gen ); }
 
     protected:
-        HybridSource mPingPongStrength = 0.0f;
+        HybridSource mPingPongStrength = 2.0f;
     };
 
 #ifdef FASTNOISE_METADATA
diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index a877a900..35b991f3 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -73,30 +73,30 @@ class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> final : public v
 
     static float32v PingPong( float32v t )
     {
-        t -= FS::Round( t * float32v( 0.5f ) ) * float32v( 2 );
+        t -= FS::Floor( t * float32v( 0.5f ) ) * float32v( 2 );
         return FS::Select( t < float32v( 1 ), t, float32v( 2 ) - t );
     }
 
     template<typename... P>
     FS_FORCEINLINE float32v GenT( int32v seed, P... pos ) const
     {
-        float32v gain = this->GetSourceValue( mGain  , seed, pos... );
+        float32v gain = this->GetSourceValue( mGain, seed, pos... );
         float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
         float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
         float32v lacunarity( mLacunarity );
         float32v amp( mFractalBounding );
         float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
 
-        float32v sum = noise * amp;
+        float32v sum = (noise - float32v( 0.5f )) * float32v( 2 ) * amp;
 
         for( int i = 1; i < mOctaves; i++ )
         {
             seed -= int32v( -1 );
-            amp *= Lerp( float32v( 1 ), (noise + float32v( 1 )) * float32v( 0.5f ), weightedStrength );
+            amp *= Lerp( float32v( 1 ), noise, weightedStrength );
             amp *= gain;
 
             noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
-            sum += noise * amp;
+            sum += (noise - float32v( 0.5f )) * float32v( 2 ) * amp;
         }
 
         return sum;

From c6abac283a35fdc8c29e30313c8cd1276298ea60 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Wed, 26 Feb 2025 22:45:50 +0000
Subject: [PATCH 129/139] Fix deprecated action in CI

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f4db4b15..3a490188 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -66,9 +66,9 @@ jobs:
 
     - if: runner.os != 'Windows'
       run: chmod +x ${{ github.workspace }}/install/FastNoise2/bin/NodeEditor
-    
+      
     - name: 'Upload artifact'
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         name: ${{ matrix.name }}
         path: ${{ github.workspace }}/install/

From 6fd5b43845984bd6b77956af928df6397726c677 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Sun, 2 Mar 2025 11:12:36 +0000
Subject: [PATCH 130/139] Fractal PingPong, simplified math

---
 include/FastNoise/Generators/Fractal.inl | 10 +++++-----
 src/FastNoise/Metadata.cpp               |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/FastNoise/Generators/Fractal.inl b/include/FastNoise/Generators/Fractal.inl
index 35b991f3..3dc10a68 100644
--- a/include/FastNoise/Generators/Fractal.inl
+++ b/include/FastNoise/Generators/Fractal.inl
@@ -84,10 +84,10 @@ class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> final : public v
         float32v weightedStrength = this->GetSourceValue( mWeightedStrength, seed, pos... );
         float32v pingPongStrength = this->GetSourceValue( mPingPongStrength, seed, pos... );
         float32v lacunarity( mLacunarity );
-        float32v amp( mFractalBounding );
-        float32v noise = PingPong( (this->GetSourceValue( mSource, seed, pos... ) + float32v( 1 )) * pingPongStrength );
+        float32v amp( mFractalBounding * 2 );
+        float32v noise = PingPong( this->GetSourceValue( mSource, seed, pos... ) * pingPongStrength );
 
-        float32v sum = (noise - float32v( 0.5f )) * float32v( 2 ) * amp;
+        float32v sum = (noise - float32v( 0.5f )) * amp;
 
         for( int i = 1; i < mOctaves; i++ )
         {
@@ -95,8 +95,8 @@ class FastSIMD::DispatchClass<FastNoise::FractalPingPong, SIMD> final : public v
             amp *= Lerp( float32v( 1 ), noise, weightedStrength );
             amp *= gain;
 
-            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) + float32v( 1 )) * pingPongStrength );
-            sum += (noise - float32v( 0.5f )) * float32v( 2 ) * amp;
+            noise = PingPong( (this->GetSourceValue( mSource, seed, (pos *= lacunarity)... )) * pingPongStrength );
+            sum += (noise - float32v( 0.5f )) * amp;
         }
 
         return sum;
diff --git a/src/FastNoise/Metadata.cpp b/src/FastNoise/Metadata.cpp
index f4022a97..ff50993b 100644
--- a/src/FastNoise/Metadata.cpp
+++ b/src/FastNoise/Metadata.cpp
@@ -21,11 +21,11 @@ constexpr static std::nullptr_t gMetadataVectorSize = nullptr; // Invalid
 // Setting these values avoids needless vector resizing and oversizing on startup
 // Sadly there is no way to automate this as they fill up as part of static init
 template<>
-constexpr size_t gMetadataVectorSize<const Metadata*> = 44;
+constexpr size_t gMetadataVectorSize<const Metadata*> = 45;
 template<>
-constexpr size_t gMetadataVectorSize<const char*> = 88;
+constexpr size_t gMetadataVectorSize<const char*> = 87;
 template<>
-constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 68;
+constexpr size_t gMetadataVectorSize<Metadata::MemberVariable> = 70;
 template<>
 constexpr size_t gMetadataVectorSize<Metadata::MemberNodeLookup> = 30;
 template<>

From ec3e7b8ae52abb6e81b4eae7e865baada8b36186 Mon Sep 17 00:00:00 2001
From: Jordan Peck <jordan.me2@gmail.com>
Date: Mon, 24 Mar 2025 21:30:54 +0000
Subject: [PATCH 131/139] fixup

---
 include/FastNoise/Generators/Utils.inl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/FastNoise/Generators/Utils.inl b/include/FastNoise/Generators/Utils.inl
index a7e10f35..650daee7 100644
--- a/include/FastNoise/Generators/Utils.inl
+++ b/include/FastNoise/Generators/Utils.inl
@@ -233,7 +233,7 @@ namespace FastNoise
             valueX = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( kRoot3f, -kRoot3f, 1, -1, kRoot3f, -kRoot3f, -1, 1, 2, -2, 0, 0, 0, 0, 0, 0 ) ), valueX );
             valueY = FS::FMulAdd( multiplier, FS::NativeExec<float32v>( FS_BIND_INTRINSIC( _mm512_permutexvar_ps ), indexOuterVector, FS::Constant<float>( 1, -1, kRoot3f, -kRoot3f, -1, 1, kRoot3f, -kRoot3f, 0, 0, 2, -2, 0, 0, 0, 0 ) ), valueY );
         }
-        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX2 )
+        else if constexpr( SIMD & FastSIMD::FeatureFlag::AVX )
         {
             float32v finalSign = FS::Cast<float>( ( ( indexGradient >> 28 ) ^ indexOuterVector ) << 31 );
             indexGradient >>= 29;

From 73bb40a6aa28dee692d8c1ce94114a87ddbb5656 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 24 Mar 2025 21:40:27 +0000
Subject: [PATCH 132/139] Rename to SuperSimplex

---
 include/FastNoise/Generators/Simplex.h   | 4 ++--
 include/FastNoise/Generators/Simplex.inl | 2 +-
 src/FastNoise/FastSIMD_Build.inl         | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/Simplex.h b/include/FastNoise/Generators/Simplex.h
index ac7fd46d..240c0212 100644
--- a/include/FastNoise/Generators/Simplex.h
+++ b/include/FastNoise/Generators/Simplex.h
@@ -26,7 +26,7 @@ namespace FastNoise
     };
 #endif
 
-    class SimplexSmooth : public virtual VariableRange<ScalableGenerator>
+    class SuperSimplex : public virtual VariableRange<ScalableGenerator>
     {
     public:
         const Metadata& GetMetadata() const override;
@@ -34,7 +34,7 @@ namespace FastNoise
 
 #ifdef FASTNOISE_METADATA
     template<>
-    struct MetadataT<SimplexSmooth> : MetadataT<VariableRange<ScalableGenerator>>
+    struct MetadataT<SuperSimplex> : MetadataT<VariableRange<ScalableGenerator>>
     {
         SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
 
diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 96fea1f1..2a4534a6 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -282,7 +282,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
 };
 
 template<FastSIMD::FeatureSet SIMD>
-class FastSIMD::DispatchClass<FastNoise::SimplexSmooth, SIMD> final : public virtual FastNoise::SimplexSmooth, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
+class FastSIMD::DispatchClass<FastNoise::SuperSimplex, SIMD> final : public virtual FastNoise::SuperSimplex, public FastSIMD::DispatchClass<FastNoise::VariableRange<ScalableGenerator>, SIMD>
 {
     float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const
     {
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index feca4a79..2a857dbe 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -91,7 +91,7 @@ FASTNOISE_REGISTER_NODE( PositionOutput );
 FASTNOISE_REGISTER_NODE( DistanceToPoint );
 
 FASTNOISE_REGISTER_NODE( Simplex );
-FASTNOISE_REGISTER_NODE( SimplexSmooth );
+FASTNOISE_REGISTER_NODE( SuperSimplex );
 FASTNOISE_REGISTER_NODE( Perlin );
 FASTNOISE_REGISTER_NODE( Value );
                        

From 1084e0debc91b30535106a55d222ef4774d10bc5 Mon Sep 17 00:00:00 2001
From: Chad Franklin <55667412+chadefranklin@users.noreply.github.com>
Date: Mon, 24 Mar 2025 18:27:14 -0400
Subject: [PATCH 133/139] CellularLookup/Distance Precision Fix (#153)

---
 include/FastNoise/Generators/Cellular.inl | 105 ++++++++++++----------
 1 file changed, 60 insertions(+), 45 deletions(-)

diff --git a/include/FastNoise/Generators/Cellular.inl b/include/FastNoise/Generators/Cellular.inl
index dc708ffb..077898d3 100644
--- a/include/FastNoise/Generators/Cellular.inl
+++ b/include/FastNoise/Generators/Cellular.inl
@@ -264,14 +264,15 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
 
         for( int xi = 0; xi < 3; xi++ )
         {
+            float32v xcfOffset = xcf - x;
             float32v ycf = ycfBase;
             int32v yc = ycBase;
             for ( int yi = 0; yi < 3; yi++ )
@@ -281,8 +282,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                 float32v yd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 16 ) ) - float32v( 0xffff / 2.0f );
 
                 float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
-                xd = FS::FMulAdd( xd, invMag, xcf );
-                yd = FS::FMulAdd( yd, invMag, ycf );
+                xd = FS::FMulAdd( xd, invMag, xcfOffset );
+                yd = FS::FMulAdd( yd, invMag, ycf - y );
 
                 float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd );
 
@@ -316,9 +317,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
+        float32v zcfBase = FS::Convert<float>( zcBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
@@ -326,10 +327,12 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
 
         for( int xi = 0; xi < 3; xi++ )
         {
+            float32v xcfOffset = xcf - x;
             float32v ycf = ycfBase;
             int32v yc = ycBase;
             for( int yi = 0; yi < 3; yi++ )
             {
+                float32v ycfOffset = ycf - y;
                 float32v zcf = zcfBase;
                 int32v zc = zcBase;
                 for( int zi = 0; zi < 3; zi++ )
@@ -340,9 +343,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                     float32v zd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 22 ) ) - float32v( 0x3ff / 2.0f );
 
                     float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
-                    xd = FS::FMulAdd( xd, invMag, xcf );
-                    yd = FS::FMulAdd( yd, invMag, ycf );
-                    zd = FS::FMulAdd( zd, invMag, zcf );
+                    xd = FS::FMulAdd( xd, invMag, xcfOffset );
+                    yd = FS::FMulAdd( yd, invMag, ycfOffset );
+                    zd = FS::FMulAdd( zd, invMag, zcf - z );
 
                     float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd );
 
@@ -380,10 +383,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
         int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
+        float32v zcfBase = FS::Convert<float>( zcBase );
+        float32v wcfBase = FS::Convert<float>( wcBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
@@ -392,14 +395,17 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
 
         for( int xi = 0; xi < 3; xi++ )
         {
+            float32v xcfOffset = xcf - x;
             float32v ycf = ycfBase;
             int32v yc = ycBase;
             for( int yi = 0; yi < 3; yi++ )
             {
+                float32v ycfOffset = ycf - y;
                 float32v zcf = zcfBase;
                 int32v zc = zcBase;
                 for( int zi = 0; zi < 3; zi++ )
                 {
+                    float32v zcfOffset = zcf - z;
                     float32v wcf = wcfBase;
                     int32v wc = wcBase;
                     for( int wi = 0; wi < 3; wi++ )
@@ -411,10 +417,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularDistance, SIMD> final : public
                         float32v wd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 24 ) ) - float32v( 0xff / 2.0f );
 
                         float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
-                        xd = FS::FMulAdd( xd, invMag, xcf );
-                        yd = FS::FMulAdd( yd, invMag, ycf );
-                        zd = FS::FMulAdd( zd, invMag, zcf );
-                        wd = FS::FMulAdd( wd, invMag, wcf );
+                        xd = FS::FMulAdd( xd, invMag, xcfOffset );
+                        yd = FS::FMulAdd( yd, invMag, ycfOffset );
+                        zd = FS::FMulAdd( zd, invMag, zcfOffset );
+                        wd = FS::FMulAdd( wd, invMag, wcf - w );
 
                         float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd, wd );
 
@@ -492,8 +498,8 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
         int32v xc = FS::Convert<int32_t>( x ) + int32v( -1 );
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
@@ -509,16 +515,18 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                 float32v yd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 16 ) ) - float32v( 0xffff / 2.0f );
 
                 float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, yd * yd ) );
-                xd = FS::FMulAdd( xd, invMag, xcf );
-                yd = FS::FMulAdd( yd, invMag, ycf );
+                float32v localCellX = FS::FMulAdd( xd, invMag, xcf );
+                float32v localCellY = FS::FMulAdd( yd, invMag, ycf );
+                xd = localCellX - x;
+                yd = localCellY - y;
 
                 float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd );
 
                 mask32v closer = newDistance < distance;
                 distance = FS::Min( newDistance, distance );
 
-                cellX = FS::Select( closer, xd + x, cellX );
-                cellY = FS::Select( closer, yd + y, cellY );
+                cellX = FS::Select( closer, localCellX, cellX );
+                cellY = FS::Select( closer, localCellY, cellY );
 
                 ycf += float32v( 1 );
                 yc += int32v( Primes::Y );
@@ -542,9 +550,9 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
         int32v ycBase = FS::Convert<int32_t>( y ) + int32v( -1 );
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
+        float32v zcfBase = FS::Convert<float>( zcBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
@@ -566,18 +574,21 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                     float32v zd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 22 ) ) - float32v( 0x3ff / 2.0f );
 
                     float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, zd * zd ) ) );
-                    xd = FS::FMulAdd( xd, invMag, xcf );
-                    yd = FS::FMulAdd( yd, invMag, ycf );
-                    zd = FS::FMulAdd( zd, invMag, zcf );
+                    float32v localCellX = FS::FMulAdd( xd, invMag, xcf );
+                    float32v localCellY = FS::FMulAdd( yd, invMag, ycf );
+                    float32v localCellZ = FS::FMulAdd( zd, invMag, zcf );
+                    xd = localCellX - x;
+                    yd = localCellY - y;
+                    zd = localCellZ - z;
 
                     float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd );
 
                     mask32v closer = newDistance < distance;
                     distance = FS::Min( newDistance, distance );
 
-                    cellX = FS::Select( closer, xd + x, cellX );
-                    cellY = FS::Select( closer, yd + y, cellY );
-                    cellZ = FS::Select( closer, zd + z, cellZ );
+                    cellX = FS::Select( closer, localCellX, cellX );
+                    cellY = FS::Select( closer, localCellY, cellY );
+                    cellZ = FS::Select( closer, localCellZ, cellZ );
 
                     zcf += float32v( 1 );
                     zc += int32v( Primes::Z );
@@ -605,10 +616,10 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
         int32v zcBase = FS::Convert<int32_t>( z ) + int32v( -1 );
         int32v wcBase = FS::Convert<int32_t>( w ) + int32v( -1 );
 
-        float32v xcf = FS::Convert<float>( xc ) - x;
-        float32v ycfBase = FS::Convert<float>( ycBase ) - y;
-        float32v zcfBase = FS::Convert<float>( zcBase ) - z;
-        float32v wcfBase = FS::Convert<float>( wcBase ) - w;
+        float32v xcf = FS::Convert<float>( xc );
+        float32v ycfBase = FS::Convert<float>( ycBase );
+        float32v zcfBase = FS::Convert<float>( zcBase );
+        float32v wcfBase = FS::Convert<float>( wcBase );
 
         xc *= int32v( Primes::X );
         ycBase *= int32v( Primes::Y );
@@ -636,20 +647,24 @@ class FastSIMD::DispatchClass<FastNoise::CellularLookup, SIMD> final : public vi
                         float32v wd = FS::Convert<float>( FS::BitShiftRightZeroExtend( hash, 24 ) ) - float32v( 0xff / 2.0f );
 
                         float32v invMag = jitter * FS::InvSqrt( FS::FMulAdd( xd, xd, FS::FMulAdd( yd, yd, FS::FMulAdd( zd, zd, wd * wd ) ) ) );
-                        xd = FS::FMulAdd( xd, invMag, xcf );
-                        yd = FS::FMulAdd( yd, invMag, ycf );
-                        zd = FS::FMulAdd( zd, invMag, zcf );
-                        wd = FS::FMulAdd( wd, invMag, wcf );
+                        float32v localCellX = FS::FMulAdd( xd, invMag, xcf );
+                        float32v localCellY = FS::FMulAdd( yd, invMag, ycf );
+                        float32v localCellZ = FS::FMulAdd( zd, invMag, zcf );
+                        float32v localCellW = FS::FMulAdd( wd, invMag, wcf );
+                        xd = localCellX - x;
+                        yd = localCellY - y;
+                        zd = localCellZ - z;
+                        wd = localCellW - w;
 
                         float32v newDistance = CalcDistance<false>( mDistanceFunction, mMinkowskiP, seed, xd, yd, zd, wd );
 
                         mask32v closer = newDistance < distance;
                         distance = FS::Min( newDistance, distance );
 
-                        cellX = FS::Select( closer, xd + x, cellX );
-                        cellY = FS::Select( closer, yd + y, cellY );
-                        cellZ = FS::Select( closer, zd + z, cellZ );
-                        cellW = FS::Select( closer, wd + w, cellW );
+                        cellX = FS::Select( closer, localCellX, cellX );
+                        cellY = FS::Select( closer, localCellY, cellY );
+                        cellZ = FS::Select( closer, localCellZ, cellZ );
+                        cellW = FS::Select( closer, localCellW, cellW );
 
                         wcf += float32v( 1 );
                         wc += int32v( Primes::W );

From c55d5559bece6f9c4e593da825b46e6f2589b39d Mon Sep 17 00:00:00 2001
From: Chad Franklin <55667412+chadefranklin@users.noreply.github.com>
Date: Wed, 26 Mar 2025 20:01:38 -0400
Subject: [PATCH 134/139] DomainWarpSimplex 3D inv masked perf bump (#154)

---
 include/FastNoise/Generators/DomainWarpSimplex.inl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
index 121964e6..c20bfee1 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.inl
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -164,10 +164,10 @@ protected:
 
         mask32v maskX1 = xGreaterEqualY & xGreaterEqualZ;
         mask32v maskY1 = FS::BitwiseAndNot( yGreaterEqualZ, xGreaterEqualY );
-        mask32v maskZ1 = FS::BitwiseAndNot( ~xGreaterEqualZ, yGreaterEqualZ );
+        mask32v maskZ1 = xGreaterEqualZ | yGreaterEqualZ; // Inv masked
 
-        mask32v nMaskX2 = ~( xGreaterEqualY | xGreaterEqualZ );
-        mask32v nMaskY2 = xGreaterEqualY & ~yGreaterEqualZ;
+        mask32v nMaskX2 = xGreaterEqualY | xGreaterEqualZ; // Inv masked
+        mask32v nMaskY2 = FS::BitwiseAndNot( xGreaterEqualY, yGreaterEqualZ );
         mask32v nMaskZ2 = xGreaterEqualZ & yGreaterEqualZ;
 
         float32v dx3 = dx0 - float32v( kReflectUnskew3 * 3 + 1 );
@@ -175,8 +175,8 @@ protected:
         float32v dz3 = dz0 - float32v( kReflectUnskew3 * 3 + 1 );
         float32v dx1 = FS::MaskedSub( maskX1, dx3, float32v( 1 ) ); // kReflectUnskew3 * 3 + 1 = kReflectUnskew3, so dx0 - kReflectUnskew3 = dx3
         float32v dy1 = FS::MaskedSub( maskY1, dy3, float32v( 1 ) );
-        float32v dz1 = FS::MaskedSub( maskZ1, dz3, float32v( 1 ) );
-        float32v dx2 = FS::MaskedIncrement( nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
+        float32v dz1 = FS::InvMaskedSub( maskZ1, dz3, float32v( 1 ) );
+        float32v dx2 = FS::MaskedIncrement( ~nMaskX2, dx0 ); // kReflectUnskew3 * 2 - 1 = 0, so dx0 + ( kReflectUnskew3 * 2 - 1 ) = dx0
         float32v dy2 = FS::MaskedIncrement( nMaskY2, dy0 );
         float32v dz2 = FS::MaskedIncrement( nMaskZ2, dz0 );
 
@@ -200,8 +200,8 @@ protected:
         float32v valueZ( 0 );
 
         ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase, yPrimedBase, zPrimedBase ), dx0, dy0, dz0, falloff0, valueX, valueY, valueZ );
-        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::MaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1, falloff1, valueX, valueY, valueZ );
-        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::InvMaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2, falloff2, valueX, valueY, valueZ );
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( maskX1, xPrimedBase, int32v( Primes::X ) ), FS::MaskedAdd( maskY1, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( maskZ1, zPrimedBase, int32v( Primes::Z ) ) ), dx1, dy1, dz1, falloff1, valueX, valueY, valueZ );
+        ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, FS::MaskedAdd( nMaskX2, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( nMaskY2, yPrimedBase, int32v( Primes::Y ) ), FS::InvMaskedAdd( nMaskZ2, zPrimedBase, int32v( Primes::Z ) ) ), dx2, dy2, dz2, falloff2, valueX, valueY, valueZ );
         ApplyVectorContributionCommon<Scheme>( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ), zPrimedBase + int32v( Primes::Z ) ), dx3, dy3, dz3, falloff3, valueX, valueY, valueZ );
 
         if constexpr( Scheme != VectorizationScheme::OrthogonalGradientMatrix )

From 5f5ef764432578a297ef22c33aadfd159af2921d Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Mon, 31 Mar 2025 00:29:59 +0100
Subject: [PATCH 135/139] Fix bounding for Simplex 2D, Super Simplex not yet
 fixed

---
 include/FastNoise/Generators/Simplex.inl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/FastNoise/Generators/Simplex.inl b/include/FastNoise/Generators/Simplex.inl
index 2a4534a6..7af0d194 100644
--- a/include/FastNoise/Generators/Simplex.inl
+++ b/include/FastNoise/Generators/Simplex.inl
@@ -54,7 +54,7 @@ class FastSIMD::DispatchClass<FastNoise::Simplex, SIMD> final : public virtual F
         float32v gradientRampValue1 = GetGradientDotPerlin( HashPrimes( seed, FS::MaskedAdd( xGreaterEqualY, xPrimedBase, int32v( Primes::X ) ), FS::InvMaskedAdd( xGreaterEqualY, yPrimedBase, int32v( Primes::Y ) ) ), dx1, dy1 );
         float32v gradientRampValue2 = GetGradientDotPerlin( HashPrimes( seed, xPrimedBase + int32v( Primes::X ), yPrimedBase + int32v( Primes::Y ) ), dx2, dy2 );
 
-        constexpr double kBounding = 49.918426513671875;
+        constexpr double kBounding = 38.283687591552734375;
 
         return this->ScaleOutput( FS::FMulAdd( gradientRampValue0, falloff0, FS::FMulAdd( gradientRampValue1, falloff1, gradientRampValue2 * falloff2 ) ),
             -1 / kBounding, 1 / kBounding );

From 6462d44870f570d1305df06032925f84ee844893 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Thu, 10 Apr 2025 00:04:26 +0100
Subject: [PATCH 136/139] Fix new nodes spawning at incorrect locations

---
 tools/NodeEditor/CMakeLists.txt          | 2 +-
 tools/NodeEditor/FastNoiseNodeEditor.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/NodeEditor/CMakeLists.txt b/tools/NodeEditor/CMakeLists.txt
index 9f681d2a..58df8f5c 100644
--- a/tools/NodeEditor/CMakeLists.txt
+++ b/tools/NodeEditor/CMakeLists.txt
@@ -82,7 +82,7 @@ find_package(ImGui REQUIRED SourcesMiscCpp)
 CPMAddPackage(
     NAME imnodes
     GITHUB_REPOSITORY Auburn/imnodes
-    GIT_TAG 26b70c528d48beeb839035f3da71550f8b0adfa7
+    GIT_TAG db2ef1192a4ddff32a838094de7127142a731ef0
     GIT_SUBMODULES ".github"
     EXCLUDE_FROM_ALL YES
     OPTIONS
diff --git a/tools/NodeEditor/FastNoiseNodeEditor.cpp b/tools/NodeEditor/FastNoiseNodeEditor.cpp
index 86065919..c31e105a 100644
--- a/tools/NodeEditor/FastNoiseNodeEditor.cpp
+++ b/tools/NodeEditor/FastNoiseNodeEditor.cpp
@@ -1336,7 +1336,7 @@ void FastNoiseNodeEditor::DoContextMenu()
     ImGui::PushStyleVar( ImGuiStyleVar_WindowPadding, ImVec2( 4, 4 ) );
     if( distance < 5.0f && ImGui::BeginPopupContextWindow( "new_node", 1 ) )
     {
-        mContextStartPos = ImGui::GetMousePosOnOpeningCurrentPopup();
+        mContextStartPos = ImNodes::ConvertToEditorContextSpace( ImGui::GetMousePosOnOpeningCurrentPopup() );
 
         if( auto newMetadata = mContextMetadata.front()->DrawUI() )
         {
@@ -1406,7 +1406,7 @@ void FastNoiseNodeEditor::DoContextMenu()
     }
     if( ImGui::BeginPopup( "new_node_drop", ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings ) )
     {
-        ImVec2 startPos = ImGui::GetMousePosOnOpeningCurrentPopup();
+        ImVec2 startPos = ImNodes::ConvertToEditorContextSpace( ImGui::GetMousePosOnOpeningCurrentPopup() );
 
         auto newMetadata = mContextMetadata.front()->DrawUI( []( const FastNoise::Metadata* metadata )
         {

From f708e68e73b58eeea71530b9e2810a298077e15e Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Fri, 18 Apr 2025 22:28:18 +0100
Subject: [PATCH 137/139] More generator descriptions

---
 .../FastNoise/Generators/BasicGenerators.h    |  4 +-
 include/FastNoise/Generators/Modifiers.h      | 54 +++++++++++++++++--
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/include/FastNoise/Generators/BasicGenerators.h b/include/FastNoise/Generators/BasicGenerators.h
index 8a943823..bb3001d5 100644
--- a/include/FastNoise/Generators/BasicGenerators.h
+++ b/include/FastNoise/Generators/BasicGenerators.h
@@ -244,7 +244,9 @@ namespace FastNoise
             this->AddHybridSource( { "Minkowski P", "Only affects Minkowski distance function\n1 = Manhattan\n2 = Euclidean" }, 1.5f, &DistanceToPoint::SetMinkowskiP, &DistanceToPoint::SetMinkowskiP );
 
             description =
-                "Outputs distance between point and input position";
+                "Outputs distance between point and input position\n"
+                "Distance is calculated in current domain space,\n"
+                "ie affected by Domain Modifiers/Warping";
         }
     };
 #endif
diff --git a/include/FastNoise/Generators/Modifiers.h b/include/FastNoise/Generators/Modifiers.h
index 9e3df51c..ec998393 100644
--- a/include/FastNoise/Generators/Modifiers.h
+++ b/include/FastNoise/Generators/Modifiers.h
@@ -27,6 +27,9 @@ namespace FastNoise
             groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainScale::SetSource );
             this->AddVariable( "Scaling", 1.0f, &DomainScale::SetScaling );
+
+            description =
+                "Scales the input coordinates uniformly before passing them to the source generator.";
         }
     };
 #endif
@@ -63,6 +66,9 @@ namespace FastNoise
             groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainOffset::SetSource );
             this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); }, 0.25f );
+
+            description =
+                "Adds an offset to the input coordinates before passing them to the source generator";
         }
     };
 #endif
@@ -126,6 +132,12 @@ namespace FastNoise
             this->AddVariable( "Yaw", 0.0f, &DomainRotate::SetYaw );
             this->AddVariable( "Pitch", 0.0f, &DomainRotate::SetPitch );
             this->AddVariable( "Roll", 0.0f, &DomainRotate::SetRoll ); 
+
+            description =
+                "Rotates the input coordinates around the origin before passing them to the source generator\n"
+                "For 2D input coordinates a 2D rotation with Yaw is performed if Pitch and Roll are 0, otherwise a 3D rotation is performed\n"
+                "For 3D input coordinates a 3D rotation is performed\n"
+                "For 4D input coordinates no rotation is applied";
         }
     };
 #endif
@@ -154,6 +166,9 @@ namespace FastNoise
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &SeedOffset::SetSource );
             this->AddVariable( "Seed Offset", 1, &SeedOffset::SetOffset );
+
+            description =
+                "Offsets the input seed before passing it to the source generator.";
         }
     };
 #endif
@@ -203,6 +218,10 @@ namespace FastNoise
             this->AddHybridSource( "From Max", 1.0f, &Remap::SetFromMax, &Remap::SetFromMax );
             this->AddHybridSource( "To Min", 0.0f, &Remap::SetToMin, &Remap::SetToMin );
             this->AddHybridSource( "To Max", 1.0f, &Remap::SetToMax, &Remap::SetToMax );            
+
+            description =
+                "Remaps the output value of the source generator from one range to another\n"
+                "Does not clamp values";
         }
     };
 #endif
@@ -246,6 +265,11 @@ namespace FastNoise
                 {
                     p->mMax = f;
                 } );
+
+            description =
+                "Used for converting a float into a greyscale RGBA8 texture format output\n"
+                "Clamps the source output between Min/Max, scales it to 0-255, and packs the result\n"
+                "into an RGBA8 color stored in a float. RGB will be the same value, Alpha is always 255";
         }
     };
 #endif
@@ -277,8 +301,11 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &Terrace::SetSource );
-            this->AddVariable( "Multiplier", 1.0f, &Terrace::SetMultiplier );
-            this->AddVariable( "Smoothness", 0.0f, &Terrace::SetSmoothness );
+            this->AddVariable( { "Multiplier", "The size of the steps" }, 1.0f, &Terrace::SetMultiplier );
+            this->AddVariable( { "Smoothness", "How smooth the transitions between levels are" }, 0.0f, &Terrace::SetSmoothness );
+
+            description =
+                "Maps the source output onto specified terrace levels (steps).\n";
         }
     };
 #endif
@@ -312,6 +339,9 @@ namespace FastNoise
             groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &DomainAxisScale::SetSource );
             this->AddPerDimensionVariable( "Scaling", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
+
+            description =
+                "Scales each axis of the input coordinates independently before passing them to the source generator.";
         }
     };
 #endif
@@ -340,7 +370,11 @@ namespace FastNoise
         {
             groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &AddDimension::SetSource );
-            this->AddHybridSource( "New Dimension Position", 0.0f, &AddDimension::SetNewDimensionPosition, &AddDimension::SetNewDimensionPosition );
+            this->AddHybridSource( { "New Dimension Position", "The position of the new dimension" }, 0.0f, &AddDimension::SetNewDimensionPosition, &AddDimension::SetNewDimensionPosition );
+
+            description =
+                "Adds a dimension to the input coordinates, new dimension is always the last dimension\n"
+                "The coordinates with the new dimension are passed to the source generator";
         }
     };
 #endif
@@ -369,6 +403,9 @@ namespace FastNoise
             groups.push_back( "Domain Modifiers" );
             this->AddGeneratorSource( "Source", &RemoveDimension::SetSource );
             this->AddVariableEnum( "Remove Dimension", Dim::Y, &RemoveDimension::SetRemoveDimension, kDim_Strings );
+
+            description =
+                "Removes the specified dimension from the input coordinates before passing them to the source generator";
         }
     };
 #endif
@@ -394,6 +431,10 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &GeneratorCache::SetSource );
+
+            description =
+                "Caches the output of the source generator. If the same input coordinates and seed are\n"
+                "requested again, the cached value is returned, improving performance for complex source generators";
         }
     };
 #endif
@@ -419,6 +460,10 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &SquareRoot::SetSource );
+
+            description =
+                "Returns the square root of the absolute value of the source output,\n"
+                "preserving the original sign (signed square root).";
         }
     };
 #endif
@@ -444,6 +489,9 @@ namespace FastNoise
         {
             groups.push_back( "Modifiers" );
             this->AddGeneratorSource( "Source", &Abs::SetSource );
+
+            description =
+                "Returns the absolute value of the source output.";
         }
     };
 #endif

From 6f9f1da08f09d03579533882b4a15ebd87fbd3c5 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 22 Apr 2025 22:06:44 +0100
Subject: [PATCH 138/139] Split DomainWarpSuperSimplex into it's own node

---
 .../FastNoise/Generators/DomainWarpSimplex.h  |  27 ++--
 .../Generators/DomainWarpSimplex.inl          | 122 ++++++++++--------
 include/FastNoise/Generators/Generator.h      |   2 +-
 src/FastNoise/FastSIMD_Build.inl              |   2 +-
 4 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/include/FastNoise/Generators/DomainWarpSimplex.h b/include/FastNoise/Generators/DomainWarpSimplex.h
index 6f1dfb8f..669056fa 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.h
+++ b/include/FastNoise/Generators/DomainWarpSimplex.h
@@ -8,12 +8,10 @@ namespace FastNoise
     {
     public:
         const Metadata& GetMetadata() const override;
-
-        void SetType( SimplexType value ) { mType = value; }
+        
         void SetVectorizationScheme( VectorizationScheme value ) { mVectorizationScheme = value; }
 
     protected:
-        SimplexType mType = SimplexType::Standard;
         VectorizationScheme mVectorizationScheme = VectorizationScheme::OrthogonalGradientMatrix;
     };
 
@@ -25,11 +23,6 @@ namespace FastNoise
 
         MetadataT()
         {
-            this->AddVariableEnum(
-                { "Type", "Noise character style" },
-                SimplexType::Standard, &DomainWarpSimplex::SetType,
-                kSimplexType_Strings
-            );
             this->AddVariableEnum(
                 { "Vectorization Scheme", "Construction used by the noise to produce a vector output" },
                 VectorizationScheme::OrthogonalGradientMatrix, &DomainWarpSimplex::SetVectorizationScheme,
@@ -38,4 +31,22 @@ namespace FastNoise
         }
     };
 #endif
+
+    class DomainWarpSuperSimplex : public virtual DomainWarpSimplex
+    {
+    public:
+        const Metadata& GetMetadata() const override;
+    };
+
+#ifdef FASTNOISE_METADATA
+    template<>
+    struct MetadataT<DomainWarpSuperSimplex> : MetadataT<DomainWarpSimplex>
+    {
+        SmartNode<> CreateNode( FastSIMD::FeatureSet ) const override;
+
+        MetadataT()
+        {
+        }
+    };
+#endif
 }
diff --git a/include/FastNoise/Generators/DomainWarpSimplex.inl b/include/FastNoise/Generators/DomainWarpSimplex.inl
index c20bfee1..5eaa2774 100644
--- a/include/FastNoise/Generators/DomainWarpSimplex.inl
+++ b/include/FastNoise/Generators/DomainWarpSimplex.inl
@@ -7,67 +7,43 @@ class FastSIMD::DispatchClass<FastNoise::DomainWarpSimplex, SIMD> final : public
 public:
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
     {
-        switch( mType ) {
-        case SimplexType::Standard:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
-            }
-        case SimplexType::Smooth:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
-            }
+        switch( mVectorizationScheme )
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_2D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_2D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
         }
     }
 
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
     {
-        switch( mType ) {
-        case SimplexType::Standard:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
-            }
-        case SimplexType::Smooth:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
-            }
-        }
+        switch( mVectorizationScheme ) 
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_3D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_3D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+        }        
     }
 
     float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
     {
-        switch( mType ) {
-        case SimplexType::Standard:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Standard<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Standard<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
-            }
-        case SimplexType::Smooth:
-            switch( mVectorizationScheme ) {
-            case VectorizationScheme::OrthogonalGradientMatrix:
-                return Warp_Smooth<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
-            case VectorizationScheme::GradientOuterProduct:
-                return Warp_Smooth<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
-            }
+        switch( mVectorizationScheme )
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_4D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_4D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
         }
     }
 
 protected:
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    float32v FS_VECTORCALL Warp_2D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
     {
         constexpr double kRoot3 = 1.7320508075688772935274463415059;
         constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
@@ -131,7 +107,7 @@ protected:
     }
 
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
+    float32v FS_VECTORCALL Warp_3D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
     {
         constexpr double kSkew3 = 1.0 / 3.0;
         constexpr double kReflectUnskew3 = -1.0 / 2.0;
@@ -228,7 +204,7 @@ protected:
     }
 
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Standard( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
+    float32v FS_VECTORCALL Warp_4D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
     {
         constexpr double kRoot5 = 2.2360679774997896964091736687313;
         constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
@@ -379,9 +355,51 @@ protected:
         float32v warpLengthSq = FS::FMulAdd( valueW, valueW, FS::FMulAdd( valueZ, valueZ, FS::FMulAdd( valueY, valueY, valueX * valueX ) ) );
         return warpLengthSq * FS::InvSqrt( warpLengthSq ) * warpAmp;
     }
+};
+
+template<FastSIMD::FeatureSet SIMD>
+class FastSIMD::DispatchClass<FastNoise::DomainWarpSuperSimplex, SIMD> final : public virtual FastNoise::DomainWarpSuperSimplex, public FastSIMD::DispatchClass<FastNoise::DomainWarp, SIMD>
+{
+public:
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
+    {
+        switch( mVectorizationScheme )
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_2D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, xOut, yOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_2D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, xOut, yOut );
+        }
+    }
 
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
+    {
+        switch( mVectorizationScheme ) 
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_3D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_3D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, xOut, yOut, zOut );
+        }        
+    }
+
+    float32v FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
+    {
+        switch( mVectorizationScheme )
+        {
+        default:
+        case VectorizationScheme::OrthogonalGradientMatrix:
+            return Warp_4D<VectorizationScheme::OrthogonalGradientMatrix>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+        case VectorizationScheme::GradientOuterProduct:
+            return Warp_4D<VectorizationScheme::GradientOuterProduct>( seed, warpAmp, x, y, z, w, xOut, yOut, zOut, wOut );
+        }
+    }
+
+protected:
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
+    float32v FS_VECTORCALL Warp_2D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const
     {
         constexpr double kRoot3 = 1.7320508075688772935274463415059;
         constexpr double kSkew2 = 1.0 / ( kRoot3 + 1.0 );
@@ -470,7 +488,7 @@ protected:
     }
 
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
+    float32v FS_VECTORCALL Warp_3D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const
     {
         constexpr double kSkew3 = 1.0 / 3.0;
         constexpr double kReflectUnskew3 = -1.0 / 2.0;
@@ -697,7 +715,7 @@ protected:
     }
 
     template<VectorizationScheme Scheme>
-    float32v FS_VECTORCALL Warp_Smooth( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
+    float32v FS_VECTORCALL Warp_4D( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const
     {
         constexpr double kRoot5 = 2.2360679774997896964091736687313;
         constexpr double kSkew4 = 1.0 / ( kRoot5 + 1.0 );
diff --git a/include/FastNoise/Generators/Generator.h b/include/FastNoise/Generators/Generator.h
index fc651526..648b72e5 100644
--- a/include/FastNoise/Generators/Generator.h
+++ b/include/FastNoise/Generators/Generator.h
@@ -51,7 +51,7 @@ namespace FastNoise
     enum class SimplexType
     {
         Standard,
-        Smooth
+        Super
     };
 
     constexpr static const char* kSimplexType_Strings[] =
diff --git a/src/FastNoise/FastSIMD_Build.inl b/src/FastNoise/FastSIMD_Build.inl
index 2a857dbe..3f7b26f4 100644
--- a/src/FastNoise/FastSIMD_Build.inl
+++ b/src/FastNoise/FastSIMD_Build.inl
@@ -104,7 +104,7 @@ FASTNOISE_REGISTER_NODE( FractalPingPong );
 FASTNOISE_REGISTER_NODE( FractalRidged );
 
 FASTNOISE_REGISTER_NODE( DomainWarpSimplex );
-//FASTNOISE_REGISTER_NODE( DomainWarpSimplexSmooth );
+FASTNOISE_REGISTER_NODE( DomainWarpSuperSimplex );
 FASTNOISE_REGISTER_NODE( DomainWarpGradient );
 
 FASTNOISE_REGISTER_NODE( DomainWarpFractalProgressive );

From 6efed6e506f4d7217037c1faa348cfa15b469391 Mon Sep 17 00:00:00 2001
From: Auburn <jordan.me2@gmail.com>
Date: Tue, 22 Apr 2025 22:07:31 +0100
Subject: [PATCH 139/139] Always consume combo scroll to avoid zooming

---
 tools/NodeEditor/util/ImGuiExtra.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/NodeEditor/util/ImGuiExtra.h b/tools/NodeEditor/util/ImGuiExtra.h
index ca76bed7..3978ad46 100644
--- a/tools/NodeEditor/util/ImGuiExtra.h
+++ b/tools/NodeEditor/util/ImGuiExtra.h
@@ -44,6 +44,8 @@ namespace ImGuiExtra
                 ImGui::GetIO().MouseWheel = 0;
                 return true;
             }
+
+            ImGui::GetIO().MouseWheel = 0;
         }
         return false;
     }