DirectXMath 3.06

walbourn · walbourn · commit aee6e900f09d · 2016-05-23T14:29:47.000-07:00
diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h
@@ -17,7 +17,7 @@
 #error DirectX Math requires C++
 #endif
 
-#define DIRECTX_MATH_VERSION 305
+#define DIRECTX_MATH_VERSION 306
 
 #if !defined(_XM_BIGENDIAN_) && !defined(_XM_LITTLEENDIAN_)
 #if defined(_M_X64) || defined(_M_IX86) || defined(_M_ARM)
@@ -29,6 +29,7 @@
 #endif
 #endif // !_XM_BIGENDIAN_ && !_XM_LITTLEENDIAN_
 
+
 #if defined(_MSC_VER) && !defined(_M_ARM) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
 #if ((_MSC_FULL_VER >= 170065501) && (_MSC_VER < 1800)) || (_MSC_FULL_VER >= 180020418)
 #define _XM_VECTORCALL_ 1
@@ -279,8 +280,8 @@ typedef const XMVECTOR FXMVECTOR;
 typedef const XMVECTOR& FXMVECTOR;
 #endif
 
-// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, Xbox 360, and vector call; by reference otherwise
-#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
+// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, Xbox 360, and x64 vector call; by reference otherwise
+#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMVECTOR GXMVECTOR;
 #else
 typedef const XMVECTOR& GXMVECTOR;
@@ -1392,6 +1393,10 @@ XMVECTOR    XM_CALLCONV     XMColorXYZToRGB( FXMVECTOR xyz );
 XMVECTOR    XM_CALLCONV     XMColorXYZToSRGB( FXMVECTOR xyz );
 XMVECTOR    XM_CALLCONV     XMColorSRGBToXYZ( FXMVECTOR srgb );
 
+XMVECTOR    XM_CALLCONV     XMColorRGBToSRGB( FXMVECTOR rgb );
+XMVECTOR    XM_CALLCONV     XMColorSRGBToRGB( FXMVECTOR srgb );
+
+
 /****************************************************************************
  *
  * Miscellaneous operations
diff --git a/Inc/DirectXMathMisc.inl b/Inc/DirectXMathMisc.inl
@@ -1985,6 +1985,42 @@ inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ( FXMVECTOR srgb )
     return XMVectorSelect( srgb, clr, g_XMSelect1110 );
 }
 
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB( FXMVECTOR rgb )
+{
+    static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f };
+    static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f };
+    static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f };
+    static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
+    static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f };
+
+    XMVECTOR V = XMVectorSaturate(rgb);
+    XMVECTOR V0 = XMVectorMultiply( V, Linear );
+    XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias;
+    XMVECTOR select = XMVectorLess( V, Cutoff );
+    V = XMVectorSelect( V1, V0, select );
+    return XMVectorSelect( rgb, V, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb )
+{
+    static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f };
+    static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f };
+    static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f };
+    static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
+    static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f };
+
+    XMVECTOR V = XMVectorSaturate(srgb);
+    XMVECTOR V0 = XMVectorMultiply( V, ILinear );
+    XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma );
+    XMVECTOR select = XMVectorGreater( V, Cutoff );
+    V = XMVectorSelect( V0, V1, select );
+    return XMVectorSelect( srgb, V, g_XMSelect1110 );
+}
+
 /****************************************************************************
  *
  * Miscellaneous
diff --git a/Inc/DirectXMathVector.inl b/Inc/DirectXMathVector.inl
@@ -2323,7 +2323,6 @@ inline XMVECTOR XM_CALLCONV XMVectorMax
 
 //------------------------------------------------------------------------------
 
-#if defined(_XM_NO_INTRINSICS_)
 namespace Internal
 {
     inline float round_to_nearest( float x )
@@ -2345,7 +2344,8 @@ namespace Internal
         return i + 1.f;
     }
 };
-#else
+
+#if !defined(_XM_NO_INTRINSICS_)
 #pragma float_control(push)
 #pragma float_control(precise, on)
 #endif
diff --git a/Inc/DirectXPackedVector.h b/Inc/DirectXPackedVector.h
@@ -921,6 +921,7 @@ XMVECTOR    XM_CALLCONV     XMLoadXDec4(_In_ const XMXDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadDecN4(_In_ const XMDECN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadDec4(_In_ const XMDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUDecN4(_In_ const XMUDECN4* pSource);
+XMVECTOR    XM_CALLCONV     XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUDec4(_In_ const XMUDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadByteN4(_In_ const XMBYTEN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadByte4(_In_ const XMBYTE4* pSource);
@@ -962,6 +963,7 @@ void    XM_CALLCONV     XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR
 void    XM_CALLCONV     XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
+void    XM_CALLCONV     XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V);
diff --git a/Inc/DirectXPackedVector.inl b/Inc/DirectXPackedVector.inl
@@ -605,104 +605,16 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadFloat3SE
 {
     assert(pSource);
 
-    __declspec(align(16)) uint32_t Result[4];
-    uint32_t Mantissa;
-    uint32_t Exponent, ExpBits;
-
-    if ( pSource->e == 0x1f ) // INF or NAN
-    {
-        Result[0] = 0x7f800000 | (pSource->xm << 14);
-        Result[1] = 0x7f800000 | (pSource->ym << 14);
-        Result[2] = 0x7f800000 | (pSource->zm << 14);
-    }
-    else if ( pSource->e != 0 ) // The values are all normalized
-    {
-        Exponent = pSource->e;
-
-        ExpBits = (Exponent + 112) << 23;
-
-        Mantissa = pSource->xm;
-        Result[0] = ExpBits | (Mantissa << 14);
-
-        Mantissa = pSource->ym;
-        Result[1] = ExpBits | (Mantissa << 14);
-
-        Mantissa = pSource->zm;
-        Result[2] = ExpBits | (Mantissa << 14);
-    }
-    else
-    {
-        // X Channel
-        Mantissa = pSource->xm;
-
-        if (Mantissa != 0) // The value is denormalized
-        {
-            // Normalize the value in the resulting float
-            Exponent = 1;
-
-            do
-            {
-                Exponent--;
-                Mantissa <<= 1;
-            } while ((Mantissa & 0x200) == 0);
-
-            Mantissa &= 0x1FF;
-        }
-        else // The value is zero
-        {
-            Exponent = (uint32_t)-112;
-        }
-
-        Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14);
-
-        // Y Channel
-        Mantissa = pSource->ym;
-
-        if (Mantissa != 0) // The value is denormalized
-        {
-            // Normalize the value in the resulting float
-            Exponent = 1;
+    union { float f; int32_t i; } fi;
+    fi.i = 0x33800000 + (pSource->e << 23);
+    float Scale = fi.f;
 
-            do
-            {
-                Exponent--;
-                Mantissa <<= 1;
-            } while ((Mantissa & 0x200) == 0);
-
-            Mantissa &= 0x1FF;
-        }
-        else // The value is zero
-        {
-            Exponent = (uint32_t)-112;
-        }
-
-        Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14);
-
-        // Z Channel
-        Mantissa = pSource->zm;
-
-        if (Mantissa != 0) // The value is denormalized
-        {
-            // Normalize the value in the resulting float
-            Exponent = 1;
-
-            do
-            {
-                Exponent--;
-                Mantissa <<= 1;
-            } while ((Mantissa & 0x200) == 0);
-
-            Mantissa &= 0x1FF;
-        }
-        else // The value is zero
-        {
-            Exponent = (uint32_t)-112;
-        }
-
-        Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14);
-    }
-
-    return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
+    XMVECTORF32 v = {
+        Scale * float( pSource->xm ),
+        Scale * float( pSource->ym ),
+        Scale * float( pSource->zm ),
+        1.0f };
+    return v;
 }
 
 //------------------------------------------------------------------------------
@@ -1012,6 +924,31 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4
 #endif // _XM_VMX128_INTRINSICS_
 }
 
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4_XR
+(
+    const XMUDECN4* pSource
+)
+{
+    assert(pSource);
+
+    int32_t ElementX = pSource->v & 0x3FF;
+    int32_t ElementY = (pSource->v >> 10) & 0x3FF;
+    int32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+
+    XMVECTORF32 vResult = {
+        (float)(ElementX - 0x180) / 510.0f,
+        (float)(ElementY - 0x180) / 510.0f,
+        (float)(ElementZ - 0x180) / 510.0f,
+        (float)(pSource->v >> 30) / 3.0f
+    };
+
+    return vResult.v;
+}
+
+
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDec4
@@ -1814,77 +1751,33 @@ inline void XM_CALLCONV PackedVector::XMStoreFloat3SE
 {
     assert(pDestination);
 
-    __declspec(align(16)) uint32_t IValue[4];
-    XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
+    XMFLOAT3A tmp;
+    XMStoreFloat3A( &tmp, V );
 
-    uint32_t Exp[3];
-    uint32_t Frac[3];
+    static const float maxf9 = float(0x1FF << 7);
+    static const float minf9 = float(1.f / (1 << 16));
 
-    // X, Y, Z Channels (5-bit exponent, 9-bit mantissa)
-    for(uint32_t j=0; j < 3; ++j)
-    {
-        uint32_t Sign = IValue[j] & 0x80000000;
-        uint32_t I = IValue[j] & 0x7FFFFFFF;
+    float x = (tmp.x >= 0.f) ? ( (tmp.x > maxf9) ? maxf9 : tmp.x ) : 0.f;
+    float y = (tmp.y >= 0.f) ? ( (tmp.y > maxf9) ? maxf9 : tmp.y ) : 0.f;
+    float z = (tmp.z >= 0.f) ? ( (tmp.z > maxf9) ? maxf9 : tmp.z ) : 0.f;
 
-        if ((I & 0x7F800000) == 0x7F800000)
-        {
-            // INF or NAN
-            Exp[j] = 0x1f;
-            if (( I & 0x7FFFFF ) != 0)
-            {
-                Frac[j] = ((I>>14)|(I>>5)|(I))&0x1ff;
-            }
-            else if ( Sign )
-            {
-                // -INF is clamped to 0 since 3SE is positive only
-                Exp[j] = Frac[j] = 0;
-            }
-        }
-        else if ( Sign )
-        {
-            // 3SE is positive only, so clamp to zero
-            Exp[j] = Frac[j] = 0;
-        }
-        else if (I > 0x477FC000U)
-        {
-            // The number is too large, set to max
-            Exp[j] = 0x1e;
-            Frac[j] = 0x1ff;
-        }
-        else
-        {
-            if (I < 0x38800000U)
-            {
-                // The number is too small to be represented as a normalized float9
-                // Convert it to a denormalized value.
-                uint32_t Shift = 113U - (I >> 23U);
-                I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
-            }
-            else
-            {
-                // Rebias the exponent to represent the value as a normalized float9
-                I += 0xC8000000U;
-            }
-     
-            uint32_t T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU;
+    const float max_xy = (x > y) ? x : y;
+    const float max_xyz = (max_xy > z) ? max_xy : z;
 
-            Exp[j] = (T & 0x3E00) >> 9;
-            Frac[j] = T & 0x1ff;
-        }
-    }
+    const float maxColor = (max_xyz > minf9) ? max_xyz : minf9;
+
+    union { float f; int32_t i; } fi;
+    fi.f = maxColor;
+    fi.i &= 0xFF800000; // cut off fraction
 
-    // Adjust to a shared exponent
-    uint32_t T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) );
+    pDestination->e = (fi.i - 0x37800000) >> 23;
 
-    Frac[0] = Frac[0] >> (T - Exp[0]);
-    Frac[1] = Frac[1] >> (T - Exp[1]);
-    Frac[2] = Frac[2] >> (T - Exp[2]);
+    fi.i = 0x83000000 - fi.i;
+    float ScaleR = fi.f;
 
-    // Store packed into memory
-    pDestination->xm = Frac[0];
-    pDestination->ym = Frac[1];
-    pDestination->zm = Frac[2];
-    pDestination->e = T;
+    pDestination->xm = static_cast<uint32_t>( Internal::round_to_nearest(x * ScaleR) );
+    pDestination->ym = static_cast<uint32_t>( Internal::round_to_nearest(y * ScaleR) );
+    pDestination->zm = static_cast<uint32_t>( Internal::round_to_nearest(z * ScaleR) );
 }
 
 //------------------------------------------------------------------------------
@@ -2260,6 +2153,32 @@ inline void XM_CALLCONV PackedVector::XMStoreUDecN4
 #endif // _XM_VMX128_INTRINSICS_
 }
 
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XM_CALLCONV PackedVector::XMStoreUDecN4_XR
+(
+    XMUDECN4* pDestination, 
+    FXMVECTOR V
+)
+{
+    assert(pDestination);
+
+    static const XMVECTORF32  Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
+    static const XMVECTORF32  Bias  = { 384.0f, 384.0f, 384.0f, 0.0f };
+    static const XMVECTORF32  C     = { 1023.f, 1023.f, 1023.f, 3.f };
+
+    XMVECTOR N = XMVectorMultiplyAdd( V, Scale, Bias );
+    N = XMVectorClamp( N, g_XMZero, C );
+
+    XMFLOAT4A tmp;
+    XMStoreFloat4A(&tmp, N );
+
+    pDestination->v = ((uint32_t)tmp.w << 30)
+                      | (((uint32_t)tmp.z & 0x3FF) << 20)
+                      | (((uint32_t)tmp.y & 0x3FF) << 10)
+                      | (((uint32_t)tmp.x & 0x3FF));
+}
+
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV PackedVector::XMStoreUDec4