@@ -605,104 +605,16 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadFloat3SE
605
605
{
606
606
assert (pSource);
607
607
608
- __declspec (align (16 )) uint32_t Result[4 ];
609
- uint32_t Mantissa;
610
- uint32_t Exponent, ExpBits;
611
-
612
- if ( pSource->e == 0x1f ) // INF or NAN
613
- {
614
- Result[0 ] = 0x7f800000 | (pSource->xm << 14 );
615
- Result[1 ] = 0x7f800000 | (pSource->ym << 14 );
616
- Result[2 ] = 0x7f800000 | (pSource->zm << 14 );
617
- }
618
- else if ( pSource->e != 0 ) // The values are all normalized
619
- {
620
- Exponent = pSource->e ;
621
-
622
- ExpBits = (Exponent + 112 ) << 23 ;
623
-
624
- Mantissa = pSource->xm ;
625
- Result[0 ] = ExpBits | (Mantissa << 14 );
626
-
627
- Mantissa = pSource->ym ;
628
- Result[1 ] = ExpBits | (Mantissa << 14 );
629
-
630
- Mantissa = pSource->zm ;
631
- Result[2 ] = ExpBits | (Mantissa << 14 );
632
- }
633
- else
634
- {
635
- // X Channel
636
- Mantissa = pSource->xm ;
637
-
638
- if (Mantissa != 0 ) // The value is denormalized
639
- {
640
- // Normalize the value in the resulting float
641
- Exponent = 1 ;
642
-
643
- do
644
- {
645
- Exponent--;
646
- Mantissa <<= 1 ;
647
- } while ((Mantissa & 0x200 ) == 0 );
648
-
649
- Mantissa &= 0x1FF ;
650
- }
651
- else // The value is zero
652
- {
653
- Exponent = (uint32_t )-112 ;
654
- }
655
-
656
- Result[0 ] = ((Exponent + 112 ) << 23 ) | (Mantissa << 14 );
657
-
658
- // Y Channel
659
- Mantissa = pSource->ym ;
660
-
661
- if (Mantissa != 0 ) // The value is denormalized
662
- {
663
- // Normalize the value in the resulting float
664
- Exponent = 1 ;
608
+ union { float f; int32_t i; } fi;
609
+ fi.i = 0x33800000 + (pSource->e << 23 );
610
+ float Scale = fi.f ;
665
611
666
- do
667
- {
668
- Exponent--;
669
- Mantissa <<= 1 ;
670
- } while ((Mantissa & 0x200 ) == 0 );
671
-
672
- Mantissa &= 0x1FF ;
673
- }
674
- else // The value is zero
675
- {
676
- Exponent = (uint32_t )-112 ;
677
- }
678
-
679
- Result[1 ] = ((Exponent + 112 ) << 23 ) | (Mantissa << 14 );
680
-
681
- // Z Channel
682
- Mantissa = pSource->zm ;
683
-
684
- if (Mantissa != 0 ) // The value is denormalized
685
- {
686
- // Normalize the value in the resulting float
687
- Exponent = 1 ;
688
-
689
- do
690
- {
691
- Exponent--;
692
- Mantissa <<= 1 ;
693
- } while ((Mantissa & 0x200 ) == 0 );
694
-
695
- Mantissa &= 0x1FF ;
696
- }
697
- else // The value is zero
698
- {
699
- Exponent = (uint32_t )-112 ;
700
- }
701
-
702
- Result[2 ] = ((Exponent + 112 ) << 23 ) | (Mantissa << 14 );
703
- }
704
-
705
- return XMLoadFloat3A ( reinterpret_cast <const XMFLOAT3A*>(&Result) );
612
+ XMVECTORF32 v = {
613
+ Scale * float ( pSource->xm ),
614
+ Scale * float ( pSource->ym ),
615
+ Scale * float ( pSource->zm ),
616
+ 1 .0f };
617
+ return v;
706
618
}
707
619
708
620
// ------------------------------------------------------------------------------
@@ -1012,6 +924,31 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4
1012
924
#endif // _XM_VMX128_INTRINSICS_
1013
925
}
1014
926
927
+
928
+ // ------------------------------------------------------------------------------
929
+ _Use_decl_annotations_
930
+ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4_XR
931
+ (
932
+ const XMUDECN4* pSource
933
+ )
934
+ {
935
+ assert (pSource);
936
+
937
+ int32_t ElementX = pSource->v & 0x3FF ;
938
+ int32_t ElementY = (pSource->v >> 10 ) & 0x3FF ;
939
+ int32_t ElementZ = (pSource->v >> 20 ) & 0x3FF ;
940
+
941
+ XMVECTORF32 vResult = {
942
+ (float )(ElementX - 0x180 ) / 510 .0f ,
943
+ (float )(ElementY - 0x180 ) / 510 .0f ,
944
+ (float )(ElementZ - 0x180 ) / 510 .0f ,
945
+ (float )(pSource->v >> 30 ) / 3 .0f
946
+ };
947
+
948
+ return vResult.v ;
949
+ }
950
+
951
+
1015
952
// ------------------------------------------------------------------------------
1016
953
_Use_decl_annotations_
1017
954
inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDec4
@@ -1814,77 +1751,33 @@ inline void XM_CALLCONV PackedVector::XMStoreFloat3SE
1814
1751
{
1815
1752
assert (pDestination);
1816
1753
1817
- __declspec ( align ( 16 )) uint32_t IValue[ 4 ] ;
1818
- XMStoreFloat3A ( reinterpret_cast <XMFLOAT3A*>(&IValue) , V );
1754
+ XMFLOAT3A tmp ;
1755
+ XMStoreFloat3A ( &tmp , V );
1819
1756
1820
- uint32_t Exp[ 3 ] ;
1821
- uint32_t Frac[ 3 ] ;
1757
+ static const float maxf9 = float ( 0x1FF << 7 ) ;
1758
+ static const float minf9 = float ( 1 . f / ( 1 << 16 )) ;
1822
1759
1823
- // X, Y, Z Channels (5-bit exponent, 9-bit mantissa)
1824
- for (uint32_t j=0 ; j < 3 ; ++j)
1825
- {
1826
- uint32_t Sign = IValue[j] & 0x80000000 ;
1827
- uint32_t I = IValue[j] & 0x7FFFFFFF ;
1760
+ float x = (tmp.x >= 0 .f ) ? ( (tmp.x > maxf9) ? maxf9 : tmp.x ) : 0 .f ;
1761
+ float y = (tmp.y >= 0 .f ) ? ( (tmp.y > maxf9) ? maxf9 : tmp.y ) : 0 .f ;
1762
+ float z = (tmp.z >= 0 .f ) ? ( (tmp.z > maxf9) ? maxf9 : tmp.z ) : 0 .f ;
1828
1763
1829
- if ((I & 0x7F800000 ) == 0x7F800000 )
1830
- {
1831
- // INF or NAN
1832
- Exp[j] = 0x1f ;
1833
- if (( I & 0x7FFFFF ) != 0 )
1834
- {
1835
- Frac[j] = ((I>>14 )|(I>>5 )|(I))&0x1ff ;
1836
- }
1837
- else if ( Sign )
1838
- {
1839
- // -INF is clamped to 0 since 3SE is positive only
1840
- Exp[j] = Frac[j] = 0 ;
1841
- }
1842
- }
1843
- else if ( Sign )
1844
- {
1845
- // 3SE is positive only, so clamp to zero
1846
- Exp[j] = Frac[j] = 0 ;
1847
- }
1848
- else if (I > 0x477FC000U )
1849
- {
1850
- // The number is too large, set to max
1851
- Exp[j] = 0x1e ;
1852
- Frac[j] = 0x1ff ;
1853
- }
1854
- else
1855
- {
1856
- if (I < 0x38800000U )
1857
- {
1858
- // The number is too small to be represented as a normalized float9
1859
- // Convert it to a denormalized value.
1860
- uint32_t Shift = 113U - (I >> 23U );
1861
- I = (0x800000U | (I & 0x7FFFFFU )) >> Shift;
1862
- }
1863
- else
1864
- {
1865
- // Rebias the exponent to represent the value as a normalized float9
1866
- I += 0xC8000000U ;
1867
- }
1868
-
1869
- uint32_t T = ((I + 0x1FFFU + ((I >> 14U ) & 1U )) >> 14U )&0x3fffU ;
1764
+ const float max_xy = (x > y) ? x : y;
1765
+ const float max_xyz = (max_xy > z) ? max_xy : z;
1870
1766
1871
- Exp[j] = (T & 0x3E00 ) >> 9 ;
1872
- Frac[j] = T & 0x1ff ;
1873
- }
1874
- }
1767
+ const float maxColor = (max_xyz > minf9) ? max_xyz : minf9;
1768
+
1769
+ union { float f; int32_t i; } fi;
1770
+ fi.f = maxColor;
1771
+ fi.i &= 0xFF800000 ; // cut off fraction
1875
1772
1876
- // Adjust to a shared exponent
1877
- uint32_t T = XMMax ( Exp[0 ], XMMax ( Exp[1 ], Exp[2 ] ) );
1773
+ pDestination->e = (fi.i - 0x37800000 ) >> 23 ;
1878
1774
1879
- Frac[0 ] = Frac[0 ] >> (T - Exp[0 ]);
1880
- Frac[1 ] = Frac[1 ] >> (T - Exp[1 ]);
1881
- Frac[2 ] = Frac[2 ] >> (T - Exp[2 ]);
1775
+ fi.i = 0x83000000 - fi.i ;
1776
+ float ScaleR = fi.f ;
1882
1777
1883
- // Store packed into memory
1884
- pDestination->xm = Frac[0 ];
1885
- pDestination->ym = Frac[1 ];
1886
- pDestination->zm = Frac[2 ];
1887
- pDestination->e = T;
1778
+ pDestination->xm = static_cast <uint32_t >( Internal::round_to_nearest (x * ScaleR) );
1779
+ pDestination->ym = static_cast <uint32_t >( Internal::round_to_nearest (y * ScaleR) );
1780
+ pDestination->zm = static_cast <uint32_t >( Internal::round_to_nearest (z * ScaleR) );
1888
1781
}
1889
1782
1890
1783
// ------------------------------------------------------------------------------
@@ -2260,6 +2153,32 @@ inline void XM_CALLCONV PackedVector::XMStoreUDecN4
2260
2153
#endif // _XM_VMX128_INTRINSICS_
2261
2154
}
2262
2155
2156
+ // ------------------------------------------------------------------------------
2157
+ _Use_decl_annotations_
2158
+ inline void XM_CALLCONV PackedVector::XMStoreUDecN4_XR
2159
+ (
2160
+ XMUDECN4* pDestination,
2161
+ FXMVECTOR V
2162
+ )
2163
+ {
2164
+ assert (pDestination);
2165
+
2166
+ static const XMVECTORF32 Scale = { 510 .0f , 510 .0f , 510 .0f , 3 .0f };
2167
+ static const XMVECTORF32 Bias = { 384 .0f , 384 .0f , 384 .0f , 0 .0f };
2168
+ static const XMVECTORF32 C = { 1023 .f , 1023 .f , 1023 .f , 3 .f };
2169
+
2170
+ XMVECTOR N = XMVectorMultiplyAdd ( V, Scale, Bias );
2171
+ N = XMVectorClamp ( N, g_XMZero, C );
2172
+
2173
+ XMFLOAT4A tmp;
2174
+ XMStoreFloat4A (&tmp, N );
2175
+
2176
+ pDestination->v = ((uint32_t )tmp.w << 30 )
2177
+ | (((uint32_t )tmp.z & 0x3FF ) << 20 )
2178
+ | (((uint32_t )tmp.y & 0x3FF ) << 10 )
2179
+ | (((uint32_t )tmp.x & 0x3FF ));
2180
+ }
2181
+
2263
2182
// ------------------------------------------------------------------------------
2264
2183
_Use_decl_annotations_
2265
2184
inline void XM_CALLCONV PackedVector::XMStoreUDec4
0 commit comments