From 5365a1c048b9070e572394670383e35dc4e555b2 Mon Sep 17 00:00:00 2001 From: Dmitry Arkhipov Date: Thu, 16 Nov 2023 21:39:07 +0300 Subject: [PATCH 1/5] different implementation of parse_unsigned --- include/boost/json/detail/sse2.hpp | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 06657bc18..8da1ddede 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -317,27 +317,14 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex { while( n >= 4 ) { - // faster on on clang for x86, - // slower on gcc -#ifdef __clang__ - r = r * 10 + p[0] - '0'; - r = r * 10 + p[1] - '0'; - r = r * 10 + p[2] - '0'; - r = r * 10 + p[3] - '0'; -#else uint32_t v; std::memcpy( &v, p, 4 ); endian::native_to_little_inplace(v); + v = (v & 0x0F0F0F0F) * 2561 >> 8; + v = (v & 0x00FF00FF) * 6553601 >> 16; - v -= 0x30303030; + r = r * 10000 + v; - unsigned w0 = v & 0xFF; - unsigned w1 = (v >> 8) & 0xFF; - unsigned w2 = (v >> 16) & 0xFF; - unsigned w3 = (v >> 24); - - r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; -#endif p += 4; n -= 4; } @@ -359,6 +346,7 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex r = r * 10 + p[2] - '0'; break; } + return r; } From c40fe040c79dc78341ea61ed94a839f1a1700901 Mon Sep 17 00:00:00 2001 From: Dmitry Arkhipov Date: Wed, 13 Mar 2024 14:32:53 +0300 Subject: [PATCH 2/5] parse by 8 digits --- include/boost/json/detail/sse2.hpp | 42 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 8da1ddede..0bee0cd99 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -315,7 +315,20 @@ inline int count_digits( char const* p ) noexcept inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept { - while( n >= 4 ) + char const* e = p + n; + + if( n & 1 ) + { + r = r * 10 + p[0] - '0'; + ++p; + } + if( n & 2 ) + { + r = r * 10 + p[0] - '0'; + r = r * 10 + p[1] - '0'; + p += 2; + } + if( n & 4 ) { uint32_t v; std::memcpy( &v, p, 4 ); @@ -324,27 +337,20 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex v = (v & 0x00FF00FF) * 6553601 >> 16; r = r * 10000 + v; - p += 4; - n -= 4; } - switch( n ) + while( p != e ) { - case 0: - break; - case 1: - r = r * 10 + p[0] - '0'; - break; - case 2: - r = r * 10 + p[0] - '0'; - r = r * 10 + p[1] - '0'; - break; - case 3: - r = r * 10 + p[0] - '0'; - r = r * 10 + p[1] - '0'; - r = r * 10 + p[2] - '0'; - break; + uint64_t v; + std::memcpy( &v, p, 8 ); + endian::native_to_little_inplace(v); + v = (v & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + v = (v & 0x00FF00FF00FF00FF) * 6553601 >> 16; + v = (v & 0x0000FFFF0000FFFF) * 42949672960001 >> 32; + + r = r * 100000000 + v; + p += 8; } return r; From 6cb8c490939d280fa637aca6ea6c2d196f86fa10 Mon Sep 17 00:00:00 2001 From: Dmitry Arkhipov Date: Wed, 13 Mar 2024 17:27:46 +0300 Subject: [PATCH 3/5] temp --- include/boost/json/detail/sse2.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 0bee0cd99..106f8485a 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -328,7 +328,11 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex r = r * 10 + p[1] - '0'; p += 2; } +#if BOOST_JSON_ARCH == 64 if( n & 4 ) +#else + while( p != e ) +#endif { uint32_t v; std::memcpy( &v, p, 4 ); @@ -340,6 +344,7 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex p += 4; } +#if BOOST_JSON_ARCH == 64 while( p != e ) { uint64_t v; @@ -352,6 +357,7 @@ inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noex r = r * 100000000 + v; p += 8; } +#endif return r; } From f867efaf864c2145a169e3223a57c9121eea22eb Mon Sep 17 00:00:00 2001 From: Dmitry Arkhipov Date: Mon, 25 Mar 2024 18:25:11 +0300 Subject: [PATCH 4/5] parse_unsigned uses SSE2 --- include/boost/json/detail/sse2.hpp | 62 +++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 106f8485a..7f6e84cc3 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -313,39 +313,63 @@ inline int count_digits( char const* p ) noexcept // parse_unsigned +inline +uint64_t +parse_four_digits(void const* p) noexcept +{ +#ifdef BOOST_JSON_USE_SSE2 + auto const c0 = _mm_cvtsi32_si128(0x0F0F0F0F); + auto v0 = _mm_and_si128(_mm_loadu_si32(p), c0); + + // auto const c1 = _mm_cvtsi64_si128(0x0001'000A'0064'03E8); + auto const c1 = _mm_setr_epi16(1000, 100, 10, 1, 0, 0, 0, 0); + + auto v1 = _mm_unpacklo_epi8( v0, _mm_setzero_si128() ); + auto v2 = _mm_madd_epi16(v1, c1); + auto v3 = _mm_srli_epi64(v2, 32); + auto v4 = _mm_add_epi32(v2, v3); + return static_cast( _mm_cvtsi128_si32(v4) ); +#else // BOOST_JSON_USE_SSE2 + uint32_t v; + std::memcpy( &v, p, 4 ); + endian::native_to_little_inplace(v); + v = (v & 0x0F0F0F0F) * 2561 >> 8; + v = (v & 0x00FF00FF) * 6553601 >> 16; + return v; +#endif // BOOST_JSON_USE_SSE2 +} + inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept { - char const* e = p + n; + auto const e = p + n; - if( n & 1 ) + if(n & 2) { - r = r * 10 + p[0] - '0'; - ++p; + uint32_t v0 = 0; + std::memcpy( &v0, p, 2 ); + endian::native_to_little_inplace(v0); + v0 = v0 & 0x0F0F; + r = (r * 100) + ((v0 & 0xFF) * 10) + (v0 >> 8); + p += 2; } - if( n & 2 ) + if(n & 1) { - r = r * 10 + p[0] - '0'; - r = r * 10 + p[1] - '0'; - p += 2; + r = (r * 10) + (p[0] & 0x0F); + p += 1; } -#if BOOST_JSON_ARCH == 64 + +#if defined(BOOST_JSON_USE_SSE2) || (BOOST_JSON_ARCH == 64) + while(p < e) +#else // !defined(BOOST_JSON_USE_SSE2) && (BOOST_JSON_ARCH == 32) if( n & 4 ) -#else - while( p != e ) #endif { - uint32_t v; - std::memcpy( &v, p, 4 ); - endian::native_to_little_inplace(v); - v = (v & 0x0F0F0F0F) * 2561 >> 8; - v = (v & 0x00FF00FF) * 6553601 >> 16; - - r = r * 10000 + v; + r = r * 10000 + parse_four_digits(p); p += 4; } #if BOOST_JSON_ARCH == 64 - while( p != e ) + while( p < e ) { uint64_t v; std::memcpy( &v, p, 8 ); From 23eba6de9b0cee43bbb3ed4cf4b4aafd25a1b188 Mon Sep 17 00:00:00 2001 From: Dmitry Arkhipov Date: Mon, 25 Mar 2024 20:46:15 +0300 Subject: [PATCH 5/5] intrinsics are used only where available --- include/boost/json/detail/sse2.hpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 7f6e84cc3..f3a465cc4 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -318,6 +318,17 @@ uint64_t parse_four_digits(void const* p) noexcept { #ifdef BOOST_JSON_USE_SSE2 +# if defined(__GNUC__) && (__GNUC__ >= 11 ) +# define BOOST_JSON_USE_INTRINSICS +# elif defined(__clang__) && (BOOST_CLANG_VERSION >= 80000 ) +# define BOOST_JSON_USE_INTRINSICS +# elif defined(_MSC_VER) +# define BOOST_JSON_USE_INTRINSICS +# endif +#endif + +#ifdef BOOST_JSON_USE_INTRINSICS +# undef BOOST_JSON_USE_INTRINSICS auto const c0 = _mm_cvtsi32_si128(0x0F0F0F0F); auto v0 = _mm_and_si128(_mm_loadu_si32(p), c0); @@ -329,14 +340,14 @@ parse_four_digits(void const* p) noexcept auto v3 = _mm_srli_epi64(v2, 32); auto v4 = _mm_add_epi32(v2, v3); return static_cast( _mm_cvtsi128_si32(v4) ); -#else // BOOST_JSON_USE_SSE2 +#else // BOOST_JSON_USE_INTRINSICS uint32_t v; std::memcpy( &v, p, 4 ); endian::native_to_little_inplace(v); v = (v & 0x0F0F0F0F) * 2561 >> 8; v = (v & 0x00FF00FF) * 6553601 >> 16; return v; -#endif // BOOST_JSON_USE_SSE2 +#endif // BOOST_JSON_USE_INTRINSICS } inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept