|
26 | 26 | #include <sys/socket.h>
|
27 | 27 |
|
28 | 28 | #if defined(__x86_64__)
|
29 |
| -#include <emmintrin.h> |
| 29 | +#include <immintrin.h> |
30 | 30 | #endif
|
31 | 31 |
|
32 | 32 | #include "lwan-io-wrappers.h"
|
@@ -156,19 +156,39 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
|
156 | 156 | if (sizeof(void *) == 8) {
|
157 | 157 | const uint64_t mask64 = (uint64_t)mask32 << 32 | mask32;
|
158 | 158 |
|
159 |
| -#if defined(__x86_64__) |
| 159 | +#if defined(__AVX2__) |
| 160 | + const size_t len256 = msg_len / 32; |
| 161 | + if (len256) { |
| 162 | + const __m256i mask256 = |
| 163 | + _mm256_setr_epi64x((int64_t)mask64, (int64_t)mask64, |
| 164 | + (int64_t)mask64, (int64_t)mask64); |
| 165 | + for (size_t i = 0; i < len256; i++) { |
| 166 | + __m256i v = _mm256_loadu_si256((__m256i *)msg); |
| 167 | + _mm256_storeu_si256((__m256i *)msg, |
| 168 | + _mm256_xor_si256(v, mask256)); |
| 169 | + msg += 32; |
| 170 | + } |
| 171 | + |
| 172 | + msg_len = (size_t)(msg_end - msg); |
| 173 | + } |
| 174 | +#endif |
| 175 | + |
| 176 | +#if defined(__SSE2__) |
160 | 177 | const size_t len128 = msg_len / 16;
|
161 | 178 | if (len128) {
|
162 |
| - const __m128i mask128 = _mm_setr_epi64((__m64)mask64, (__m64)mask64); |
| 179 | + const __m128i mask128 = |
| 180 | + _mm_setr_epi64((__m64)mask64, (__m64)mask64); |
163 | 181 | for (size_t i = 0; i < len128; i++) {
|
164 | 182 | __m128i v = _mm_loadu_si128((__m128i *)msg);
|
165 | 183 | _mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128));
|
166 | 184 | msg += 16;
|
167 | 185 | }
|
| 186 | + |
| 187 | + msg_len = (size_t)(msg_end - msg); |
168 | 188 | }
|
169 | 189 | #endif
|
170 | 190 |
|
171 |
| - const size_t len64 = (size_t)((msg_end - msg) / 8); |
| 191 | + const size_t len64 = msg_len / 8; |
172 | 192 | for (size_t i = 0; i < len64; i++) {
|
173 | 193 | uint64_t v = string_as_uint64(msg);
|
174 | 194 | v ^= mask64;
|
|
0 commit comments