Skip to content

Commit a842cb2

Browse files
committed
Use AVX2 when unmasking websockets frames
This already used SSE2, but let's use a wider vector if available to get this over more quickly.
1 parent e68709f commit a842cb2

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

src/lib/lwan-websocket.c

+24-4
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include <sys/socket.h>
2727

2828
#if defined(__x86_64__)
29-
#include <emmintrin.h>
29+
#include <immintrin.h>
3030
#endif
3131

3232
#include "lwan-io-wrappers.h"
@@ -156,19 +156,39 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
156156
if (sizeof(void *) == 8) {
157157
const uint64_t mask64 = (uint64_t)mask32 << 32 | mask32;
158158

159-
#if defined(__x86_64__)
159+
#if defined(__AVX2__)
160+
const size_t len256 = msg_len / 32;
161+
if (len256) {
162+
const __m256i mask256 =
163+
_mm256_setr_epi64x((int64_t)mask64, (int64_t)mask64,
164+
(int64_t)mask64, (int64_t)mask64);
165+
for (size_t i = 0; i < len256; i++) {
166+
__m256i v = _mm256_loadu_si256((__m256i *)msg);
167+
_mm256_storeu_si256((__m256i *)msg,
168+
_mm256_xor_si256(v, mask256));
169+
msg += 32;
170+
}
171+
172+
msg_len = (size_t)(msg_end - msg);
173+
}
174+
#endif
175+
176+
#if defined(__SSE2__)
160177
const size_t len128 = msg_len / 16;
161178
if (len128) {
162-
const __m128i mask128 = _mm_setr_epi64((__m64)mask64, (__m64)mask64);
179+
const __m128i mask128 =
180+
_mm_setr_epi64((__m64)mask64, (__m64)mask64);
163181
for (size_t i = 0; i < len128; i++) {
164182
__m128i v = _mm_loadu_si128((__m128i *)msg);
165183
_mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128));
166184
msg += 16;
167185
}
186+
187+
msg_len = (size_t)(msg_end - msg);
168188
}
169189
#endif
170190

171-
const size_t len64 = (size_t)((msg_end - msg) / 8);
191+
const size_t len64 = msg_len / 8;
172192
for (size_t i = 0; i < len64; i++) {
173193
uint64_t v = string_as_uint64(msg);
174194
v ^= mask64;

0 commit comments

Comments
 (0)