Skip to content

Commit 00a4c53

Browse files
committed
More simplifications to the websocket unmasking code
1 parent f3e4036 commit 00a4c53

File tree

1 file changed

+20
-32
lines changed

1 file changed

+20
-32
lines changed

src/lib/lwan-websocket.c

+20-32
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,9 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
157157
#if defined(__AVX2__)
158158
const __m256i mask256 =
159159
_mm256_castps_si256(_mm256_broadcast_ss((const float *)mask));
160-
if (msg_len >= 32) {
161-
do {
162-
__m256i v = _mm256_lddqu_si256((const __m256i *)msg);
163-
_mm256_storeu_si256((__m256i *)msg, _mm256_xor_si256(v, mask256));
164-
165-
msg += 32;
166-
msg_len -= 32;
167-
} while (msg_len >= 32);
160+
for (; msg_len >= 32; msg_len -= 32, msg += 32) {
161+
__m256i v = _mm256_lddqu_si256((const __m256i *)msg);
162+
_mm256_storeu_si256((__m256i *)msg, _mm256_xor_si256(v, mask256));
168163
}
169164
#endif
170165

@@ -176,40 +171,33 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
176171
#else
177172
const __m128i mask128 = _mm_loadu_si128((const __m128i *)mask);
178173
#endif
179-
if (msg_len >= 16) {
180-
do {
174+
for (; msg_len >= 16; msg_len -= 16, msg += 16) {
181175
#if defined(__SSE3__)
182-
__m128i v = _mm_lddqu_si128((const __m128i *)msg);
176+
__m128i v = _mm_lddqu_si128((const __m128i *)msg);
183177
#else
184-
__m128i v = _mm_loadu_si128((const __m128i *)msg);
178+
__m128i v = _mm_loadu_si128((const __m128i *)msg);
185179
#endif
186180

187-
_mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128));
188-
189-
msg += 16;
190-
msg_len -= 16;
191-
} while (msg_len >= 16);
181+
_mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128));
192182
}
193183
#endif
194184

195-
if (sizeof(void *) == 8) {
196-
if (msg_len >= 8) {
185+
if (sizeof(void *) == 8 && msg_len >= 8) {
197186
#if defined(__SSE_4_1__)
198-
/* We're far away enough from the AVX2 path that it's
199-
* probably better to use mask128 instead of mask256
200-
* here. */
201-
const __int64 mask64 = _mm_extract_epi64(mask128, 0);
187+
/* We're far away enough from the AVX2 path that it's
188+
* probably better to use mask128 instead of mask256
189+
* here. */
190+
const __int64 mask64 = _mm_extract_epi64(mask128, 0);
202191
#else
203-
const uint32_t mask32 = string_as_uint32(mask);
204-
const uint64_t mask64 = (uint64_t)mask32 << 32 | (uint64_t)mask32;
192+
const uint32_t mask32 = string_as_uint32(mask);
193+
const uint64_t mask64 = (uint64_t)mask32 << 32 | (uint64_t)mask32;
205194
#endif
206-
do {
207-
uint64_t v = string_as_uint64(msg);
208-
v ^= (uint64_t)mask64;
209-
msg = mempcpy(msg, &v, sizeof(v));
210-
msg_len -= 8;
211-
} while (msg_len >= 8);
212-
}
195+
do {
196+
uint64_t v = string_as_uint64(msg);
197+
v ^= (uint64_t)mask64;
198+
msg = mempcpy(msg, &v, sizeof(v));
199+
msg_len -= 8;
200+
} while (msg_len >= 8);
213201
}
214202

215203
if (msg_len >= 4) {

0 commit comments

Comments
 (0)