@@ -157,14 +157,9 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
157
157
#if defined(__AVX2__ )
158
158
const __m256i mask256 =
159
159
_mm256_castps_si256 (_mm256_broadcast_ss ((const float * )mask ));
160
- if (msg_len >= 32 ) {
161
- do {
162
- __m256i v = _mm256_lddqu_si256 ((const __m256i * )msg );
163
- _mm256_storeu_si256 ((__m256i * )msg , _mm256_xor_si256 (v , mask256 ));
164
-
165
- msg += 32 ;
166
- msg_len -= 32 ;
167
- } while (msg_len >= 32 );
160
+ for (; msg_len >= 32 ; msg_len -= 32 , msg += 32 ) {
161
+ __m256i v = _mm256_lddqu_si256 ((const __m256i * )msg );
162
+ _mm256_storeu_si256 ((__m256i * )msg , _mm256_xor_si256 (v , mask256 ));
168
163
}
169
164
#endif
170
165
@@ -176,40 +171,33 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
176
171
#else
177
172
const __m128i mask128 = _mm_loadu_si128 ((const __m128i * )mask );
178
173
#endif
179
- if (msg_len >= 16 ) {
180
- do {
174
+ for (; msg_len >= 16 ; msg_len -= 16 , msg += 16 ) {
181
175
#if defined(__SSE3__ )
182
- __m128i v = _mm_lddqu_si128 ((const __m128i * )msg );
176
+ __m128i v = _mm_lddqu_si128 ((const __m128i * )msg );
183
177
#else
184
- __m128i v = _mm_loadu_si128 ((const __m128i * )msg );
178
+ __m128i v = _mm_loadu_si128 ((const __m128i * )msg );
185
179
#endif
186
180
187
- _mm_storeu_si128 ((__m128i * )msg , _mm_xor_si128 (v , mask128 ));
188
-
189
- msg += 16 ;
190
- msg_len -= 16 ;
191
- } while (msg_len >= 16 );
181
+ _mm_storeu_si128 ((__m128i * )msg , _mm_xor_si128 (v , mask128 ));
192
182
}
193
183
#endif
194
184
195
- if (sizeof (void * ) == 8 ) {
196
- if (msg_len >= 8 ) {
185
+ if (sizeof (void * ) == 8 && msg_len >= 8 ) {
197
186
#if defined(__SSE_4_1__ )
198
- /* We're far away enough from the AVX2 path that it's
199
- * probably better to use mask128 instead of mask256
200
- * here. */
201
- const __int64 mask64 = _mm_extract_epi64 (mask128 , 0 );
187
+ /* We're far away enough from the AVX2 path that it's
188
+ * probably better to use mask128 instead of mask256
189
+ * here. */
190
+ const __int64 mask64 = _mm_extract_epi64 (mask128 , 0 );
202
191
#else
203
- const uint32_t mask32 = string_as_uint32 (mask );
204
- const uint64_t mask64 = (uint64_t )mask32 << 32 | (uint64_t )mask32 ;
192
+ const uint32_t mask32 = string_as_uint32 (mask );
193
+ const uint64_t mask64 = (uint64_t )mask32 << 32 | (uint64_t )mask32 ;
205
194
#endif
206
- do {
207
- uint64_t v = string_as_uint64 (msg );
208
- v ^= (uint64_t )mask64 ;
209
- msg = mempcpy (msg , & v , sizeof (v ));
210
- msg_len -= 8 ;
211
- } while (msg_len >= 8 );
212
- }
195
+ do {
196
+ uint64_t v = string_as_uint64 (msg );
197
+ v ^= (uint64_t )mask64 ;
198
+ msg = mempcpy (msg , & v , sizeof (v ));
199
+ msg_len -= 8 ;
200
+ } while (msg_len >= 8 );
213
201
}
214
202
215
203
if (msg_len >= 4 ) {
0 commit comments