Skip to content

Commit 284b34f

Browse files
committed
Use a homegrown strpbrk() alternative in url_decode()
I haven't fully tested and/or benchmarked this. I'm just committing this before I end up losing this code; hopefully the fuzzer will find something. I need to write a proper benchmark soon, though. Maybe even make an AVX2 version, too.
1 parent 8685940 commit 284b34f

File tree

1 file changed

+73
-1
lines changed

1 file changed

+73
-1
lines changed

src/lib/lwan-request.c

+73-1
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,78 @@ static ALWAYS_INLINE char *identify_http_method(struct lwan_request *request,
266266
return NULL;
267267
}
268268

269+
/* has_zero() routines stolen from the Bit Twiddling Hacks page */
270+
static ALWAYS_INLINE uint64_t has_zero64(uint64_t v)
271+
{
272+
return (v - 0x0101010101010101ull) & ~v & 0x8080808080808080ull;
273+
}
274+
275+
static ALWAYS_INLINE uint32_t has_zero32(uint64_t v)
276+
{
277+
return (v - 0x01010101u) & ~v & 0x80808080u;
278+
}
279+
280+
static char *find_pct_or_plus(const char *input)
281+
{
282+
const uint64_t mask_plus64 = '+' * 0x0101010101010101ull;
283+
const uint64_t mask_pct64 = '%' * 0x0101010101010101ull;
284+
const uint32_t mask_plus32 = '+' * 0x01010101u;
285+
const uint32_t mask_pct32 = '%' * 0x01010101u;
286+
char *str = (char *)input;
287+
288+
while (true) {
289+
uint64_t v = string_as_uint64(str);
290+
uint64_t has_plus = has_zero64(v ^ mask_plus64);
291+
uint64_t has_pct = has_zero64(v ^ mask_pct64);
292+
uint64_t has_zero = has_zero64(v);
293+
uint64_t m = LWAN_MAX(has_plus, has_pct);
294+
295+
if (has_zero && LWAN_MAX(has_zero, m) == has_zero) {
296+
switch (__builtin_ctzll(has_zero) / 8) {
297+
case 1 ... 3:
298+
goto check_small;
299+
case 4 ... 7:
300+
goto check_at_least_four;
301+
default:
302+
return NULL;
303+
}
304+
}
305+
306+
if (m) {
307+
return str + __builtin_ctzll(m) / 8;
308+
}
309+
310+
str += 8;
311+
}
312+
313+
check_at_least_four: {
314+
uint32_t v = string_as_uint32(str);
315+
uint32_t has_plus = has_zero32(v ^ mask_plus32);
316+
uint32_t has_pct = has_zero32(v ^ mask_pct32);
317+
uint32_t has_zero = has_zero32(v);
318+
uint32_t m = LWAN_MAX(has_plus, has_pct);
319+
320+
if (has_zero && LWAN_MAX(has_zero, m) == has_zero) {
321+
return NULL;
322+
}
323+
324+
if (m) {
325+
return str + __builtin_ctz(m) / 8;
326+
}
327+
328+
str += 4;
329+
}
330+
331+
check_small:
332+
while (*str) {
333+
if (*str == '%' || *str == '+')
334+
return str;
335+
str++;
336+
}
337+
338+
return NULL;
339+
}
340+
269341
__attribute__((nonnull(1))) static ssize_t url_decode(char *str)
270342
{
271343
static const unsigned char tbl1[256] = {
@@ -286,7 +358,7 @@ __attribute__((nonnull(1))) static ssize_t url_decode(char *str)
286358
const char *inptr = str;
287359
char *outptr = str;
288360

289-
for (char *p = strpbrk(inptr, "+%"); p; p = strpbrk(inptr, "+%")) {
361+
for (char *p = find_pct_or_plus(inptr); p; p = find_pct_or_plus(inptr)) {
290362
const ptrdiff_t diff = p - inptr;
291363
if (diff)
292364
outptr = mempmove(outptr, inptr, (size_t)diff);

0 commit comments

Comments
 (0)