Skip to content

Commit 292d7ac

Browse files
committed
Reduce memory reads when looking up MIME type from extension
Since, by construction, the extensions are stored sorted and sequentially in memory, if we're comparing only the strings found in the blob generated by mimegen, we can compare only the pointers; otherwise, fall back to two 64-bit reads, replacing the indirect call to strncmp(). The extensions are now stored in big-endian rather than in host-endian, making it possible to compare things properly. As a result, Brotli liked the input data a bit more and was able to save a bit over 50 bytes; it's not much, but, hey, savings are savings.
1 parent f8b41ed commit 292d7ac

File tree

2 files changed

+37
-27
lines changed

2 files changed

+37
-27
lines changed

src/bin/tools/mimegen.c

+12-18
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <assert.h>
2121
#include <ctype.h>
2222
#include <errno.h>
23+
#include <endian.h>
2324
#include <fcntl.h>
2425
#include <stdint.h>
2526
#include <stdio.h>
@@ -67,20 +68,9 @@ output_append_full(struct output *output, const char *str, size_t str_len)
6768
return 0;
6869
}
6970

70-
static int output_append_padded(struct output *output, const char *str)
71+
static int output_append_u64(struct output *output, uint64_t value)
7172
{
72-
size_t str_len = strlen(str);
73-
74-
assert(str_len <= 8);
75-
76-
int r = output_append_full(output, str, str_len);
77-
if (r < 0)
78-
return r;
79-
80-
if (str_len != 8)
81-
return output_append_full(output, "\0\0\0\0\0\0\0\0", 8 - str_len);
82-
83-
return 0;
73+
return output_append_full(output, (char *)&value, 8);
8474
}
8575

8676
static int output_append(struct output *output, const char *str)
@@ -316,14 +306,18 @@ int main(int argc, char *argv[])
316306
return 1;
317307
}
318308
for (i = 0; i < hash_get_count(ext_mime); i++) {
319-
char ext_lower[9] = {0};
309+
uint64_t ext_lower = 0;
320310

321-
strncpy(ext_lower, exts[i], 8);
311+
#pragma GCC diagnostic push
312+
#pragma GCC diagnostic ignored "-Wstringop-truncation"
313+
/* See lwan_determine_mime_type_for_file_name() in lwan-tables.c */
314+
strncpy((char *)&ext_lower, exts[i], 8);
315+
#pragma GCC diagnostic pop
322316

323-
for (char *p = ext_lower; *p; p++)
324-
*p &= ~0x20;
317+
ext_lower &= ~0x2020202020202020ull;
318+
ext_lower = htobe64(ext_lower);
325319

326-
if (output_append_padded(&output, ext_lower) < 0) {
320+
if (output_append_u64(&output, ext_lower) < 0) {
327321
fprintf(stderr, "Could not append to output\n");
328322
fclose(fp);
329323
return 1;

src/lib/lwan-tables.c

+25-9
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,29 @@ LWAN_SELF_TEST(status_codes)
120120
#undef ASSERT_STATUS
121121
}
122122

123-
static int
124-
compare_mime_entry(const void *a, const void *b)
123+
static int compare_mime_entry(const void *a, const void *b)
125124
{
126-
const char *exta = (const char *)a;
127-
const char *extb = (const char *)b;
125+
static const uintptr_t begin = (uintptr_t)uncompressed_mime_entries;
126+
static const uintptr_t end = begin + 8 * MIME_ENTRIES;
127+
const uintptr_t pa = (uintptr_t)a;
128+
const uintptr_t pb = (uintptr_t)b;
129+
uint64_t exta;
130+
uint64_t extb;
131+
132+
if (end - pa >= begin && end - pb >= begin) {
133+
/* If both keys are within the uncompressed mime entries range, then
134+
* we don't need to load from memory, just compare the pointers: they're
135+
* all stored sequentially in memory by construction. */
136+
exta = pa;
137+
extb = pb;
138+
} else {
139+
/* These are stored in big-endian so the comparison below works
140+
* as expected. */
141+
exta = string_as_uint64((const char *)a);
142+
extb = string_as_uint64((const char *)b);
143+
}
128144

129-
return strncmp(exta, extb, 8);
145+
return (exta > extb) - (exta < extb);
130146
}
131147

132148
const char *
@@ -147,19 +163,19 @@ lwan_determine_mime_type_for_file_name(const char *file_name)
147163
}
148164

149165
if (LIKELY(*last_dot)) {
150-
uint64_t key;
166+
uint64_t key = 0;
151167
const unsigned char *extension;
152168

153169
#pragma GCC diagnostic push
154170
#pragma GCC diagnostic ignored "-Wstringop-truncation"
155171
/* Data is stored with NULs on strings up to 7 chars, and no NULs
156172
* for 8-char strings, because that's implicit. So truncation is
157-
* intentional here: comparison in compare_mime_entry() uses
158-
* strncmp(..., 8), so even if NUL isn't present, it'll stop at the
159-
* right place. */
173+
* intentional here: comparison in compare_mime_entry() always loads
174+
* 8 bytes per extension. */
160175
strncpy((char *)&key, last_dot + 1, 8);
161176
#pragma GCC diagnostic pop
162177
key &= ~0x2020202020202020ull;
178+
key = htobe64(key);
163179

164180
extension = bsearch(&key, uncompressed_mime_entries, MIME_ENTRIES, 8,
165181
compare_mime_entry);

0 commit comments

Comments
 (0)