Skip to content
This repository was archived by the owner on Mar 22, 2024. It is now read-only.

Commit b537640

Browse files
committed
Hot-path acceleration: one-pass scan and vectorization
1 parent 63eb62a commit b537640

File tree

4 files changed

+173
-18
lines changed

4 files changed

+173
-18
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ MISC_PATH = $(PREFIX)/share/afl
2727
PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
2828
SH_PROGS = afl-plot afl-cmin afl-whatsup
2929

30-
CFLAGS ?= -O3 -funroll-loops
30+
CFLAGS ?= -O3 -march=native
3131
CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
3232
-DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
3333
-DBIN_PATH=\"$(BIN_PATH)\"

afl-fuzz.c

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,22 +1085,6 @@ static inline u8 has_new_bits(u8* virgin_map) {
10851085
}
10861086

10871087

1088-
/* A combination of classify_counts and has_new_bits. If 0 is returned, then the
1089-
* trace bits are kept as-is. Otherwise, the trace bits are overwritten with
1090-
* classified values.
1091-
*
1092-
* This accelerates the processing: in most cases, no interesting behavior
1093-
* happen, and the trace bits will be discarded soon. This function optimizes
1094-
* for such cases: one-pass scan on trace bits without modifying anything. Only
1095-
* on rare cases it fall backs to the slow path: classify_counts() first, then
1096-
* return has_new_bits(). */
1097-
1098-
static inline u8 has_new_bits_unclassified(u8* virgin_map) {
1099-
classify_counts(trace_bits); // TODO
1100-
return has_new_bits(virgin_map);
1101-
}
1102-
1103-
11041088
/* Get rid of shared memory (atexit handler). */
11051089

11061090
static void remove_shm(void) {
@@ -3044,7 +3028,19 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) {
30443028
/* Keep only if there are new bits in the map, add to queue for
30453029
future fuzzing, etc. */
30463030

3047-
if (!(hnb = has_new_bits_unclassified(virgin_bits))) {
3031+
3032+
/* A combination of classify_counts and has_new_bits. If 0 is returned, then
3033+
* the trace bits are kept as-is. Otherwise, the trace bits are overwritten
3034+
* with classified values.
3035+
*
3036+
* This accelerates the processing: in most cases, no interesting behavior
3037+
* happen, and the trace bits will be discarded soon. This function
3038+
* optimizes for such cases: one-pass scan on trace bits without modifying
3039+
* anything. Only on rare cases it fall backs to the slow path:
3040+
* classify_counts() first, then return has_new_bits(). */
3041+
hnb = has_new_bits_unclassified(virgin_bits);
3042+
3043+
if (!hnb) {
30483044
if (crash_mode) total_crashes++;
30493045
return 0;
30503046
}

coverage-32.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,50 @@ static inline void discover_word(u8* ret, u32* current, u32* virgin) {
8484
*virgin &= ~*current;
8585
}
8686
}
87+
88+
89+
#define PACK_SIZE 16
90+
static inline const u32* skim(const u32* virgin, const u32* current, const u32* current_end) {
91+
92+
for (; current != current_end; virgin += 4, current += 4) {
93+
94+
if (current[0] && classify_word(current[0]) & virgin[0]) return &current[0];
95+
if (current[1] && classify_word(current[1]) & virgin[1]) return &current[1];
96+
if (current[2] && classify_word(current[2]) & virgin[2]) return &current[2];
97+
if (current[3] && classify_word(current[3]) & virgin[3]) return &current[3];
98+
99+
}
100+
101+
return current_end;
102+
}
103+
104+
105+
static inline u8 has_new_bits_unclassified(u8* virgin_map) {
106+
u32* virgin = (u32*)virgin_map;
107+
u32* current = (u32*)trace_bits;
108+
u32* current_end = (u32*)(trace_bits + MAP_SIZE);
109+
110+
u8 ret = 0;
111+
while ((current = (u32*)skim(virgin, current, current_end)) != current_end) {
112+
/* Compute the word offset inside current pack. */
113+
u32 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 4;
114+
virgin = (u32*)((u8*)current - trace_bits + virgin_map);
115+
116+
#define UNROLL(x) \
117+
case x: \
118+
if (*current) { \
119+
*current = classify_word(*current); \
120+
discover_word(&ret, current, virgin); \
121+
} \
122+
++current, ++virgin;
123+
124+
/* Ensure the alignment of the next iteration. */
125+
switch (offset) {
126+
UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3)
127+
}
128+
129+
#undef UNROLL
130+
}
131+
132+
return ret;
133+
}

coverage-64.h

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#include "config.h"
22
#include "types.h"
33

4+
#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__)
5+
# include <immintrin.h>
6+
#endif
47

58
static inline u64 classify_word(u64 word) {
69

@@ -94,3 +97,112 @@ static inline void discover_word(u8* ret, u64* current, u64* virgin) {
9497
}
9598

9699
}
100+
101+
102+
#if defined(__AVX512F__) && defined(__AVX512DQ__)
103+
#define PACK_SIZE 64
104+
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {
105+
106+
for (; current != current_end; virgin += 8, current += 8) {
107+
108+
__m512i value = *(__m512i*)current;
109+
__mmask8 mask = _mm512_testn_epi64_mask(value, value);
110+
111+
/* All bytes are zero. */
112+
if (mask == 0xff) continue;
113+
114+
/* Look for nonzero bytes and check for new bits. */
115+
#define UNROLL(x) \
116+
if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return &current[x]
117+
UNROLL(0); UNROLL(1); UNROLL(2); UNROLL(3);
118+
UNROLL(4); UNROLL(5); UNROLL(6); UNROLL(7);
119+
#undef UNROLL
120+
121+
}
122+
123+
return current_end;
124+
125+
}
126+
#endif
127+
128+
129+
#if !defined(PACK_SIZE) && defined(__AVX2__)
130+
#define PACK_SIZE 32
131+
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {
132+
133+
__m256i zeroes = _mm256_setzero_si256();
134+
135+
for (; current != current_end; virgin += 4, current += 4) {
136+
137+
__m256i value = *(__m256i*)current;
138+
__m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
139+
u32 mask = _mm256_movemask_epi8(cmp);
140+
141+
/* All bytes are zero. */
142+
if (mask == -1) continue;
143+
144+
/* Look for nonzero bytes and check for new bits. */
145+
if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return &current[0];
146+
if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return &current[1];
147+
if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return &current[2];
148+
if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return &current[3];
149+
150+
}
151+
152+
return current_end;
153+
154+
}
155+
#endif
156+
157+
158+
#if !defined(PACK_SIZE)
159+
#define PACK_SIZE 32
160+
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {
161+
162+
for (; current != current_end; virgin += 4, current += 4) {
163+
164+
if (current[0] && classify_word(current[0]) & virgin[0]) return &current[0];
165+
if (current[1] && classify_word(current[1]) & virgin[1]) return &current[1];
166+
if (current[2] && classify_word(current[2]) & virgin[2]) return &current[2];
167+
if (current[3] && classify_word(current[3]) & virgin[3]) return &current[3];
168+
169+
}
170+
171+
return current_end;
172+
173+
}
174+
#endif
175+
176+
177+
static inline u8 has_new_bits_unclassified(u8* virgin_map) {
178+
u64* virgin = (u64*)virgin_map;
179+
u64* current = (u64*)trace_bits;
180+
u64* current_end = (u64*)(trace_bits + MAP_SIZE);
181+
182+
u8 ret = 0;
183+
while ((current = (u64*)skim(virgin, current, current_end)) != current_end) {
184+
/* Compute the word offset inside current pack. */
185+
u64 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 8;
186+
virgin = (u64*)((u8*)current - trace_bits + virgin_map);
187+
188+
#define UNROLL(x) \
189+
case x: \
190+
if (*current) { \
191+
*current = classify_word(*current); \
192+
discover_word(&ret, current, virgin); \
193+
} \
194+
++current, ++virgin;
195+
196+
/* Ensure the alignment of the next iteration. */
197+
switch (offset) {
198+
UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3)
199+
#if PACK_SIZE == 64
200+
UNROLL(4) UNROLL(5) UNROLL(6) UNROLL(7)
201+
#endif
202+
}
203+
204+
#undef UNROLL
205+
}
206+
207+
return ret;
208+
}

0 commit comments

Comments
 (0)