Skip to content

Commit c67f181

Browse files
xinhaoyuancopybara-github
authored andcommitted
Use resevior sampling in auto-dictionary tracing and collection.
This is to avoid biases that favors later entries that always overwrite the earilers. As side-effects, auto-dictionary tracing tables are filled randomly only after they become full of entries, and it saves the memory copying when it randomly decides to skip an entry. Also the table cleanup is now lazy, and it clean only the previously used entries. It should be much faster than the previous method of zero-filling all the entries. PiperOrigin-RevId: 845451974
1 parent 8359318 commit c67f181

File tree

6 files changed

+73
-19
lines changed

6 files changed

+73
-19
lines changed

centipede/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,7 @@ cc_library(
938938
name = "runner_cmp_trace",
939939
hdrs = ["runner_cmp_trace.h"],
940940
copts = DISABLE_SANCOV_COPTS,
941+
deps = ["@abseil-cpp//absl/base:core_headers"],
941942
)
942943

943944
# Library for manipulating centipede runner flags. This is not used by the

centipede/byte_array_mutator_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ TEST(CmpDictionary, CmpDictionary) {
158158
}
159159

160160
TEST(CmpDictionary, CmpDictionaryIsCompatibleWithCmpTrace) {
161-
CmpTrace<0, 13> traceN;
161+
CmpTrace<0, 13> traceN = {};
162162
traceN.Clear();
163163
constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
164164
10, 11, 12, 13, 14, 15, 16, 17, 18, 19};

centipede/fuzztest_mutator.cc

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ namespace {
4242
using MutatorDomainBase =
4343
decltype(fuzztest::VectorOf(fuzztest::Arbitrary<uint8_t>()));
4444

45+
template <typename T>
46+
bool SampleInsert(const T& cmp_table, size_t& counter) {
47+
static thread_local absl::BitGen bitgen;
48+
counter++;
49+
if (counter <= cmp_table.kTableSize) {
50+
return true;
51+
}
52+
return absl::Uniform<size_t>(bitgen, 0, counter) < cmp_table.kTableSize;
53+
}
54+
4555
template <typename T>
4656
void InsertCmpEntryIntoIntegerDictionary(const uint8_t* a, const uint8_t* b,
4757
TablesOfRecentCompares& cmp_tables) {
@@ -59,25 +69,36 @@ void PopulateCmpEntries(const ExecutionMetadata& metadata,
5969
// Size limits on the cmp entries to be populated.
6070
static constexpr uint8_t kMaxCmpEntrySize = 15;
6171
static constexpr uint8_t kMinCmpEntrySize = 2;
72+
size_t uint16_sample_counter = 0;
73+
size_t uint32_sample_counter = 0;
74+
size_t uint64_sample_counter = 0;
75+
size_t mem_sample_counter = 0;
6276

63-
metadata.ForEachCmpEntry([&cmp_tables](fuzztest::internal::ByteSpan a,
64-
fuzztest::internal::ByteSpan b) {
77+
metadata.ForEachCmpEntry([&](fuzztest::internal::ByteSpan a,
78+
fuzztest::internal::ByteSpan b) {
6579
FUZZTEST_CHECK(a.size() == b.size())
6680
<< "cmp operands must have the same size";
6781
const size_t size = a.size();
6882
if (size < kMinCmpEntrySize) return;
6983
if (size > kMaxCmpEntrySize) return;
70-
if (size == 2) {
84+
if (size == 2 && SampleInsert(cmp_tables.GetMutable<sizeof(uint16_t)>(),
85+
uint16_sample_counter)) {
7186
InsertCmpEntryIntoIntegerDictionary<uint16_t>(a.data(), b.data(),
7287
cmp_tables);
73-
} else if (size == 4) {
88+
} else if (size == 4 &&
89+
SampleInsert(cmp_tables.GetMutable<sizeof(uint32_t)>(),
90+
uint32_sample_counter)) {
7491
InsertCmpEntryIntoIntegerDictionary<uint32_t>(a.data(), b.data(),
7592
cmp_tables);
76-
} else if (size == 8) {
93+
} else if (size == 8 &&
94+
SampleInsert(cmp_tables.GetMutable<sizeof(uint64_t)>(),
95+
uint64_sample_counter)) {
7796
InsertCmpEntryIntoIntegerDictionary<uint64_t>(a.data(), b.data(),
7897
cmp_tables);
7998
}
80-
cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size);
99+
if (SampleInsert(cmp_tables.GetMutable<0>(), mem_sample_counter)) {
100+
cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size);
101+
}
81102
});
82103
}
83104

centipede/runner_cmp_trace.h

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@
1818
// Capturing arguments of CMP instructions, memcmp, and similar.
1919
// WARNING: this code needs to have minimal dependencies.
2020

21+
#include <sys/time.h>
22+
2123
#include <cstddef>
2224
#include <cstdint>
2325
#include <cstring>
2426

27+
#include "absl/base/optimization.h"
28+
2529
namespace fuzztest::internal {
2630

2731
// Captures up to `kNumItems` different CMP argument pairs.
@@ -45,16 +49,38 @@ class CmpTrace {
4549
// No CTOR - objects will be created in TLS.
4650

4751
// Clears `this`.
48-
void Clear() { memset(this, 0, sizeof(*this)); }
52+
void Clear() { to_clear = true; }
4953

5054
// Captures one CMP argument pair, as two byte arrays, `size` bytes each.
5155
void Capture(uint8_t size, const uint8_t *value0, const uint8_t *value1) {
56+
if (ABSL_PREDICT_FALSE(to_clear)) {
57+
for (size_t i = 0; i < kNumItems; ++i) {
58+
if (sizes_[i] == 0) break;
59+
sizes_[i] = 0;
60+
}
61+
capture_count_ = 0;
62+
to_clear = false;
63+
}
5264
if (size > kNumBytesPerValue) size = kNumBytesPerValue;
5365
// We choose a pseudo-random slot each time.
5466
// This way after capturing many pairs we end up with up to `kNumItems`
5567
// pairs which are typically, but not always, the most recent.
56-
rand_seed_ = rand_seed_ * 1103515245 + 12345;
57-
const size_t index = rand_seed_ % kNumItems;
68+
size_t index = 0;
69+
if (capture_count_ < kNumItems) {
70+
index = capture_count_++;
71+
} else {
72+
if (rand_seed_ == 0) {
73+
// Initialize the random seed (likely) once.
74+
struct timeval tv = {};
75+
constexpr size_t kUsecInSec = 1000000;
76+
gettimeofday(&tv, nullptr);
77+
rand_seed_ = tv.tv_sec * kUsecInSec + tv.tv_usec;
78+
}
79+
capture_count_++;
80+
rand_seed_ = rand_seed_ * 1103515245 + 12345;
81+
index = rand_seed_ % capture_count_;
82+
if (index >= kNumItems) return;
83+
}
5884
Item& item = items_[index];
5985
sizes_[index] = size;
6086
__builtin_memcpy(item.value0, value0, size);
@@ -74,12 +100,14 @@ class CmpTrace {
74100
// Iterates non-zero CMP pairs.
75101
template <typename Callback>
76102
void ForEachNonZero(Callback callback) {
103+
if (ABSL_PREDICT_FALSE(to_clear)) return;
77104
for (size_t i = 0; i < kNumItems; ++i) {
78105
const auto size = sizes_[i];
79-
if (size == 0 || size > kNumBytesPerValue) continue;
80-
sizes_[i] = 0;
106+
if (size == 0) break;
107+
if (size > kNumBytesPerValue) continue;
81108
callback(size, items_[i].value0, items_[i].value1);
82109
}
110+
to_clear = true;
83111
}
84112

85113
private:
@@ -89,17 +117,22 @@ class CmpTrace {
89117
uint8_t value1[kNumBytesPerValue];
90118
};
91119

120+
volatile bool to_clear;
121+
92122
// Value sizes of argument pairs. zero-size indicates that the corresponding
93123
// entry is empty.
94124
//
95125
// Marked volatile because of the potential racing between the owning thread
96126
// and the main thread, which is tolerated gracefully.
97127
volatile uint8_t sizes_[kNumItems];
128+
129+
size_t capture_count_;
98130
// Values of argument pairs.
99131
Item items_[kNumItems];
100132

101-
// Pseudo-random seed.
102-
size_t rand_seed_;
133+
// Pseudo-random seed from glibc
134+
// (https://en.wikipedia.org/wiki/Linear_congruential_generator).
135+
uint32_t rand_seed_;
103136
};
104137

105138
} // namespace fuzztest::internal

centipede/runner_cmp_trace_test.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ TEST(CmpTrace, T1) {
5656
observed_pairs.push_back(cmp_pair);
5757
};
5858

59-
CmpTrace<2, 10> trace2;
60-
CmpTrace<4, 11> trace4;
61-
CmpTrace<8, 12> trace8;
62-
CmpTrace<0, 13> traceN;
59+
CmpTrace<2, 10> trace2 = {};
60+
CmpTrace<4, 11> trace4 = {};
61+
CmpTrace<8, 12> trace8 = {};
62+
CmpTrace<0, 13> traceN = {};
6363
trace2.Clear();
6464
trace4.Clear();
6565
trace8.Clear();

fuzztest/internal/table_of_recent_compares.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
// Copyright 2022 Google LLC
32
//
43
// Licensed under the Apache License, Version 2.0 (the "License");

0 commit comments

Comments
 (0)