Skip to content

Commit 23cb414

Browse files
committed
Cull non-favored corpus entries from memory
1 parent e170eef commit 23cb414

2 files changed

Lines changed: 138 additions & 37 deletions

File tree

crates/evm/evm/src/executors/corpus.rs

Lines changed: 124 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ pub(crate) struct GlobalCorpusMetrics {
200200
cumulative_edges_seen: AtomicUsize,
201201
// Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
202202
cumulative_features_seen: AtomicUsize,
203-
// Number of corpus entries.
203+
// Number of persisted corpus entries discovered or written on disk.
204204
corpus_count: AtomicUsize,
205205
// Number of corpus entries that are favored.
206206
favored_items: AtomicUsize,
@@ -229,7 +229,7 @@ pub(crate) struct CorpusMetrics {
229229
cumulative_edges_seen: usize,
230230
// Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
231231
cumulative_features_seen: usize,
232-
// Number of corpus entries.
232+
// Number of persisted corpus entries discovered or written on disk.
233233
corpus_count: usize,
234234
// Number of corpus entries that are favored.
235235
favored_items: usize,
@@ -1025,39 +1025,53 @@ impl WorkerCorpus {
10251025
Ok(sequence[depth].clone())
10261026
}
10271027

1028-
/// Flush one non-favored corpus entry when the corpus size exceeds the minimum.
1028+
/// Flush non-favored entries from memory when the corpus size exceeds the minimum.
1029+
///
1030+
/// Entries remain on disk and in `disk_corpus_entries` so they can still be used as crossover
1031+
/// donors. `metrics.corpus_count` intentionally tracks persisted on-disk entries, not live
1032+
/// in-memory entries.
10291033
fn cull_corpus(&mut self) -> Result<()> {
1030-
if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1)
1031-
&& let Some(index) = self.in_memory_corpus.iter().position(|corpus| !corpus.is_favored)
1032-
{
1033-
let corpus = &self.in_memory_corpus[index];
1034-
let evicted_uuid = corpus.uuid;
1035-
1036-
trace!(target: "corpus", corpus=%serde_json::to_string(&corpus).unwrap(), "evict corpus");
1034+
let min_size = self.config.corpus_min_size;
1035+
if self.in_memory_corpus.len() <= min_size {
1036+
return Ok(());
1037+
}
10371038

1038-
// Remove corpus from memory.
1039-
self.in_memory_corpus.remove(index);
1039+
let mut remaining_removals = self.in_memory_corpus.len() - min_size;
1040+
let mut old_to_new = vec![None; self.in_memory_corpus.len()];
1041+
let mut retained = Vec::with_capacity(self.in_memory_corpus.len());
1042+
let mut evicted_uuids = HashSet::new();
10401043

1041-
// Adjust the tracked indices.
1042-
self.new_entry_indices.retain_mut(|i| {
1043-
if *i > index {
1044-
*i -= 1; // Shift indices down.
1045-
true // Keep this index.
1046-
} else {
1047-
*i != index // Remove if it's the deleted index, keep otherwise.
1048-
}
1049-
});
1050-
self.disk_corpus_entries.retain(|entry| entry.uuid != evicted_uuid);
1051-
1052-
let impacted_edges = self
1053-
.top_rated
1054-
.iter()
1055-
.filter_map(|(&edge_idx, &(uuid, _))| (uuid == evicted_uuid).then_some(edge_idx))
1056-
.collect::<Vec<_>>();
1057-
for edge_idx in impacted_edges {
1058-
self.recompute_top_rated_for_edge(edge_idx);
1044+
for (old_index, corpus) in self.in_memory_corpus.drain(..).enumerate() {
1045+
if !corpus.is_favored && remaining_removals > 0 {
1046+
trace!(target: "corpus", corpus=%serde_json::to_string(&corpus).unwrap(), "evict corpus from memory");
1047+
evicted_uuids.insert(corpus.uuid);
1048+
remaining_removals -= 1;
1049+
} else {
1050+
old_to_new[old_index] = Some(retained.len());
1051+
retained.push(corpus);
10591052
}
10601053
}
1054+
1055+
if evicted_uuids.is_empty() {
1056+
self.in_memory_corpus = retained;
1057+
return Ok(());
1058+
}
1059+
1060+
self.in_memory_corpus = retained;
1061+
self.new_entry_indices = self
1062+
.new_entry_indices
1063+
.iter()
1064+
.filter_map(|&i| old_to_new.get(i).copied().flatten())
1065+
.collect();
1066+
1067+
let impacted_edges = self
1068+
.top_rated
1069+
.iter()
1070+
.filter_map(|(&edge_idx, &(uuid, _))| evicted_uuids.contains(&uuid).then_some(edge_idx))
1071+
.collect::<Vec<_>>();
1072+
for edge_idx in impacted_edges {
1073+
self.recompute_top_rated_for_edge(edge_idx);
1074+
}
10611075
Ok(())
10621076
}
10631077

@@ -1786,4 +1800,84 @@ mod tests {
17861800

17871801
assert!(manager.in_memory_corpus.iter().all(|c| c.uuid != non_favored_uuid));
17881802
}
1803+
1804+
#[test]
1805+
fn culling_flushes_all_non_favored_from_memory_but_keeps_disk_entries() {
1806+
let tx_gen = Just(basic_tx()).boxed();
1807+
let config = FuzzCorpusConfig {
1808+
corpus_dir: Some(temp_corpus_dir()),
1809+
corpus_min_size: 0,
1810+
..Default::default()
1811+
};
1812+
1813+
let mut favored = CorpusEntry::new_with_cmp_and_edges(
1814+
vec![basic_tx()],
1815+
Vec::new(),
1816+
vec![1],
1817+
Uuid::new_v4(),
1818+
);
1819+
favored.is_favored = true;
1820+
let favored_uuid = favored.uuid;
1821+
let favored_cost = favored.tx_seq.len();
1822+
let non_favored = (0..3).map(|_| CorpusEntry::new(vec![basic_tx()])).collect::<Vec<_>>();
1823+
let non_favored_uuids =
1824+
non_favored.iter().map(|corpus| corpus.uuid).collect::<HashSet<_>>();
1825+
1826+
let corpus_root = temp_corpus_dir();
1827+
let worker_subdir = corpus_root.join("worker0");
1828+
fs::create_dir_all(&worker_subdir).unwrap();
1829+
let disk_corpus_entries = std::iter::once(favored_uuid)
1830+
.chain(non_favored_uuids.iter().copied())
1831+
.map(|uuid| CorpusDirEntry {
1832+
path: worker_subdir.join(format!("{uuid}-1.json")),
1833+
uuid,
1834+
timestamp: 1,
1835+
})
1836+
.collect::<Vec<_>>();
1837+
1838+
let mut in_memory_corpus = vec![favored];
1839+
in_memory_corpus.extend(non_favored);
1840+
let mut manager = WorkerCorpus {
1841+
id: 0,
1842+
tx_generator: tx_gen,
1843+
mutation_generator: Just(MutationType::Repeat).boxed(),
1844+
config: config.into(),
1845+
in_memory_corpus,
1846+
current_mutated: None,
1847+
failed_replays: 0,
1848+
history_map: Vec::new(),
1849+
edge_indices: EdgeIndexMap::default(),
1850+
sancov_history_map: Vec::new(),
1851+
top_rated: HashMap::from([(1, (favored_uuid, favored_cost))]),
1852+
disk_corpus_entries,
1853+
metrics: CorpusMetrics { corpus_count: 4, ..Default::default() },
1854+
new_entry_indices: vec![0, 1, 2, 3],
1855+
last_sync_timestamp: 0,
1856+
worker_dir: Some(corpus_root),
1857+
last_sync_metrics: CorpusMetrics::default(),
1858+
optimization_best_value: None,
1859+
optimization_best_sequence: vec![],
1860+
};
1861+
1862+
manager.cull_corpus().unwrap();
1863+
1864+
assert_eq!(manager.in_memory_corpus.len(), 1);
1865+
assert_eq!(manager.in_memory_corpus[0].uuid, favored_uuid);
1866+
assert_eq!(manager.disk_corpus_entries.len(), 4, "on-disk crossover donors stay cached");
1867+
assert_eq!(manager.metrics.corpus_count, 4, "corpus_count tracks persisted entries");
1868+
assert_eq!(manager.new_entry_indices, vec![0]);
1869+
assert!(
1870+
manager.in_memory_corpus.iter().all(|corpus| !non_favored_uuids.contains(&corpus.uuid))
1871+
);
1872+
}
1873+
1874+
#[test]
1875+
fn culling_can_empty_memory_when_no_entries_are_favored() {
1876+
let (mut manager, _) = new_manager_with_single_corpus();
1877+
1878+
manager.cull_corpus().unwrap();
1879+
1880+
assert!(manager.in_memory_corpus.is_empty());
1881+
assert!(manager.new_entry_indices.is_empty());
1882+
}
17891883
}

crates/evm/evm/src/executors/invariant/mod.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -528,11 +528,12 @@ impl<'a, FEN: FoundryEvmNetwork> InvariantExecutor<'a, FEN> {
528528
let failures_before_run = invariant_test.test_data.failures.invariant_count();
529529
let mut stop_after_run = false;
530530

531-
let initial_seq = corpus_manager.new_inputs(
531+
let mut initial_seq = corpus_manager.new_inputs(
532532
&mut invariant_test.test_data.branch_runner,
533533
&invariant_test.fuzz_state,
534534
&invariant_test.targeted_contracts,
535535
)?;
536+
initial_seq.truncate(self.config.depth.max(1) as usize);
536537

537538
// Create current invariant run data.
538539
let mut current_run = InvariantTestRun::new(
@@ -792,14 +793,20 @@ impl<'a, FEN: FoundryEvmNetwork> InvariantExecutor<'a, FEN> {
792793
current_run.depth += 1;
793794
}
794795

795-
current_run.inputs.push(corpus_manager.generate_next_input(
796-
&mut invariant_test.test_data.branch_runner,
797-
&initial_seq,
798-
discarded,
799-
current_run.depth as usize,
800-
)?);
796+
if current_run.depth < self.config.depth {
797+
current_run.inputs.push(corpus_manager.generate_next_input(
798+
&mut invariant_test.test_data.branch_runner,
799+
&initial_seq,
800+
discarded,
801+
current_run.depth as usize,
802+
)?);
803+
}
801804
}
802805

806+
let executed_len = current_run.inputs.len().min(self.config.depth as usize);
807+
current_run.inputs.truncate(executed_len);
808+
current_run.cmp_seq.truncate(executed_len);
809+
803810
// Extend corpus with current run data.
804811
// Materialize the optimization best prefix once at run end (avoids
805812
// cloning inputs on every new in-run max).

0 commit comments

Comments
 (0)