@@ -200,7 +200,7 @@ pub(crate) struct GlobalCorpusMetrics {
200200 cumulative_edges_seen : AtomicUsize ,
201201 // Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
202202 cumulative_features_seen : AtomicUsize ,
203- // Number of corpus entries.
203+ // Number of persisted corpus entries discovered or written on disk .
204204 corpus_count : AtomicUsize ,
205205 // Number of corpus entries that are favored.
206206 favored_items : AtomicUsize ,
@@ -229,7 +229,7 @@ pub(crate) struct CorpusMetrics {
229229 cumulative_edges_seen : usize ,
230230 // Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
231231 cumulative_features_seen : usize ,
232- // Number of corpus entries.
232+ // Number of persisted corpus entries discovered or written on disk .
233233 corpus_count : usize ,
234234 // Number of corpus entries that are favored.
235235 favored_items : usize ,
@@ -1025,39 +1025,53 @@ impl WorkerCorpus {
10251025 Ok ( sequence[ depth] . clone ( ) )
10261026 }
10271027
1028- /// Flush one non-favored corpus entry when the corpus size exceeds the minimum.
1028+ /// Flush non-favored entries from memory when the corpus size exceeds the minimum.
1029+ ///
1030+ /// Entries remain on disk and in `disk_corpus_entries` so they can still be used as crossover
1031+ /// donors. `metrics.corpus_count` intentionally tracks persisted on-disk entries, not live
1032+ /// in-memory entries.
10291033 fn cull_corpus ( & mut self ) -> Result < ( ) > {
1030- if self . in_memory_corpus . len ( ) > self . config . corpus_min_size . max ( 1 )
1031- && let Some ( index) = self . in_memory_corpus . iter ( ) . position ( |corpus| !corpus. is_favored )
1032- {
1033- let corpus = & self . in_memory_corpus [ index] ;
1034- let evicted_uuid = corpus. uuid ;
1035-
1036- trace ! ( target: "corpus" , corpus=%serde_json:: to_string( & corpus) . unwrap( ) , "evict corpus" ) ;
1034+ let min_size = self . config . corpus_min_size ;
1035+ if self . in_memory_corpus . len ( ) <= min_size {
1036+ return Ok ( ( ) ) ;
1037+ }
10371038
1038- // Remove corpus from memory.
1039- self . in_memory_corpus . remove ( index) ;
1039+ let mut remaining_removals = self . in_memory_corpus . len ( ) - min_size;
1040+ let mut old_to_new = vec ! [ None ; self . in_memory_corpus. len( ) ] ;
1041+ let mut retained = Vec :: with_capacity ( self . in_memory_corpus . len ( ) ) ;
1042+ let mut evicted_uuids = HashSet :: new ( ) ;
10401043
1041- // Adjust the tracked indices.
1042- self . new_entry_indices . retain_mut ( |i| {
1043- if * i > index {
1044- * i -= 1 ; // Shift indices down.
1045- true // Keep this index.
1046- } else {
1047- * i != index // Remove if it's the deleted index, keep otherwise.
1048- }
1049- } ) ;
1050- self . disk_corpus_entries . retain ( |entry| entry. uuid != evicted_uuid) ;
1051-
1052- let impacted_edges = self
1053- . top_rated
1054- . iter ( )
1055- . filter_map ( |( & edge_idx, & ( uuid, _) ) | ( uuid == evicted_uuid) . then_some ( edge_idx) )
1056- . collect :: < Vec < _ > > ( ) ;
1057- for edge_idx in impacted_edges {
1058- self . recompute_top_rated_for_edge ( edge_idx) ;
1044+ for ( old_index, corpus) in self . in_memory_corpus . drain ( ..) . enumerate ( ) {
1045+ if !corpus. is_favored && remaining_removals > 0 {
1046+ trace ! ( target: "corpus" , corpus=%serde_json:: to_string( & corpus) . unwrap( ) , "evict corpus from memory" ) ;
1047+ evicted_uuids. insert ( corpus. uuid ) ;
1048+ remaining_removals -= 1 ;
1049+ } else {
1050+ old_to_new[ old_index] = Some ( retained. len ( ) ) ;
1051+ retained. push ( corpus) ;
10591052 }
10601053 }
1054+
1055+ if evicted_uuids. is_empty ( ) {
1056+ self . in_memory_corpus = retained;
1057+ return Ok ( ( ) ) ;
1058+ }
1059+
1060+ self . in_memory_corpus = retained;
1061+ self . new_entry_indices = self
1062+ . new_entry_indices
1063+ . iter ( )
1064+ . filter_map ( |& i| old_to_new. get ( i) . copied ( ) . flatten ( ) )
1065+ . collect ( ) ;
1066+
1067+ let impacted_edges = self
1068+ . top_rated
1069+ . iter ( )
1070+ . filter_map ( |( & edge_idx, & ( uuid, _) ) | evicted_uuids. contains ( & uuid) . then_some ( edge_idx) )
1071+ . collect :: < Vec < _ > > ( ) ;
1072+ for edge_idx in impacted_edges {
1073+ self . recompute_top_rated_for_edge ( edge_idx) ;
1074+ }
10611075 Ok ( ( ) )
10621076 }
10631077
@@ -1786,4 +1800,84 @@ mod tests {
17861800
17871801 assert ! ( manager. in_memory_corpus. iter( ) . all( |c| c. uuid != non_favored_uuid) ) ;
17881802 }
1803+
1804+ #[ test]
1805+ fn culling_flushes_all_non_favored_from_memory_but_keeps_disk_entries ( ) {
1806+ let tx_gen = Just ( basic_tx ( ) ) . boxed ( ) ;
1807+ let config = FuzzCorpusConfig {
1808+ corpus_dir : Some ( temp_corpus_dir ( ) ) ,
1809+ corpus_min_size : 0 ,
1810+ ..Default :: default ( )
1811+ } ;
1812+
1813+ let mut favored = CorpusEntry :: new_with_cmp_and_edges (
1814+ vec ! [ basic_tx( ) ] ,
1815+ Vec :: new ( ) ,
1816+ vec ! [ 1 ] ,
1817+ Uuid :: new_v4 ( ) ,
1818+ ) ;
1819+ favored. is_favored = true ;
1820+ let favored_uuid = favored. uuid ;
1821+ let favored_cost = favored. tx_seq . len ( ) ;
1822+ let non_favored = ( 0 ..3 ) . map ( |_| CorpusEntry :: new ( vec ! [ basic_tx( ) ] ) ) . collect :: < Vec < _ > > ( ) ;
1823+ let non_favored_uuids =
1824+ non_favored. iter ( ) . map ( |corpus| corpus. uuid ) . collect :: < HashSet < _ > > ( ) ;
1825+
1826+ let corpus_root = temp_corpus_dir ( ) ;
1827+ let worker_subdir = corpus_root. join ( "worker0" ) ;
1828+ fs:: create_dir_all ( & worker_subdir) . unwrap ( ) ;
1829+ let disk_corpus_entries = std:: iter:: once ( favored_uuid)
1830+ . chain ( non_favored_uuids. iter ( ) . copied ( ) )
1831+ . map ( |uuid| CorpusDirEntry {
1832+ path : worker_subdir. join ( format ! ( "{uuid}-1.json" ) ) ,
1833+ uuid,
1834+ timestamp : 1 ,
1835+ } )
1836+ . collect :: < Vec < _ > > ( ) ;
1837+
1838+ let mut in_memory_corpus = vec ! [ favored] ;
1839+ in_memory_corpus. extend ( non_favored) ;
1840+ let mut manager = WorkerCorpus {
1841+ id : 0 ,
1842+ tx_generator : tx_gen,
1843+ mutation_generator : Just ( MutationType :: Repeat ) . boxed ( ) ,
1844+ config : config. into ( ) ,
1845+ in_memory_corpus,
1846+ current_mutated : None ,
1847+ failed_replays : 0 ,
1848+ history_map : Vec :: new ( ) ,
1849+ edge_indices : EdgeIndexMap :: default ( ) ,
1850+ sancov_history_map : Vec :: new ( ) ,
1851+ top_rated : HashMap :: from ( [ ( 1 , ( favored_uuid, favored_cost) ) ] ) ,
1852+ disk_corpus_entries,
1853+ metrics : CorpusMetrics { corpus_count : 4 , ..Default :: default ( ) } ,
1854+ new_entry_indices : vec ! [ 0 , 1 , 2 , 3 ] ,
1855+ last_sync_timestamp : 0 ,
1856+ worker_dir : Some ( corpus_root) ,
1857+ last_sync_metrics : CorpusMetrics :: default ( ) ,
1858+ optimization_best_value : None ,
1859+ optimization_best_sequence : vec ! [ ] ,
1860+ } ;
1861+
1862+ manager. cull_corpus ( ) . unwrap ( ) ;
1863+
1864+ assert_eq ! ( manager. in_memory_corpus. len( ) , 1 ) ;
1865+ assert_eq ! ( manager. in_memory_corpus[ 0 ] . uuid, favored_uuid) ;
1866+ assert_eq ! ( manager. disk_corpus_entries. len( ) , 4 , "on-disk crossover donors stay cached" ) ;
1867+ assert_eq ! ( manager. metrics. corpus_count, 4 , "corpus_count tracks persisted entries" ) ;
1868+ assert_eq ! ( manager. new_entry_indices, vec![ 0 ] ) ;
1869+ assert ! (
1870+ manager. in_memory_corpus. iter( ) . all( |corpus| !non_favored_uuids. contains( & corpus. uuid) )
1871+ ) ;
1872+ }
1873+
1874+ #[ test]
1875+ fn culling_can_empty_memory_when_no_entries_are_favored ( ) {
1876+ let ( mut manager, _) = new_manager_with_single_corpus ( ) ;
1877+
1878+ manager. cull_corpus ( ) . unwrap ( ) ;
1879+
1880+ assert ! ( manager. in_memory_corpus. is_empty( ) ) ;
1881+ assert ! ( manager. new_entry_indices. is_empty( ) ) ;
1882+ }
17891883}
0 commit comments