@@ -10,19 +10,18 @@ import (
1010 "os"
1111 "path/filepath"
1212 "sync"
13+ "sync/atomic"
1314 "time"
1415
1516 "github.com/crytic/medusa-geth/common"
1617 "github.com/crytic/medusa/chain"
1718 "github.com/crytic/medusa/fuzzing/calls"
19+ "github.com/crytic/medusa/fuzzing/contracts"
1820 "github.com/crytic/medusa/fuzzing/coverage"
1921 "github.com/crytic/medusa/logging"
20- "github.com/crytic/medusa/logging/colors"
2122 "github.com/crytic/medusa/utils"
2223 "github.com/crytic/medusa/utils/randomutils"
2324 "github.com/google/uuid"
24-
25- "github.com/crytic/medusa/fuzzing/contracts"
2625)
2726
2827// Corpus describes an archive of fuzzer-generated artifacts used to further fuzzing efforts. These artifacts are
@@ -55,12 +54,15 @@ type Corpus struct {
5554 // callSequences.
5655 callSequencesLock sync.Mutex
5756
57+ // validCallSequences tracks how many call sequences in the corpus are valid when the corpus is re-run.
58+ validCallSequences atomic.Uint64
59+
5860 // logger describes the Corpus's log object that can be used to log important events
5961 logger * logging.Logger
6062}
6163
62- // NewCorpus initializes a new Corpus object, reading artifacts from the provided directory. If the directory refers
63- // to an empty path, artifacts will not be persistently stored.
64+ // NewCorpus initializes a new Corpus object, reading artifacts from the provided directory and preparing in-memory
65+ // state required for fuzzing. If the directory refers to an empty path, artifacts will not be persistently stored.
6466func NewCorpus (corpusDirectory string ) (* Corpus , error ) {
6567 var err error
6668 corpus := & Corpus {
@@ -175,140 +177,10 @@ func (c *Corpus) migrateLegacyCorpus() error {
175177 return nil
176178}
177179
178- // CoverageMaps exposes coverage details for all call sequences known to the corpus.
179- func (c * Corpus ) CoverageMaps () * coverage.CoverageMaps {
180- return c .coverageMaps
181- }
182-
183- // CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
184- // that led to a failure.
185- func (c * Corpus ) CallSequenceEntryCount () (int , int ) {
186- return len (c .callSequenceFiles .files ), len (c .testResultSequenceFiles .files )
187- }
188-
189- // ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
190- // after Corpus initialization and are ready for use in mutations.
191- func (c * Corpus ) ActiveMutableSequenceCount () int {
192- if c .mutationTargetSequenceChooser == nil {
193- return 0
194- }
195- return c .mutationTargetSequenceChooser .ChoiceCount ()
196- }
197-
198- // RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
199- func (c * Corpus ) RandomMutationTargetSequence () (calls.CallSequence , error ) {
200- // If we didn't initialize a chooser, return an error
201- if c .mutationTargetSequenceChooser == nil {
202- return nil , fmt .Errorf ("corpus could not return a random call sequence because the corpus was not initialized" )
203- }
204-
205- // Pick a random call sequence, then clone it before returning it, so the original is untainted.
206- seq , err := c .mutationTargetSequenceChooser .Choose ()
207- if seq == nil || err != nil {
208- return nil , err
209- }
210- return seq .Clone ()
211- }
212-
213- // initializeSequences is a helper method for Initialize. It validates a list of call sequence files on a given
214- // chain, using the map of deployed contracts (e.g. to check for non-existent method called, due to code changes).
215- // Valid call sequences are added to the list of un-executed sequences the fuzzer should execute first.
216- // If this sequence list being initialized is for use with mutations, it is added to the mutationTargetSequenceChooser.
217- // Returns an error if one occurs.
218- func (c * Corpus ) initializeSequences (sequenceFiles * corpusDirectory [calls.CallSequence ], testChain * chain.TestChain , deployedContracts map [common.Address ]* contracts.Contract , useInMutations bool ) error {
219- // Cache the base block index so that you can reset back to it after every sequence
220- baseBlockIndex := uint64 (len (testChain .CommittedBlocks ()))
221-
222- // Loop for each sequence
223- var err error
224- for _ , sequenceFileData := range sequenceFiles .files {
225- // Unwrap the underlying sequence.
226- sequence := sequenceFileData .data
227-
228- // Define a variable to track whether we should disable this sequence (if it is no longer applicable in some
229- // way).
230- sequenceInvalidError := error (nil )
231- fetchElementFunc := func (currentIndex int ) (* calls.CallSequenceElement , error ) {
232- // If we are at the end of our sequence, return nil indicating we should stop executing.
233- if currentIndex >= len (sequence ) {
234- return nil , nil
235- }
236-
237- // If we are deploying a contract and not targeting one with this call, there should be no work to do.
238- currentSequenceElement := sequence [currentIndex ]
239- if currentSequenceElement .Call .To == nil {
240- return currentSequenceElement , nil
241- }
242-
243- // We are calling a contract with this call, ensure we can resolve the contract call is targeting.
244- resolvedContract , resolvedContractExists := deployedContracts [* currentSequenceElement .Call .To ]
245- if ! resolvedContractExists {
246- sequenceInvalidError = fmt .Errorf ("contract at address '%v' could not be resolved" , currentSequenceElement .Call .To .String ())
247- return nil , nil
248- }
249- currentSequenceElement .Contract = resolvedContract
250-
251- // Next, if our sequence element uses ABI values to produce call data, our deserialized data is not yet
252- // sufficient for runtime use, until we use it to resolve runtime references.
253- callAbiValues := currentSequenceElement .Call .DataAbiValues
254- if callAbiValues != nil {
255- sequenceInvalidError = callAbiValues .Resolve (currentSequenceElement .Contract .CompiledContract ().Abi )
256- if sequenceInvalidError != nil {
257- sequenceInvalidError = fmt .Errorf ("error resolving method in contract '%v': %v" , currentSequenceElement .Contract .Name (), sequenceInvalidError )
258- return nil , nil
259- }
260- }
261- return currentSequenceElement , nil
262- }
263-
264- // Define actions to perform after executing each call in the sequence.
265- executionCheckFunc := func (currentlyExecutedSequence calls.CallSequence ) (bool , error ) {
266- // Grab the coverage maps for the last executed sequence element
267- lastExecutedSequenceElement := currentlyExecutedSequence [len (currentlyExecutedSequence )- 1 ]
268- covMaps := coverage .GetCoverageTracerResults (lastExecutedSequenceElement .ChainReference .MessageResults ())
269-
270- // Memory optimization: Remove the coverage maps from the message results
271- coverage .RemoveCoverageTracerResults (lastExecutedSequenceElement .ChainReference .MessageResults ())
272-
273- // Update the global coverage maps
274- _ , covErr := c .coverageMaps .Update (covMaps )
275- if covErr != nil {
276- return true , covErr
277- }
278- return false , nil
279- }
280-
281- // Execute each call sequence, populating runtime data and collecting coverage data along the way.
282- _ , err = calls .ExecuteCallSequenceIteratively (testChain , fetchElementFunc , executionCheckFunc )
283-
284- // If we failed to replay a sequence and measure coverage due to an unexpected error, report it.
285- if err != nil {
286- return fmt .Errorf ("failed to initialize coverage maps from corpus, encountered an error while executing call sequence: %v" , err )
287- }
288-
289- // If the sequence was replayed successfully, we add it. If it was not, we exclude it with a warning.
290- if sequenceInvalidError == nil {
291- if useInMutations && c .mutationTargetSequenceChooser != nil {
292- c .mutationTargetSequenceChooser .AddChoices (randomutils .NewWeightedRandomChoice [calls.CallSequence ](sequence , big .NewInt (1 )))
293- }
294- c .unexecutedCallSequences = append (c .unexecutedCallSequences , sequence )
295- } else {
296- c .logger .Debug ("Corpus item " , colors .Bold , sequenceFileData .fileName , colors .Reset , " disabled due to error when replaying it" , sequenceInvalidError )
297- }
298-
299- // Revert chain state to our starting point to test the next sequence.
300- if err := testChain .RevertToBlockIndex (baseBlockIndex ); err != nil {
301- return fmt .Errorf ("failed to reset the chain while seeding coverage: %v" , err )
302- }
303- }
304- return nil
305- }
306-
307- // Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup
308- // (deployment) test chain to calculate coverage, while resolving references to compiled contracts.
309- // Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error
310- // is returned, then the corpus counts returned will always be zero.
311- func (c * Corpus ) Initialize (baseTestChain * chain.TestChain , contractDefinitions contracts.Contracts ) (int , int , error ) {
180+ // Initialize initializes the in-memory corpus state but does not actually replay any of the sequences stored in the corpus.
181+ // It seeds coverage information from the post-setup chain while enqueueing all persisted sequences for execution. The fuzzer workers
182+ // will concurrently execute all the sequences stored in the corpus before actually starting the fuzzing campaign.
183+ func (c * Corpus ) Initialize (baseTestChain * chain.TestChain , contractDefinitions contracts.Contracts ) error {
312184 // Acquire our call sequences lock during the duration of this method.
313185 c .callSequencesLock .Lock ()
314186 defer c .callSequencesLock .Unlock ()
@@ -332,9 +204,11 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
332204 // We also track any contract deployments, so we can resolve contract/method definitions for corpus call
333205 // sequences.
334206 newChain .Events .ContractDeploymentAddedEventEmitter .Subscribe (func (event chain.ContractDeploymentsAddedEvent ) error {
335- matchedContract := contractDefinitions .MatchBytecode (event .Contract .InitBytecode , event .Contract .RuntimeBytecode )
336- if matchedContract != nil {
337- deployedContracts [event .Contract .Address ] = matchedContract
207+ if contractDefinitions != nil {
208+ matchedContract := contractDefinitions .MatchBytecode (event .Contract .InitBytecode , event .Contract .RuntimeBytecode )
209+ if matchedContract != nil {
210+ deployedContracts [event .Contract .Address ] = matchedContract
211+ }
338212 }
339213 return nil
340214 })
@@ -345,8 +219,9 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
345219 return nil
346220 })
347221 if err != nil {
348- return 0 , 0 , fmt .Errorf ("failed to initialize coverage maps, base test chain cloning encountered error: %v" , err )
222+ return fmt .Errorf ("failed to initialize coverage maps, base test chain cloning encountered error: %v" , err )
349223 }
224+ defer testChain .Close ()
350225
351226 // Freeze a set of deployedContracts's keys so that we have a set of addresses present in baseTestChain.
352227 // Feed this set to the coverage tracer.
@@ -369,32 +244,65 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
369244 // Update the global coverage maps
370245 _ , covErr := c .coverageMaps .Update (covMaps )
371246 if covErr != nil {
372- return 0 , 0 , covErr
247+ return covErr
373248 }
374249 }
375250 }
376251
377- // Next we replay every call sequence, checking its validity on this chain and measuring coverage. Valid sequences
378- // are added to the corpus for mutations, re-execution, etc.
379- //
380- // The order of initializations here is important, as it determines the order of "unexecuted sequences" to replay
381- // when the fuzzer's worker starts up. We want to replay test results first, so that other corpus items
382- // do not trigger the same test failures instead.
383- err = c .initializeSequences (c .testResultSequenceFiles , testChain , deployedContracts , false )
384- if err != nil {
385- return 0 , 0 , err
252+ // Add all test results and call sequences to the unexecuted call sequences list
253+ totalSequences := len (c .callSequenceFiles .files ) + len (c .testResultSequenceFiles .files )
254+ c .unexecutedCallSequences = make ([]calls.CallSequence , 0 , totalSequences )
255+ for _ , sequenceFileData := range c .testResultSequenceFiles .files {
256+ c .unexecutedCallSequences = append (c .unexecutedCallSequences , sequenceFileData .data )
257+ }
258+ for _ , sequenceFileData := range c .callSequenceFiles .files {
259+ c .unexecutedCallSequences = append (c .unexecutedCallSequences , sequenceFileData .data )
386260 }
387261
388- err = c .initializeSequences (c .callSequenceFiles , testChain , deployedContracts , true )
389- if err != nil {
390- return 0 , 0 , err
262+ // This value will increment as call sequences in the corpus are executed and marked as valid.
263+ c .validCallSequences .Store (0 )
264+
265+ return nil
266+ }
267+
268+ // CoverageMaps exposes coverage details for all call sequences known to the corpus.
269+ func (c * Corpus ) CoverageMaps () * coverage.CoverageMaps {
270+ return c .coverageMaps
271+ }
272+
273+ // CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
274+ // that led to a failure.
275+ func (c * Corpus ) CallSequenceEntryCount () (int , int ) {
276+ return len (c .callSequenceFiles .files ), len (c .testResultSequenceFiles .files )
277+ }
278+
279+ // InitializingCorpus returns true if the corpus is still initializing, false otherwise.
280+ func (c * Corpus ) InitializingCorpus () bool {
281+ return len (c .unexecutedCallSequences ) > 0
282+ }
283+
284+ // ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
285+ // after Corpus initialization and are ready for use in mutations.
286+ func (c * Corpus ) ActiveMutableSequenceCount () int {
287+ if c .mutationTargetSequenceChooser == nil {
288+ return 0
391289 }
290+ return c .mutationTargetSequenceChooser .ChoiceCount ()
291+ }
392292
393- // Calculate corpus health metrics
394- corpusSequencesTotal := len (c .callSequenceFiles .files ) + len (c .testResultSequenceFiles .files )
395- corpusSequencesActive := len (c .unexecutedCallSequences )
293+ // RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
294+ func (c * Corpus ) RandomMutationTargetSequence () (calls.CallSequence , error ) {
295+ // If we didn't initialize a chooser, return an error
296+ if c .mutationTargetSequenceChooser == nil {
297+ return nil , fmt .Errorf ("corpus could not return a random call sequence because the corpus was not initialized" )
298+ }
396299
397- return corpusSequencesActive , corpusSequencesTotal , nil
300+ // Pick a random call sequence, then clone it before returning it, so the original is untainted.
301+ seq , err := c .mutationTargetSequenceChooser .Choose ()
302+ if seq == nil || err != nil {
303+ return nil , err
304+ }
305+ return seq .Clone ()
398306}
399307
400308// addCallSequence adds a call sequence to the corpus in a given corpus directory.
@@ -437,7 +345,7 @@ func (c *Corpus) addCallSequence(sequenceFiles *corpusDirectory[calls.CallSequen
437345 if mutationChooserWeight == nil {
438346 mutationChooserWeight = big .NewInt (1 )
439347 }
440- c .mutationTargetSequenceChooser .AddChoices (randomutils .NewWeightedRandomChoice [calls. CallSequence ] (sequence , mutationChooserWeight ))
348+ c .mutationTargetSequenceChooser .AddChoices (randomutils .NewWeightedRandomChoice (sequence , mutationChooserWeight ))
441349 }
442350
443351 // Unlock now, as flushing will lock on its own.
@@ -506,6 +414,33 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence,
506414 return nil
507415}
508416
417+ // MarkCallSequenceForMutation records that a call sequence in the corpus has been successfully executed and can be used for mutations.
418+ func (c * Corpus ) MarkCallSequenceForMutation (sequence calls.CallSequence , mutationChooserWeight * big.Int ) error {
419+ // If no weight is provided, set it to 1.
420+ if mutationChooserWeight == nil {
421+ mutationChooserWeight = big .NewInt (1 )
422+ }
423+
424+ // Unclear whether a lock is needed but might as well be safe
425+ c .callSequencesLock .Lock ()
426+ defer c .callSequencesLock .Unlock ()
427+
428+ // Add the sequence to the mutation chooser
429+ c .mutationTargetSequenceChooser .AddChoices (randomutils .NewWeightedRandomChoice (sequence , mutationChooserWeight ))
430+ return nil
431+ }
432+
433+ // IncrementValid increments the valid call sequences counter.
434+ func (c * Corpus ) IncrementValid () {
435+ c .validCallSequences .Add (1 )
436+ }
437+
438+ // ValidCallSequences returns the number of valid call sequences in the corpus.
439+ // Note that this value is only accurate right after corpus initialization.
440+ func (c * Corpus ) ValidCallSequences () uint64 {
441+ return c .validCallSequences .Load ()
442+ }
443+
509444// UnexecutedCallSequence returns a call sequence loaded from disk which has not yet been returned by this method.
510445// It is intended to be used by the fuzzer to run all un-executed call sequences (without mutations) to check for test
511446// failures. If a call sequence is returned, it will not be returned by this method again.
@@ -574,6 +509,10 @@ func (c *Corpus) Flush() error {
574509// PruneSequences takes a chain.TestChain parameter used to run transactions.
575510// It returns an int indicating the number of sequences removed from the corpus, and an error if any occurred.
576511func (c * Corpus ) PruneSequences (ctx context.Context , chain * chain.TestChain ) (int , error ) {
512+ if c .mutationTargetSequenceChooser == nil {
513+ return 0 , nil
514+ }
515+
577516 chainOriginalIndex := uint64 (len (chain .CommittedBlocks ()))
578517 tmpMap := coverage .NewCoverageMaps ()
579518
0 commit comments