crytic · anishnaik · Oct 28, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 27, 2025
@@ -10,19 +10,18 @@ import (
 	"os"
 	"path/filepath"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"github.com/crytic/medusa-geth/common"
 	"github.com/crytic/medusa/chain"
 	"github.com/crytic/medusa/fuzzing/calls"
+	"github.com/crytic/medusa/fuzzing/contracts"
 	"github.com/crytic/medusa/fuzzing/coverage"
 	"github.com/crytic/medusa/logging"
-	"github.com/crytic/medusa/logging/colors"
 	"github.com/crytic/medusa/utils"
 	"github.com/crytic/medusa/utils/randomutils"
 	"github.com/google/uuid"
-
-	"github.com/crytic/medusa/fuzzing/contracts"
 )
 
 // Corpus describes an archive of fuzzer-generated artifacts used to further fuzzing efforts. These artifacts are
@@ -55,12 +54,21 @@ type Corpus struct {
 	// callSequences.
 	callSequencesLock sync.Mutex
 
+	// initializationTotal captures the total number of corpus sequences that need to be executed to initialize the fuzzer.
+	initializationTotal uint64
+	// initializationProcessed tracks how many initialization sequences have been executed.
+	initializationProcessed atomic.Uint64
+	// initializationSuccessful tracks how many initialization sequences were executed successfully.
+	initializationSuccessful atomic.Uint64
+	// initializationOnce ensures the initializationDoneCallback is invoked only once.
+	initializationOnce sync.Once
+	// initializationDoneCallback is invoked when all initialization sequences finish execution to notify the fuzzer that the corpus has been initialized.
+	initializationDoneCallback func(active uint64, total uint64)
+
 	// logger describes the Corpus's log object that can be used to log important events
 	logger *logging.Logger
 }
 
-// NewCorpus initializes a new Corpus object, reading artifacts from the provided directory. If the directory refers
-// to an empty path, artifacts will not be persistently stored.
 func NewCorpus(corpusDirectory string) (*Corpus, error) {
 	var err error
 	corpus := &Corpus{
@@ -175,140 +183,11 @@ func (c *Corpus) migrateLegacyCorpus() error {
 	return nil
 }
 
-// CoverageMaps exposes coverage details for all call sequences known to the corpus.
-func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
-	return c.coverageMaps
-}
-
-// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
-// that led to a failure.
-func (c *Corpus) CallSequenceEntryCount() (int, int) {
-	return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
-}
-
-// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
-// after Corpus initialization and are ready for use in mutations.
-func (c *Corpus) ActiveMutableSequenceCount() int {
-	if c.mutationTargetSequenceChooser == nil {
-		return 0
-	}
-	return c.mutationTargetSequenceChooser.ChoiceCount()
-}
-
-// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
-func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
-	// If we didn't initialize a chooser, return an error
-	if c.mutationTargetSequenceChooser == nil {
-		return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
-	}
-
-	// Pick a random call sequence, then clone it before returning it, so the original is untainted.
-	seq, err := c.mutationTargetSequenceChooser.Choose()
-	if seq == nil || err != nil {
-		return nil, err
-	}
-	return seq.Clone()
-}
-
-// initializeSequences is a helper method for Initialize. It validates a list of call sequence files on a given
-// chain, using the map of deployed contracts (e.g. to check for non-existent method called, due to code changes).
-// Valid call sequences are added to the list of un-executed sequences the fuzzer should execute first.
-// If this sequence list being initialized is for use with mutations, it is added to the mutationTargetSequenceChooser.
-// Returns an error if one occurs.
-func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSequence], testChain *chain.TestChain, deployedContracts map[common.Address]*contracts.Contract, useInMutations bool) error {
-	// Cache the base block index so that you can reset back to it after every sequence
-	baseBlockIndex := uint64(len(testChain.CommittedBlocks()))
-
-	// Loop for each sequence
-	var err error
-	for _, sequenceFileData := range sequenceFiles.files {
-		// Unwrap the underlying sequence.
-		sequence := sequenceFileData.data
-
-		// Define a variable to track whether we should disable this sequence (if it is no longer applicable in some
-		// way).
-		sequenceInvalidError := error(nil)
-		fetchElementFunc := func(currentIndex int) (*calls.CallSequenceElement, error) {
-			// If we are at the end of our sequence, return nil indicating we should stop executing.
-			if currentIndex >= len(sequence) {
-				return nil, nil
-			}
-
-			// If we are deploying a contract and not targeting one with this call, there should be no work to do.
-			currentSequenceElement := sequence[currentIndex]
-			if currentSequenceElement.Call.To == nil {
-				return currentSequenceElement, nil
-			}
-
-			// We are calling a contract with this call, ensure we can resolve the contract call is targeting.
-			resolvedContract, resolvedContractExists := deployedContracts[*currentSequenceElement.Call.To]
-			if !resolvedContractExists {
-				sequenceInvalidError = fmt.Errorf("contract at address '%v' could not be resolved", currentSequenceElement.Call.To.String())
-				return nil, nil
-			}
-			currentSequenceElement.Contract = resolvedContract
-
-			// Next, if our sequence element uses ABI values to produce call data, our deserialized data is not yet
-			// sufficient for runtime use, until we use it to resolve runtime references.
-			callAbiValues := currentSequenceElement.Call.DataAbiValues
-			if callAbiValues != nil {
-				sequenceInvalidError = callAbiValues.Resolve(currentSequenceElement.Contract.CompiledContract().Abi)
-				if sequenceInvalidError != nil {
-					sequenceInvalidError = fmt.Errorf("error resolving method in contract '%v': %v", currentSequenceElement.Contract.Name(), sequenceInvalidError)
-					return nil, nil
-				}
-			}
-			return currentSequenceElement, nil
-		}
-
-		// Define actions to perform after executing each call in the sequence.
-		executionCheckFunc := func(currentlyExecutedSequence calls.CallSequence) (bool, error) {
-			// Grab the coverage maps for the last executed sequence element
-			lastExecutedSequenceElement := currentlyExecutedSequence[len(currentlyExecutedSequence)-1]
-			covMaps := coverage.GetCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())
-
-			// Memory optimization: Remove the coverage maps from the message results
-			coverage.RemoveCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())
-
-			// Update the global coverage maps
-			_, covErr := c.coverageMaps.Update(covMaps)
-			if covErr != nil {
-				return true, covErr
-			}
-			return false, nil
-		}
-
-		// Execute each call sequence, populating runtime data and collecting coverage data along the way.
-		_, err = calls.ExecuteCallSequenceIteratively(testChain, fetchElementFunc, executionCheckFunc)
-
-		// If we failed to replay a sequence and measure coverage due to an unexpected error, report it.
-		if err != nil {
-			return fmt.Errorf("failed to initialize coverage maps from corpus, encountered an error while executing call sequence: %v", err)
-		}
-
-		// If the sequence was replayed successfully, we add it. If it was not, we exclude it with a warning.
-		if sequenceInvalidError == nil {
-			if useInMutations && c.mutationTargetSequenceChooser != nil {
-				c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, big.NewInt(1)))
-			}
-			c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequence)
-		} else {
-			c.logger.Debug("Corpus item ", colors.Bold, sequenceFileData.fileName, colors.Reset, " disabled due to error when replaying it", sequenceInvalidError)
-		}
-
-		// Revert chain state to our starting point to test the next sequence.
-		if err := testChain.RevertToBlockIndex(baseBlockIndex); err != nil {
-			return fmt.Errorf("failed to reset the chain while seeding coverage: %v", err)
-		}
-	}
-	return nil
-}
-
-// Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup
-// (deployment) test chain to calculate coverage, while resolving references to compiled contracts.
-// Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error
-// is returned, then the corpus counts returned will always be zero.
-func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) (int, int, error) {
+// Initialize initializes the in-memory corpus state but does not actually replay any of the sequences stored in the corpus.
+// It seeds coverage information from the post-setup chain while enqueueing all persisted sequences for execution. The fuzzer workers
+// will concurrently all the sequences stored in the corpus and then the onComplete hook is invoked to notify the fuzzer that the corpus has been initialized.
+// Returns an error if seeding fails.
+func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts, onComplete func(active uint64, total uint64)) error {
 	// Acquire our call sequences lock during the duration of this method.
 	c.callSequencesLock.Lock()
 	defer c.callSequencesLock.Unlock()
@@ -332,9 +211,11 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 		// We also track any contract deployments, so we can resolve contract/method definitions for corpus call
 		// sequences.
 		newChain.Events.ContractDeploymentAddedEventEmitter.Subscribe(func(event chain.ContractDeploymentsAddedEvent) error {
-			matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
-			if matchedContract != nil {
-				deployedContracts[event.Contract.Address] = matchedContract
+			if contractDefinitions != nil {
+				matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
+				if matchedContract != nil {
+					deployedContracts[event.Contract.Address] = matchedContract
+				}
 			}
 			return nil
 		})
@@ -345,8 +226,9 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 		return nil
 	})
 	if err != nil {
-		return 0, 0, fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
+		return fmt.Errorf("failed to seed coverage maps during warmup preparation: %v", err)
 	}
+	defer testChain.Close()
 
 	// Freeze a set of deployedContracts's keys so that we have a set of addresses present in baseTestChain.
 	// Feed this set to the coverage tracer.
@@ -369,32 +251,70 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 			// Update the global coverage maps
 			_, covErr := c.coverageMaps.Update(covMaps)
 			if covErr != nil {
-				return 0, 0, covErr
+				return covErr
 			}
 		}
 	}
 
-	// Next we replay every call sequence, checking its validity on this chain and measuring coverage. Valid sequences
-	// are added to the corpus for mutations, re-execution, etc.
-	//
-	// The order of initializations here is important, as it determines the order of "unexecuted sequences" to replay
-	// when the fuzzer's worker starts up. We want to replay test results first, so that other corpus items
-	// do not trigger the same test failures instead.
-	err = c.initializeSequences(c.testResultSequenceFiles, testChain, deployedContracts, false)
-	if err != nil {
-		return 0, 0, err
+	totalSequences := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
+	c.unexecutedCallSequences = make([]calls.CallSequence, 0, totalSequences)
+	for _, sequenceFileData := range c.testResultSequenceFiles.files {
+		c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
+	}
+	for _, sequenceFileData := range c.callSequenceFiles.files {
+		c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
 	}
 
-	err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true)
-	if err != nil {
-		return 0, 0, err
+	// Reset warmup tracking counters.
+	c.initializationProcessed.Store(0)
+	c.initializationSuccessful.Store(0)
+	c.initializationOnce = sync.Once{}
+	c.initializationDoneCallback = onComplete
+	c.initializationTotal = uint64(len(c.unexecutedCallSequences))
+
+	// If there are no sequences to process, trigger the callback immediately.
+	if c.initializationTotal == 0 && c.initializationDoneCallback != nil {
+		c.initializationOnce.Do(func() {
+			c.initializationDoneCallback(0, 0)
+		})
 	}
 
-	// Calculate corpus health metrics
-	corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
-	corpusSequencesActive := len(c.unexecutedCallSequences)
+	return nil
+}
+
+// CoverageMaps exposes coverage details for all call sequences known to the corpus.
+func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
+	return c.coverageMaps
+}
+
+// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
+// that led to a failure.
+func (c *Corpus) CallSequenceEntryCount() (int, int) {
+	return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
+}
 
-	return corpusSequencesActive, corpusSequencesTotal, nil
+// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
+// after Corpus initialization and are ready for use in mutations.
+func (c *Corpus) ActiveMutableSequenceCount() int {
+	if c.mutationTargetSequenceChooser == nil {
+		return 0
+	}
+	return c.mutationTargetSequenceChooser.ChoiceCount()
+}
+
+// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
+func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
+	// If we didn't initialize a chooser, return an error
+	if c.mutationTargetSequenceChooser == nil {
+		return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
+	}
+
+	// Pick a random call sequence, then clone it before returning it, so the original is untainted.
+	seq, err := c.mutationTargetSequenceChooser.Choose()
+	if seq == nil || err != nil {
+		return nil, err
+	}
+	return seq.Clone()
 }
 
 // addCallSequence adds a call sequence to the corpus in a given corpus directory.
@@ -437,7 +357,7 @@ func (c *Corpus) addCallSequence(sequenceFiles *corpusDirectory[calls.CallSequen
 		if mutationChooserWeight == nil {
 			mutationChooserWeight = big.NewInt(1)
 		}
-		c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, mutationChooserWeight))
+		c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
 	}
 
 	// Unlock now, as flushing will lock on its own.
@@ -488,24 +408,68 @@ func checkSequenceCoverageAndUpdate(callSequence calls.CallSequence, coverageMap
 // CheckSequenceCoverageAndUpdate checks if the most recent call executed in the provided call sequence achieved
 // coverage the Corpus did not with any of its call sequences. If it did, the call sequence is added to the corpus
 // and the Corpus coverage maps are updated accordingly.
-// Returns an error if one occurs.
-func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) error {
+// Returns a boolean indicating whether coverage increased, and an error if one occurs.
+func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) (bool, error) {
 	coverageUpdated, err := checkSequenceCoverageAndUpdate(callSequence, c.coverageMaps)
 	if err != nil {
-		return err
+		return false, err
 	}
 
 	// If we had an increase in coverage, we save the sequence.
 	if coverageUpdated {
 		// If we achieved new coverage, save this sequence for mutation purposes.
 		err = c.addCallSequence(c.callSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately)
 		if err != nil {
-			return err
+			return true, err
 		}
 	}
+	return coverageUpdated, nil
+}
+
+// MarkCorpusElementForMutation records that a corpus element has been successfully executed and can be used for mutations.
+// The sequence is cloned, stripped of runtime metadata, and registered with the mutation chooser so it can participate
+// in future mutations.
+func (c *Corpus) MarkCorpusElementForMutation(sequence calls.CallSequence, mutationChooserWeight *big.Int) error {
+	// If no weight is provided, set it to 1.
+	if mutationChooserWeight == nil {
+		mutationChooserWeight = big.NewInt(1)
+	}
+
+	// Add the sequence to the mutation chooser
+	c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
 	return nil
 }
 
+// IncrementValid records that a previously unexecuted corpus element has finished executing.
+// The valid parameter should be true when the call sequence execution succeeded (even if it triggered a test failure),
+// and false if it was skipped due to incompatibility or other errors.
+func (c *Corpus) IncrementValid(valid bool) {
+	// Guard clause
+	total := c.initializationTotal
+	if total == 0 {
+		return
+	}
+
+	// Increment the processed counter.
+	processed := c.initializationProcessed.Add(1)
+
+	// If the call sequence execution was successful, increment the successful counter.
+	if valid {
+		c.initializationSuccessful.Add(1)
+	}
+
+	// If we have processed all corpus elements, invoke the completion callback.
+	if processed == total {
+		c.initializationOnce.Do(func() {
+			// Invoke the completion callback if it is set.
+			if c.initializationDoneCallback != nil {
+				// Invoke the completion callback with the total number of corpus elements and the number of successful corpus elements.
+				c.initializationDoneCallback(c.initializationSuccessful.Load(), total)
+			}
+		})
+	}
+}
+
 // UnexecutedCallSequence returns a call sequence loaded from disk which has not yet been returned by this method.
 // It is intended to be used by the fuzzer to run all un-executed call sequences (without mutations) to check for test
 // failures. If a call sequence is returned, it will not be returned by this method again.
@@ -574,6 +538,10 @@ func (c *Corpus) Flush() error {
 // PruneSequences takes a chain.TestChain parameter used to run transactions.
 // It returns an int indicating the number of sequences removed from the corpus, and an error if any occurred.
 func (c *Corpus) PruneSequences(ctx context.Context, chain *chain.TestChain) (int, error) {
+	if c.mutationTargetSequenceChooser == nil {
+		return 0, nil
+	}
+
 	chainOriginalIndex := uint64(len(chain.CommittedBlocks()))
 	tmpMap := coverage.NewCoverageMaps()