Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 133 additions & 165 deletions fuzzing/corpus/corpus.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,18 @@ import (
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"

"github.com/crytic/medusa-geth/common"
"github.com/crytic/medusa/chain"
"github.com/crytic/medusa/fuzzing/calls"
"github.com/crytic/medusa/fuzzing/contracts"
"github.com/crytic/medusa/fuzzing/coverage"
"github.com/crytic/medusa/logging"
"github.com/crytic/medusa/logging/colors"
"github.com/crytic/medusa/utils"
"github.com/crytic/medusa/utils/randomutils"
"github.com/google/uuid"

"github.com/crytic/medusa/fuzzing/contracts"
)

// Corpus describes an archive of fuzzer-generated artifacts used to further fuzzing efforts. These artifacts are
Expand Down Expand Up @@ -55,12 +54,21 @@ type Corpus struct {
// callSequences.
callSequencesLock sync.Mutex

// initializationTotal captures the total number of corpus sequences that need to be executed to initialize the fuzzer.
initializationTotal uint64
// initializationProcessed tracks how many initialization sequences have been executed.
initializationProcessed atomic.Uint64
// initializationSuccessful tracks how many initialization sequences were executed successfully.
initializationSuccessful atomic.Uint64
// initializationOnce ensures the initializationDoneCallback is invoked only once.
initializationOnce sync.Once
// initializationDoneCallback is invoked when all initialization sequences finish execution to notify the fuzzer that the corpus has been initialized.
initializationDoneCallback func(active uint64, total uint64)

// logger describes the Corpus's log object that can be used to log important events
logger *logging.Logger
}

// NewCorpus initializes a new Corpus object, reading artifacts from the provided directory. If the directory refers
// to an empty path, artifacts will not be persistently stored.
func NewCorpus(corpusDirectory string) (*Corpus, error) {
var err error
corpus := &Corpus{
Expand Down Expand Up @@ -175,140 +183,11 @@ func (c *Corpus) migrateLegacyCorpus() error {
return nil
}

// CoverageMaps exposes coverage details for all call sequences known to the corpus.
func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
return c.coverageMaps
}

// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
// that led to a failure.
func (c *Corpus) CallSequenceEntryCount() (int, int) {
return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
}

// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
// after Corpus initialization and are ready for use in mutations.
func (c *Corpus) ActiveMutableSequenceCount() int {
if c.mutationTargetSequenceChooser == nil {
return 0
}
return c.mutationTargetSequenceChooser.ChoiceCount()
}

// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
// If we didn't initialize a chooser, return an error
if c.mutationTargetSequenceChooser == nil {
return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
}

// Pick a random call sequence, then clone it before returning it, so the original is untainted.
seq, err := c.mutationTargetSequenceChooser.Choose()
if seq == nil || err != nil {
return nil, err
}
return seq.Clone()
}

// initializeSequences is a helper method for Initialize. It validates a list of call sequence files on a given
// chain, using the map of deployed contracts (e.g. to check for non-existent method called, due to code changes).
// Valid call sequences are added to the list of un-executed sequences the fuzzer should execute first.
// If this sequence list being initialized is for use with mutations, it is added to the mutationTargetSequenceChooser.
// Returns an error if one occurs.
func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSequence], testChain *chain.TestChain, deployedContracts map[common.Address]*contracts.Contract, useInMutations bool) error {
// Cache the base block index so that you can reset back to it after every sequence
baseBlockIndex := uint64(len(testChain.CommittedBlocks()))

// Loop for each sequence
var err error
for _, sequenceFileData := range sequenceFiles.files {
// Unwrap the underlying sequence.
sequence := sequenceFileData.data

// Define a variable to track whether we should disable this sequence (if it is no longer applicable in some
// way).
sequenceInvalidError := error(nil)
fetchElementFunc := func(currentIndex int) (*calls.CallSequenceElement, error) {
// If we are at the end of our sequence, return nil indicating we should stop executing.
if currentIndex >= len(sequence) {
return nil, nil
}

// If we are deploying a contract and not targeting one with this call, there should be no work to do.
currentSequenceElement := sequence[currentIndex]
if currentSequenceElement.Call.To == nil {
return currentSequenceElement, nil
}

// We are calling a contract with this call, ensure we can resolve the contract call is targeting.
resolvedContract, resolvedContractExists := deployedContracts[*currentSequenceElement.Call.To]
if !resolvedContractExists {
sequenceInvalidError = fmt.Errorf("contract at address '%v' could not be resolved", currentSequenceElement.Call.To.String())
return nil, nil
}
currentSequenceElement.Contract = resolvedContract

// Next, if our sequence element uses ABI values to produce call data, our deserialized data is not yet
// sufficient for runtime use, until we use it to resolve runtime references.
callAbiValues := currentSequenceElement.Call.DataAbiValues
if callAbiValues != nil {
sequenceInvalidError = callAbiValues.Resolve(currentSequenceElement.Contract.CompiledContract().Abi)
if sequenceInvalidError != nil {
sequenceInvalidError = fmt.Errorf("error resolving method in contract '%v': %v", currentSequenceElement.Contract.Name(), sequenceInvalidError)
return nil, nil
}
}
return currentSequenceElement, nil
}

// Define actions to perform after executing each call in the sequence.
executionCheckFunc := func(currentlyExecutedSequence calls.CallSequence) (bool, error) {
// Grab the coverage maps for the last executed sequence element
lastExecutedSequenceElement := currentlyExecutedSequence[len(currentlyExecutedSequence)-1]
covMaps := coverage.GetCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())

// Memory optimization: Remove the coverage maps from the message results
coverage.RemoveCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())

// Update the global coverage maps
_, covErr := c.coverageMaps.Update(covMaps)
if covErr != nil {
return true, covErr
}
return false, nil
}

// Execute each call sequence, populating runtime data and collecting coverage data along the way.
_, err = calls.ExecuteCallSequenceIteratively(testChain, fetchElementFunc, executionCheckFunc)

// If we failed to replay a sequence and measure coverage due to an unexpected error, report it.
if err != nil {
return fmt.Errorf("failed to initialize coverage maps from corpus, encountered an error while executing call sequence: %v", err)
}

// If the sequence was replayed successfully, we add it. If it was not, we exclude it with a warning.
if sequenceInvalidError == nil {
if useInMutations && c.mutationTargetSequenceChooser != nil {
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, big.NewInt(1)))
}
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequence)
} else {
c.logger.Debug("Corpus item ", colors.Bold, sequenceFileData.fileName, colors.Reset, " disabled due to error when replaying it", sequenceInvalidError)
}

// Revert chain state to our starting point to test the next sequence.
if err := testChain.RevertToBlockIndex(baseBlockIndex); err != nil {
return fmt.Errorf("failed to reset the chain while seeding coverage: %v", err)
}
}
return nil
}

// Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup
// (deployment) test chain to calculate coverage, while resolving references to compiled contracts.
// Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error
// is returned, then the corpus counts returned will always be zero.
func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) (int, int, error) {
// Initialize initializes the in-memory corpus state but does not actually replay any of the sequences stored in the corpus.
// It seeds coverage information from the post-setup chain while enqueueing all persisted sequences for execution. The fuzzer workers
// will concurrently all the sequences stored in the corpus and then the onComplete hook is invoked to notify the fuzzer that the corpus has been initialized.
// Returns an error if seeding fails.
func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts, onComplete func(active uint64, total uint64)) error {
// Acquire our call sequences lock during the duration of this method.
c.callSequencesLock.Lock()
defer c.callSequencesLock.Unlock()
Expand All @@ -332,9 +211,11 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
// We also track any contract deployments, so we can resolve contract/method definitions for corpus call
// sequences.
newChain.Events.ContractDeploymentAddedEventEmitter.Subscribe(func(event chain.ContractDeploymentsAddedEvent) error {
matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
if matchedContract != nil {
deployedContracts[event.Contract.Address] = matchedContract
if contractDefinitions != nil {
matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
if matchedContract != nil {
deployedContracts[event.Contract.Address] = matchedContract
}
}
return nil
})
Expand All @@ -345,8 +226,9 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
return nil
})
if err != nil {
return 0, 0, fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
return fmt.Errorf("failed to seed coverage maps during warmup preparation: %v", err)
}
defer testChain.Close()

// Freeze a set of deployedContracts's keys so that we have a set of addresses present in baseTestChain.
// Feed this set to the coverage tracer.
Expand All @@ -369,32 +251,70 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
// Update the global coverage maps
_, covErr := c.coverageMaps.Update(covMaps)
if covErr != nil {
return 0, 0, covErr
return covErr
}
}
}

// Next we replay every call sequence, checking its validity on this chain and measuring coverage. Valid sequences
// are added to the corpus for mutations, re-execution, etc.
//
// The order of initializations here is important, as it determines the order of "unexecuted sequences" to replay
// when the fuzzer's worker starts up. We want to replay test results first, so that other corpus items
// do not trigger the same test failures instead.
err = c.initializeSequences(c.testResultSequenceFiles, testChain, deployedContracts, false)
if err != nil {
return 0, 0, err
totalSequences := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
c.unexecutedCallSequences = make([]calls.CallSequence, 0, totalSequences)
for _, sequenceFileData := range c.testResultSequenceFiles.files {
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
}
for _, sequenceFileData := range c.callSequenceFiles.files {
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
}

err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true)
if err != nil {
return 0, 0, err
// Reset warmup tracking counters.
c.initializationProcessed.Store(0)
c.initializationSuccessful.Store(0)
c.initializationOnce = sync.Once{}
c.initializationDoneCallback = onComplete
c.initializationTotal = uint64(len(c.unexecutedCallSequences))

// If there are no sequences to process, trigger the callback immediately.
if c.initializationTotal == 0 && c.initializationDoneCallback != nil {
c.initializationOnce.Do(func() {
c.initializationDoneCallback(0, 0)
})
}

// Calculate corpus health metrics
corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
corpusSequencesActive := len(c.unexecutedCallSequences)
return nil
}

// CoverageMaps exposes coverage details for all call sequences known to the corpus.
func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
return c.coverageMaps
}

// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
// that led to a failure.
func (c *Corpus) CallSequenceEntryCount() (int, int) {
return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
}

return corpusSequencesActive, corpusSequencesTotal, nil
// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
// after Corpus initialization and are ready for use in mutations.
func (c *Corpus) ActiveMutableSequenceCount() int {
if c.mutationTargetSequenceChooser == nil {
return 0
}
return c.mutationTargetSequenceChooser.ChoiceCount()
}

// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
// If we didn't initialize a chooser, return an error
if c.mutationTargetSequenceChooser == nil {
return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
}

// Pick a random call sequence, then clone it before returning it, so the original is untainted.
seq, err := c.mutationTargetSequenceChooser.Choose()
if seq == nil || err != nil {
return nil, err
}
return seq.Clone()
}

// addCallSequence adds a call sequence to the corpus in a given corpus directory.
Expand Down Expand Up @@ -437,7 +357,7 @@ func (c *Corpus) addCallSequence(sequenceFiles *corpusDirectory[calls.CallSequen
if mutationChooserWeight == nil {
mutationChooserWeight = big.NewInt(1)
}
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, mutationChooserWeight))
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
}

// Unlock now, as flushing will lock on its own.
Expand Down Expand Up @@ -488,24 +408,68 @@ func checkSequenceCoverageAndUpdate(callSequence calls.CallSequence, coverageMap
// CheckSequenceCoverageAndUpdate checks if the most recent call executed in the provided call sequence achieved
// coverage the Corpus did not with any of its call sequences. If it did, the call sequence is added to the corpus
// and the Corpus coverage maps are updated accordingly.
// Returns an error if one occurs.
func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) error {
// Returns a boolean indicating whether coverage increased, and an error if one occurs.
func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) (bool, error) {
coverageUpdated, err := checkSequenceCoverageAndUpdate(callSequence, c.coverageMaps)
if err != nil {
return err
return false, err
}

// If we had an increase in coverage, we save the sequence.
if coverageUpdated {
// If we achieved new coverage, save this sequence for mutation purposes.
err = c.addCallSequence(c.callSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately)
if err != nil {
return err
return true, err
}
}
return coverageUpdated, nil
}

// MarkCorpusElementForMutation records that a corpus element has been successfully executed and can be used for mutations.
// The sequence is cloned, stripped of runtime metadata, and registered with the mutation chooser so it can participate
// in future mutations.
func (c *Corpus) MarkCorpusElementForMutation(sequence calls.CallSequence, mutationChooserWeight *big.Int) error {
// If no weight is provided, set it to 1.
if mutationChooserWeight == nil {
mutationChooserWeight = big.NewInt(1)
}

// Add the sequence to the mutation chooser
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
return nil
}

// IncrementValid records that a previously unexecuted corpus element has finished executing.
// The valid parameter should be true when the call sequence execution succeeded (even if it triggered a test failure),
// and false if it was skipped due to incompatibility or other errors.
func (c *Corpus) IncrementValid(valid bool) {
// Guard clause
total := c.initializationTotal
if total == 0 {
return
}

// Increment the processed counter.
processed := c.initializationProcessed.Add(1)

// If the call sequence execution was successful, increment the successful counter.
if valid {
c.initializationSuccessful.Add(1)
}

// If we have processed all corpus elements, invoke the completion callback.
if processed == total {
c.initializationOnce.Do(func() {
// Invoke the completion callback if it is set.
if c.initializationDoneCallback != nil {
// Invoke the completion callback with the total number of corpus elements and the number of successful corpus elements.
c.initializationDoneCallback(c.initializationSuccessful.Load(), total)
}
})
}
}

// UnexecutedCallSequence returns a call sequence loaded from disk which has not yet been returned by this method.
// It is intended to be used by the fuzzer to run all un-executed call sequences (without mutations) to check for test
// failures. If a call sequence is returned, it will not be returned by this method again.
Expand Down Expand Up @@ -574,6 +538,10 @@ func (c *Corpus) Flush() error {
// PruneSequences takes a chain.TestChain parameter used to run transactions.
// It returns an int indicating the number of sequences removed from the corpus, and an error if any occurred.
func (c *Corpus) PruneSequences(ctx context.Context, chain *chain.TestChain) (int, error) {
if c.mutationTargetSequenceChooser == nil {
return 0, nil
}

chainOriginalIndex := uint64(len(chain.CommittedBlocks()))
tmpMap := coverage.NewCoverageMaps()

Expand Down
Loading
Loading