Skip to content

Commit 1c9cdd9

Browse files
authored
Merge branch 'master' into dependabot/github_actions/actions/download-artifact-6
2 parents ce2bf1c + 77597e3 commit 1c9cdd9

File tree

6 files changed

+228
-192
lines changed

6 files changed

+228
-192
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## medusa build process
2-
FROM golang:1.23 AS medusa
2+
FROM golang:1.24 AS medusa
33

44
WORKDIR /src
55
COPY . /src/medusa/

fuzzing/corpus/corpus.go

Lines changed: 100 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,18 @@ import (
1010
"os"
1111
"path/filepath"
1212
"sync"
13+
"sync/atomic"
1314
"time"
1415

1516
"github.com/crytic/medusa-geth/common"
1617
"github.com/crytic/medusa/chain"
1718
"github.com/crytic/medusa/fuzzing/calls"
19+
"github.com/crytic/medusa/fuzzing/contracts"
1820
"github.com/crytic/medusa/fuzzing/coverage"
1921
"github.com/crytic/medusa/logging"
20-
"github.com/crytic/medusa/logging/colors"
2122
"github.com/crytic/medusa/utils"
2223
"github.com/crytic/medusa/utils/randomutils"
2324
"github.com/google/uuid"
24-
25-
"github.com/crytic/medusa/fuzzing/contracts"
2625
)
2726

2827
// Corpus describes an archive of fuzzer-generated artifacts used to further fuzzing efforts. These artifacts are
@@ -55,12 +54,15 @@ type Corpus struct {
5554
// callSequences.
5655
callSequencesLock sync.Mutex
5756

57+
// validCallSequences tracks how many call sequences in the corpus are valid when the corpus is re-run.
58+
validCallSequences atomic.Uint64
59+
5860
// logger describes the Corpus's log object that can be used to log important events
5961
logger *logging.Logger
6062
}
6163

62-
// NewCorpus initializes a new Corpus object, reading artifacts from the provided directory. If the directory refers
63-
// to an empty path, artifacts will not be persistently stored.
64+
// NewCorpus initializes a new Corpus object, reading artifacts from the provided directory and preparing in-memory
65+
// state required for fuzzing. If the directory refers to an empty path, artifacts will not be persistently stored.
6466
func NewCorpus(corpusDirectory string) (*Corpus, error) {
6567
var err error
6668
corpus := &Corpus{
@@ -175,140 +177,10 @@ func (c *Corpus) migrateLegacyCorpus() error {
175177
return nil
176178
}
177179

178-
// CoverageMaps exposes coverage details for all call sequences known to the corpus.
179-
func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
180-
return c.coverageMaps
181-
}
182-
183-
// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
184-
// that led to a failure.
185-
func (c *Corpus) CallSequenceEntryCount() (int, int) {
186-
return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
187-
}
188-
189-
// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
190-
// after Corpus initialization and are ready for use in mutations.
191-
func (c *Corpus) ActiveMutableSequenceCount() int {
192-
if c.mutationTargetSequenceChooser == nil {
193-
return 0
194-
}
195-
return c.mutationTargetSequenceChooser.ChoiceCount()
196-
}
197-
198-
// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
199-
func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
200-
// If we didn't initialize a chooser, return an error
201-
if c.mutationTargetSequenceChooser == nil {
202-
return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
203-
}
204-
205-
// Pick a random call sequence, then clone it before returning it, so the original is untainted.
206-
seq, err := c.mutationTargetSequenceChooser.Choose()
207-
if seq == nil || err != nil {
208-
return nil, err
209-
}
210-
return seq.Clone()
211-
}
212-
213-
// initializeSequences is a helper method for Initialize. It validates a list of call sequence files on a given
214-
// chain, using the map of deployed contracts (e.g. to check for non-existent method called, due to code changes).
215-
// Valid call sequences are added to the list of un-executed sequences the fuzzer should execute first.
216-
// If this sequence list being initialized is for use with mutations, it is added to the mutationTargetSequenceChooser.
217-
// Returns an error if one occurs.
218-
func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSequence], testChain *chain.TestChain, deployedContracts map[common.Address]*contracts.Contract, useInMutations bool) error {
219-
// Cache the base block index so that you can reset back to it after every sequence
220-
baseBlockIndex := uint64(len(testChain.CommittedBlocks()))
221-
222-
// Loop for each sequence
223-
var err error
224-
for _, sequenceFileData := range sequenceFiles.files {
225-
// Unwrap the underlying sequence.
226-
sequence := sequenceFileData.data
227-
228-
// Define a variable to track whether we should disable this sequence (if it is no longer applicable in some
229-
// way).
230-
sequenceInvalidError := error(nil)
231-
fetchElementFunc := func(currentIndex int) (*calls.CallSequenceElement, error) {
232-
// If we are at the end of our sequence, return nil indicating we should stop executing.
233-
if currentIndex >= len(sequence) {
234-
return nil, nil
235-
}
236-
237-
// If we are deploying a contract and not targeting one with this call, there should be no work to do.
238-
currentSequenceElement := sequence[currentIndex]
239-
if currentSequenceElement.Call.To == nil {
240-
return currentSequenceElement, nil
241-
}
242-
243-
// We are calling a contract with this call, ensure we can resolve the contract call is targeting.
244-
resolvedContract, resolvedContractExists := deployedContracts[*currentSequenceElement.Call.To]
245-
if !resolvedContractExists {
246-
sequenceInvalidError = fmt.Errorf("contract at address '%v' could not be resolved", currentSequenceElement.Call.To.String())
247-
return nil, nil
248-
}
249-
currentSequenceElement.Contract = resolvedContract
250-
251-
// Next, if our sequence element uses ABI values to produce call data, our deserialized data is not yet
252-
// sufficient for runtime use, until we use it to resolve runtime references.
253-
callAbiValues := currentSequenceElement.Call.DataAbiValues
254-
if callAbiValues != nil {
255-
sequenceInvalidError = callAbiValues.Resolve(currentSequenceElement.Contract.CompiledContract().Abi)
256-
if sequenceInvalidError != nil {
257-
sequenceInvalidError = fmt.Errorf("error resolving method in contract '%v': %v", currentSequenceElement.Contract.Name(), sequenceInvalidError)
258-
return nil, nil
259-
}
260-
}
261-
return currentSequenceElement, nil
262-
}
263-
264-
// Define actions to perform after executing each call in the sequence.
265-
executionCheckFunc := func(currentlyExecutedSequence calls.CallSequence) (bool, error) {
266-
// Grab the coverage maps for the last executed sequence element
267-
lastExecutedSequenceElement := currentlyExecutedSequence[len(currentlyExecutedSequence)-1]
268-
covMaps := coverage.GetCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())
269-
270-
// Memory optimization: Remove the coverage maps from the message results
271-
coverage.RemoveCoverageTracerResults(lastExecutedSequenceElement.ChainReference.MessageResults())
272-
273-
// Update the global coverage maps
274-
_, covErr := c.coverageMaps.Update(covMaps)
275-
if covErr != nil {
276-
return true, covErr
277-
}
278-
return false, nil
279-
}
280-
281-
// Execute each call sequence, populating runtime data and collecting coverage data along the way.
282-
_, err = calls.ExecuteCallSequenceIteratively(testChain, fetchElementFunc, executionCheckFunc)
283-
284-
// If we failed to replay a sequence and measure coverage due to an unexpected error, report it.
285-
if err != nil {
286-
return fmt.Errorf("failed to initialize coverage maps from corpus, encountered an error while executing call sequence: %v", err)
287-
}
288-
289-
// If the sequence was replayed successfully, we add it. If it was not, we exclude it with a warning.
290-
if sequenceInvalidError == nil {
291-
if useInMutations && c.mutationTargetSequenceChooser != nil {
292-
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, big.NewInt(1)))
293-
}
294-
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequence)
295-
} else {
296-
c.logger.Debug("Corpus item ", colors.Bold, sequenceFileData.fileName, colors.Reset, " disabled due to error when replaying it", sequenceInvalidError)
297-
}
298-
299-
// Revert chain state to our starting point to test the next sequence.
300-
if err := testChain.RevertToBlockIndex(baseBlockIndex); err != nil {
301-
return fmt.Errorf("failed to reset the chain while seeding coverage: %v", err)
302-
}
303-
}
304-
return nil
305-
}
306-
307-
// Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup
308-
// (deployment) test chain to calculate coverage, while resolving references to compiled contracts.
309-
// Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error
310-
// is returned, then the corpus counts returned will always be zero.
311-
func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) (int, int, error) {
180+
// Initialize initializes the in-memory corpus state but does not actually replay any of the sequences stored in the corpus.
181+
// It seeds coverage information from the post-setup chain while enqueueing all persisted sequences for execution. The fuzzer workers
182+
// will concurrently execute all the sequences stored in the corpus before actually starting the fuzzing campaign.
183+
func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) error {
312184
// Acquire our call sequences lock during the duration of this method.
313185
c.callSequencesLock.Lock()
314186
defer c.callSequencesLock.Unlock()
@@ -332,9 +204,11 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
332204
// We also track any contract deployments, so we can resolve contract/method definitions for corpus call
333205
// sequences.
334206
newChain.Events.ContractDeploymentAddedEventEmitter.Subscribe(func(event chain.ContractDeploymentsAddedEvent) error {
335-
matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
336-
if matchedContract != nil {
337-
deployedContracts[event.Contract.Address] = matchedContract
207+
if contractDefinitions != nil {
208+
matchedContract := contractDefinitions.MatchBytecode(event.Contract.InitBytecode, event.Contract.RuntimeBytecode)
209+
if matchedContract != nil {
210+
deployedContracts[event.Contract.Address] = matchedContract
211+
}
338212
}
339213
return nil
340214
})
@@ -345,8 +219,9 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
345219
return nil
346220
})
347221
if err != nil {
348-
return 0, 0, fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
222+
return fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
349223
}
224+
defer testChain.Close()
350225

351226
// Freeze a set of deployedContracts's keys so that we have a set of addresses present in baseTestChain.
352227
// Feed this set to the coverage tracer.
@@ -369,32 +244,65 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
369244
// Update the global coverage maps
370245
_, covErr := c.coverageMaps.Update(covMaps)
371246
if covErr != nil {
372-
return 0, 0, covErr
247+
return covErr
373248
}
374249
}
375250
}
376251

377-
// Next we replay every call sequence, checking its validity on this chain and measuring coverage. Valid sequences
378-
// are added to the corpus for mutations, re-execution, etc.
379-
//
380-
// The order of initializations here is important, as it determines the order of "unexecuted sequences" to replay
381-
// when the fuzzer's worker starts up. We want to replay test results first, so that other corpus items
382-
// do not trigger the same test failures instead.
383-
err = c.initializeSequences(c.testResultSequenceFiles, testChain, deployedContracts, false)
384-
if err != nil {
385-
return 0, 0, err
252+
// Add all test results and call sequences to the unexecuted call sequences list
253+
totalSequences := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
254+
c.unexecutedCallSequences = make([]calls.CallSequence, 0, totalSequences)
255+
for _, sequenceFileData := range c.testResultSequenceFiles.files {
256+
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
257+
}
258+
for _, sequenceFileData := range c.callSequenceFiles.files {
259+
c.unexecutedCallSequences = append(c.unexecutedCallSequences, sequenceFileData.data)
386260
}
387261

388-
err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true)
389-
if err != nil {
390-
return 0, 0, err
262+
// This value will increment as call sequences in the corpus are executed and marked as valid.
263+
c.validCallSequences.Store(0)
264+
265+
return nil
266+
}
267+
268+
// CoverageMaps exposes coverage details for all call sequences known to the corpus.
269+
func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
270+
return c.coverageMaps
271+
}
272+
273+
// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
274+
// that led to a failure.
275+
func (c *Corpus) CallSequenceEntryCount() (int, int) {
276+
return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
277+
}
278+
279+
// InitializingCorpus returns true if the corpus is still initializing, false otherwise.
280+
func (c *Corpus) InitializingCorpus() bool {
281+
return len(c.unexecutedCallSequences) > 0
282+
}
283+
284+
// ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
285+
// after Corpus initialization and are ready for use in mutations.
286+
func (c *Corpus) ActiveMutableSequenceCount() int {
287+
if c.mutationTargetSequenceChooser == nil {
288+
return 0
391289
}
290+
return c.mutationTargetSequenceChooser.ChoiceCount()
291+
}
392292

393-
// Calculate corpus health metrics
394-
corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
395-
corpusSequencesActive := len(c.unexecutedCallSequences)
293+
// RandomMutationTargetSequence returns a weighted random call sequence from the Corpus, or an error if one occurs.
294+
func (c *Corpus) RandomMutationTargetSequence() (calls.CallSequence, error) {
295+
// If we didn't initialize a chooser, return an error
296+
if c.mutationTargetSequenceChooser == nil {
297+
return nil, fmt.Errorf("corpus could not return a random call sequence because the corpus was not initialized")
298+
}
396299

397-
return corpusSequencesActive, corpusSequencesTotal, nil
300+
// Pick a random call sequence, then clone it before returning it, so the original is untainted.
301+
seq, err := c.mutationTargetSequenceChooser.Choose()
302+
if seq == nil || err != nil {
303+
return nil, err
304+
}
305+
return seq.Clone()
398306
}
399307

400308
// addCallSequence adds a call sequence to the corpus in a given corpus directory.
@@ -437,7 +345,7 @@ func (c *Corpus) addCallSequence(sequenceFiles *corpusDirectory[calls.CallSequen
437345
if mutationChooserWeight == nil {
438346
mutationChooserWeight = big.NewInt(1)
439347
}
440-
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, mutationChooserWeight))
348+
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
441349
}
442350

443351
// Unlock now, as flushing will lock on its own.
@@ -506,6 +414,33 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence,
506414
return nil
507415
}
508416

417+
// MarkCallSequenceForMutation records that a call sequence in the corpus has been successfully executed and can be used for mutations.
418+
func (c *Corpus) MarkCallSequenceForMutation(sequence calls.CallSequence, mutationChooserWeight *big.Int) error {
419+
// If no weight is provided, set it to 1.
420+
if mutationChooserWeight == nil {
421+
mutationChooserWeight = big.NewInt(1)
422+
}
423+
424+
// Unclear whether a lock is needed but might as well be safe
425+
c.callSequencesLock.Lock()
426+
defer c.callSequencesLock.Unlock()
427+
428+
// Add the sequence to the mutation chooser
429+
c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice(sequence, mutationChooserWeight))
430+
return nil
431+
}
432+
433+
// IncrementValid increments the valid call sequences counter.
434+
func (c *Corpus) IncrementValid() {
435+
c.validCallSequences.Add(1)
436+
}
437+
438+
// ValidCallSequences returns the number of valid call sequences in the corpus.
439+
// Note that this value is only accurate right after corpus initialization.
440+
func (c *Corpus) ValidCallSequences() uint64 {
441+
return c.validCallSequences.Load()
442+
}
443+
509444
// UnexecutedCallSequence returns a call sequence loaded from disk which has not yet been returned by this method.
510445
// It is intended to be used by the fuzzer to run all un-executed call sequences (without mutations) to check for test
511446
// failures. If a call sequence is returned, it will not be returned by this method again.
@@ -574,6 +509,10 @@ func (c *Corpus) Flush() error {
574509
// PruneSequences takes a chain.TestChain parameter used to run transactions.
575510
// It returns an int indicating the number of sequences removed from the corpus, and an error if any occurred.
576511
func (c *Corpus) PruneSequences(ctx context.Context, chain *chain.TestChain) (int, error) {
512+
if c.mutationTargetSequenceChooser == nil {
513+
return 0, nil
514+
}
515+
577516
chainOriginalIndex := uint64(len(chain.CommittedBlocks()))
578517
tmpMap := coverage.NewCoverageMaps()
579518

fuzzing/corpus/corpus_files.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ package corpus
33
import (
44
"encoding/json"
55
"fmt"
6-
"github.com/crytic/medusa/utils"
76
"os"
87
"path/filepath"
98
"strings"
109
"sync"
10+
11+
"github.com/crytic/medusa/utils"
1112
)
1213

1314
// corpusFile represents corpus data and its state on the filesystem.
@@ -176,7 +177,7 @@ func (cd *corpusDirectory[T]) writeFiles() error {
176177
// Write the JSON encoded data.
177178
err = os.WriteFile(filePath, jsonEncodedData, os.ModePerm)
178179
if err != nil {
179-
return fmt.Errorf("An error occurred while writing corpus data to file: %v\n", err)
180+
return fmt.Errorf("an error occurred while writing corpus data to file: %v", err)
180181
}
181182

182183
// Update our written to disk status.

0 commit comments

Comments
 (0)