Merge branch 'master' into feat/rework-mutators

0xalpharush · web-flow · commit 2ed4855e1d5e · 2024-09-05T08:47:48.000-05:00
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 `medusa` is a cross-platform [go-ethereum](https://github.com/ethereum/go-ethereum/)-based smart contract fuzzer inspired by [Echidna](https://github.com/crytic/echidna).
 It provides parallelized fuzz testing of smart contracts through CLI, or its Go API that allows custom user-extended testing methodology.
 
-**Disclaimer**: Please note that `medusa` is an **experimental** smart contract fuzzer. Currently, it should _not_ be adopted into production systems. We intend for `medusa` to reach the same capabilities and maturity that Echidna has. Until then, be careful using `medusa` as your primary smart contract fuzz testing solution. Additionally, please be aware that the Go-level testing API is still **under development** and is subject to breaking changes.
+**Disclaimer**: The Go-level testing API is still **under development** and is subject to breaking changes.
 
 ## Features
 
@@ -29,6 +29,23 @@ cd docs
 mdbook serve
 ```
 
+## Install
+
+MacOS users can install the latest release of `medusa` using Homebrew:
+
+```shell
+
+brew install medusa
+```
+
+The master branch can be installed using the following command:
+
+```shell
+brew install --HEAD medusa
+```
+
+For more information on building from source or obtaining binaries for Windows and Linux, please refer to the [installation guide](./docs/src/getting_started/installation.md).
+
 ## Contributing
 
 For information about how to contribute to this project, check out the [CONTRIBUTING](./CONTRIBUTING.md) guidelines.
diff --git a/fuzzing/corpus/corpus.go b/fuzzing/corpus/corpus.go
@@ -4,10 +4,13 @@ import (
 	"bytes"
 	"fmt"
 	"math/big"
+	"os"
 	"path/filepath"
 	"sync"
 	"time"
 
+	"github.com/crytic/medusa/utils"
+
 	"github.com/crytic/medusa/chain"
 	"github.com/crytic/medusa/fuzzing/calls"
 	"github.com/crytic/medusa/fuzzing/coverage"
@@ -30,13 +33,8 @@ type Corpus struct {
 	// coverageMaps describes the total code coverage known to be achieved across all corpus call sequences.
 	coverageMaps *coverage.CoverageMaps
 
-	// mutableSequenceFiles represents a corpus directory with files which describe call sequences that should
-	// be used for mutations.
-	mutableSequenceFiles *corpusDirectory[calls.CallSequence]
-
-	// immutableSequenceFiles represents a corpus directory with files which describe call sequences that should not be
-	// used for mutations.
-	immutableSequenceFiles *corpusDirectory[calls.CallSequence]
+	// callSequenceFiles represents a corpus directory with files that should be used for mutations.
+	callSequenceFiles *corpusDirectory[calls.CallSequence]
 
 	// testResultSequenceFiles represents a corpus directory with files which describe call sequences that were flagged
 	// to be saved by a test case provider. These are not used in mutations.
@@ -66,25 +64,25 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) {
 	corpus := &Corpus{
 		storageDirectory:        corpusDirectory,
 		coverageMaps:            coverage.NewCoverageMaps(),
-		mutableSequenceFiles:    newCorpusDirectory[calls.CallSequence](""),
-		immutableSequenceFiles:  newCorpusDirectory[calls.CallSequence](""),
+		callSequenceFiles:       newCorpusDirectory[calls.CallSequence](""),
 		testResultSequenceFiles: newCorpusDirectory[calls.CallSequence](""),
 		unexecutedCallSequences: make([]calls.CallSequence, 0),
 		logger:                  logging.GlobalLogger.NewSubLogger("module", "corpus"),
 	}
 
 	// If we have a corpus directory set, parse our call sequences.
 	if corpus.storageDirectory != "" {
-		// Read mutable call sequences.
-		corpus.mutableSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences", "mutable")
-		err = corpus.mutableSequenceFiles.readFiles("*.json")
+		// Migrate the legacy corpus structure
+		// Note that it is important to call this first since we want to move all the call sequence files before reading
+		// them into the corpus
+		err = corpus.migrateLegacyCorpus()
 		if err != nil {
 			return nil, err
 		}
 
-		// Read immutable call sequences.
-		corpus.immutableSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences", "immutable")
-		err = corpus.immutableSequenceFiles.readFiles("*.json")
+		// Read call sequences.
+		corpus.callSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences")
+		err = corpus.callSequenceFiles.readFiles("*.json")
 		if err != nil {
 			return nil, err
 		}
@@ -100,26 +98,90 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) {
 	return corpus, nil
 }
 
+// migrateLegacyCorpus is used to read in the legacy corpus standard where call sequences were stored in two separate
+// directories (mutable/immutable).
+func (c *Corpus) migrateLegacyCorpus() error {
+	// Check to see if the mutable and/or the immutable directories exist
+	callSequencePath := filepath.Join(c.storageDirectory, "call_sequences")
+	mutablePath := filepath.Join(c.storageDirectory, "call_sequences", "mutable")
+	immutablePath := filepath.Join(c.storageDirectory, "call_sequences", "immutable")
+
+	// Only return an error if the error is something other than "filepath does not exist"
+	mutableDirInfo, err := os.Stat(mutablePath)
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	immutableDirInfo, err := os.Stat(immutablePath)
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+
+	// Return early if these directories do not exist
+	if mutableDirInfo == nil && immutableDirInfo == nil {
+		return nil
+	}
+
+	// Now, we need to notify the user that we have detected a legacy structure
+	c.logger.Info("Migrating legacy corpus")
+
+	// If the mutable directory exists, read in all the files and add them to the call sequence files
+	if mutableDirInfo != nil {
+		// Discover all corpus files in the given directory.
+		filePaths, err := filepath.Glob(filepath.Join(mutablePath, "*.json"))
+		if err != nil {
+			return err
+		}
+
+		// Move each file from the mutable directory to the parent call_sequences directory
+		for _, filePath := range filePaths {
+			err = utils.MoveFile(filePath, filepath.Join(callSequencePath, filepath.Base(filePath)))
+			if err != nil {
+				return err
+			}
+		}
+
+		// Delete the mutable directory
+		err = utils.DeleteDirectory(mutablePath)
+		if err != nil {
+			return err
+		}
+	}
+
+	// If the immutable directory exists, read in all the files and add them to the call sequence files
+	if immutableDirInfo != nil {
+		// Discover all corpus files in the given directory.
+		filePaths, err := filepath.Glob(filepath.Join(immutablePath, "*.json"))
+		if err != nil {
+			return err
+		}
+
+		// Move each file from the immutable directory to the parent call_sequences directory
+		for _, filePath := range filePaths {
+			err = utils.MoveFile(filePath, filepath.Join(callSequencePath, filepath.Base(filePath)))
+			if err != nil {
+				return err
+			}
+		}
+
+		// Delete the immutable directory
+		err = utils.DeleteDirectory(immutablePath)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // CoverageMaps exposes coverage details for all call sequences known to the corpus.
 func (c *Corpus) CoverageMaps() *coverage.CoverageMaps {
 	return c.coverageMaps
 }
 
-// CallSequenceEntryCount returns the total number of call sequences entries in the corpus, based on the provided filter
-// flags. Some call sequences may not be valid for use if they fail validation when initializing the corpus.
-// Returns the count of the requested call sequence entries.
-func (c *Corpus) CallSequenceEntryCount(mutable bool, immutable bool, testResults bool) int {
-	count := 0
-	if mutable {
-		count += len(c.mutableSequenceFiles.files)
-	}
-	if immutable {
-		count += len(c.immutableSequenceFiles.files)
-	}
-	if testResults {
-		count += len(c.testResultSequenceFiles.files)
-	}
-	return count
+// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results
+// that led to a failure.
+func (c *Corpus) CallSequenceEntryCount() (int, int) {
+	return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files)
 }
 
 // ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated
@@ -302,18 +364,13 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 		return 0, 0, err
 	}
 
-	err = c.initializeSequences(c.mutableSequenceFiles, testChain, deployedContracts, true)
-	if err != nil {
-		return 0, 0, err
-	}
-
-	err = c.initializeSequences(c.immutableSequenceFiles, testChain, deployedContracts, false)
+	err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true)
 	if err != nil {
 		return 0, 0, err
 	}
 
 	// Calculate corpus health metrics
-	corpusSequencesTotal := len(c.mutableSequenceFiles.files) + len(c.immutableSequenceFiles.files) + len(c.testResultSequenceFiles.files)
+	corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files)
 	corpusSequencesActive := len(c.unexecutedCallSequences)
 
 	return corpusSequencesActive, corpusSequencesTotal, nil
@@ -411,17 +468,9 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence,
 	}
 
 	// If we had an increase in non-reverted or reverted coverage, we save the sequence.
-	// Note: We only want to save the sequence once. We're most interested if it can be used for mutations first.
-	if coverageUpdated {
-		// If we achieved new non-reverting coverage, save this sequence for mutation purposes.
-		err = c.addCallSequence(c.mutableSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately)
-		if err != nil {
-			return err
-		}
-	} else if revertedCoverageUpdated {
-		// If we did not achieve new successful coverage, but achieved an increase in reverted coverage, save this
-		// sequence for non-mutation purposes.
-		err = c.addCallSequence(c.immutableSequenceFiles, callSequence, false, mutationChooserWeight, flushImmediately)
+	if coverageUpdated || revertedCoverageUpdated {
+		// If we achieved new coverage, save this sequence for mutation purposes.
+		err = c.addCallSequence(c.callSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately)
 		if err != nil {
 			return err
 		}
@@ -470,8 +519,8 @@ func (c *Corpus) Flush() error {
 	c.callSequencesLock.Lock()
 	defer c.callSequencesLock.Unlock()
 
-	// Write mutation target call sequences.
-	err := c.mutableSequenceFiles.writeFiles()
+	// Write all coverage-increasing call sequences.
+	err := c.callSequenceFiles.writeFiles()
 	if err != nil {
 		return err
 	}
@@ -482,11 +531,5 @@ func (c *Corpus) Flush() error {
 		return err
 	}
 
-	// Write other call sequences.
-	err = c.immutableSequenceFiles.writeFiles()
-	if err != nil {
-		return err
-	}
-
 	return nil
 }
diff --git a/fuzzing/corpus/corpus_test.go b/fuzzing/corpus/corpus_test.go
@@ -23,7 +23,7 @@ func getMockSimpleCorpus(minSequences int, maxSequences, minBlocks int, maxBlock
 	// Add the requested number of entries.
 	numSequences := minSequences + (rand.Int() % (maxSequences - minSequences))
 	for i := 0; i < numSequences; i++ {
-		err := corpus.addCallSequence(corpus.mutableSequenceFiles, getMockCallSequence(minBlocks+(rand.Int()%(maxBlocks-minBlocks))), true, nil, false)
+		err := corpus.addCallSequence(corpus.callSequenceFiles, getMockCallSequence(minBlocks+(rand.Int()%(maxBlocks-minBlocks))), true, nil, false)
 		if err != nil {
 			return nil, err
 		}
@@ -100,9 +100,9 @@ func TestCorpusReadWrite(t *testing.T) {
 		assert.NoError(t, err)
 
 		// Ensure that there are the correct number of call sequence files
-		matches, err := filepath.Glob(filepath.Join(corpus.mutableSequenceFiles.path, "*.json"))
+		matches, err := filepath.Glob(filepath.Join(corpus.callSequenceFiles.path, "*.json"))
 		assert.NoError(t, err)
-		assert.EqualValues(t, len(corpus.mutableSequenceFiles.files), len(matches))
+		assert.EqualValues(t, len(corpus.callSequenceFiles.files), len(matches))
 
 		// Wipe corpus clean so that you can now read it in from disk
 		corpus, err = NewCorpus("corpus")
@@ -124,7 +124,7 @@ func TestCorpusCallSequenceMarshaling(t *testing.T) {
 	// Run the test in our temporary test directory to avoid artifact pollution.
 	testutils.ExecuteInDirectory(t, t.TempDir(), func() {
 		// For each entry, marshal it and then unmarshal the byte array
-		for _, entryFile := range corpus.mutableSequenceFiles.files {
+		for _, entryFile := range corpus.callSequenceFiles.files {
 			// Marshal the entry
 			b, err := json.Marshal(entryFile.data)
 			assert.NoError(t, err)
@@ -139,9 +139,9 @@ func TestCorpusCallSequenceMarshaling(t *testing.T) {
 		}
 
 		// Remove all items
-		for i := 0; i < len(corpus.mutableSequenceFiles.files); {
-			corpus.mutableSequenceFiles.removeFile(corpus.mutableSequenceFiles.files[i].fileName)
+		for i := 0; i < len(corpus.callSequenceFiles.files); {
+			corpus.callSequenceFiles.removeFile(corpus.callSequenceFiles.files[i].fileName)
 		}
-		assert.Empty(t, corpus.mutableSequenceFiles.files)
+		assert.Empty(t, corpus.callSequenceFiles.files)
 	})
 }
diff --git a/fuzzing/executiontracer/execution_tracer.go b/fuzzing/executiontracer/execution_tracer.go
@@ -110,6 +110,12 @@ func (t *ExecutionTracer) GetTrace(txHash common.Hash) *ExecutionTrace {
 
 // OnTxEnd is called upon the end of transaction execution, as defined by tracers.Tracer.
 func (t *ExecutionTracer) OnTxEnd(receipt *coretypes.Receipt, err error) {
+	// We avoid storing the trace for this transaction. An error should realistically only occur if we hit a block gas
+	// limit error. In this case, the transaction will be retried in the next block and we can retrieve the trace at
+	// that time.
+	if err != nil || receipt == nil {
+		return
+	}
 	t.traceMap[receipt.TxHash] = t.trace
 }
 
diff --git a/fuzzing/fuzzer.go b/fuzzing/fuzzer.go
@@ -419,10 +419,14 @@ func chainSetupFromCompilations(fuzzer *Fuzzer, testChain *chain.TestChain) (*ex
 	// Ordering is important here (predeploys _then_ targets) so that you can have the same contract in both lists
 	// while still being able to use the contract address overrides
 	contractsToDeploy := make([]string, 0)
+	balances := make([]*big.Int, 0)
 	for contractName := range fuzzer.config.Fuzzing.PredeployedContracts {
 		contractsToDeploy = append(contractsToDeploy, contractName)
+		// Preserve index of target contract balances
+		balances = append(balances, big.NewInt(0))
 	}
 	contractsToDeploy = append(contractsToDeploy, fuzzer.config.Fuzzing.TargetContracts...)
+	balances = append(balances, fuzzer.config.Fuzzing.TargetContractsBalances...)
 
 	deployedContractAddr := make(map[string]common.Address)
 	// Loop for all contracts to deploy
@@ -460,8 +464,8 @@ func chainSetupFromCompilations(fuzzer *Fuzzer, testChain *chain.TestChain) (*ex
 
 				// If our project config has a non-zero balance for this target contract, retrieve it
 				contractBalance := big.NewInt(0)
-				if len(fuzzer.config.Fuzzing.TargetContractsBalances) > i {
-					contractBalance = new(big.Int).Set(fuzzer.config.Fuzzing.TargetContractsBalances[i])
+				if len(balances) > i {
+					contractBalance = new(big.Int).Set(balances[i])
 				}
 
 				// Create a message to represent our contract deployment (we let deployments consume the whole block
@@ -758,8 +762,8 @@ func (f *Fuzzer) Start() error {
 
 	// Initialize our coverage maps by measuring the coverage we get from the corpus.
 	var corpusActiveSequences, corpusTotalSequences int
-	if f.corpus.CallSequenceEntryCount(true, true, true) > 0 {
-		f.logger.Info("Running call sequences in the corpus...")
+	if totalCallSequences, testResults := f.corpus.CallSequenceEntryCount(); totalCallSequences > 0 || testResults > 0 {
+		f.logger.Info("Running call sequences in the corpus")
 	}
 	startTime := time.Now()
 	corpusActiveSequences, corpusTotalSequences, err = f.corpus.Initialize(baseTestChain, f.contractDefinitions)
diff --git a/fuzzing/fuzzer_test.go b/fuzzing/fuzzer_test.go
@@ -403,6 +403,7 @@ func TestDeploymentsWithPredeploy(t *testing.T) {
 		filePath: "testdata/contracts/deployments/predeploy_contract.sol",
 		configUpdates: func(config *config.ProjectConfig) {
 			config.Fuzzing.TargetContracts = []string{"TestContract"}
+			config.Fuzzing.TargetContractsBalances = []*big.Int{big.NewInt(1)}
 			config.Fuzzing.TestLimit = 1000 // this test should expose a failure immediately
 			config.Fuzzing.Testing.PropertyTesting.Enabled = false
 			config.Fuzzing.Testing.OptimizationTesting.Enabled = false
@@ -825,7 +826,8 @@ func TestCorpusReplayability(t *testing.T) {
 
 			// Cache current coverage maps
 			originalCoverage := f.fuzzer.corpus.CoverageMaps()
-			originalCorpusSequenceCount := f.fuzzer.corpus.CallSequenceEntryCount(true, true, true)
+			originalTotalCallSequences, originalTotalTestResults := f.fuzzer.corpus.CallSequenceEntryCount()
+			originalCorpusSequenceCount := originalTotalCallSequences + originalTotalTestResults
 
 			// Next, set the fuzzer worker count to one, this allows us to count the call sequences executed before
 			// solving a problem. We will verify the problem is solved with less or equal sequences tested, than
diff --git a/fuzzing/fuzzer_test_methods_test.go b/fuzzing/fuzzer_test_methods_test.go
@@ -81,7 +81,7 @@ func assertFailedTestsExpected(f *fuzzerTestContext, expectFailure bool) {
 // corpus. It asserts that the actual result matches the provided expected result.
 func assertCorpusCallSequencesCollected(f *fuzzerTestContext, expectCallSequences bool) {
 	// Obtain our count of mutable (often representing just non-reverted coverage increasing) sequences.
-	callSequenceCount := f.fuzzer.corpus.CallSequenceEntryCount(true, false, false)
+	callSequenceCount, _ := f.fuzzer.corpus.CallSequenceEntryCount()
 
 	// Ensure we captured some coverage-increasing call sequences.
 	if expectCallSequences {
diff --git a/fuzzing/testdata/contracts/deployments/predeploy_contract.sol b/fuzzing/testdata/contracts/deployments/predeploy_contract.sol
@@ -7,6 +7,8 @@ contract PredeployContract {
 contract TestContract {
     PredeployContract predeploy = PredeployContract(address(0x1234));
 
+    constructor() payable {}
+    
     function testPredeploy() public {
         predeploy.triggerFailure();
     }
diff --git a/utils/fs_utils.go b/utils/fs_utils.go

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,8 @@ contract PredeployContract {`
`7`	`7`	`contract TestContract {`
`8`	`8`	`PredeployContract predeploy = PredeployContract(address(0x1234));`
`9`	`9`
	`10`	`+ constructor() payable {}`
	`11`	`+`
`10`	`12`	`function testPredeploy() public {`
`11`	`13`	`predeploy.triggerFailure();`
`12`	`14`	`}`