Skip to content

Commit a9c00cb

Browse files
Prune unnecessary transaction sequences from corpus (#625)
* prune sequences WIP * keep track of total pruning * do it only every 3 minutes * resolve TODOs, change logging format, add config option, move state into struct * comments and docs * Prune in parallel * add comments * Update fuzzing/fuzzer.go Co-authored-by: anishnaik <[email protected]> * suggestions from @anishnaik * go fmt * move corpus_pruner and remove dependency on fuzzer * 'values' -> 'corpus items' --------- Co-authored-by: anishnaik <[email protected]>
1 parent 7f61563 commit a9c00cb

File tree

9 files changed

+260
-15
lines changed

9 files changed

+260
-15
lines changed

docs/src/project_configuration/fuzzing_config.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ The fuzzing configuration defines the parameters for the fuzzing campaign.
5353
Enabling coverage allows for improved code exploration.
5454
- **Default**: `true`
5555

56+
### `pruneFrequency`
57+
58+
- **Type**: Integer
59+
- **Description**: Determines how often, in minutes, the corpus should be pruned to remove unnecessary members.
60+
Setting `pruneFrequency` to 0 disables pruning.
61+
`pruneFrequency` only matters if `coverageEnabled` is set to true; otherwise, no pruning will occur.
62+
- **Default**: `5`
63+
5664
### `corpusDirectory`
5765

5866
- **Type**: String

docs/src/static/function_level_testing_medusa.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"timeout": 0,
66
"testLimit": 1000,
77
"callSequenceLength": 1,
8+
"pruneFrequency": 5,
89
"corpusDirectory": "",
910
"coverageEnabled": true,
1011
"targetContracts": ["TestDepositContract"],

docs/src/static/medusa.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"testLimit": 0,
77
"shrinkLimit": 5000,
88
"callSequenceLength": 100,
9+
"pruneFrequency": 5,
910
"corpusDirectory": "",
1011
"coverageEnabled": true,
1112
"coverageFormats": ["html", "lcov"],

fuzzing/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ type FuzzingConfig struct {
5555
// CallSequenceLength describes the maximum length a transaction sequence can be generated as.
5656
CallSequenceLength int `json:"callSequenceLength"`
5757

58+
// PruneFrequncy determines how often, in minutes, the corpus should be pruned to remove unnecessary members.
59+
// Setting PruneFrequency to 0 disables pruning.
60+
// PruneFrequency only matters if CoverageEnabled is set to true; otherwise, no pruning will occur.
61+
PruneFrequency uint64 `json:"pruneFrequency"`
62+
5863
// CorpusDirectory describes the name for the folder that will hold the corpus and the coverage files. If empty,
5964
// the in-memory corpus will be used, but not flush to disk.
6065
CorpusDirectory string `json:"corpusDirectory"`

fuzzing/config/config_defaults.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ func GetDefaultProjectConfig(platform string) (*ProjectConfig, error) {
4545
TestLimit: 0,
4646
ShrinkLimit: 5_000,
4747
CallSequenceLength: 100,
48+
PruneFrequency: 5,
4849
TargetContracts: []string{},
4950
TargetContractsBalances: []*ContractBalance{},
5051
PredeployedContracts: map[string]string{},

fuzzing/corpus/corpus.go

Lines changed: 92 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package corpus
22

33
import (
4+
"math/rand"
5+
46
"bytes"
7+
"context"
58
"fmt"
69
"math/big"
710
"os"
@@ -455,14 +458,13 @@ func (c *Corpus) AddTestResultCallSequence(callSequence calls.CallSequence, muta
455458
return c.addCallSequence(c.testResultSequenceFiles, callSequence, false, mutationChooserWeight, flushImmediately)
456459
}
457460

458-
// CheckSequenceCoverageAndUpdate checks if the most recent call executed in the provided call sequence achieved
459-
// coverage the Corpus did not with any of its call sequences. If it did, the call sequence is added to the corpus
460-
// and the Corpus coverage maps are updated accordingly.
461-
// Returns an error if one occurs.
462-
func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) error {
461+
// checkSequenceCoverageAndUpdate checks if the most recent call executed in the provided call sequence achieved
462+
// coverage the not already included in coverageMaps. If it did, coverageMaps is updated accordingly.
463+
// Returns a boolean indicating whether any change happened, and an error if one occurs.
464+
func checkSequenceCoverageAndUpdate(callSequence calls.CallSequence, coverageMaps *coverage.CoverageMaps) (bool, error) {
463465
// If we have coverage-guided fuzzing disabled or no calls in our sequence, there is nothing to do.
464466
if len(callSequence) == 0 {
465-
return nil
467+
return false, nil
466468
}
467469

468470
// Obtain our coverage maps for our last call.
@@ -473,14 +475,22 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence,
473475

474476
// If we have none, because a coverage tracer wasn't attached when processing this call, we can stop.
475477
if lastMessageCoverageMaps == nil {
476-
return nil
478+
return false, nil
477479
}
478480

479481
// Memory optimization: Remove them from the results now that we obtained them, to free memory later.
480482
coverage.RemoveCoverageTracerResults(lastMessageResult)
481483

482484
// Merge the coverage maps into our total coverage maps and check if we had an update.
483-
coverageUpdated, err := c.coverageMaps.Update(lastMessageCoverageMaps)
485+
return coverageMaps.Update(lastMessageCoverageMaps)
486+
}
487+
488+
// CheckSequenceCoverageAndUpdate checks if the most recent call executed in the provided call sequence achieved
489+
// coverage the Corpus did not with any of its call sequences. If it did, the call sequence is added to the corpus
490+
// and the Corpus coverage maps are updated accordingly.
491+
// Returns an error if one occurs.
492+
func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, mutationChooserWeight *big.Int, flushImmediately bool) error {
493+
coverageUpdated, err := checkSequenceCoverageAndUpdate(callSequence, c.coverageMaps)
484494
if err != nil {
485495
return err
486496
}
@@ -551,3 +561,77 @@ func (c *Corpus) Flush() error {
551561

552562
return nil
553563
}
564+
565+
// PruneSequences removes unnecessary entries from the corpus. It does this by:
566+
// - Initialize a blank coverage map tmpMap
567+
// - Grab all sequences in the corpus
568+
// - Randomize the order
569+
// - For each transaction, see whether it adds anything new to tmpMap.
570+
// If it does, add the new coverage and continue.
571+
// If it doesn't, remove it from the corpus.
572+
//
573+
// By doing this, we hope to find a smaller set of txn sequences that still preserves our current coverage.
574+
// PruneSequences takes a chain.TestChain parameter used to run transactions.
575+
// It returns an int indicating the number of sequences removed from the corpus, and an error if any occurred.
576+
func (c *Corpus) PruneSequences(ctx context.Context, chain *chain.TestChain) (int, error) {
577+
chainOriginalIndex := uint64(len(chain.CommittedBlocks()))
578+
tmpMap := coverage.NewCoverageMaps()
579+
580+
c.callSequencesLock.Lock()
581+
seqs := make([]calls.CallSequence, len(c.mutationTargetSequenceChooser.Choices))
582+
for i, seq := range c.mutationTargetSequenceChooser.Choices {
583+
seqCloned, err := seq.Data.Clone()
584+
if err != nil {
585+
c.callSequencesLock.Unlock()
586+
return 0, err
587+
}
588+
seqs[i] = seqCloned
589+
}
590+
c.callSequencesLock.Unlock()
591+
// We don't need to lock during the next part as long as the ordering of Choices doesn't change.
592+
// New items could get added in the meantime, but older items won't be touched.
593+
594+
toRemove := map[int]bool{}
595+
596+
// Iterate seqs in a random order
597+
for _, i := range rand.Perm(len(seqs)) {
598+
if utils.CheckContextDone(ctx) {
599+
return 0, nil
600+
}
601+
602+
seq := seqs[i]
603+
604+
fetchElementFunc := func(currentIndex int) (*calls.CallSequenceElement, error) {
605+
if currentIndex >= len(seq) {
606+
return nil, nil
607+
}
608+
return seq[currentIndex], nil
609+
}
610+
611+
// Never quit early
612+
executionCheckFunc := func(currentlyExecutedSequence calls.CallSequence) (bool, error) { return false, nil }
613+
614+
seq, err := calls.ExecuteCallSequenceIteratively(chain, fetchElementFunc, executionCheckFunc)
615+
if err != nil {
616+
return 0, err
617+
}
618+
619+
coverageUpdated, err := checkSequenceCoverageAndUpdate(seq, tmpMap)
620+
if err != nil {
621+
return 0, err
622+
}
623+
624+
if !coverageUpdated {
625+
// No new coverage was added. We can remove this from the corpus.
626+
toRemove[i] = true
627+
}
628+
629+
err = chain.RevertToBlockIndex(chainOriginalIndex)
630+
if err != nil {
631+
return 0, err
632+
}
633+
}
634+
635+
c.mutationTargetSequenceChooser.RemoveChoices(toRemove)
636+
return len(toRemove), nil
637+
}

fuzzing/corpus/corpus_pruner.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package corpus
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"time"
7+
8+
"github.com/crytic/medusa/chain"
9+
"github.com/crytic/medusa/fuzzing/coverage"
10+
"github.com/crytic/medusa/logging"
11+
"github.com/crytic/medusa/utils"
12+
)
13+
14+
// CorpusPruner is a job that runs every `PruneFrequency` minutes.
15+
// It removes unnecessary items from the corpus by calling `Corpus.PruneSequences`.
16+
type CorpusPruner struct {
17+
// enabled determines if the pruner is enabled
18+
enabled bool
19+
20+
// corpus is the corpus to be pruned
21+
corpus *Corpus
22+
23+
// logger is used to log when pruning and on error
24+
logger *logging.Logger
25+
26+
// ctx is the CorpusPruner's context which can be used to cancel the pruner
27+
ctx context.Context
28+
29+
// pruneFrequency determines how often, in minutes, the pruning should occur
30+
pruneFrequency uint64
31+
32+
// totalCorpusPruned counts the total number of sequences pruned so far
33+
totalCorpusPruned int
34+
35+
// chain is the test chain used during pruning
36+
chain *chain.TestChain
37+
}
38+
39+
// NewCorpusPruner creates a new CorpusPruner.
40+
func NewCorpusPruner(enabled bool, pruneFrequency uint64, logger *logging.Logger) *CorpusPruner {
41+
if !enabled {
42+
return &CorpusPruner{}
43+
}
44+
return &CorpusPruner{
45+
enabled: enabled,
46+
pruneFrequency: pruneFrequency,
47+
logger: logger,
48+
}
49+
}
50+
51+
// pruneCorpus is a wrapper around Corpus.PruneSequences that adds timing, logging, and updating totalCorpusPruned.
52+
// It is used by mainLoop.
53+
func (cp *CorpusPruner) pruneCorpus() error {
54+
start := time.Now() // We'll track how long pruning takes
55+
n, err := cp.corpus.PruneSequences(cp.ctx, cp.chain)
56+
// PruneSequences takes a while, so ctx could've finished in the meantime.
57+
// If it did, we skip the log message.
58+
if err != nil || utils.CheckContextDone(cp.ctx) {
59+
return err
60+
}
61+
cp.totalCorpusPruned += n
62+
cp.logger.Info(fmt.Sprintf("Pruned %d corpus items in %v. Total pruned this run: %d", n, time.Since(start), cp.totalCorpusPruned))
63+
return nil
64+
}
65+
66+
// mainLoop calls pruneCorpus every `pruneFrequency` minutes.
67+
// It runs infinitely until ctx.Done is triggered.
68+
func (cp *CorpusPruner) mainLoop() {
69+
defer cp.chain.Close()
70+
ticker := time.NewTicker(time.Duration(cp.pruneFrequency) * time.Minute)
71+
defer ticker.Stop()
72+
for {
73+
select {
74+
case <-cp.ctx.Done():
75+
return
76+
case <-ticker.C:
77+
err := cp.pruneCorpus()
78+
if err != nil {
79+
cp.logger.Error("Corpus pruner encountered an error", err)
80+
return
81+
}
82+
}
83+
}
84+
}
85+
86+
// Start takes a context, a corpus to prune, and a base chain in a setup state ready for testing.
87+
// It clones the base chain, then prunes the corpus every `PruneFrequency` minutes.
88+
// This runs until ctx cancels the operation.
89+
// Returns an error if one occurred.
90+
func (cp *CorpusPruner) Start(ctx context.Context, corpus *Corpus, baseTestChain *chain.TestChain) error {
91+
if !cp.enabled {
92+
return nil
93+
}
94+
95+
// Clone our chain, attaching a tracer.
96+
clonedChain, err := baseTestChain.Clone(func(initializedChain *chain.TestChain) error {
97+
initializedChain.AddTracer(coverage.NewCoverageTracer().NativeTracer(), true, false)
98+
return nil
99+
})
100+
if err != nil {
101+
return err
102+
}
103+
cp.chain = clonedChain
104+
105+
// Write our params to the struct so we don't have to pass them all over the place as function args.
106+
cp.ctx = ctx
107+
cp.corpus = corpus
108+
109+
// Start up the main loop in a goroutine.
110+
go cp.mainLoop()
111+
112+
return nil
113+
}

fuzzing/fuzzer.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ type Fuzzer struct {
8585
// revertReporter tracks per-function reversion metrics, if enabled
8686
revertReporter *reverts.RevertReporter
8787

88+
// corpusPruner is a service that will prune the corpus at a given frequency to reduce corpus size and memory overhead.
89+
corpusPruner *corpus.CorpusPruner
90+
8891
// randomProvider describes the provider used to generate random values in the Fuzzer. All other random providers
8992
// used by the Fuzzer's subcomponents are derived from this one.
9093
randomProvider *rand.Rand
@@ -174,6 +177,10 @@ func NewFuzzer(config config.ProjectConfig) (*Fuzzer, error) {
174177
return nil, err
175178
}
176179

180+
// Create the corpus pruner.
181+
pruneEnabled := config.Fuzzing.CoverageEnabled && config.Fuzzing.PruneFrequency > 0
182+
corpusPruner := corpus.NewCorpusPruner(pruneEnabled, config.Fuzzing.PruneFrequency, logger)
183+
177184
// Create and return our fuzzing instance.
178185
fuzzer := &Fuzzer{
179186
config: config,
@@ -184,6 +191,7 @@ func NewFuzzer(config config.ProjectConfig) (*Fuzzer, error) {
184191
testCases: make([]TestCase, 0),
185192
testCasesFinished: make(map[string]TestCase),
186193
revertReporter: revertReporter,
194+
corpusPruner: corpusPruner,
187195
Hooks: FuzzerHooks{
188196
NewCallSequenceGeneratorConfigFunc: defaultCallSequenceGeneratorConfigFunc,
189197
NewShrinkingValueMutatorFunc: defaultShrinkingValueMutatorFunc,
@@ -853,6 +861,13 @@ func (f *Fuzzer) Start() error {
853861
)
854862
}
855863

864+
// Start the corpus pruner.
865+
err = f.corpusPruner.Start(f.ctx, f.corpus, baseTestChain)
866+
if err != nil {
867+
f.logger.Error("Error starting corpus pruner", err)
868+
return err
869+
}
870+
856871
// Log the start of our fuzzing campaign.
857872
f.logger.Info("Fuzzing with ", colors.Bold, f.config.Fuzzing.Workers, colors.Reset, " workers")
858873

0 commit comments

Comments
 (0)