Skip to content

Commit b2a001b

Browse files
committed
pkg/manager, syz-verifier, syz-manager: refactor corpus management and improve verifier
Extract corpus minimization logic into shared pkg/manager/corpus.go for reuse by both syz-manager and syz-verifier. Improve syz-verifier fuzzing implementation: - Update request distribution strategy for better workload management - Change result comparison logic to more accurately detect mismatches - Print mismatches in more readable format for easier debugging - Enforce restarting executor between program executions to avoid state pollution - Fix coverage-guided fuzzing to properly track code coverage - Fix maxSignal phase management for accurate signal tracking - Simplify crash handling logic - Refactor corpus management to use single goroutine with one channel Enhance HTTP server monitoring: - Add log, coverage statistics, and VM information to HTTP server views - Enable HTTP server logging for better observability Fix race condition where multiple executor goroutines call MaxSignal() simultaneously from RPC server connection loops.
1 parent ff2a3e0 commit b2a001b

File tree

5 files changed

+438
-283
lines changed

5 files changed

+438
-283
lines changed

CONTRIBUTORS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,5 @@ Jeongjun Park
143143
Nikita Zhandarovich
144144
Jiacheng Xu
145145
Kuzey Arda Bulut
146-
Daniel Bransky
146+
Daniel Bransky
147+
Matan Kalina

pkg/manager/corpus.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2015 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package manager
5+
6+
import (
7+
"github.com/google/syzkaller/pkg/corpus"
8+
"github.com/google/syzkaller/pkg/db"
9+
"github.com/google/syzkaller/pkg/log"
10+
)
11+
12+
// CorpusMinimizer provides the interface needed to minimize a corpus.
13+
type CorpusMinimizer struct {
14+
Corpus *corpus.Corpus
15+
CorpusDB *db.DB
16+
Cover bool
17+
LastMinCorpus int
18+
SaturatedCalls map[string]bool
19+
DisabledHashes map[string]struct{}
20+
PhaseCheck func() bool // returns true if we should proceed with minimization
21+
}
22+
23+
// Minimize performs corpus minimization if conditions are met.
24+
// It should be called with appropriate locking in place.
25+
func (cm *CorpusMinimizer) Minimize() int {
26+
// Don't minimize corpus until we have triaged all inputs from it.
27+
// During corpus triage it would happen very often since we are actively adding inputs,
28+
// and presumably the persistent corpus was reasonably minimal, and we don't use it for fuzzing yet.
29+
if cm.PhaseCheck != nil && !cm.PhaseCheck() {
30+
return cm.LastMinCorpus
31+
}
32+
currSize := cm.Corpus.StatProgs.Val()
33+
if currSize <= cm.LastMinCorpus*103/100 {
34+
return cm.LastMinCorpus
35+
}
36+
cm.Corpus.Minimize(cm.Cover)
37+
newSize := cm.Corpus.StatProgs.Val()
38+
39+
log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize)
40+
cm.LastMinCorpus = newSize
41+
42+
// From time to time we get corpus explosion due to different reason:
43+
// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
44+
// This has bad effect on the instance and especially on instances
45+
// connected via hub. Do some per-syscall sanity checking to prevent this.
46+
for call, info := range cm.Corpus.CallCover() {
47+
if cm.Cover {
48+
// If we have less than 1K inputs per this call,
49+
// accept all new inputs unconditionally.
50+
if info.Count < 1000 {
51+
continue
52+
}
53+
// If we have more than 3K already, don't accept any more.
54+
// Between 1K and 3K look at amount of coverage we are getting from these programs.
55+
// Empirically, real coverage for the most saturated syscalls is ~30-60
56+
// per program (even when we have a thousand of them). For explosion
57+
// case coverage tend to be much lower (~0.3-5 per program).
58+
if info.Count < 3000 && len(info.Cover)/info.Count >= 10 {
59+
continue
60+
}
61+
} else {
62+
// If we don't have real coverage, signal is weak.
63+
// If we have more than several hundreds, there is something wrong.
64+
if info.Count < 300 {
65+
continue
66+
}
67+
}
68+
if cm.SaturatedCalls[call] {
69+
continue
70+
}
71+
cm.SaturatedCalls[call] = true
72+
log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call)
73+
}
74+
75+
// Clean up the corpus database
76+
for key := range cm.CorpusDB.Records {
77+
ok1 := cm.Corpus.Item(key) != nil
78+
_, ok2 := cm.DisabledHashes[key]
79+
if !ok1 && !ok2 {
80+
cm.CorpusDB.Delete(key)
81+
}
82+
}
83+
if err := cm.CorpusDB.Flush(); err != nil {
84+
log.Fatalf("failed to save corpus database: %v", err)
85+
}
86+
cm.CorpusDB.BumpVersion(CurrentDBVersion)
87+
88+
return newSize
89+
}

syz-manager/manager.go

Lines changed: 11 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,68 +1032,20 @@ func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) {
10321032
}
10331033

10341034
func (mgr *Manager) minimizeCorpusLocked() {
1035-
// Don't minimize corpus until we have triaged all inputs from it.
1036-
// During corpus triage it would happen very often since we are actively adding inputs,
1037-
// and presumably the persistent corpus was reasonably minimial, and we don't use it for fuzzing yet.
1038-
if mgr.phase < phaseTriagedCorpus {
1039-
return
1040-
}
1041-
currSize := mgr.corpus.StatProgs.Val()
1042-
if currSize <= mgr.lastMinCorpus*103/100 {
1043-
return
1044-
}
1045-
mgr.corpus.Minimize(mgr.cfg.Cover)
1046-
newSize := mgr.corpus.StatProgs.Val()
1047-
1048-
log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize)
1049-
mgr.lastMinCorpus = newSize
1050-
1051-
// From time to time we get corpus explosion due to different reason:
1052-
// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
1053-
// This has bad effect on the instance and especially on instances
1054-
// connected via hub. Do some per-syscall sanity checking to prevent this.
1055-
for call, info := range mgr.corpus.CallCover() {
1056-
if mgr.cfg.Cover {
1057-
// If we have less than 1K inputs per this call,
1058-
// accept all new inputs unconditionally.
1059-
if info.Count < 1000 {
1060-
continue
1061-
}
1062-
// If we have more than 3K already, don't accept any more.
1063-
// Between 1K and 3K look at amount of coverage we are getting from these programs.
1064-
// Empirically, real coverage for the most saturated syscalls is ~30-60
1065-
// per program (even when we have a thousand of them). For explosion
1066-
// case coverage tend to be much lower (~0.3-5 per program).
1067-
if info.Count < 3000 && len(info.Cover)/info.Count >= 10 {
1068-
continue
1069-
}
1070-
} else {
1071-
// If we don't have real coverage, signal is weak.
1072-
// If we have more than several hundreds, there is something wrong.
1073-
if info.Count < 300 {
1074-
continue
1075-
}
1076-
}
1077-
if mgr.saturatedCalls[call] {
1078-
continue
1079-
}
1080-
mgr.saturatedCalls[call] = true
1081-
log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call)
1035+
cm := &manager.CorpusMinimizer{
1036+
Corpus: mgr.corpus,
1037+
CorpusDB: mgr.corpusDB,
1038+
Cover: mgr.cfg.Cover,
1039+
LastMinCorpus: mgr.lastMinCorpus,
1040+
SaturatedCalls: mgr.saturatedCalls,
1041+
DisabledHashes: mgr.disabledHashes,
1042+
PhaseCheck: func() bool {
1043+
return mgr.phase >= phaseTriagedCorpus
1044+
},
10821045
}
1083-
10841046
mgr.corpusDBMu.Lock()
10851047
defer mgr.corpusDBMu.Unlock()
1086-
for key := range mgr.corpusDB.Records {
1087-
ok1 := mgr.corpus.Item(key) != nil
1088-
_, ok2 := mgr.disabledHashes[key]
1089-
if !ok1 && !ok2 {
1090-
mgr.corpusDB.Delete(key)
1091-
}
1092-
}
1093-
if err := mgr.corpusDB.Flush(); err != nil {
1094-
log.Fatalf("failed to save corpus database: %v", err)
1095-
}
1096-
mgr.corpusDB.BumpVersion(manager.CurrentDBVersion)
1048+
mgr.lastMinCorpus = cm.Minimize()
10971049
}
10981050

10991051
func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) {

syz-verifier/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ func Setup(name string, cfg *mgrconfig.Config, debug bool) (*Kernel, error) {
5050
return nil, fmt.Errorf("failed to create reporter for %q: %w", name, err)
5151
}
5252

53+
// Enforce deterministic execution for verifier
54+
cfg.Experimental.ResetAccState = true // executor process restarts between program executions to clear accumulated kernel/VM stat.
55+
//cfg.Procs = 1 // No parallel processes execution. When enabled slows down execution significantly.
56+
5357
kernel.serv, err = rpcserver.New(&rpcserver.RemoteConfig{
5458
Config: cfg,
5559
Manager: kernel,
@@ -109,6 +113,7 @@ func main() {
109113
}
110114
osutil.MkdirAll(workdir)
111115

116+
log.EnableLogCaching(1000, 1<<20)
112117
log.Logf(0, "initialized %d sources", len(sources))
113118

114119
vrf := &Verifier{

0 commit comments

Comments
 (0)