Skip to content

Commit 540b990

Browse files
committed
pkg/report: get only the thread local reports
Syzkaller is currently able to parse sequential reports from the kernel log and accumulate the statistics per report. You can see these reports in syzkaller only (not syzbot dashboard). The problems: 1. These reports may be interleaving if are coming from the parallel contexts (threads) 2. Getting reports from one thread instead of all threads will hopefully give a higher signal/noise ratio. This PR improves sequential reports parsing and is not expected to affect "first report" extraction.
1 parent a16aed1 commit 540b990

File tree

6 files changed

+157
-219
lines changed

6 files changed

+157
-219
lines changed

pkg/report/linux.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,9 @@ func (ctx *linux) Parse(output []byte) *Report {
165165
}
166166
for questionable := false; ; questionable = true {
167167
rep := &Report{
168-
Output: output,
169-
StartPos: startPos,
168+
Output: output,
169+
StartPos: startPos,
170+
ContextID: context,
170171
}
171172
endPos, reportEnd, report, prefix := ctx.findReport(output, oops, startPos, context, questionable)
172173
rep.EndPos = endPos

pkg/report/report.go

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ type Report struct {
7272
MachineInfo []byte
7373
// If the crash happened in the context of the syz-executor process, Executor will hold more info.
7474
Executor *ExecutorInfo
75+
// On Linux systems ContextID may be the ThreadID(enabled by CONFIG_PRINTK_CALLER)
76+
// or alternatively CpuID.
77+
ContextID string
78+
7579
// reportPrefixLen is length of additional prefix lines that we added before actual crash report.
7680
reportPrefixLen int
7781
// symbolized is set if the report is symbolized. It prevents double symbolization.
@@ -278,16 +282,51 @@ func IsSuppressed(reporter *Reporter, output []byte) bool {
278282
}
279283

280284
// ParseAll returns all successive reports in output.
281-
func ParseAll(reporter *Reporter, output []byte) (reports []*Report) {
282-
skipPos := 0
285+
func ParseAll(reporter *Reporter, output []byte, startFrom int) []*Report {
286+
skipPos := startFrom
287+
var res []*Report
288+
var scanFrom []int
283289
for {
284290
rep := reporter.ParseFrom(output, skipPos)
285291
if rep == nil {
286-
return
292+
break
293+
}
294+
isTailReport := len(res) > 0
295+
if isTailReport && rep.Type == crash.SyzFailure {
296+
skipPos = rep.SkipPos
297+
continue
287298
}
288-
reports = append(reports, rep)
299+
res = append(res, rep)
300+
scanFrom = append(scanFrom, skipPos)
289301
skipPos = rep.SkipPos
290302
}
303+
return fixReports(reporter, res, scanFrom)
304+
}
305+
306+
// fixReports truncates the report where possible.
307+
// Some reports last till the end of the output. If we have a few sequential reports, they intersect.
308+
// The idea is to cut the log into the chunks and generate the shorter but still valid(!corrupted) reports.
309+
func fixReports(reporter *Reporter, reports []*Report, skipPos []int) []*Report {
310+
nextContextReportPos := map[string]int{}
311+
for i := len(reports) - 1; i >= 0; i-- {
312+
rep := reports[i]
313+
if rep.Corrupted {
314+
continue
315+
}
316+
nextReportPos := nextContextReportPos[rep.ContextID]
317+
nextContextReportPos[rep.ContextID] = rep.StartPos
318+
if nextReportPos == 0 {
319+
continue
320+
}
321+
if nextReportPos < rep.EndPos {
322+
shorterReport := reporter.ParseFrom(rep.Output[:nextReportPos], skipPos[i])
323+
if shorterReport != nil && !shorterReport.Corrupted {
324+
reports[i] = shorterReport
325+
reports[i].Output = rep.Output
326+
}
327+
}
328+
}
329+
return reports
291330
}
292331

293332
// GCE console connection sometimes fails with this message.
@@ -933,13 +972,15 @@ var groupGoRuntimeErrors = oops{
933972
},
934973
}
935974

936-
const reportSeparator = "\n<<<<<<<<<<<<<<< tail report >>>>>>>>>>>>>>>\n\n"
975+
const reportSeparator = "<<<<<<<<<<<<<<< tail report >>>>>>>>>>>>>>>"
937976

938977
func MergeReportBytes(reps []*Report) []byte {
939978
var res []byte
940-
for _, rep := range reps {
979+
for i, rep := range reps {
980+
if i > 0 {
981+
res = append(res, []byte(reportSeparator)...)
982+
}
941983
res = append(res, rep.Report...)
942-
res = append(res, []byte(reportSeparator)...)
943984
}
944985
return res
945986
}

pkg/report/report_test.go

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package report
66
import (
77
"bufio"
88
"bytes"
9+
"encoding/json"
910
"flag"
1011
"fmt"
1112
"os"
@@ -41,9 +42,14 @@ type ParseTest struct {
4142
EndLine string
4243
Corrupted bool
4344
Suppressed bool
44-
HasReport bool
45-
Report []byte
45+
46+
// HasReport is in charge of both Report and TailReports.
47+
HasReport bool
48+
Report []byte
49+
TailReports [][]byte
50+
4651
Executor string
52+
ContextIDs []string
4753
// Only used in report parsing:
4854
corruptedReason string
4955
}
@@ -55,6 +61,9 @@ func (test *ParseTest) Equal(other *ParseTest) bool {
5561
test.Type != other.Type {
5662
return false
5763
}
64+
if test.ContextIDs != nil && !reflect.DeepEqual(test.ContextIDs, other.ContextIDs) {
65+
return false
66+
}
5867
if !reflect.DeepEqual(test.AltTitles, other.AltTitles) {
5968
return false
6069
}
@@ -64,6 +73,9 @@ func (test *ParseTest) Equal(other *ParseTest) bool {
6473
if test.HasReport && !bytes.Equal(test.Report, other.Report) {
6574
return false
6675
}
76+
if test.HasReport && !reflect.DeepEqual(test.TailReports, other.TailReports) {
77+
return false
78+
}
6779
return test.Executor == other.Executor
6880
}
6981

@@ -90,6 +102,10 @@ func (test *ParseTest) Headers() []byte {
90102
if test.Executor != "" {
91103
fmt.Fprintf(buf, "EXECUTOR: %s\n", test.Executor)
92104
}
105+
if strings.Join(test.ContextIDs, "") != "" {
106+
jsonData, _ := json.Marshal(test.ContextIDs)
107+
fmt.Fprintf(buf, "CONTEXTS: %s\n", jsonData)
108+
}
93109
return buf.Bytes()
94110
}
95111

@@ -98,8 +114,8 @@ func testParseFile(t *testing.T, reporter *Reporter, fn string) {
98114
testParseImpl(t, reporter, test)
99115
}
100116

101-
func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
102-
data, err := os.ReadFile(fn)
117+
func parseReport(t *testing.T, reporter *Reporter, testFileName string) *ParseTest {
118+
data, err := os.ReadFile(testFileName)
103119
if err != nil {
104120
t.Fatal(err)
105121
}
@@ -109,10 +125,11 @@ func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
109125
phaseHeaders = iota
110126
phaseLog
111127
phaseReport
128+
phaseTailReports
112129
)
113130
phase := phaseHeaders
114131
test := &ParseTest{
115-
FileName: fn,
132+
FileName: testFileName,
116133
}
117134
prevEmptyLine := false
118135
s := bufio.NewScanner(bytes.NewReader(data))
@@ -134,8 +151,20 @@ func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
134151
test.Log = append(test.Log, '\n')
135152
}
136153
case phaseReport:
137-
test.Report = append(test.Report, s.Bytes()...)
138-
test.Report = append(test.Report, '\n')
154+
if string(s.Bytes()) == "TAIL REPORTS:" {
155+
test.TailReports = [][]byte{{}}
156+
phase = phaseTailReports
157+
} else {
158+
test.Report = append(test.Report, s.Bytes()...)
159+
test.Report = append(test.Report, '\n')
160+
}
161+
case phaseTailReports:
162+
if string(s.Bytes()) == reportSeparator {
163+
test.TailReports = append(test.TailReports, []byte{})
164+
continue
165+
}
166+
test.TailReports[len(test.TailReports)-1] = append(test.TailReports[len(test.TailReports)-1], s.Bytes()...)
167+
test.TailReports[len(test.TailReports)-1] = append(test.TailReports[len(test.TailReports)-1], []byte{'\n'}...)
139168
}
140169
prevEmptyLine = len(s.Bytes()) == 0
141170
}
@@ -160,6 +189,7 @@ func parseHeaderLine(t *testing.T, test *ParseTest, ln string) {
160189
corruptedPrefix = "CORRUPTED: "
161190
suppressedPrefix = "SUPPRESSED: "
162191
executorPrefix = "EXECUTOR: "
192+
contextidPrefix = "CONTEXTS: "
163193
)
164194
switch {
165195
case strings.HasPrefix(ln, "#"):
@@ -195,60 +225,75 @@ func parseHeaderLine(t *testing.T, test *ParseTest, ln string) {
195225
}
196226
case strings.HasPrefix(ln, executorPrefix):
197227
test.Executor = ln[len(executorPrefix):]
228+
case strings.HasPrefix(ln, contextidPrefix):
229+
err := json.Unmarshal([]byte(ln[len(contextidPrefix):]), &test.ContextIDs)
230+
if err != nil {
231+
t.Fatalf("contextIDs unmarshaling error: %q", err)
232+
}
198233
default:
199234
t.Fatalf("unknown header field %q", ln)
200235
}
201236
}
202237

203-
func testFromReport(rep *Report) *ParseTest {
204-
if rep == nil {
238+
func testFromReports(reps ...*Report) *ParseTest {
239+
if reps == nil || len(reps) > 0 && reps[0] == nil {
205240
return &ParseTest{}
206241
}
207242
ret := &ParseTest{
208-
Title: rep.Title,
209-
AltTitles: rep.AltTitles,
210-
Corrupted: rep.Corrupted,
211-
corruptedReason: rep.CorruptedReason,
212-
Suppressed: rep.Suppressed,
213-
Type: crash.TitleToType(rep.Title),
214-
Frame: rep.Frame,
215-
Report: rep.Report,
216-
}
217-
if rep.Executor != nil {
218-
ret.Executor = fmt.Sprintf("proc=%d, id=%d", rep.Executor.ProcID, rep.Executor.ExecID)
243+
Title: reps[0].Title,
244+
AltTitles: reps[0].AltTitles,
245+
Corrupted: reps[0].Corrupted,
246+
corruptedReason: reps[0].CorruptedReason,
247+
Suppressed: reps[0].Suppressed,
248+
Type: crash.TitleToType(reps[0].Title),
249+
Frame: reps[0].Frame,
250+
Report: reps[0].Report,
251+
}
252+
if reps[0].Executor != nil {
253+
ret.Executor = fmt.Sprintf("proc=%d, id=%d", reps[0].Executor.ProcID, reps[0].Executor.ExecID)
219254
}
220255
sort.Strings(ret.AltTitles)
256+
ret.ContextIDs = append(ret.ContextIDs, reps[0].ContextID)
257+
for i := 1; i < len(reps); i++ {
258+
ret.TailReports = append(ret.TailReports, reps[i].Report)
259+
ret.ContextIDs = append(ret.ContextIDs, reps[i].ContextID)
260+
}
221261
return ret
222262
}
223263

224264
func testParseImpl(t *testing.T, reporter *Reporter, test *ParseTest) {
225-
rep := reporter.Parse(test.Log)
265+
gotReports := ParseAll(reporter, test.Log, 0)
266+
267+
var firstReport *Report
268+
if len(gotReports) > 0 {
269+
firstReport = gotReports[0]
270+
}
226271
containsCrash := reporter.ContainsCrash(test.Log)
227272
expectCrash := (test.Title != "")
228273
if expectCrash && !containsCrash {
229274
t.Fatalf("did not find crash")
230275
}
231276
if !expectCrash && containsCrash {
232-
t.Fatalf("found unexpected crash")
277+
t.Fatalf("found unexpected crash: %s", firstReport.Title)
233278
}
234-
if rep != nil && rep.Title == "" {
279+
if firstReport != nil && firstReport.Title == "" {
235280
t.Fatalf("found crash, but title is empty")
236281
}
237-
parsed := testFromReport(rep)
282+
parsed := testFromReports(gotReports...)
238283
if !test.Equal(parsed) {
239284
if *flagUpdate && test.StartLine+test.EndLine == "" {
240285
updateReportTest(t, test, parsed)
241286
}
242287
t.Fatalf("want:\n%s\ngot:\n%sCorrupted reason: %q",
243288
test.Headers(), parsed.Headers(), parsed.corruptedReason)
244289
}
245-
if parsed.Title != "" && len(rep.Report) == 0 {
290+
if parsed.Title != "" && len(firstReport.Report) == 0 {
246291
t.Fatalf("found crash message but report is empty")
247292
}
248-
if rep == nil {
293+
if firstReport == nil {
249294
return
250295
}
251-
checkReport(t, reporter, rep, test)
296+
checkReport(t, reporter, firstReport, test)
252297
}
253298

254299
func checkReport(t *testing.T, reporter *Reporter, rep *Report, test *ParseTest) {
@@ -285,11 +330,6 @@ func checkReport(t *testing.T, reporter *Reporter, rep *Report, test *ParseTest)
285330
if rep1 == nil || rep1.Title != rep.Title || rep1.StartPos != rep.StartPos {
286331
t.Fatalf("did not find the same report from rep.StartPos=%v", rep.StartPos)
287332
}
288-
// If we parse from EndPos, we must not find the same report.
289-
rep2 := reporter.ParseFrom(test.Log, rep.EndPos)
290-
if rep2 != nil && rep2.Title == rep.Title {
291-
t.Fatalf("found the same report after rep.EndPos=%v", rep.EndPos)
292-
}
293333
}
294334
}
295335

@@ -303,6 +343,10 @@ func updateReportTest(t *testing.T, test, parsed *ParseTest) {
303343
fmt.Fprintf(buf, "\n%s", test.Log)
304344
if test.HasReport {
305345
fmt.Fprintf(buf, "REPORT:\n%s", parsed.Report)
346+
if len(parsed.TailReports) > 0 {
347+
fmt.Fprintf(buf, "TAIL REPORTS:\n")
348+
buf.Write(bytes.Join(parsed.TailReports, []byte(reportSeparator+"\n")))
349+
}
306350
}
307351
if err := os.WriteFile(test.FileName, buf.Bytes(), 0640); err != nil {
308352
t.Logf("failed to update test file: %v", err)
@@ -395,7 +439,7 @@ func testSymbolizeFile(t *testing.T, reporter *Reporter, fn string) {
395439
if err != nil {
396440
t.Fatalf("failed to symbolize: %v", err)
397441
}
398-
parsed := testFromReport(rep)
442+
parsed := testFromReports(rep)
399443
if !test.Equal(parsed) {
400444
if *flagUpdate {
401445
updateReportTest(t, test, parsed)

tools/syz-symbolize/symbolize.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func main() {
5757
if err != nil {
5858
tool.Failf("failed to open input file: %v", err)
5959
}
60-
reps := report.ParseAll(reporter, text)
60+
reps := report.ParseAll(reporter, text, 0)
6161
if len(reps) == 0 {
6262
rep := &report.Report{Report: text}
6363
if err := reporter.Symbolize(rep); err != nil {

0 commit comments

Comments
 (0)