pkg/report: get only the thread local reports

tarasmadan · tarasmadan · commit 540b9904b59e · 2026-01-22T03:07:20.000+01:00
Syzkaller is currently able to parse sequential reports from the kernel log and accumulate the statistics per report.
You can see these reports in syzkaller only (not syzbot dashboard).
The problems:
1. These reports may be interleaving if are coming from the parallel contexts (threads)
2. Getting reports from one thread instead of all threads will hopefully give a higher signal/noise ratio.

This PR improves sequential reports parsing and is not expected to affect "first report" extraction.
diff --git a/pkg/report/linux.go b/pkg/report/linux.go
@@ -165,8 +165,9 @@ func (ctx *linux) Parse(output []byte) *Report {
 	}
 	for questionable := false; ; questionable = true {
 		rep := &Report{
-			Output:   output,
-			StartPos: startPos,
+			Output:    output,
+			StartPos:  startPos,
+			ContextID: context,
 		}
 		endPos, reportEnd, report, prefix := ctx.findReport(output, oops, startPos, context, questionable)
 		rep.EndPos = endPos
diff --git a/pkg/report/report.go b/pkg/report/report.go
@@ -72,6 +72,10 @@ type Report struct {
 	MachineInfo []byte
 	// If the crash happened in the context of the syz-executor process, Executor will hold more info.
 	Executor *ExecutorInfo
+	// On Linux systems ContextID may be the ThreadID(enabled by CONFIG_PRINTK_CALLER)
+	// or alternatively CpuID.
+	ContextID string
+
 	// reportPrefixLen is length of additional prefix lines that we added before actual crash report.
 	reportPrefixLen int
 	// symbolized is set if the report is symbolized. It prevents double symbolization.
@@ -278,16 +282,51 @@ func IsSuppressed(reporter *Reporter, output []byte) bool {
 }
 
 // ParseAll returns all successive reports in output.
-func ParseAll(reporter *Reporter, output []byte) (reports []*Report) {
-	skipPos := 0
+func ParseAll(reporter *Reporter, output []byte, startFrom int) []*Report {
+	skipPos := startFrom
+	var res []*Report
+	var scanFrom []int
 	for {
 		rep := reporter.ParseFrom(output, skipPos)
 		if rep == nil {
-			return
+			break
+		}
+		isTailReport := len(res) > 0
+		if isTailReport && rep.Type == crash.SyzFailure {
+			skipPos = rep.SkipPos
+			continue
 		}
-		reports = append(reports, rep)
+		res = append(res, rep)
+		scanFrom = append(scanFrom, skipPos)
 		skipPos = rep.SkipPos
 	}
+	return fixReports(reporter, res, scanFrom)
+}
+
+// fixReports truncates the report where possible.
+// Some reports last till the end of the output. If we have a few sequential reports, they intersect.
+// The idea is to cut the log into the chunks and generate the shorter but still valid(!corrupted) reports.
+func fixReports(reporter *Reporter, reports []*Report, skipPos []int) []*Report {
+	nextContextReportPos := map[string]int{}
+	for i := len(reports) - 1; i >= 0; i-- {
+		rep := reports[i]
+		if rep.Corrupted {
+			continue
+		}
+		nextReportPos := nextContextReportPos[rep.ContextID]
+		nextContextReportPos[rep.ContextID] = rep.StartPos
+		if nextReportPos == 0 {
+			continue
+		}
+		if nextReportPos < rep.EndPos {
+			shorterReport := reporter.ParseFrom(rep.Output[:nextReportPos], skipPos[i])
+			if shorterReport != nil && !shorterReport.Corrupted {
+				reports[i] = shorterReport
+				reports[i].Output = rep.Output
+			}
+		}
+	}
+	return reports
 }
 
 // GCE console connection sometimes fails with this message.
@@ -933,13 +972,15 @@ var groupGoRuntimeErrors = oops{
 	},
 }
 
-const reportSeparator = "\n<<<<<<<<<<<<<<< tail report >>>>>>>>>>>>>>>\n\n"
+const reportSeparator = "<<<<<<<<<<<<<<< tail report >>>>>>>>>>>>>>>"
 
 func MergeReportBytes(reps []*Report) []byte {
 	var res []byte
-	for _, rep := range reps {
+	for i, rep := range reps {
+		if i > 0 {
+			res = append(res, []byte(reportSeparator)...)
+		}
 		res = append(res, rep.Report...)
-		res = append(res, []byte(reportSeparator)...)
 	}
 	return res
 }
diff --git a/pkg/report/report_test.go b/pkg/report/report_test.go
@@ -6,6 +6,7 @@ package report
 import (
 	"bufio"
 	"bytes"
+	"encoding/json"
 	"flag"
 	"fmt"
 	"os"
@@ -41,9 +42,14 @@ type ParseTest struct {
 	EndLine    string
 	Corrupted  bool
 	Suppressed bool
-	HasReport  bool
-	Report     []byte
+
+	// HasReport is in charge of both Report and TailReports.
+	HasReport   bool
+	Report      []byte
+	TailReports [][]byte
+
 	Executor   string
+	ContextIDs []string
 	// Only used in report parsing:
 	corruptedReason string
 }
@@ -55,6 +61,9 @@ func (test *ParseTest) Equal(other *ParseTest) bool {
 		test.Type != other.Type {
 		return false
 	}
+	if test.ContextIDs != nil && !reflect.DeepEqual(test.ContextIDs, other.ContextIDs) {
+		return false
+	}
 	if !reflect.DeepEqual(test.AltTitles, other.AltTitles) {
 		return false
 	}
@@ -64,6 +73,9 @@ func (test *ParseTest) Equal(other *ParseTest) bool {
 	if test.HasReport && !bytes.Equal(test.Report, other.Report) {
 		return false
 	}
+	if test.HasReport && !reflect.DeepEqual(test.TailReports, other.TailReports) {
+		return false
+	}
 	return test.Executor == other.Executor
 }
 
@@ -90,6 +102,10 @@ func (test *ParseTest) Headers() []byte {
 	if test.Executor != "" {
 		fmt.Fprintf(buf, "EXECUTOR: %s\n", test.Executor)
 	}
+	if strings.Join(test.ContextIDs, "") != "" {
+		jsonData, _ := json.Marshal(test.ContextIDs)
+		fmt.Fprintf(buf, "CONTEXTS: %s\n", jsonData)
+	}
 	return buf.Bytes()
 }
 
@@ -98,8 +114,8 @@ func testParseFile(t *testing.T, reporter *Reporter, fn string) {
 	testParseImpl(t, reporter, test)
 }
 
-func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
-	data, err := os.ReadFile(fn)
+func parseReport(t *testing.T, reporter *Reporter, testFileName string) *ParseTest {
+	data, err := os.ReadFile(testFileName)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -109,10 +125,11 @@ func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
 		phaseHeaders = iota
 		phaseLog
 		phaseReport
+		phaseTailReports
 	)
 	phase := phaseHeaders
 	test := &ParseTest{
-		FileName: fn,
+		FileName: testFileName,
 	}
 	prevEmptyLine := false
 	s := bufio.NewScanner(bytes.NewReader(data))
@@ -134,8 +151,20 @@ func parseReport(t *testing.T, reporter *Reporter, fn string) *ParseTest {
 				test.Log = append(test.Log, '\n')
 			}
 		case phaseReport:
-			test.Report = append(test.Report, s.Bytes()...)
-			test.Report = append(test.Report, '\n')
+			if string(s.Bytes()) == "TAIL REPORTS:" {
+				test.TailReports = [][]byte{{}}
+				phase = phaseTailReports
+			} else {
+				test.Report = append(test.Report, s.Bytes()...)
+				test.Report = append(test.Report, '\n')
+			}
+		case phaseTailReports:
+			if string(s.Bytes()) == reportSeparator {
+				test.TailReports = append(test.TailReports, []byte{})
+				continue
+			}
+			test.TailReports[len(test.TailReports)-1] = append(test.TailReports[len(test.TailReports)-1], s.Bytes()...)
+			test.TailReports[len(test.TailReports)-1] = append(test.TailReports[len(test.TailReports)-1], []byte{'\n'}...)
 		}
 		prevEmptyLine = len(s.Bytes()) == 0
 	}
@@ -160,6 +189,7 @@ func parseHeaderLine(t *testing.T, test *ParseTest, ln string) {
 		corruptedPrefix  = "CORRUPTED: "
 		suppressedPrefix = "SUPPRESSED: "
 		executorPrefix   = "EXECUTOR: "
+		contextidPrefix  = "CONTEXTS: "
 	)
 	switch {
 	case strings.HasPrefix(ln, "#"):
@@ -195,60 +225,75 @@ func parseHeaderLine(t *testing.T, test *ParseTest, ln string) {
 		}
 	case strings.HasPrefix(ln, executorPrefix):
 		test.Executor = ln[len(executorPrefix):]
+	case strings.HasPrefix(ln, contextidPrefix):
+		err := json.Unmarshal([]byte(ln[len(contextidPrefix):]), &test.ContextIDs)
+		if err != nil {
+			t.Fatalf("contextIDs unmarshaling error: %q", err)
+		}
 	default:
 		t.Fatalf("unknown header field %q", ln)
 	}
 }
 
-func testFromReport(rep *Report) *ParseTest {
-	if rep == nil {
+func testFromReports(reps ...*Report) *ParseTest {
+	if reps == nil || len(reps) > 0 && reps[0] == nil {
 		return &ParseTest{}
 	}
 	ret := &ParseTest{
-		Title:           rep.Title,
-		AltTitles:       rep.AltTitles,
-		Corrupted:       rep.Corrupted,
-		corruptedReason: rep.CorruptedReason,
-		Suppressed:      rep.Suppressed,
-		Type:            crash.TitleToType(rep.Title),
-		Frame:           rep.Frame,
-		Report:          rep.Report,
-	}
-	if rep.Executor != nil {
-		ret.Executor = fmt.Sprintf("proc=%d, id=%d", rep.Executor.ProcID, rep.Executor.ExecID)
+		Title:           reps[0].Title,
+		AltTitles:       reps[0].AltTitles,
+		Corrupted:       reps[0].Corrupted,
+		corruptedReason: reps[0].CorruptedReason,
+		Suppressed:      reps[0].Suppressed,
+		Type:            crash.TitleToType(reps[0].Title),
+		Frame:           reps[0].Frame,
+		Report:          reps[0].Report,
+	}
+	if reps[0].Executor != nil {
+		ret.Executor = fmt.Sprintf("proc=%d, id=%d", reps[0].Executor.ProcID, reps[0].Executor.ExecID)
 	}
 	sort.Strings(ret.AltTitles)
+	ret.ContextIDs = append(ret.ContextIDs, reps[0].ContextID)
+	for i := 1; i < len(reps); i++ {
+		ret.TailReports = append(ret.TailReports, reps[i].Report)
+		ret.ContextIDs = append(ret.ContextIDs, reps[i].ContextID)
+	}
 	return ret
 }
 
 func testParseImpl(t *testing.T, reporter *Reporter, test *ParseTest) {
-	rep := reporter.Parse(test.Log)
+	gotReports := ParseAll(reporter, test.Log, 0)
+
+	var firstReport *Report
+	if len(gotReports) > 0 {
+		firstReport = gotReports[0]
+	}
 	containsCrash := reporter.ContainsCrash(test.Log)
 	expectCrash := (test.Title != "")
 	if expectCrash && !containsCrash {
 		t.Fatalf("did not find crash")
 	}
 	if !expectCrash && containsCrash {
-		t.Fatalf("found unexpected crash")
+		t.Fatalf("found unexpected crash: %s", firstReport.Title)
 	}
-	if rep != nil && rep.Title == "" {
+	if firstReport != nil && firstReport.Title == "" {
 		t.Fatalf("found crash, but title is empty")
 	}
-	parsed := testFromReport(rep)
+	parsed := testFromReports(gotReports...)
 	if !test.Equal(parsed) {
 		if *flagUpdate && test.StartLine+test.EndLine == "" {
 			updateReportTest(t, test, parsed)
 		}
 		t.Fatalf("want:\n%s\ngot:\n%sCorrupted reason: %q",
 			test.Headers(), parsed.Headers(), parsed.corruptedReason)
 	}
-	if parsed.Title != "" && len(rep.Report) == 0 {
+	if parsed.Title != "" && len(firstReport.Report) == 0 {
 		t.Fatalf("found crash message but report is empty")
 	}
-	if rep == nil {
+	if firstReport == nil {
 		return
 	}
-	checkReport(t, reporter, rep, test)
+	checkReport(t, reporter, firstReport, test)
 }
 
 func checkReport(t *testing.T, reporter *Reporter, rep *Report, test *ParseTest) {
@@ -285,11 +330,6 @@ func checkReport(t *testing.T, reporter *Reporter, rep *Report, test *ParseTest)
 		if rep1 == nil || rep1.Title != rep.Title || rep1.StartPos != rep.StartPos {
 			t.Fatalf("did not find the same report from rep.StartPos=%v", rep.StartPos)
 		}
-		// If we parse from EndPos, we must not find the same report.
-		rep2 := reporter.ParseFrom(test.Log, rep.EndPos)
-		if rep2 != nil && rep2.Title == rep.Title {
-			t.Fatalf("found the same report after rep.EndPos=%v", rep.EndPos)
-		}
 	}
 }
 
@@ -303,6 +343,10 @@ func updateReportTest(t *testing.T, test, parsed *ParseTest) {
 	fmt.Fprintf(buf, "\n%s", test.Log)
 	if test.HasReport {
 		fmt.Fprintf(buf, "REPORT:\n%s", parsed.Report)
+		if len(parsed.TailReports) > 0 {
+			fmt.Fprintf(buf, "TAIL REPORTS:\n")
+			buf.Write(bytes.Join(parsed.TailReports, []byte(reportSeparator+"\n")))
+		}
 	}
 	if err := os.WriteFile(test.FileName, buf.Bytes(), 0640); err != nil {
 		t.Logf("failed to update test file: %v", err)
@@ -395,7 +439,7 @@ func testSymbolizeFile(t *testing.T, reporter *Reporter, fn string) {
 	if err != nil {
 		t.Fatalf("failed to symbolize: %v", err)
 	}
-	parsed := testFromReport(rep)
+	parsed := testFromReports(rep)
 	if !test.Equal(parsed) {
 		if *flagUpdate {
 			updateReportTest(t, test, parsed)
diff --git a/tools/syz-symbolize/symbolize.go b/tools/syz-symbolize/symbolize.go
@@ -57,7 +57,7 @@ func main() {
 	if err != nil {
 		tool.Failf("failed to open input file: %v", err)
 	}
-	reps := report.ParseAll(reporter, text)
+	reps := report.ParseAll(reporter, text, 0)
 	if len(reps) == 0 {
 		rep := &report.Report{Report: text}
 		if err := reporter.Symbolize(rep); err != nil {
diff --git a/vm/vm.go b/vm/vm.go
diff --git a/vm/vm_test.go b/vm/vm_test.go

Original file line number	Diff line number	Diff line change
`@@ -165,8 +165,9 @@ func (ctx linux) Parse(output []byte) Report {`
`165`	`165`	`}`
`166`	`166`	`for questionable := false; ; questionable = true {`
`167`	`167`	`rep := &Report{`
`168`		`- Output: output,`
`169`		`- StartPos: startPos,`
	`168`	`+ Output: output,`
	`169`	`+ StartPos: startPos,`
	`170`	`+ ContextID: context,`
`170`	`171`	`}`
`171`	`172`	`endPos, reportEnd, report, prefix := ctx.findReport(output, oops, startPos, context, questionable)`
`172`	`173`	`rep.EndPos = endPos`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ func main() {`
`57`	`57`	`if err != nil {`
`58`	`58`	`tool.Failf("failed to open input file: %v", err)`
`59`	`59`	`}`
`60`		`- reps := report.ParseAll(reporter, text)`
	`60`	`+ reps := report.ParseAll(reporter, text, 0)`
`61`	`61`	`if len(reps) == 0 {`
`62`	`62`	`rep := &report.Report{Report: text}`
`63`	`63`	`if err := reporter.Symbolize(rep); err != nil {`