Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 163 additions & 6 deletions pkg/email/lore/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
package lore

import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"

"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/email"
)

// Thread is a generic representation of a single discussion in the mailing list.
type Thread struct {
Subject string
MessageID string
Expand All @@ -20,9 +23,28 @@ type Thread struct {
Messages []*email.Email
}

// Series represents a single patch series sent over email.
type Series struct {
Subject string
MessageID string
Version int
Corrupted string // If non-empty, contains a reason why the series better be ignored.
Patches []Patch
}

type Patch struct {
Seq int
*email.Email
}

// Threads extracts individual threads from a list of emails.
func Threads(emails []*email.Email) []*Thread {
return listThreads(emails, 0)
}

func listThreads(emails []*email.Email, maxDepth int) []*Thread {
ctx := &parseCtx{
maxDepth: maxDepth,
messages: map[string]*email.Email{},
next: map[*email.Email][]*email.Email{},
}
Expand All @@ -33,24 +55,128 @@ func Threads(emails []*email.Email) []*Thread {
return ctx.threads
}

// PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists.
func PatchSeries(emails []*email.Email) []*Series {
var ret []*Series
// Normally, all following series patches are sent in response to the first email sent.
// So there's no sense to look at deeper replies.
for _, thread := range listThreads(emails, 1) {
if thread.Type != dashapi.DiscussionPatch {
continue
}
patch, ok := parsePatchSubject(thread.Subject)
if !ok {
// It must never be happening.
panic("DiscussionPatch is set, but we fail to parse the thread subject")
}
total := patch.Total.ValueOr(1)
series := &Series{
Subject: patch.Title,
MessageID: thread.MessageID,
Version: patch.Version.ValueOr(1),
}
ret = append(ret, series)
if patch.Seq.IsSet() && patch.Seq.Value() > 1 {
series.Corrupted = "the first patch has seq>1"
continue
}
hasSeq := map[int]bool{}
for _, email := range thread.Messages {
patch, ok := parsePatchSubject(email.Subject)
if !ok {
continue
}
seq := patch.Seq.ValueOr(1)
if seq == 0 {
// The cover email is not of interest.
continue
}
if hasSeq[seq] {
// It's weird if that really happens, but let's skip for now.
continue
}
hasSeq[seq] = true
series.Patches = append(series.Patches, Patch{
Seq: seq,
Email: email,
})
}
if len(hasSeq) != total {
series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found",
total, len(hasSeq))
continue
}
if len(series.Patches) == 0 {
series.Corrupted = "0 patches"
continue
}
sort.Slice(series.Patches, func(i, j int) bool {
return series.Patches[i].Seq < series.Patches[j].Seq
})
}
return ret
}

// DiscussionType extracts the specific discussion type from an email.
func DiscussionType(msg *email.Email) dashapi.DiscussionType {
discType := dashapi.DiscussionMention
if msg.OwnEmail {
discType = dashapi.DiscussionReport
}
// This is very crude, but should work for now.
if patchSubjectRe.MatchString(strings.ToLower(msg.Subject)) {
if _, ok := parsePatchSubject(msg.Subject); ok {
discType = dashapi.DiscussionPatch
} else if strings.Contains(msg.Subject, "Monthly") {
discType = dashapi.DiscussionReminder
}
return discType
}

var patchSubjectRe = regexp.MustCompile(`\[(?:(?:rfc|resend)\s+)*patch`)
type PatchSubject struct {
Title string
Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject.
Version Optional[int]
Seq Optional[int] // The "Seq/Total" part.
Total Optional[int]
}

// nolint: lll
var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`)

func parsePatchSubject(subject string) (PatchSubject, bool) {
var ret PatchSubject
groups := patchSubjectRe.FindStringSubmatch(subject)
if len(groups) == 0 {
return ret, false
}
tags := strings.Fields(groups[1])
for _, tag := range append(tags, strings.Fields(groups[2])...) {
if strings.HasPrefix(tag, "v") {
val, err := strconv.Atoi(strings.TrimPrefix(tag, "v"))
if err == nil {
ret.Version.Set(val)
continue
}
}
ret.Tags = append(ret.Tags, tag)
}
sort.Strings(ret.Tags)
if groups[3] != "" {
if val, err := strconv.Atoi(groups[3]); err == nil {
ret.Seq.Set(val)
}
}
if groups[4] != "" {
if val, err := strconv.Atoi(groups[4]); err == nil {
ret.Total.Set(val)
}
}
ret.Title = groups[5]
return ret, true
}

type parseCtx struct {
maxDepth int
threads []*Thread
messages map[string]*email.Email
next map[*email.Email][]*email.Email
Expand All @@ -73,7 +199,7 @@ func (c *parseCtx) process() {
}
// Iterate starting from these tree nodes.
for _, node := range nodes {
c.visit(node, nil)
c.visit(node, nil, 0)
}
// Collect BugIDs.
for _, thread := range c.threads {
Expand All @@ -92,7 +218,7 @@ func (c *parseCtx) process() {
}
}

func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
func (c *parseCtx) visit(msg *email.Email, thread *Thread, depth int) {
var oldInfo *email.OldThreadInfo
if thread != nil {
oldInfo = &email.OldThreadInfo{
Expand All @@ -114,7 +240,38 @@ func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
}
c.threads = append(c.threads, thread)
}
for _, nextMsg := range c.next[msg] {
c.visit(nextMsg, thread)
if c.maxDepth == 0 || depth < c.maxDepth {
for _, nextMsg := range c.next[msg] {
c.visit(nextMsg, thread, depth+1)
}
}
}

type Optional[T any] struct {
val T
set bool
}

func value[T any](val T) Optional[T] {
return Optional[T]{val: val, set: true}
}

func (o Optional[T]) IsSet() bool {
return o.set
}

func (o Optional[T]) Value() T {
return o.val
}

func (o Optional[T]) ValueOr(def T) T {
if o.set {
return o.val
}
return def
}

func (o *Optional[T]) Set(val T) {
o.val = val
o.set = true
}
Loading
Loading