Skip to content

Commit cbbdc61

Browse files
committed
pkg/email/lore: extract patch series
Refactor the code to make it more reusable. Add a method to extract specifically the list of new patch series.
1 parent 7f21017 commit cbbdc61

File tree

4 files changed

+390
-27
lines changed

4 files changed

+390
-27
lines changed

pkg/email/lore/parse.go

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
package lore
55

66
import (
7+
"fmt"
78
"regexp"
89
"sort"
10+
"strconv"
911
"strings"
1012

1113
"github.com/google/syzkaller/dashboard/dashapi"
@@ -20,9 +22,28 @@ type Thread struct {
2022
Messages []*email.Email
2123
}
2224

25+
type Series struct {
26+
Subject string
27+
MessageID string
28+
Version int
29+
Corrupted string // If non-empty, contains a reason why the series better be ignored.
30+
Patches []Patch
31+
// TODO: add Cover *email.Email?
32+
}
33+
34+
type Patch struct {
35+
Seq int
36+
*email.Email
37+
}
38+
2339
// Threads extracts individual threads from a list of emails.
2440
func Threads(emails []*email.Email) []*Thread {
41+
return listThreads(emails, 0)
42+
}
43+
44+
func listThreads(emails []*email.Email, maxDepth int) []*Thread {
2545
ctx := &parseCtx{
46+
maxDepth: maxDepth,
2647
messages: map[string]*email.Email{},
2748
next: map[*email.Email][]*email.Email{},
2849
}
@@ -33,24 +54,128 @@ func Threads(emails []*email.Email) []*Thread {
3354
return ctx.threads
3455
}
3556

57+
// PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists.
58+
func PatchSeries(emails []*email.Email) []*Series {
59+
var ret []*Series
60+
// Normally, all following series patches are sent in response to the first email sent.
61+
// So there's no sense to look at deeper replies.
62+
for _, thread := range listThreads(emails, 1) {
63+
if thread.Type != dashapi.DiscussionPatch {
64+
continue
65+
}
66+
patch, ok := parsePatchSubject(thread.Subject)
67+
if !ok {
68+
// It must never be happening.
69+
panic("DiscussionPatch is set, but we fail to parse the thread subject")
70+
}
71+
total := patch.Total.ValueOr(1)
72+
series := &Series{
73+
Subject: patch.Title,
74+
MessageID: thread.MessageID,
75+
Version: patch.Version.ValueOr(1),
76+
}
77+
ret = append(ret, series)
78+
if patch.Seq.IsSet() && patch.Seq.Value() > 1 {
79+
series.Corrupted = "the first patch has seq>1"
80+
continue
81+
}
82+
hasSeq := map[int]bool{}
83+
for _, email := range thread.Messages {
84+
patch, ok := parsePatchSubject(email.Subject)
85+
if !ok {
86+
continue
87+
}
88+
seq := patch.Seq.ValueOr(1)
89+
if seq == 0 {
90+
// The cover email is not of interest.
91+
continue
92+
}
93+
if hasSeq[seq] {
94+
// It's weird if that really happens, but let's skip for now.
95+
continue
96+
}
97+
hasSeq[seq] = true
98+
series.Patches = append(series.Patches, Patch{
99+
Seq: seq,
100+
Email: email,
101+
})
102+
}
103+
if len(hasSeq) != total {
104+
series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found",
105+
total, len(hasSeq))
106+
continue
107+
}
108+
if len(series.Patches) == 0 {
109+
series.Corrupted = "0 patches"
110+
continue
111+
}
112+
sort.Slice(series.Patches, func(i, j int) bool {
113+
return series.Patches[i].Seq < series.Patches[j].Seq
114+
})
115+
}
116+
return ret
117+
}
118+
36119
// DiscussionType extracts the specific discussion type from an email.
37120
func DiscussionType(msg *email.Email) dashapi.DiscussionType {
38121
discType := dashapi.DiscussionMention
39122
if msg.OwnEmail {
40123
discType = dashapi.DiscussionReport
41124
}
42125
// This is very crude, but should work for now.
43-
if patchSubjectRe.MatchString(strings.ToLower(msg.Subject)) {
126+
if _, ok := parsePatchSubject(msg.Subject); ok {
44127
discType = dashapi.DiscussionPatch
45128
} else if strings.Contains(msg.Subject, "Monthly") {
46129
discType = dashapi.DiscussionReminder
47130
}
48131
return discType
49132
}
50133

51-
var patchSubjectRe = regexp.MustCompile(`\[(?:(?:rfc|resend)\s+)*patch`)
134+
type PatchSubject struct {
135+
Title string
136+
Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject.
137+
Version Optional[int]
138+
Seq Optional[int] // The "Seq/Total" part.
139+
Total Optional[int]
140+
}
141+
142+
// nolint: lll
143+
var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`)
144+
145+
func parsePatchSubject(subject string) (PatchSubject, bool) {
146+
var ret PatchSubject
147+
groups := patchSubjectRe.FindStringSubmatch(subject)
148+
if len(groups) == 0 {
149+
return ret, false
150+
}
151+
tags := strings.Fields(groups[1])
152+
for _, tag := range append(tags, strings.Fields(groups[2])...) {
153+
if strings.HasPrefix(tag, "v") {
154+
val, err := strconv.Atoi(strings.TrimPrefix(tag, "v"))
155+
if err == nil {
156+
ret.Version.Set(val)
157+
continue
158+
}
159+
}
160+
ret.Tags = append(ret.Tags, tag)
161+
}
162+
sort.Strings(ret.Tags)
163+
if groups[3] != "" {
164+
if val, err := strconv.Atoi(groups[3]); err == nil {
165+
ret.Seq.Set(val)
166+
}
167+
}
168+
if groups[4] != "" {
169+
if val, err := strconv.Atoi(groups[4]); err == nil {
170+
ret.Total.Set(val)
171+
}
172+
}
173+
ret.Title = groups[5]
174+
return ret, true
175+
}
52176

53177
type parseCtx struct {
178+
maxDepth int
54179
threads []*Thread
55180
messages map[string]*email.Email
56181
next map[*email.Email][]*email.Email
@@ -73,7 +198,7 @@ func (c *parseCtx) process() {
73198
}
74199
// Iterate starting from these tree nodes.
75200
for _, node := range nodes {
76-
c.visit(node, nil)
201+
c.visit(node, nil, 0)
77202
}
78203
// Collect BugIDs.
79204
for _, thread := range c.threads {
@@ -92,7 +217,7 @@ func (c *parseCtx) process() {
92217
}
93218
}
94219

95-
func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
220+
func (c *parseCtx) visit(msg *email.Email, thread *Thread, depth int) {
96221
var oldInfo *email.OldThreadInfo
97222
if thread != nil {
98223
oldInfo = &email.OldThreadInfo{
@@ -114,7 +239,38 @@ func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
114239
}
115240
c.threads = append(c.threads, thread)
116241
}
117-
for _, nextMsg := range c.next[msg] {
118-
c.visit(nextMsg, thread)
242+
if c.maxDepth == 0 || depth < c.maxDepth {
243+
for _, nextMsg := range c.next[msg] {
244+
c.visit(nextMsg, thread, depth+1)
245+
}
119246
}
120247
}
248+
249+
type Optional[T any] struct {
250+
val T
251+
set bool
252+
}
253+
254+
func value[T any](val T) Optional[T] {
255+
return Optional[T]{val: val, set: true}
256+
}
257+
258+
func (o Optional[T]) IsSet() bool {
259+
return o.set
260+
}
261+
262+
func (o Optional[T]) Value() T {
263+
return o.val
264+
}
265+
266+
func (o Optional[T]) ValueOr(def T) T {
267+
if o.set {
268+
return o.val
269+
}
270+
return def
271+
}
272+
273+
func (o *Optional[T]) Set(val T) {
274+
o.val = val
275+
o.set = true
276+
}

0 commit comments

Comments
 (0)