Skip to content
This repository was archived by the owner on Jul 18, 2022. It is now read-only.

Commit a47fdeb

Browse files
author
Florent Biville
committed
Make header detection regex more lenient on punctuation and ws
1 parent 00cb349 commit a47fdeb

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

internal/pkg/core/header_detector.go

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,47 @@ func OpeningLine(styles []CommentStyle) string {
6060

6161
// visible for testing
6262
func MatchingLine(line string, styles []CommentStyle) string {
63-
middleLine := fmt.Sprintf(`[\t\v\f\r ]*%s?[\t\v\f\r ]*\Q%s\E[,.;:?!\t\v\f\r ]*\n?`, combineRegexes(styles, func(style CommentStyle) string { return style.GetString() }), line)
63+
openingStyleSymbolRegex := combineRegexes(styles, func(style CommentStyle) string { return style.GetString() })
64+
normalizedLine := normalizePunctuation(line)
65+
middleLine := fmt.Sprintf(`[\t\v\f\r ]*%s?[\t\v\f\r ]*\Q%s\E[,.;:?!\t\v\f\r ]*\n?`, openingStyleSymbolRegex, normalizedLine)
6466
builder := strings.Builder{}
6567
builder.WriteString(middleLine)
6668
return builder.String()
6769
}
6870

71+
func normalizePunctuation(line string) string {
72+
ignore := `\E.?\Q`
73+
normalizedLine := ""
74+
// we could use a(n only) slightly better heuristic with a regex matching all dots
75+
// not prefixed by a digit or a couple of { and 0-n spaces
76+
// but no support for negative lookbehind in Golang regex engine so here we go :(
77+
for k, v := range line {
78+
if v == ',' || v == ';' || v == ':' || v == '?' || v == '!' {
79+
normalizedLine += ignore
80+
} else if v == '.' && (k == len(line)-1 || line[k:k+2] == ". ") {
81+
// dots in template expressions must and in numerical expressions should be preserved
82+
// hence the poor heuristic of the following space or dot as last line's character
83+
normalizedLine += ignore
84+
} else {
85+
normalizedLine += string(v)
86+
}
87+
}
88+
result := strings.NewReplacer(
89+
`\t\t`, `\t`,
90+
`\v\v`, `\v`,
91+
`\f\f`, `\f`,
92+
`\r\r`, `\r`,
93+
" ", " ",
94+
).Replace(normalizedLine)
95+
return strings.NewReplacer(
96+
"\t", `\E\t+\Q`,
97+
"\v", `\E\v+\Q`,
98+
"\f", `\E\f+\Q`,
99+
"\r", `\E\r+\Q`,
100+
" ", `\E +\Q`,
101+
).Replace(result)
102+
}
103+
69104
// visible for testing
70105
func ClosingLine(styles []CommentStyle) string {
71106
closingLine := fmt.Sprintf(`(?:[\t\v\f\r ]*%s[\t\v\f\r ]*)?`, combineRegexes(styles, func(style CommentStyle) string { return style.GetClosingString() }))

internal/pkg/core/header_detector_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ world`
122122
})
123123
})
124124

125-
Context("with punctuation and whitespace variations", func() {
125+
Context("with punctuation and whitespace variations in the source file", func() {
126126

127127

128128
const file = `// some multi-line header.!:
@@ -140,6 +140,24 @@ world`
140140
})
141141
})
142142

143+
Context("with punctuation and whitespace variations in the license header template", func() {
144+
145+
146+
const file = `// some multi-line header
147+
// with some text
148+
hello
149+
world`
150+
151+
It("should detect it", func() {
152+
regex, err := core.ComputeHeaderDetectionRegex(
153+
[]string{"some. multi-line. header!", "with some text?"},
154+
map[string]string{})
155+
156+
Expect(err).NotTo(HaveOccurred())
157+
Expect(MatchLeftMostPositions(regex, file)).To(Equal([]int{0, 44}))
158+
})
159+
})
160+
143161
Context("with a realistic header", func() {
144162

145163
const template = `Copyright {{.YearRange}} {{.Owner}}

0 commit comments

Comments
 (0)