Skip to content
This repository has been archived by the owner on Jul 18, 2022. It is now read-only.

Commit

Permalink
Make header detection regex more lenient on punctuation and ws
Browse files Browse the repository at this point in the history
  • Loading branch information
Florent Biville committed Jan 3, 2020
1 parent 00cb349 commit a47fdeb
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 2 deletions.
37 changes: 36 additions & 1 deletion internal/pkg/core/header_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,47 @@ func OpeningLine(styles []CommentStyle) string {

// visible for testing
func MatchingLine(line string, styles []CommentStyle) string {
middleLine := fmt.Sprintf(`[\t\v\f\r ]*%s?[\t\v\f\r ]*\Q%s\E[,.;:?!\t\v\f\r ]*\n?`, combineRegexes(styles, func(style CommentStyle) string { return style.GetString() }), line)
openingStyleSymbolRegex := combineRegexes(styles, func(style CommentStyle) string { return style.GetString() })
normalizedLine := normalizePunctuation(line)
middleLine := fmt.Sprintf(`[\t\v\f\r ]*%s?[\t\v\f\r ]*\Q%s\E[,.;:?!\t\v\f\r ]*\n?`, openingStyleSymbolRegex, normalizedLine)
builder := strings.Builder{}
builder.WriteString(middleLine)
return builder.String()
}

func normalizePunctuation(line string) string {
ignore := `\E.?\Q`
normalizedLine := ""
// we could use a(n only) slightly better heuristic with a regex matching all dots
// not prefixed by a digit or a couple of { and 0-n spaces
// but no support for negative lookbehind in Golang regex engine so here we go :(
for k, v := range line {
if v == ',' || v == ';' || v == ':' || v == '?' || v == '!' {
normalizedLine += ignore
} else if v == '.' && (k == len(line)-1 || line[k:k+2] == ". ") {
// dots in template expressions must and in numerical expressions should be preserved
// hence the poor heuristic of the following space or dot as last line's character
normalizedLine += ignore
} else {
normalizedLine += string(v)
}
}
result := strings.NewReplacer(
`\t\t`, `\t`,
`\v\v`, `\v`,
`\f\f`, `\f`,
`\r\r`, `\r`,
" ", " ",
).Replace(normalizedLine)
return strings.NewReplacer(
"\t", `\E\t+\Q`,
"\v", `\E\v+\Q`,
"\f", `\E\f+\Q`,
"\r", `\E\r+\Q`,
" ", `\E +\Q`,
).Replace(result)
}

// visible for testing
func ClosingLine(styles []CommentStyle) string {
closingLine := fmt.Sprintf(`(?:[\t\v\f\r ]*%s[\t\v\f\r ]*)?`, combineRegexes(styles, func(style CommentStyle) string { return style.GetClosingString() }))
Expand Down
20 changes: 19 additions & 1 deletion internal/pkg/core/header_detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ world`
})
})

Context("with punctuation and whitespace variations", func() {
Context("with punctuation and whitespace variations in the source file", func() {


const file = `// some multi-line header.!:
Expand All @@ -140,6 +140,24 @@ world`
})
})

Context("with punctuation and whitespace variations in the license header template", func() {


const file = `// some multi-line header
// with some text
hello
world`

It("should detect it", func() {
regex, err := core.ComputeHeaderDetectionRegex(
[]string{"some. multi-line. header!", "with some text?"},
map[string]string{})

Expect(err).NotTo(HaveOccurred())
Expect(MatchLeftMostPositions(regex, file)).To(Equal([]int{0, 44}))
})
})

Context("with a realistic header", func() {

const template = `Copyright {{.YearRange}} {{.Owner}}
Expand Down

0 comments on commit a47fdeb

Please sign in to comment.