forked from BobuSumisu/aho-corasick
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrie.go
107 lines (89 loc) · 2.69 KB
/
trie.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package ahocorasick
import (
"sync"
)
const (
rootState uint32 = 1
nilState uint32 = 0
)
// Trie represents a trie of patterns with extra links as per the Aho-Corasick algorithm.
type Trie struct {
failTrans [][256]uint32
dict []uint32
pattern []uint32
dictLink []uint32
matchPool sync.Pool // Pool for match slice pointers
matchStructPool sync.Pool // Pool for Match structs
}
// Walk calls this function on any match, giving the end position, length of the matched bytes,
// and the pattern number.
type WalkFn func(end, n, pattern uint32) bool
// Walk runs the algorithm on a given output, calling the supplied callback function on every
// match. The algorithm will terminate if the callback function returns false.
func (tr *Trie) Walk(input []byte, fn WalkFn) {
// Local references to frequently accessed slices.
failTrans := tr.failTrans
dict := tr.dict
pattern := tr.pattern
dictLink := tr.dictLink
s := rootState
inputLen := len(input)
for i := range inputLen {
s = failTrans[s][input[i]]
ds := dict[s]
dl := dictLink[s]
if ds != 0 || dl != nilState {
if ds != 0 && !fn(uint32(i), ds, pattern[s]) {
return
}
for u := dl; u != nilState; u = dictLink[u] {
if !fn(uint32(i), dict[u], pattern[u]) {
return
}
}
}
}
}
// Match runs the Aho-Corasick string-search algorithm on a byte input.
func (tr *Trie) Match(input []byte) []*Match {
matches := tr.matchPool.Get().(*[]*Match)
*matches = (*matches)[:0] // Reset slice while keeping capacity
tr.Walk(input, func(end, n, pattern uint32) bool {
pos := end - n + 1
match := tr.matchStructPool.Get().(*Match)
match.pos = pos
match.pattern = pattern
match.match = input[pos : pos+n]
*matches = append(*matches, match)
return true
})
result := make([]*Match, len(*matches))
copy(result, *matches)
return result
}
// MatchFirst is the same as Match, but returns after first successful match.
func (tr *Trie) MatchFirst(input []byte) *Match {
var match *Match
tr.Walk(input, func(end, n, pattern uint32) bool {
pos := end - n + 1
match = &Match{pos: pos, match: input[pos : pos+n]}
return false
})
return match
}
// MatchString runs the Aho-Corasick string-search algorithm on a string input.
func (tr *Trie) MatchString(input string) []*Match {
return tr.Match([]byte(input))
}
// MatchFirstString is the same as MatchString, but returns after first successful match.
func (tr *Trie) MatchFirstString(input string) *Match {
return tr.MatchFirst([]byte(input))
}
// ReleaseMatches returns the matches to the pool.
func (tr *Trie) ReleaseMatches(matches []*Match) {
matchesPtr := &matches
for _, m := range *matchesPtr {
tr.matchStructPool.Put(m)
}
tr.matchPool.Put(matchesPtr)
}