-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbuilder.go
More file actions
130 lines (113 loc) · 3.25 KB
/
Copy pathbuilder.go
File metadata and controls
130 lines (113 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package ahocorasick
import "errors"
// Builder configures and builds an Aho-Corasick automaton.
type Builder struct {
patterns [][]byte
matchKind MatchKind
ascii bool // optimize for ASCII-only patterns
prefilter bool // enable prefilter optimization
byteClasses bool // enable byte class compression
}
// NewBuilder creates a new Builder with default configuration.
// Default: LeftmostFirst semantics, byte classes enabled.
func NewBuilder() *Builder {
return &Builder{
matchKind: LeftmostFirst,
byteClasses: true,
prefilter: true,
}
}
// SetMatchKind sets the match semantics.
// Default is LeftmostFirst (Perl-compatible).
func (b *Builder) SetMatchKind(kind MatchKind) *Builder {
b.matchKind = kind
return b
}
// SetASCII enables ASCII-only optimization.
// When true, patterns are assumed to contain only ASCII bytes.
func (b *Builder) SetASCII(ascii bool) *Builder {
b.ascii = ascii
return b
}
// SetPrefilter enables or disables prefilter optimization.
// When enabled, a prefilter may be used to accelerate search.
func (b *Builder) SetPrefilter(prefilter bool) *Builder {
b.prefilter = prefilter
return b
}
// SetByteClasses enables or disables byte class compression.
// When enabled, reduces memory usage by grouping equivalent bytes.
func (b *Builder) SetByteClasses(enabled bool) *Builder {
b.byteClasses = enabled
return b
}
// AddPattern adds a single pattern to the automaton.
func (b *Builder) AddPattern(pattern []byte) *Builder {
b.patterns = append(b.patterns, pattern)
return b
}
// AddPatterns adds multiple patterns to the automaton.
func (b *Builder) AddPatterns(patterns [][]byte) *Builder {
b.patterns = append(b.patterns, patterns...)
return b
}
// AddStrings adds multiple string patterns to the automaton.
func (b *Builder) AddStrings(patterns []string) *Builder {
for _, p := range patterns {
b.patterns = append(b.patterns, []byte(p))
}
return b
}
// Build constructs the Aho-Corasick automaton from the configured patterns.
// Returns an error if no patterns were added or if construction fails.
// Uses optimized dense array transitions for high performance (1+ GB/s).
func (b *Builder) Build() (*Automaton, error) {
if len(b.patterns) == 0 {
return nil, errors.New("ahocorasick: no patterns provided")
}
// Validate patterns
for i, p := range b.patterns {
if len(p) == 0 {
return nil, errors.New("ahocorasick: empty pattern at index " + itoa(i))
}
}
// Build byte classes if enabled
var bc *ByteClasses
if b.byteClasses {
bc = NewByteClasses(b.patterns)
} else {
bc = NewSingletonByteClasses()
}
// Phase 1: Build the NFA (trie + failure links + match propagation)
nfa := buildOptimizedNFA(b.patterns, bc, b.matchKind)
// Phase 2: Compile NFA into a fully resolved DFA
// All failure transitions are pre-computed into the flat transition table.
dfa := buildDFA(nfa, b.patterns, b.matchKind)
return &Automaton{
dfa: dfa,
patterns: b.patterns,
matchKind: b.matchKind,
}, nil
}
// itoa converts int to string without importing strconv
func itoa(i int) string {
if i == 0 {
return "0"
}
var buf [20]byte
pos := len(buf)
neg := i < 0
if neg {
i = -i
}
for i > 0 {
pos--
buf[pos] = byte('0' + i%10)
i /= 10
}
if neg {
pos--
buf[pos] = '-'
}
return string(buf[pos:])
}