forked from BobuSumisu/aho-corasick
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstream.go
143 lines (122 loc) · 3.19 KB
/
stream.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package ahocorasick
import (
"compress/gzip"
"encoding/binary"
"io"
"sync"
)
// Encode writes a Trie to w in gzip compressed binary format.
func Encode(w io.Writer, trie *Trie) error {
enc := newEncoder(w)
return enc.encode(trie)
}
// Decode reads a Trie in gzip compressed binary format from r.
func Decode(r io.Reader) (*Trie, error) {
dec := newDecoder(r)
return dec.decode()
}
type encoder struct {
w io.Writer
}
func newEncoder(w io.Writer) *encoder {
return &encoder{
w,
}
}
func (enc *encoder) encode(trie *Trie) error {
w := gzip.NewWriter(enc.w)
defer w.Close()
// Write the lengths of all arrays first
if err := binary.Write(w, binary.LittleEndian, uint64(len(trie.dict))); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, uint64(len(trie.failTrans))); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, uint64(len(trie.dictLink))); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, uint64(len(trie.pattern))); err != nil {
return err
}
// Write the actual data
if err := binary.Write(w, binary.LittleEndian, trie.dict); err != nil {
return err
}
// Flatten and write failTrans
for _, arr := range trie.failTrans {
if err := binary.Write(w, binary.LittleEndian, arr[:]); err != nil {
return err
}
}
if err := binary.Write(w, binary.LittleEndian, trie.dictLink); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, trie.pattern); err != nil {
return err
}
return nil
}
type decoder struct {
r io.Reader
}
func newDecoder(r io.Reader) *decoder {
return &decoder{
r,
}
}
func (dec *decoder) decode() (*Trie, error) {
r, err := gzip.NewReader(dec.r)
if err != nil {
return nil, err
}
defer r.Close()
var dictLen, failTransLen, dictLinkLen, patternLen uint64
// Read the lengths of all arrays
if err := binary.Read(r, binary.LittleEndian, &dictLen); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &failTransLen); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &dictLinkLen); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &patternLen); err != nil {
return nil, err
}
// Allocate memory and read the actual data
dict := make([]uint32, dictLen)
if err := binary.Read(r, binary.LittleEndian, dict); err != nil {
return nil, err
}
// Read and reshape failTrans
failTrans := make([][256]uint32, failTransLen)
flatFailTrans := make([]uint32, failTransLen*256)
if err := binary.Read(r, binary.LittleEndian, flatFailTrans); err != nil {
return nil, err
}
for i := range failTrans {
copy(failTrans[i][:], flatFailTrans[i*256:(i+1)*256])
}
dictLink := make([]uint32, dictLinkLen)
if err := binary.Read(r, binary.LittleEndian, dictLink); err != nil {
return nil, err
}
pattern := make([]uint32, patternLen)
if err := binary.Read(r, binary.LittleEndian, pattern); err != nil {
return nil, err
}
return &Trie{
failTrans: failTrans,
dictLink: dictLink,
dict: dict,
pattern: pattern,
matchPool: sync.Pool{
New: func() any { return &[]*Match{} },
},
matchStructPool: sync.Pool{
New: func() any { return new(Match) },
},
}, nil
}