Skip to content

Commit 16a23d7

Browse files
authored
Refactoring (#13)
1 parent 5c0e018 commit 16a23d7

File tree

2 files changed

+75
-73
lines changed

2 files changed

+75
-73
lines changed

src/builder.rs

+55-55
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,33 @@ const PATTERN_ID_INVALID: u32 = std::u32::MAX;
1818
const FAIL_MAX: usize = 0x00ffffff;
1919

2020
struct SparseTrie {
21-
nodes: Vec<Vec<(u8, usize)>>,
22-
pattern_id: Vec<usize>,
21+
states: Vec<Vec<(u8, usize)>>,
22+
pattern_ids: Vec<usize>,
2323
len: usize,
2424
}
2525

2626
impl SparseTrie {
2727
fn new() -> Self {
2828
Self {
29-
nodes: vec![vec![]],
30-
pattern_id: vec![std::usize::MAX],
29+
states: vec![vec![]],
30+
pattern_ids: vec![std::usize::MAX],
3131
len: 0,
3232
}
3333
}
3434

3535
#[inline(always)]
3636
fn add(&mut self, pattern: &[u8]) -> Result<(), DaachorseError> {
37-
let mut node_id = 0;
37+
let mut state_id = 0;
3838
for &c in pattern {
39-
node_id = self.get(node_id, c).unwrap_or_else(|| {
40-
let next_node_id = self.nodes.len();
41-
self.nodes.push(vec![]);
42-
self.nodes[node_id].push((c, next_node_id));
43-
self.pattern_id.push(std::usize::MAX);
44-
next_node_id
39+
state_id = self.get(state_id, c).unwrap_or_else(|| {
40+
let next_state_id = self.states.len();
41+
self.states.push(vec![]);
42+
self.states[state_id].push((c, next_state_id));
43+
self.pattern_ids.push(std::usize::MAX);
44+
next_state_id
4545
});
4646
}
47-
let pattern_id = self.pattern_id.get_mut(node_id).unwrap();
47+
let pattern_id = self.pattern_ids.get_mut(state_id).unwrap();
4848
if *pattern_id != std::usize::MAX {
4949
let e = DuplicatePatternError {
5050
pattern: pattern.to_vec(),
@@ -63,10 +63,10 @@ impl SparseTrie {
6363
}
6464

6565
#[inline(always)]
66-
fn get(&self, node_id: usize, c: u8) -> Option<usize> {
67-
for trans in &self.nodes[node_id] {
68-
if c == trans.0 {
69-
return Some(trans.1);
66+
fn get(&self, state_id: usize, c: u8) -> Option<usize> {
67+
for &(cc, child_id) in &self.states[state_id] {
68+
if c == cc {
69+
return Some(child_id);
7070
}
7171
}
7272
None
@@ -243,34 +243,34 @@ impl DoubleArrayAhoCorasickBuilder {
243243
}
244244

245245
fn build_double_array(&mut self, sparse_trie: &SparseTrie) -> Result<(), DaachorseError> {
246-
let mut node_id_map = vec![std::usize::MAX; sparse_trie.nodes.len()];
247-
node_id_map[0] = 0;
246+
let mut state_id_map = vec![std::usize::MAX; sparse_trie.states.len()];
247+
state_id_map[0] = 0;
248248

249249
self.init_array();
250250

251-
for (i, node) in sparse_trie.nodes.iter().enumerate() {
252-
let idx = node_id_map[i];
251+
for (i, edges) in sparse_trie.states.iter().enumerate() {
252+
let idx = state_id_map[i];
253253
{
254-
let pattern_id = sparse_trie.pattern_id[i];
254+
let pattern_id = sparse_trie.pattern_ids[i];
255255
if pattern_id != std::usize::MAX {
256256
self.extras[idx].pattern_id = pattern_id as u32;
257257
}
258258
}
259259

260-
if node.is_empty() {
260+
if edges.is_empty() {
261261
continue;
262262
}
263263

264-
let base = self.find_base(node);
264+
let base = self.find_base(edges);
265265
if base >= self.states.len() {
266266
self.extend_array()?;
267267
}
268268

269-
for &(c, child_id) in node {
269+
for &(c, child_id) in edges {
270270
let child_idx = base ^ c as usize;
271271
self.fix_state(child_idx);
272272
self.states[child_idx].set_check(c);
273-
node_id_map[child_id] = child_idx;
273+
state_id_map[child_id] = child_idx;
274274
}
275275
self.states[idx].set_base(base as u32);
276276
self.extras[base].used_base = true;
@@ -329,15 +329,15 @@ impl DoubleArrayAhoCorasickBuilder {
329329
}
330330

331331
#[inline(always)]
332-
fn find_base(&self, node: &[(u8, usize)]) -> usize {
332+
fn find_base(&self, edges: &[(u8, usize)]) -> usize {
333333
if self.head_idx == std::usize::MAX {
334334
return self.states.len();
335335
}
336336
let mut idx = self.head_idx;
337337
loop {
338338
debug_assert!(!self.extras[idx].used_index);
339-
let base = idx ^ node[0].0 as usize;
340-
if self.check_valid_base(base, node) {
339+
let base = idx ^ edges[0].0 as usize;
340+
if self.check_valid_base(base, edges) {
341341
return base;
342342
}
343343
idx = self.extras[idx].next;
@@ -348,11 +348,11 @@ impl DoubleArrayAhoCorasickBuilder {
348348
self.states.len()
349349
}
350350

351-
fn check_valid_base(&self, base: usize, node: &[(u8, usize)]) -> bool {
351+
fn check_valid_base(&self, base: usize, edges: &[(u8, usize)]) -> bool {
352352
if self.extras[base].used_base {
353353
return false;
354354
}
355-
for &(c, _) in node {
355+
for &(c, _) in edges {
356356
let idx = base ^ c as usize;
357357
if self.extras[idx].used_index {
358358
return false;
@@ -437,9 +437,9 @@ impl DoubleArrayAhoCorasickBuilder {
437437

438438
fn add_fails(&mut self, sparse_trie: &SparseTrie) -> Result<(), DaachorseError> {
439439
self.states[0].set_fail(0);
440-
self.visits.reserve(sparse_trie.nodes.len());
440+
self.visits.reserve(sparse_trie.states.len());
441441

442-
for &(c, st_child_idx) in &sparse_trie.nodes[0] {
442+
for &(c, st_child_idx) in &sparse_trie.states[0] {
443443
let da_child_idx = self.get_child_index(0, c).unwrap();
444444
self.states[da_child_idx].set_fail(0);
445445
self.visits.push(StatePair {
@@ -451,14 +451,14 @@ impl DoubleArrayAhoCorasickBuilder {
451451
let mut vi = 0;
452452
while vi < self.visits.len() {
453453
let StatePair {
454-
da_idx: da_node_idx,
455-
st_idx: st_node_idx,
454+
da_idx: da_state_idx,
455+
st_idx: st_state_idx,
456456
} = self.visits[vi];
457457
vi += 1;
458458

459-
for &(c, st_child_idx) in &sparse_trie.nodes[st_node_idx] {
460-
let da_child_idx = self.get_child_index(da_node_idx, c).unwrap();
461-
let mut fail_idx = self.states[da_node_idx].fail() as usize;
459+
for &(c, st_child_idx) in &sparse_trie.states[st_state_idx] {
460+
let da_child_idx = self.get_child_index(da_state_idx, c).unwrap();
461+
let mut fail_idx = self.states[da_state_idx].fail() as usize;
462462
let new_fail_idx = loop {
463463
if let Some(child_fail_idx) = self.get_child_index(fail_idx, c) {
464464
break child_fail_idx;
@@ -500,25 +500,25 @@ impl DoubleArrayAhoCorasickBuilder {
500500
};
501501

502502
for sp in self.visits.iter().rev() {
503-
let mut da_node_idx = sp.da_idx;
503+
let mut da_state_idx = sp.da_idx;
504504

505505
let Extra {
506506
pattern_id,
507507
processed,
508508
..
509-
} = self.extras[da_node_idx];
509+
} = self.extras[da_state_idx];
510510

511511
if pattern_id == PATTERN_ID_INVALID {
512512
continue;
513513
}
514514
if processed {
515-
debug_assert!(self.states[da_node_idx].output_pos().is_some());
515+
debug_assert!(self.states[da_state_idx].output_pos().is_some());
516516
continue;
517517
}
518-
debug_assert!(self.states[da_node_idx].output_pos().is_none());
518+
debug_assert!(self.states[da_state_idx].output_pos().is_none());
519519

520-
self.extras[da_node_idx].processed = true;
521-
self.states[da_node_idx].set_output_pos(self.outputs.len() as u32);
520+
self.extras[da_state_idx].processed = true;
521+
self.states[da_state_idx].set_output_pos(self.outputs.len() as u32);
522522
self.outputs.push(Output::new(
523523
pattern_id,
524524
self.pattern_lens[pattern_id as usize] as u32,
@@ -528,23 +528,23 @@ impl DoubleArrayAhoCorasickBuilder {
528528
error_checker(&self.outputs)?;
529529

530530
loop {
531-
da_node_idx = self.states[da_node_idx].fail() as usize;
532-
if da_node_idx == 0 {
531+
da_state_idx = self.states[da_state_idx].fail() as usize;
532+
if da_state_idx == 0 {
533533
break;
534534
}
535535

536536
let Extra {
537537
pattern_id,
538538
processed,
539539
..
540-
} = self.extras[da_node_idx];
540+
} = self.extras[da_state_idx];
541541

542542
if pattern_id == PATTERN_ID_INVALID {
543543
continue;
544544
}
545545

546546
if processed {
547-
let mut clone_pos = self.states[da_node_idx].output_pos().unwrap() as usize;
547+
let mut clone_pos = self.states[da_state_idx].output_pos().unwrap() as usize;
548548
debug_assert!(!self.outputs[clone_pos].is_begin());
549549
while !self.outputs[clone_pos].is_begin() {
550550
self.outputs.push(self.outputs[clone_pos]);
@@ -554,8 +554,8 @@ impl DoubleArrayAhoCorasickBuilder {
554554
break;
555555
}
556556

557-
self.extras[da_node_idx].processed = true;
558-
self.states[da_node_idx].set_output_pos(self.outputs.len() as u32);
557+
self.extras[da_state_idx].processed = true;
558+
self.states[da_state_idx].set_output_pos(self.outputs.len() as u32);
559559
self.outputs.push(Output::new(
560560
pattern_id,
561561
self.pattern_lens[pattern_id as usize] as u32,
@@ -574,24 +574,24 @@ impl DoubleArrayAhoCorasickBuilder {
574574

575575
fn set_dummy_outputs(&mut self) {
576576
for sp in self.visits.iter() {
577-
let da_node_idx = sp.da_idx;
577+
let da_state_idx = sp.da_idx;
578578

579579
let Extra {
580580
pattern_id,
581581
processed,
582582
..
583-
} = self.extras[da_node_idx];
583+
} = self.extras[da_state_idx];
584584

585585
if processed {
586-
debug_assert!(self.states[da_node_idx].output_pos().is_some());
586+
debug_assert!(self.states[da_state_idx].output_pos().is_some());
587587
continue;
588588
}
589-
debug_assert!(self.states[da_node_idx].output_pos().is_none());
589+
debug_assert!(self.states[da_state_idx].output_pos().is_none());
590590
debug_assert_eq!(pattern_id, PATTERN_ID_INVALID);
591591

592-
let fail_idx = self.states[da_node_idx].fail() as usize;
592+
let fail_idx = self.states[da_state_idx].fail() as usize;
593593
if let Some(output_pos) = self.states[fail_idx].output_pos() {
594-
self.states[da_node_idx].set_output_pos(output_pos);
594+
self.states[da_state_idx].set_output_pos(output_pos);
595595
}
596596
}
597597
}

src/lib.rs

+20-18
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ impl Output {
166166
pub struct Match {
167167
length: usize,
168168
end: usize,
169-
pattern: usize,
169+
pattern_id: usize,
170170
}
171171

172172
impl Match {
@@ -185,7 +185,7 @@ impl Match {
185185
/// Pattern ID.
186186
#[inline(always)]
187187
pub const fn pattern(&self) -> usize {
188-
self.pattern
188+
self.pattern_id
189189
}
190190
}
191191

@@ -211,13 +211,15 @@ where
211211
let haystack = self.haystack.as_ref();
212212
for (pos, &c) in haystack.iter().enumerate().skip(self.pos) {
213213
state_id = unsafe { self.pma.get_next_state_id(state_id, c) };
214-
if let Some(out_pos) = unsafe { self.pma.states.get_unchecked(state_id).output_pos() } {
215-
let out = unsafe { self.pma.outputs.get_unchecked(out_pos as usize) };
214+
if let Some(output_pos) =
215+
unsafe { self.pma.states.get_unchecked(state_id).output_pos() }
216+
{
217+
let out = unsafe { self.pma.outputs.get_unchecked(output_pos as usize) };
216218
self.pos = pos + 1;
217219
return Some(Match {
218220
length: out.pattern_len() as usize,
219221
end: self.pos,
220-
pattern: out.pattern_id() as usize,
222+
pattern_id: out.pattern_id() as usize,
221223
});
222224
}
223225
}
@@ -235,7 +237,7 @@ where
235237
haystack: P,
236238
state_id: usize,
237239
pos: usize,
238-
out_pos: usize,
240+
output_pos: usize,
239241
}
240242

241243
impl<'a, P> Iterator for FindOverlappingIterator<'a, P>
@@ -246,28 +248,28 @@ where
246248

247249
#[inline(always)]
248250
fn next(&mut self) -> Option<Self::Item> {
249-
let out = unsafe { self.pma.outputs.get_unchecked(self.out_pos) };
251+
let out = unsafe { self.pma.outputs.get_unchecked(self.output_pos) };
250252
if !out.is_begin() {
251-
self.out_pos += 1;
253+
self.output_pos += 1;
252254
return Some(Match {
253255
length: out.pattern_len() as usize,
254256
end: self.pos,
255-
pattern: out.pattern_id() as usize,
257+
pattern_id: out.pattern_id() as usize,
256258
});
257259
}
258260
let haystack = self.haystack.as_ref();
259261
for (pos, &c) in haystack.iter().enumerate().skip(self.pos) {
260262
self.state_id = unsafe { self.pma.get_next_state_id(self.state_id, c) };
261-
if let Some(out_pos) =
263+
if let Some(output_pos) =
262264
unsafe { self.pma.states.get_unchecked(self.state_id).output_pos() }
263265
{
264266
self.pos = pos + 1;
265-
self.out_pos = out_pos as usize + 1;
266-
let out = unsafe { self.pma.outputs.get_unchecked(out_pos as usize) };
267+
self.output_pos = output_pos as usize + 1;
268+
let out = unsafe { self.pma.outputs.get_unchecked(output_pos as usize) };
267269
return Some(Match {
268270
length: out.pattern_len() as usize,
269271
end: self.pos,
270-
pattern: out.pattern_id() as usize,
272+
pattern_id: out.pattern_id() as usize,
271273
});
272274
}
273275
}
@@ -298,15 +300,15 @@ where
298300
let haystack = self.haystack.as_ref();
299301
for (pos, &c) in haystack.iter().enumerate().skip(self.pos) {
300302
self.state_id = unsafe { self.pma.get_next_state_id(self.state_id, c) };
301-
if let Some(out_pos) =
303+
if let Some(output_pos) =
302304
unsafe { self.pma.states.get_unchecked(self.state_id).output_pos() }
303305
{
304306
self.pos = pos + 1;
305-
let out = unsafe { self.pma.outputs.get_unchecked(out_pos as usize) };
307+
let out = unsafe { self.pma.outputs.get_unchecked(output_pos as usize) };
306308
return Some(Match {
307309
length: out.pattern_len() as usize,
308310
end: self.pos,
309-
pattern: out.pattern_id() as usize,
311+
pattern_id: out.pattern_id() as usize,
310312
});
311313
}
312314
}
@@ -429,7 +431,7 @@ impl DoubleArrayAhoCorasick {
429431
haystack,
430432
state_id: 0,
431433
pos: 0,
432-
out_pos: 0,
434+
output_pos: 0,
433435
}
434436
}
435437

@@ -644,7 +646,7 @@ mod tests {
644646
];
645647
let check_expected = vec![0, 2, 1, 0, 0, 2, 2];
646648
// ^ ^ ^ ^ ^ ^ ^
647-
// node_id= 0 3 2 1 4 6 5
649+
// state_id= 0 3 2 1 4 6 5
648650
let fail_expected = vec![0, 0, 0, 0, 3, 1, 1];
649651

650652
let pma_base: Vec<_> = pma.states[0..7]

0 commit comments

Comments
 (0)