Skip to content

Commit 685c875

Browse files
keith-hallBurntSushi
authored andcommitted
automata: fix onepass::DFA::try_search_slots panic when too many slots are given
The API specifically documents that the caller may provide *any* number of slots. But the onepass implementation was panicking when too many were provided. (I had only considered the case when too few were provided when I wrote one-pass implementation I think.) This already works for the PikeVM and the bounded backtracker. This commit adds tests covering those as well. Note that the bug report specifically cites a zero-repetition capture group as the instigator here. While that may have provided the circumstances for the bug to appear, the actual bug is more general than that. You don't need any capture groups at all to provoke it. You just need to provide more slots than there are in the compiled regex. Fixes #1327, Closes #1329
1 parent 5ea3eb1 commit 685c875

File tree

7 files changed

+138
-5
lines changed

7 files changed

+138
-5
lines changed

regex-automata/src/dfa/onepass.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2093,9 +2093,20 @@ impl DFA {
20932093
// be bad. In theory, we could avoid all this slot clearing if we knew
20942094
// that every slot was always activated for every match. Then we would
20952095
// know they would always be overwritten when a match is found.
2096+
//
2097+
// NOTE: We have to be careful here to avoid setting a length that
2098+
// exceeds the number of slots in our cache. Otherwise copying into
2099+
// the cache later will fail. This can happen when the number of
2100+
// caller provided slots is bigger than the number of slots in the
2101+
// compiled regex. (It's a bit of a weird case, but for simplicity and
2102+
// flexibility reasons, it is an API guarantee that the caller can
2103+
// provided any number of slots that they want.)
20962104
let explicit_slots_len = core::cmp::min(
20972105
Slots::LIMIT,
2098-
slots.len().saturating_sub(self.explicit_slot_start),
2106+
core::cmp::min(
2107+
slots.len().saturating_sub(self.explicit_slot_start),
2108+
cache.explicit_slots.len(),
2109+
),
20992110
);
21002111
cache.setup_search(explicit_slots_len);
21012112
for slot in cache.explicit_slots() {
@@ -2216,10 +2227,28 @@ impl DFA {
22162227
// the path to the match state.
22172228
if self.explicit_slot_start < slots.len() {
22182229
// NOTE: The 'cache.explicit_slots()' slice is setup at the
2219-
// beginning of every search such that it is guaranteed to return a
2220-
// slice of length equivalent to 'slots[explicit_slot_start..]'.
2221-
slots[self.explicit_slot_start..]
2222-
.copy_from_slice(cache.explicit_slots());
2230+
// beginning of every search such that it is guaranteed
2231+
// to return a slice that is at most equal in length to
2232+
// 'slots[explicit_slot_start..]'. It may be smaller in
2233+
// cases where the caller provided more slots than there
2234+
// are in the compiled regex. In which case, we limit the
2235+
// length of `slots` to what we actually have.
2236+
let cache_slots = cache.explicit_slots();
2237+
slots[self.explicit_slot_start..][..cache_slots.len()]
2238+
.copy_from_slice(cache_slots);
2239+
2240+
// There is an edge case when a regex has capture groups with zero
2241+
// repetition (e.g., (abc){0}), the cache may have fewer explicit
2242+
// slots than what the caller provided.
2243+
// So we copy only the slots that exist in the cache.
2244+
// let cache_slots = cache.explicit_slots();
2245+
// let available = core::cmp::min(
2246+
// cache_slots.len(),
2247+
// slots.len().saturating_sub(self.explicit_slot_start),
2248+
// );
2249+
// slots[self.explicit_slot_start
2250+
// ..self.explicit_slot_start + available]
2251+
// .copy_from_slice(&cache_slots[..available]);
22232252
epsilons.slots().apply(at, &mut slots[self.explicit_slot_start..]);
22242253
}
22252254
*matched_pid = Some(pid);
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod regression;
12
#[cfg(not(miri))]
23
mod suite;
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Regression test for zero-repetition capture groups,
2+
// which caused a panic when the Vec passed into search_slots
3+
// contained space for the capture group which would never
4+
// have any results.
5+
//
6+
// See: https://github.com/rust-lang/regex/issues/1327
7+
#[test]
8+
fn zero_repetition_capture_group() {
9+
use regex_automata::{
10+
dfa::onepass::DFA, util::primitives::NonMaxUsize, Anchored, Input,
11+
};
12+
13+
let expr = DFA::new(r"(abc)(ABC){0}").unwrap();
14+
let s = "abcABC";
15+
let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes);
16+
17+
// Test with 4 slots, so the whole match plus the first capture group.
18+
let mut cache = expr.create_cache();
19+
let mut slots: Vec<Option<NonMaxUsize>> = vec![None; 4];
20+
let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap();
21+
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
22+
assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap()));
23+
assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap()));
24+
assert_eq!(slots[2], Some(NonMaxUsize::new(0).unwrap()));
25+
assert_eq!(slots[3], Some(NonMaxUsize::new(3).unwrap()));
26+
27+
// Test with larger slot array, which would fit the
28+
// zero-repetition capture group.
29+
slots.resize(6, None);
30+
let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap();
31+
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
32+
// First capture group should match
33+
assert_eq!(slots[2], Some(NonMaxUsize::new(0).unwrap()));
34+
assert_eq!(slots[3], Some(NonMaxUsize::new(3).unwrap()));
35+
// Second capture group with {0} should be None.
36+
assert_eq!(slots[4], None);
37+
assert_eq!(slots[5], None);
38+
}
39+
40+
// Another regression test for the same case as
41+
// `zero_repetition_capture_group`, but uses a simpler pattern. That
42+
// is, a zero-repetition capture group is a red herring. The actual bug
43+
// is simpler: it happens whenever too many slots are provided by the
44+
// caller.
45+
#[test]
46+
fn too_many_slots_normal_pattern() {
47+
use regex_automata::{
48+
dfa::onepass::DFA, util::primitives::NonMaxUsize, Anchored, Input,
49+
};
50+
51+
let expr = DFA::new(r"abc").unwrap();
52+
let s = "abc";
53+
let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes);
54+
55+
let mut cache = expr.create_cache();
56+
let mut slots: Vec<Option<NonMaxUsize>> = vec![None; 4];
57+
let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap();
58+
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
59+
assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap()));
60+
assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap()));
61+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod regression;
12
#[cfg(not(miri))]
23
mod suite;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Tests that we can call the backtracker with more slots
2+
// than is actually in the compiled regex.
3+
#[test]
4+
fn too_many_slots_normal_pattern() {
5+
use regex_automata::{
6+
nfa::thompson::backtrack::BoundedBacktracker,
7+
util::primitives::NonMaxUsize, Anchored, Input,
8+
};
9+
10+
let expr = BoundedBacktracker::new(r"abc").unwrap();
11+
let s = "abc";
12+
let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes);
13+
14+
let mut cache = expr.create_cache();
15+
let mut slots: Vec<Option<NonMaxUsize>> = vec![None; 4];
16+
let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap();
17+
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
18+
assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap()));
19+
assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap()));
20+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod regression;
12
#[cfg(not(miri))]
23
mod suite;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Tests that we can call the PikeVM with more slots
2+
// than is actually in the compiled regex.
3+
#[test]
4+
fn too_many_slots_normal_pattern() {
5+
use regex_automata::{
6+
nfa::thompson::pikevm::PikeVM, util::primitives::NonMaxUsize,
7+
Anchored, Input,
8+
};
9+
10+
let expr = PikeVM::new(r"abc").unwrap();
11+
let s = "abc";
12+
let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes);
13+
14+
let mut cache = expr.create_cache();
15+
let mut slots: Vec<Option<NonMaxUsize>> = vec![None; 4];
16+
let pid = expr.search_slots(&mut cache, &input, &mut slots);
17+
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
18+
assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap()));
19+
assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap()));
20+
}

0 commit comments

Comments
 (0)