Skip to content

Commit 0cf5a36

Browse files
committed
Add the "teddy" algorithm from aho-corasick
Per suggestion from @BurntSushi [here](tafia/quick-xml#664 (comment)) On my M1, tt appears to be slower but competitive with memchr up to memchr3, then start being the from 5-16
1 parent df92a1c commit 0cf5a36

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ benchmarks = []
2020
pattern = []
2121

2222
[dev-dependencies]
23+
aho-corasick = "1.1.0"
2324
proptest = "1.0.0"
2425
lazy_static = "1.0.0"
2526
region = "3.0.0"

benches/benchmarks.rs

+20
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ fn spaces(c: &mut Criterion) {
4040
group.bench_function("stdlib_iter_position", |b| {
4141
b.iter(|| haystack.bytes().position(|c| c == b' '));
4242
});
43+
group.bench_function("teddy", |b| {
44+
let searcher = aho_corasick::packed::Searcher::new([" "]).unwrap();
45+
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
46+
});
4347
group.bench_function("memchr", |b| {
4448
b.iter(|| memchr::memchr(b' ', haystack.as_bytes()));
4549
});
@@ -69,6 +73,10 @@ fn xml3(c: &mut Criterion) {
6973
.position(|c| c == b'<' || c == b'>' || c == b'&')
7074
});
7175
});
76+
group.bench_function("teddy", |b| {
77+
let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&"]).unwrap();
78+
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
79+
});
7280
group.bench_function("memchr", |b| {
7381
b.iter(|| memchr::memchr3(b'<', b'>', b'&', haystack.as_bytes()));
7482
});
@@ -98,6 +106,10 @@ fn xml5(c: &mut Criterion) {
98106
.position(|c| c == b'<' || c == b'>' || c == b'&' || c == b'\'' || c == b'"')
99107
});
100108
});
109+
group.bench_function("teddy", |b| {
110+
let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&", "'", "\""]).unwrap();
111+
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
112+
});
101113
group.bench_function("memchr", |b| {
102114
b.iter(|| {
103115
let bytes = haystack.as_bytes();
@@ -174,6 +186,10 @@ fn big_16(c: &mut Criterion) {
174186
})
175187
});
176188
});
189+
group.bench_function("teddy", |b| {
190+
let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap();
191+
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
192+
});
177193
group.bench_function("memchr", |b| {
178194
b.iter(|| {
179195
let bytes = haystack.as_bytes();
@@ -253,6 +269,10 @@ fn big_16(c: &mut Criterion) {
253269
})
254270
});
255271
});
272+
group.bench_function("teddy", |b| {
273+
let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap();
274+
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
275+
});
256276
group.bench_function("memchr", |b| {
257277
b.iter(|| {
258278
let bytes = haystack.as_bytes();

0 commit comments

Comments
 (0)