Skip to content

Commit 6a1bf47

Browse files
authored
Add support for PCRE2 (#27)
Enable the PCRE2 crate which seems to be significantly faster than onig (3x-8x faster)
1 parent d60f017 commit 6a1bf47

File tree

6 files changed

+167
-10
lines changed

6 files changed

+167
-10
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@ jobs:
3535
with:
3636
command: test
3737
args: --no-default-features --features fancy-regex
38+
- uses: actions-rs/cargo@v1
39+
with:
40+
command: test
41+
args: --no-default-features --features pcre2

Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "grok"
3-
version = "2.0.2"
3+
version = "2.1.0"
44
authors = ["Matt Mastracci <[email protected]>", "Michael Nitschinger <[email protected]>"]
55
license = "Apache-2.0"
66
readme = "README.md"
@@ -23,14 +23,20 @@ default = ["onig"]
2323
onig = []
2424
regex = ["dep:regex"]
2525
fancy-regex = ["dep:fancy-regex"]
26+
pcre2 = ["dep:pcre2"]
2627

2728
[dependencies]
2829
# The default regex engine. Use default-feature = false to disable it.
2930
onig = { version = "6.5", default-features = false }
31+
3032
# The Rust regex library. Does not support backtracking, so many patterns are unusable.
3133
regex = { version = "1", optional = true, default-features = false, features = ["std", "unicode", "perf", "perf-dfa-full"] }
34+
3235
# A more complete Rust regex library supporting backtracking.
3336
fancy-regex = { version = "0.14", optional = true, default-features = false, features = ["std", "unicode", "perf"] }
3437

38+
# A PCRE2 binding.
39+
pcre2 = { version = "0.2.9", optional = true }
40+
3541
[build-dependencies]
3642
glob = "0.3"

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,21 @@ powerful [Oniguruma](https://github.com/kkos/oniguruma) regex library. You can
5555
also use the standard Rust regex engine or fancy-regex by enabling the
5656
respective features:
5757

58-
The default engine, and at this time the most performant one, is `onig`:
58+
The default engine is `onig` for compatibility with previous 2.x releases:
5959

6060
```toml
6161
[dependencies]
6262
grok = { version = "2.0", features = ["onig"] }
6363
```
6464

65+
The `pcre2` engine is a more complete Rust regex library supporting
66+
backtracking, JIT compilation and is the fastest engine for most use cases:
67+
68+
```toml
69+
[dependencies]
70+
grok = { version = "2.0", default-features = false, features = ["pcre2"] }
71+
```
72+
6573
The `fancy-regex` engine is a more complete Rust regex library supporting
6674
backtracking:
6775

patterns/bacula.pattern

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ BACULA_LOG_VSS (Generate )?VSS (Writer)?
3939
BACULA_LOG_MAXSTART Fatal error: Job canceled because max start delay time exceeded.
4040
BACULA_LOG_DUPLICATE Fatal error: JobId %{INT:duplicate} already running. Duplicate job not allowed.
4141
BACULA_LOG_NOJOBSTAT Fatal error: No Job status returned from FD.
42-
BACULA_LOG_FATAL_CONN Fatal error: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(?<berror>%{GREEDYDATA})
43-
BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(?<berror>%{GREEDYDATA})
42+
BACULA_LOG_FATAL_CONN Fatal error: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(%{GREEDYDATA})
43+
BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(%{GREEDYDATA})
4444
BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at %{HOSTNAME}. Possible causes:
4545
BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup.
4646
BACULA_LOG_NOPRIOR No prior Full backup Job record found.

src/lib.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,48 @@ use std::fmt;
1616
mod fancy_regex;
1717
#[cfg(feature = "onig")]
1818
mod onig;
19+
#[cfg(feature = "pcre2")]
20+
mod pcre2;
1921
#[cfg(feature = "regex")]
2022
mod regex;
2123

22-
// If fancy-regex is enabled, we use it. Otherwise if onig is enabled, use that.
23-
// Only use regex if it is enabled and no other feature is enabled.
24+
// Enable features in the following preferred order. If multiple features are
25+
// enabled, the first one in the list is used.
2426

25-
#[cfg(feature = "fancy-regex")]
27+
// 0. pcre2
28+
// 1. fancy-regex
29+
// 3. onig
30+
// 3. regex
31+
32+
#[cfg(feature = "pcre2")]
33+
pub use pcre2::{
34+
Pcre2Matches as Matches, Pcre2MatchesIter as MatchesIter, Pcre2Pattern as Pattern,
35+
};
36+
37+
#[cfg(all(not(feature = "pcre2"), feature = "fancy-regex"))]
2638
pub use fancy_regex::{
2739
FancyRegexMatches as Matches, FancyRegexMatchesIter as MatchesIter,
2840
FancyRegexPattern as Pattern,
2941
};
3042

31-
#[cfg(all(feature = "onig", not(feature = "fancy-regex")))]
43+
#[cfg(all(not(feature = "pcre2"), not(feature = "fancy-regex"), feature = "onig"))]
3244
pub use onig::{OnigMatches as Matches, OnigMatchesIter as MatchesIter, OnigPattern as Pattern};
3345

34-
#[cfg(all(not(feature = "onig"), not(feature = "fancy-regex"), feature = "regex"))]
46+
#[cfg(all(
47+
not(feature = "pcre2"),
48+
not(feature = "fancy-regex"),
49+
not(feature = "onig"),
50+
feature = "regex"
51+
))]
3552
pub use regex::{
3653
RegexMatches as Matches, RegexMatchesIter as MatchesIter, RegexPattern as Pattern,
3754
};
3855

3956
#[cfg(all(
4057
not(feature = "onig"),
4158
not(feature = "fancy-regex"),
42-
not(feature = "regex")
59+
not(feature = "regex"),
60+
not(feature = "pcre2")
4361
))]
4462
compile_error!("No regex engine selected. Please enable one of the following features: fancy-regex, onig, regex");
4563

src/pcre2.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
use crate::Error;
2+
use pcre2::bytes::{Captures, Regex, RegexBuilder};
3+
use std::collections::{btree_map, BTreeMap, HashMap};
4+
5+
/// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text.
6+
#[derive(Debug)]
7+
pub struct Pcre2Pattern {
8+
regex: Regex,
9+
names: BTreeMap<String, usize>,
10+
}
11+
12+
impl Pcre2Pattern {
13+
/// Creates a new pattern from a raw regex string and an alias map to identify the
14+
/// fields properly.
15+
pub(crate) fn new(regex: &str, alias: &HashMap<String, String>) -> Result<Self, Error> {
16+
let mut builder = RegexBuilder::new();
17+
builder.jit_if_available(true);
18+
builder.utf(true);
19+
match builder.build(regex) {
20+
Ok(r) => Ok({
21+
let mut names = BTreeMap::new();
22+
for (i, name) in r.capture_names().iter().enumerate() {
23+
if let Some(name) = name {
24+
let name = match alias.iter().find(|&(_k, v)| v == name) {
25+
Some(item) => item.0.clone(),
26+
None => String::from(name),
27+
};
28+
names.insert(name, i);
29+
}
30+
}
31+
Self { regex: r, names }
32+
}),
33+
Err(e) => Err(Error::RegexCompilationFailed(format!(
34+
"Regex compilation failed: {e:?}:\n{regex}"
35+
))),
36+
}
37+
}
38+
39+
/// Matches this compiled `Pattern` against the text and returns the matches.
40+
pub fn match_against<'a>(&'a self, text: &'a str) -> Option<Pcre2Matches<'a>> {
41+
self.regex
42+
.captures(text.as_bytes())
43+
.ok()
44+
.flatten()
45+
.map(|caps| Pcre2Matches {
46+
captures: caps,
47+
pattern: self,
48+
})
49+
}
50+
51+
/// Returns all names this `Pattern` captures.
52+
pub fn capture_names(&self) -> impl Iterator<Item = &str> {
53+
self.names.keys().map(|s| s.as_str())
54+
}
55+
}
56+
57+
/// The `Matches` represent matched results from a `Pattern` against a provided text.
58+
#[derive(Debug)]
59+
pub struct Pcre2Matches<'a> {
60+
captures: Captures<'a>,
61+
pattern: &'a Pcre2Pattern,
62+
}
63+
64+
impl<'a> Pcre2Matches<'a> {
65+
/// Gets the value for the name (or) alias if found, `None` otherwise.
66+
pub fn get(&self, name_or_alias: &str) -> Option<&str> {
67+
self.pattern
68+
.names
69+
.get(name_or_alias)
70+
.and_then(|&idx| self.captures.get(idx))
71+
.map(|m| std::str::from_utf8(m.as_bytes()).unwrap())
72+
}
73+
74+
/// Returns the number of matches.
75+
pub fn len(&self) -> usize {
76+
self.pattern.names.len()
77+
}
78+
79+
/// Returns true if there are no matches, false otherwise.
80+
pub fn is_empty(&self) -> bool {
81+
self.len() == 0
82+
}
83+
84+
/// Returns a tuple of key/value with all the matches found.
85+
///
86+
/// Note that if no match is found, the value is empty.
87+
pub fn iter(&'a self) -> Pcre2MatchesIter<'a> {
88+
Pcre2MatchesIter {
89+
captures: &self.captures,
90+
names: self.pattern.names.iter(),
91+
}
92+
}
93+
}
94+
95+
impl<'a> IntoIterator for &'a Pcre2Matches<'a> {
96+
type Item = (&'a str, &'a str);
97+
type IntoIter = Pcre2MatchesIter<'a>;
98+
99+
fn into_iter(self) -> Self::IntoIter {
100+
self.iter()
101+
}
102+
}
103+
104+
/// An `Iterator` over all matches, accessible via `Matches`.
105+
pub struct Pcre2MatchesIter<'a> {
106+
captures: &'a Captures<'a>,
107+
names: btree_map::Iter<'a, String, usize>,
108+
}
109+
110+
impl<'a> Iterator for Pcre2MatchesIter<'a> {
111+
type Item = (&'a str, &'a str);
112+
113+
fn next(&mut self) -> Option<Self::Item> {
114+
for (k, &v) in self.names.by_ref() {
115+
if let Some(m) = self.captures.get(v) {
116+
return Some((k.as_str(), std::str::from_utf8(m.as_bytes()).unwrap()));
117+
}
118+
}
119+
None
120+
}
121+
}

0 commit comments

Comments
 (0)