Skip to content

Commit d60f017

Browse files
authored
Add support for other regex engines (#26)
This add support for other regex engines (regex and fancy-regex), which helps users where the compilation requirements of onig may be onerous. Note that fancy-regex is quite complete, but performs 4x to 8x slower than onig in nearly all cases. This feature is only recommended for specific cases.
1 parent 4f4a8a8 commit d60f017

File tree

8 files changed

+475
-169
lines changed

8 files changed

+475
-169
lines changed

.github/workflows/ci.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,8 @@ jobs:
3030
toolchain: stable
3131
- uses: actions-rs/cargo@v1
3232
with:
33-
command: test
33+
command: test
34+
- uses: actions-rs/cargo@v1
35+
with:
36+
command: test
37+
args: --no-default-features --features fancy-regex

Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,20 @@ build = "build.rs"
1717
edition = "2021"
1818
rust-version = "1.56"
1919

20+
[features]
21+
default = ["onig"]
22+
23+
onig = []
24+
regex = ["dep:regex"]
25+
fancy-regex = ["dep:fancy-regex"]
26+
2027
[dependencies]
28+
# The default regex engine. Use default-feature = false to disable it.
2129
onig = { version = "6.5", default-features = false }
30+
# The Rust regex library. Does not support backtracking, so many patterns are unusable.
31+
regex = { version = "1", optional = true, default-features = false, features = ["std", "unicode", "perf", "perf-dfa-full"] }
32+
# A more complete Rust regex library supporting backtracking.
33+
fancy-regex = { version = "0.14", optional = true, default-features = false, features = ["std", "unicode", "perf"] }
2234

2335
[build-dependencies]
2436
glob = "0.3"

README.md

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,35 @@ be passed freely around. For performance reasons the `Match` returned is bound t
4848
them close together or clone/copy out the containing results as needed.
4949

5050
## Further Information
51-
This library depends on [onig](https://crates.io/crates/onig) for its regex execution, which itself is a Rust binding for the powerful [Oniguruma](https://github.com/kkos/oniguruma) regex library. If in doubt why a specific regex doesn't work, this is the best place to look for more information what patterns are supported and how to use advanced features.
51+
52+
This library supports multiple regex engines through feature flags. By default,
53+
it uses [onig](https://crates.io/crates/onig), which is a Rust binding for the
54+
powerful [Oniguruma](https://github.com/kkos/oniguruma) regex library. You can
55+
also use the standard Rust regex engine or fancy-regex by enabling the
56+
respective features:
57+
58+
The default engine, and at this time the most performant one, is `onig`:
59+
60+
```toml
61+
[dependencies]
62+
grok = { version = "2.0", features = ["onig"] }
63+
```
64+
65+
The `fancy-regex` engine is a more complete Rust regex library supporting
66+
backtracking:
67+
68+
```toml
69+
[dependencies]
70+
grok = { version = "2.0", default-features = false, features = ["fancy-regex"] }
71+
```
72+
73+
The `regex` engine is supported, but it does not support backtracking, so many
74+
patterns are unusable. This is not recommended for most use cases:
75+
76+
```toml
77+
[dependencies]
78+
grok = { version = "2.0", default-features = false, features = ["regex"] }
79+
```
5280

5381
## License
5482
`grok` is distributed under the terms of the Apache License (Version 2.0).

patterns/firewalls.pattern

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,19 @@ CISCOFW713172 Group = %{GREEDYDATA:group}, IP = %{IP:src_ip}, Automatic NAT Dete
8383
CISCOFW733100 \[\s*%{DATA:drop_type}\s*\] drop %{DATA:drop_rate_id} exceeded. Current burst rate is %{INT:drop_rate_current_burst} per second, max configured rate is %{INT:drop_rate_max_burst}; Current average rate is %{INT:drop_rate_current_avg} per second, max configured rate is %{INT:drop_rate_max_avg}; Cumulative total count is %{INT:drop_total_count}
8484
#== End Cisco ASA ==
8585

86+
IPTABLES_TCP_FLAGS (CWR |ECE |URG |ACK |PSH |RST |SYN |FIN )*
87+
IPTABLES_TCP_PART (?:SEQ=%{INT:[iptables][tcp][seq]:int}\s+)?(?:ACK=%{INT:[iptables][tcp][ack]:int}\s+)?WINDOW=%{INT:[iptables][tcp][window]:int}\s+RES=0x%{BASE16NUM:[iptables][tcp_reserved_bits]}\s+%{IPTABLES_TCP_FLAGS:[iptables][tcp][flags]}
88+
89+
IPTABLES4_FRAG (?:(?<= )(?:CE|DF|MF))*
90+
IPTABLES4_PART SRC=%{IPV4:[source][ip]}\s+DST=%{IPV4:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TOS=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+PREC=(?:0x%{BASE16NUM:[iptables][precedence_bits]})?\s+TTL=(?:%{INT:[iptables][ttl]:int})?\s+ID=(?:%{INT:[iptables][id]})?\s+(?:%{IPTABLES4_FRAG:[iptables][fragment_flags]})?(?:\s+FRAG: %{INT:[iptables][fragment_offset]:int})?
91+
IPTABLES6_PART SRC=%{IPV6:[source][ip]}\s+DST=%{IPV6:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TC=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+HOPLIMIT=(?:%{INT:[iptables][ttl]:int})?\s+FLOWLBL=(?:%{INT:[iptables][flow_label]})?
92+
93+
IPTABLES IN=(?:%{NOTSPACE:[observer][ingress][interface][name]})?\s+OUT=(?:%{NOTSPACE:[observer][egress][interface][name]})?\s+(?:MAC=(?:%{COMMONMAC:[destination][mac]})?(?::%{COMMONMAC:[source][mac]})?(?::[A-Fa-f0-9]{2}:[A-Fa-f0-9]{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:[network][transport]})?\s+SPT=(?:%{INT:[source][port]:int})?\s+DPT=(?:%{INT:[destination][port]:int})?\s+(?:%{IPTABLES_TCP_PART})?
94+
8695
# Shorewall firewall logs
87-
SHOREWALL (%{SYSLOGTIMESTAMP:timestamp}) (%{WORD:nf_host}) kernel:.*Shorewall:(%{WORD:nf_action1})?:(%{WORD:nf_action2})?.*IN=(%{USERNAME:nf_in_interface})?.*(OUT= *MAC=(%{COMMONMAC:nf_dst_mac}):(%{COMMONMAC:nf_src_mac})?|OUT=%{USERNAME:nf_out_interface}).*SRC=(%{IPV4:nf_src_ip}).*DST=(%{IPV4:nf_dst_ip}).*LEN=(%{WORD:nf_len}).?*TOS=(%{WORD:nf_tos}).?*PREC=(%{WORD:nf_prec}).?*TTL=(%{INT:nf_ttl}).?*ID=(%{INT:nf_id}).?*PROTO=(%{WORD:nf_protocol}).?*SPT=(%{INT:nf_src_port}?.*DPT=%{INT:nf_dst_port}?.*)
96+
SHOREWALL (?:%{SYSLOGTIMESTAMP:timestamp}) (?:%{WORD:[observer][hostname]}) .*Shorewall:(?:%{WORD:[shorewall][firewall][type]})?:(?:%{WORD:[shorewall][firewall][action]})?.*%{IPTABLES}
8897
#== End Shorewall
8998
#== SuSE Firewall 2 ==
90-
SFW2 ((%{SYSLOGTIMESTAMP})|(%{TIMESTAMP_ISO8601}))\s*%{HOSTNAME}\s*kernel\S+\s*%{NAGIOSTIME}\s*SFW2\-INext\-%{NOTSPACE:nf_action}\s*IN=%{USERNAME:nf_in_interface}.*OUT=((\s*%{USERNAME:nf_out_interface})|(\s*))MAC=((%{COMMONMAC:nf_dst_mac}:%{COMMONMAC:nf_src_mac})|(\s*)).*SRC=%{IP:nf_src_ip}\s*DST=%{IP:nf_dst_ip}.*PROTO=%{WORD:nf_protocol}((.*SPT=%{INT:nf_src_port}.*DPT=%{INT:nf_dst_port}.*)|())
99+
SFW2_LOG_PREFIX SFW2\-INext\-%{NOTSPACE:[suse][firewall][action]}
100+
SFW2 ((?:%{SYSLOGTIMESTAMP:timestamp})|(?:%{TIMESTAMP_ISO8601:timestamp}))\s*%{HOSTNAME:[observer][hostname]}.*?%{SFW2_LOG_PREFIX:[suse][firewall][log_prefix]}\s*%{IPTABLES}
91101
#== End SuSE ==

src/fancy_regex.rs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use crate::Error;
2+
use fancy_regex::{Captures, Regex};
3+
use std::collections::{btree_map, BTreeMap, HashMap};
4+
5+
/// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text.
6+
#[derive(Debug)]
7+
pub struct FancyRegexPattern {
8+
regex: Regex,
9+
names: BTreeMap<String, usize>,
10+
}
11+
12+
impl FancyRegexPattern {
13+
/// Creates a new pattern from a raw regex string and an alias map to identify the
14+
/// fields properly.
15+
pub(crate) fn new(regex: &str, alias: &HashMap<String, String>) -> Result<Self, Error> {
16+
match Regex::new(regex) {
17+
Ok(r) => Ok({
18+
let mut names = BTreeMap::new();
19+
for (i, name) in r.capture_names().enumerate() {
20+
if let Some(name) = name {
21+
let name = match alias.iter().find(|&(_k, v)| *v == name) {
22+
Some(item) => item.0.clone(),
23+
None => String::from(name),
24+
};
25+
names.insert(name, i);
26+
}
27+
}
28+
Self { regex: r, names }
29+
}),
30+
Err(e) => Err(Error::RegexCompilationFailed(format!(
31+
"Regex compilation failed: {e:?}:\n{regex}"
32+
))),
33+
}
34+
}
35+
36+
/// Matches this compiled `Pattern` against the text and returns the matches.
37+
pub fn match_against<'a>(&'a self, text: &'a str) -> Option<FancyRegexMatches<'a>> {
38+
self.regex.captures(text).ok().flatten().and_then(|caps| {
39+
Some(FancyRegexMatches {
40+
captures: caps,
41+
pattern: self,
42+
})
43+
})
44+
}
45+
46+
/// Returns all names this `Pattern` captures.
47+
pub fn capture_names(&self) -> impl Iterator<Item = &str> {
48+
self.names.keys().map(|s| s.as_str())
49+
}
50+
}
51+
52+
/// The `Matches` represent matched results from a `Pattern` against a provided text.
53+
#[derive(Debug)]
54+
pub struct FancyRegexMatches<'a> {
55+
captures: Captures<'a>,
56+
pattern: &'a FancyRegexPattern,
57+
}
58+
59+
impl<'a> FancyRegexMatches<'a> {
60+
/// Gets the value for the name (or) alias if found, `None` otherwise.
61+
pub fn get(&self, name_or_alias: &str) -> Option<&str> {
62+
self.pattern
63+
.names
64+
.get(name_or_alias)
65+
.and_then(|&idx| self.captures.get(idx))
66+
.map(|m| m.as_str())
67+
}
68+
69+
/// Returns the number of matches.
70+
pub fn len(&self) -> usize {
71+
self.pattern.names.len()
72+
}
73+
74+
/// Returns true if there are no matches, false otherwise.
75+
pub fn is_empty(&self) -> bool {
76+
self.len() == 0
77+
}
78+
79+
/// Returns a tuple of key/value with all the matches found.
80+
///
81+
/// Note that if no match is found, the value is empty.
82+
pub fn iter(&'a self) -> FancyRegexMatchesIter<'a> {
83+
FancyRegexMatchesIter {
84+
captures: &self.captures,
85+
names: self.pattern.names.iter(),
86+
}
87+
}
88+
}
89+
90+
impl<'a> IntoIterator for &'a FancyRegexMatches<'a> {
91+
type Item = (&'a str, &'a str);
92+
type IntoIter = FancyRegexMatchesIter<'a>;
93+
94+
fn into_iter(self) -> Self::IntoIter {
95+
self.iter()
96+
}
97+
}
98+
99+
/// An `Iterator` over all matches, accessible via `Matches`.
100+
pub struct FancyRegexMatchesIter<'a> {
101+
captures: &'a Captures<'a>,
102+
names: btree_map::Iter<'a, String, usize>,
103+
}
104+
105+
impl<'a> Iterator for FancyRegexMatchesIter<'a> {
106+
type Item = (&'a str, &'a str);
107+
108+
fn next(&mut self) -> Option<Self::Item> {
109+
for (k, &v) in self.names.by_ref() {
110+
if let Some(m) = self.captures.get(v) {
111+
return Some((k.as_str(), m.as_str()));
112+
}
113+
}
114+
None
115+
}
116+
}

0 commit comments

Comments
 (0)