Skip to content

Commit 9d724e3

Browse files
committed
Rewrite pattern parser
1 parent c50587b commit 9d724e3

File tree

12 files changed

+679
-177
lines changed

12 files changed

+679
-177
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ name: Continuous Integration
22

33
on:
44
pull_request:
5+
branches:
6+
- main
57
push:
68
branches:
79
- main
@@ -39,3 +41,13 @@ jobs:
3941
with:
4042
command: test
4143
args: --no-default-features --features pcre2
44+
- uses: actions-rs/cargo@v1
45+
with:
46+
command: test
47+
args: --no-default-features --features onig
48+
- uses: actions-rs/cargo@v1
49+
with:
50+
command: test
51+
args: --no-default-features --features regex
52+
53+

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,33 @@ All user visible changes to this project will be documented in this file.
44
This project adheres to [Semantic Versioning](http://semver.org/), as described
55
for Rust libraries in [RFC #1105](https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md)
66

7+
## 2.2.0 - 2025-07-04
8+
9+
* Rewrote the pattern parsing to avoid using regular expressions, making all
10+
regular expression engine features optional (though at least one is still
11+
required).
12+
* Ensure correctness in capture names, even when using duplicate names
13+
generated from pattern names. These were previously unspecified:
14+
* (breaking) Duplicate pattern names result in matches with incrementing names in the format:
15+
`NAME`, `NAME[1]`, `NAME[2]`, etc.
16+
* (breaking) Duplicate pattern names from aliases are now guaranteed to be
17+
"last one wins".
18+
* (breaking) `Matches::len()` was removed as it was previously reporting the
19+
pattern name count. Use `Matches::iter().count()` instead.
20+
* (breaking) `Matches::is_empty()` was removed. Use `Matches::iter().count() == 0` instead.
21+
22+
## 2.1.0 - 2025-05-29
23+
24+
* Add support for `pcre2` feature which is significantly faster than `onig`.
25+
* Add support for `regex` and `fancy_regex` features which allow for pure Rust
26+
operation.
27+
* Some default patterns were updated from upstream sources for better
28+
compatibility across engines.
29+
30+
## 2.0.2 - 2025-05-29
31+
32+
* Workaround for an issue with `onig` where certain patterns could panic.
33+
734
## 2.0.1 - Unreleased
835

936
* Updated `onig` to `6.4`.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "grok"
3-
version = "2.1.0"
3+
version = "2.2.0"
44
authors = ["Matt Mastracci <[email protected]>", "Michael Nitschinger <[email protected]>"]
55
license = "Apache-2.0"
66
readme = "README.md"
@@ -20,14 +20,14 @@ rust-version = "1.56"
2020
[features]
2121
default = ["onig"]
2222

23-
onig = []
23+
onig = ["dep:onig"]
2424
regex = ["dep:regex"]
2525
fancy-regex = ["dep:fancy-regex"]
2626
pcre2 = ["dep:pcre2"]
2727

2828
[dependencies]
2929
# The default regex engine. Use default-feature = false to disable it.
30-
onig = { version = "6.5", default-features = false }
30+
onig = { version = "6.5", optional = true, default-features = false }
3131

3232
# The Rust regex library. Does not support backtracking, so many patterns are unusable.
3333
regex = { version = "1", optional = true, default-features = false, features = ["std", "unicode", "perf", "perf-dfa-full"] }

src/fancy_regex.rs

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use std::collections::{btree_map, BTreeMap, HashMap};
44

55
/// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text.
66
#[derive(Debug)]
7-
pub struct FancyRegexPattern {
7+
pub(crate) struct FancyRegexPattern {
88
regex: Regex,
9-
names: BTreeMap<String, usize>,
9+
pub names: BTreeMap<String, usize>,
1010
}
1111

1212
impl FancyRegexPattern {
@@ -18,10 +18,7 @@ impl FancyRegexPattern {
1818
let mut names = BTreeMap::new();
1919
for (i, name) in r.capture_names().enumerate() {
2020
if let Some(name) = name {
21-
let name = match alias.iter().find(|&(_k, v)| *v == name) {
22-
Some(item) => item.0.clone(),
23-
None => String::from(name),
24-
};
21+
let name = alias.get(name).map_or(name, |s| s).to_string();
2522
names.insert(name, i);
2623
}
2724
}
@@ -51,9 +48,9 @@ impl FancyRegexPattern {
5148

5249
/// The `Matches` represent matched results from a `Pattern` against a provided text.
5350
#[derive(Debug)]
54-
pub struct FancyRegexMatches<'a> {
51+
pub(crate) struct FancyRegexMatches<'a> {
5552
captures: Captures<'a>,
56-
pattern: &'a FancyRegexPattern,
53+
pub pattern: &'a FancyRegexPattern,
5754
}
5855

5956
impl<'a> FancyRegexMatches<'a> {
@@ -66,19 +63,7 @@ impl<'a> FancyRegexMatches<'a> {
6663
.map(|m| m.as_str())
6764
}
6865

69-
/// Returns the number of matches.
70-
pub fn len(&self) -> usize {
71-
self.pattern.names.len()
72-
}
73-
74-
/// Returns true if there are no matches, false otherwise.
75-
pub fn is_empty(&self) -> bool {
76-
self.len() == 0
77-
}
78-
7966
/// Returns a tuple of key/value with all the matches found.
80-
///
81-
/// Note that if no match is found, the value is empty.
8267
pub fn iter(&'a self) -> FancyRegexMatchesIter<'a> {
8368
FancyRegexMatchesIter {
8469
captures: &self.captures,
@@ -97,7 +82,7 @@ impl<'a> IntoIterator for &'a FancyRegexMatches<'a> {
9782
}
9883

9984
/// An `Iterator` over all matches, accessible via `Matches`.
100-
pub struct FancyRegexMatchesIter<'a> {
85+
pub(crate) struct FancyRegexMatchesIter<'a> {
10186
captures: &'a Captures<'a>,
10287
names: btree_map::Iter<'a, String, usize>,
10388
}

0 commit comments

Comments
 (0)