From b041493fe7267d62b4d440c942dcdd15208aff0f Mon Sep 17 00:00:00 2001
From: boocmp <boocmp@yandex.ru>
Date: Wed, 8 Jan 2025 21:01:19 +0700
Subject: [PATCH 1/4] AbstractNetworkFilter moved to abstract_network.rs.

---
 src/filters/abstract_network.rs | 256 ++++++++++++++++++++++++++++++++
 src/filters/mod.rs              |   1 +
 src/filters/network.rs          | 252 +------------------------------
 3 files changed, 261 insertions(+), 248 deletions(-)
 create mode 100644 src/filters/abstract_network.rs
diff --git a/src/filters/abstract_network.rs b/src/filters/abstract_network.rs
new file mode 100644
index 00000000..bbc22edb
--- /dev/null
+++ b/src/filters/abstract_network.rs
@@ -0,0 +1,256 @@
+use memchr::memchr as find_char;
+
+use super::network::NetworkFilterError;
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns.
+static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap());
+
+#[derive(Clone, Copy)]
+pub(crate) enum NetworkFilterLeftAnchor {
+    /// A `||` token, which represents a match to the start of a domain or subdomain segment.
+    DoublePipe,
+    /// A `|` token, which represents a match to the exact start of the URL.
+    SinglePipe,
+}
+
+#[derive(Clone, Copy)]
+pub(crate) enum NetworkFilterRightAnchor {
+    /// A `|` token, which represents a match to the exact end of the URL.
+    SinglePipe,
+}
+
+/// Pattern for a network filter, describing what URLs to match against.
+#[derive(Clone)]
+pub(crate) struct NetworkFilterPattern {
+    pub(crate) left_anchor: Option<NetworkFilterLeftAnchor>,
+    pub(crate) pattern: String,
+    pub(crate) right_anchor: Option<NetworkFilterRightAnchor>,
+}
+
+/// Any option that appears on the right side of a network filter as initiated by a `$` character.
+/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is
+/// negated using a prepended `~`.
+#[derive(Clone)]
+pub(crate) enum NetworkFilterOption {
+    Domain(Vec<(bool, String)>),
+    Badfilter,
+    Important,
+    MatchCase,
+    ThirdParty(bool),
+    FirstParty(bool),
+    Tag(String),
+    Redirect(String),
+    RedirectRule(String),
+    Csp(Option<String>),
+    Removeparam(String),
+    Generichide,
+    Document,
+    Image(bool),
+    Media(bool),
+    Object(bool),
+    Other(bool),
+    Ping(bool),
+    Script(bool),
+    Stylesheet(bool),
+    Subdocument(bool),
+    XmlHttpRequest(bool),
+    Websocket(bool),
+    Font(bool),
+}
+
+impl NetworkFilterOption {
+    pub fn is_content_type(&self) -> bool {
+        matches!(
+            self,
+            Self::Document
+                | Self::Image(..)
+                | Self::Media(..)
+                | Self::Object(..)
+                | Self::Other(..)
+                | Self::Ping(..)
+                | Self::Script(..)
+                | Self::Stylesheet(..)
+                | Self::Subdocument(..)
+                | Self::XmlHttpRequest(..)
+                | Self::Websocket(..)
+                | Self::Font(..)
+        )
+    }
+
+    pub fn is_redirection(&self) -> bool {
+        matches!(self, Self::Redirect(..) | Self::RedirectRule(..))
+    }
+}
+
+/// Abstract syntax representation of a network filter. This representation can fully specify the
+/// string representation of a filter as written, with the exception of aliased options like `1p`
+/// or `ghide`. This allows separation of concerns between parsing and interpretation.
+pub(crate) struct AbstractNetworkFilter {
+    pub(crate) exception: bool,
+    pub(crate) pattern: NetworkFilterPattern,
+    pub(crate) options: Option<Vec<NetworkFilterOption>>,
+}
+
+impl AbstractNetworkFilter {
+    pub(crate) fn parse(line: &str) -> Result<Self, NetworkFilterError> {
+        let mut filter_index_start: usize = 0;
+        let mut filter_index_end: usize = line.len();
+
+        let mut exception = false;
+        if line.starts_with("@@") {
+            filter_index_start += 2;
+            exception = true;
+        }
+
+        let maybe_options_index: Option<usize> = find_char(b'$', line.as_bytes());
+
+        let mut options = None;
+        if let Some(options_index) = maybe_options_index {
+            filter_index_end = options_index;
+
+            // slicing here is safe; the first byte after '$' will be a character boundary
+            let raw_options = &line[filter_index_end + 1..];
+
+            options = Some(parse_filter_options(raw_options)?);
+        }
+
+        let left_anchor = if line[filter_index_start..].starts_with("||") {
+            filter_index_start += 2;
+            Some(NetworkFilterLeftAnchor::DoublePipe)
+        } else if line[filter_index_start..].starts_with('|') {
+            filter_index_start += 1;
+            Some(NetworkFilterLeftAnchor::SinglePipe)
+        } else {
+            None
+        };
+
+        let right_anchor = if filter_index_end > 0
+            && filter_index_end > filter_index_start
+            && line[..filter_index_end].ends_with('|')
+        {
+            filter_index_end -= 1;
+            Some(NetworkFilterRightAnchor::SinglePipe)
+        } else {
+            None
+        };
+
+        let pattern = &line[filter_index_start..filter_index_end];
+
+        Ok(AbstractNetworkFilter {
+            exception,
+            pattern: NetworkFilterPattern {
+                left_anchor,
+                pattern: pattern.to_string(),
+                right_anchor,
+            },
+            options,
+        })
+    }
+}
+
+fn parse_filter_options(raw_options: &str) -> Result<Vec<NetworkFilterOption>, NetworkFilterError> {
+    let mut result = vec![];
+
+    for raw_option in raw_options.split(',') {
+        // Check for negation: ~option
+        let negation = raw_option.starts_with('~');
+        let maybe_negated_option = raw_option.trim_start_matches('~');
+
+        // Check for options: option=value1|value2
+        let mut option_and_values = maybe_negated_option.splitn(2, '=');
+        let (option, value) = (
+            option_and_values.next().unwrap(),
+            option_and_values.next().unwrap_or_default(),
+        );
+
+        result.push(match (option, negation) {
+            ("domain", _) | ("from", _) => {
+                let domains: Vec<(bool, String)> = value
+                    .split('|')
+                    .map(|domain| {
+                        if let Some(negated_domain) = domain.strip_prefix('~') {
+                            (false, negated_domain.to_string())
+                        } else {
+                            (true, domain.to_string())
+                        }
+                    })
+                    .filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/')))
+                    .collect();
+                if domains.is_empty() {
+                    return Err(NetworkFilterError::NoSupportedDomains);
+                }
+                NetworkFilterOption::Domain(domains)
+            }
+            ("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter),
+            ("badfilter", false) => NetworkFilterOption::Badfilter,
+            ("important", true) => return Err(NetworkFilterError::NegatedImportant),
+            ("important", false) => NetworkFilterOption::Important,
+            ("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase),
+            ("match-case", false) => NetworkFilterOption::MatchCase,
+            ("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated),
+            ("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated),
+            ("tag", true) => return Err(NetworkFilterError::NegatedTag),
+            ("tag", false) => NetworkFilterOption::Tag(String::from(value)),
+            ("redirect", true) => return Err(NetworkFilterError::NegatedRedirection),
+            ("redirect", false) => {
+                // Ignore this filter if no redirection resource is specified
+                if value.is_empty() {
+                    return Err(NetworkFilterError::EmptyRedirection);
+                }
+
+                NetworkFilterOption::Redirect(String::from(value))
+            }
+            ("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection),
+            ("redirect-rule", false) => {
+                if value.is_empty() {
+                    return Err(NetworkFilterError::EmptyRedirection);
+                }
+
+                NetworkFilterOption::RedirectRule(String::from(value))
+            }
+            ("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() {
+                Some(String::from(value))
+            } else {
+                None
+            }),
+            ("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam),
+            ("removeparam", false) => {
+                if value.is_empty() {
+                    return Err(NetworkFilterError::EmptyRemoveparam);
+                }
+                if !VALID_PARAM.is_match(value) {
+                    return Err(NetworkFilterError::RemoveparamRegexUnsupported);
+                }
+                NetworkFilterOption::Removeparam(String::from(value))
+            }
+            ("generichide", true) | ("ghide", true) => {
+                return Err(NetworkFilterError::NegatedGenericHide)
+            }
+            ("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide,
+            ("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument),
+            ("document", false) | ("doc", false) => NetworkFilterOption::Document,
+            ("image", negated) => NetworkFilterOption::Image(!negated),
+            ("media", negated) => NetworkFilterOption::Media(!negated),
+            ("object", negated) | ("object-subrequest", negated) => {
+                NetworkFilterOption::Object(!negated)
+            }
+            ("other", negated) => NetworkFilterOption::Other(!negated),
+            ("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated),
+            ("script", negated) => NetworkFilterOption::Script(!negated),
+            ("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated),
+            ("subdocument", negated) | ("frame", negated) => {
+                NetworkFilterOption::Subdocument(!negated)
+            }
+            ("xmlhttprequest", negated) | ("xhr", negated) => {
+                NetworkFilterOption::XmlHttpRequest(!negated)
+            }
+            ("websocket", negated) => NetworkFilterOption::Websocket(!negated),
+            ("font", negated) => NetworkFilterOption::Font(!negated),
+            (_, _) => return Err(NetworkFilterError::UnrecognisedOption),
+        });
+    }
+    Ok(result)
+}
diff --git a/src/filters/mod.rs b/src/filters/mod.rs
index 7b0f52ce..7015e7c2 100644
--- a/src/filters/mod.rs
+++ b/src/filters/mod.rs
@@ -1,4 +1,5 @@
 //! Contains representations and standalone behaviors of individual filter rules.
 
+mod abstract_network;
 pub mod cosmetic;
 pub mod network;
diff --git a/src/filters/network.rs b/src/filters/network.rs
index 02e2472c..188b91b5 100644
--- a/src/filters/network.rs
+++ b/src/filters/network.rs
@@ -1,7 +1,7 @@
 //! Filters that take effect at the network request level, including blocking and response
 //! modification.
 
-use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse};
+use memchr::{memchr as find_char, memmem};
 use once_cell::sync::Lazy;
 use regex::{
     bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
@@ -12,6 +12,9 @@ use thiserror::Error;
 
 use std::fmt;
 
+use crate::filters::abstract_network::{
+    AbstractNetworkFilter, NetworkFilterLeftAnchor, NetworkFilterOption, NetworkFilterRightAnchor,
+};
 use crate::lists::ParseOptions;
 use crate::regex_manager::RegexManager;
 use crate::request;
@@ -232,253 +235,6 @@ impl FilterPart {
     }
 }
 
-#[derive(Clone, Copy)]
-enum NetworkFilterLeftAnchor {
-    /// A `||` token, which represents a match to the start of a domain or subdomain segment.
-    DoublePipe,
-    /// A `|` token, which represents a match to the exact start of the URL.
-    SinglePipe,
-}
-
-#[derive(Clone, Copy)]
-enum NetworkFilterRightAnchor {
-    /// A `|` token, which represents a match to the exact end of the URL.
-    SinglePipe,
-}
-
-/// Pattern for a network filter, describing what URLs to match against.
-#[derive(Clone)]
-struct NetworkFilterPattern {
-    left_anchor: Option<NetworkFilterLeftAnchor>,
-    pattern: String,
-    right_anchor: Option<NetworkFilterRightAnchor>,
-}
-
-/// Any option that appears on the right side of a network filter as initiated by a `$` character.
-/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is
-/// negated using a prepended `~`.
-#[derive(Clone)]
-enum NetworkFilterOption {
-    Domain(Vec<(bool, String)>),
-    Badfilter,
-    Important,
-    MatchCase,
-    ThirdParty(bool),
-    FirstParty(bool),
-    Tag(String),
-    Redirect(String),
-    RedirectRule(String),
-    Csp(Option<String>),
-    Removeparam(String),
-    Generichide,
-    Document,
-    Image(bool),
-    Media(bool),
-    Object(bool),
-    Other(bool),
-    Ping(bool),
-    Script(bool),
-    Stylesheet(bool),
-    Subdocument(bool),
-    XmlHttpRequest(bool),
-    Websocket(bool),
-    Font(bool),
-}
-
-impl NetworkFilterOption {
-    pub fn is_content_type(&self) -> bool {
-        matches!(
-            self,
-            Self::Document
-                | Self::Image(..)
-                | Self::Media(..)
-                | Self::Object(..)
-                | Self::Other(..)
-                | Self::Ping(..)
-                | Self::Script(..)
-                | Self::Stylesheet(..)
-                | Self::Subdocument(..)
-                | Self::XmlHttpRequest(..)
-                | Self::Websocket(..)
-                | Self::Font(..)
-        )
-    }
-
-    pub fn is_redirection(&self) -> bool {
-        matches!(self, Self::Redirect(..) | Self::RedirectRule(..))
-    }
-}
-
-/// Abstract syntax representation of a network filter. This representation can fully specify the
-/// string representation of a filter as written, with the exception of aliased options like `1p`
-/// or `ghide`. This allows separation of concerns between parsing and interpretation.
-struct AbstractNetworkFilter {
-    exception: bool,
-    pattern: NetworkFilterPattern,
-    options: Option<Vec<NetworkFilterOption>>,
-}
-
-impl AbstractNetworkFilter {
-    fn parse(line: &str) -> Result<Self, NetworkFilterError> {
-        let mut filter_index_start: usize = 0;
-        let mut filter_index_end: usize = line.len();
-
-        let mut exception = false;
-        if line.starts_with("@@") {
-            filter_index_start += 2;
-            exception = true;
-        }
-
-        let maybe_options_index: Option<usize> = find_char_reverse(b'$', line.as_bytes());
-
-        let mut options = None;
-        if let Some(options_index) = maybe_options_index {
-            filter_index_end = options_index;
-
-            // slicing here is safe; the first byte after '$' will be a character boundary
-            let raw_options = &line[filter_index_end + 1..];
-
-            options = Some(parse_filter_options(raw_options)?);
-        }
-
-        let left_anchor = if line[filter_index_start..].starts_with("||") {
-            filter_index_start += 2;
-            Some(NetworkFilterLeftAnchor::DoublePipe)
-        } else if line[filter_index_start..].starts_with('|') {
-            filter_index_start += 1;
-            Some(NetworkFilterLeftAnchor::SinglePipe)
-        } else {
-            None
-        };
-
-        let right_anchor = if filter_index_end > 0
-            && filter_index_end > filter_index_start
-            && line[..filter_index_end].ends_with('|')
-        {
-            filter_index_end -= 1;
-            Some(NetworkFilterRightAnchor::SinglePipe)
-        } else {
-            None
-        };
-
-        let pattern = &line[filter_index_start..filter_index_end];
-
-        Ok(AbstractNetworkFilter {
-            exception,
-            pattern: NetworkFilterPattern {
-                left_anchor,
-                pattern: pattern.to_string(),
-                right_anchor,
-            },
-            options,
-        })
-    }
-}
-
-fn parse_filter_options(raw_options: &str) -> Result<Vec<NetworkFilterOption>, NetworkFilterError> {
-    let mut result = vec![];
-
-    for raw_option in raw_options.split(',') {
-        // Check for negation: ~option
-        let negation = raw_option.starts_with('~');
-        let maybe_negated_option = raw_option.trim_start_matches('~');
-
-        // Check for options: option=value1|value2
-        let mut option_and_values = maybe_negated_option.splitn(2, '=');
-        let (option, value) = (
-            option_and_values.next().unwrap(),
-            option_and_values.next().unwrap_or_default(),
-        );
-
-        result.push(match (option, negation) {
-            ("domain", _) | ("from", _) => {
-                let domains: Vec<(bool, String)> = value
-                    .split('|')
-                    .map(|domain| {
-                        if let Some(negated_domain) = domain.strip_prefix('~') {
-                            (false, negated_domain.to_string())
-                        } else {
-                            (true, domain.to_string())
-                        }
-                    })
-                    .filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/')))
-                    .collect();
-                if domains.is_empty() {
-                    return Err(NetworkFilterError::NoSupportedDomains);
-                }
-                NetworkFilterOption::Domain(domains)
-            }
-            ("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter),
-            ("badfilter", false) => NetworkFilterOption::Badfilter,
-            ("important", true) => return Err(NetworkFilterError::NegatedImportant),
-            ("important", false) => NetworkFilterOption::Important,
-            ("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase),
-            ("match-case", false) => NetworkFilterOption::MatchCase,
-            ("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated),
-            ("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated),
-            ("tag", true) => return Err(NetworkFilterError::NegatedTag),
-            ("tag", false) => NetworkFilterOption::Tag(String::from(value)),
-            ("redirect", true) => return Err(NetworkFilterError::NegatedRedirection),
-            ("redirect", false) => {
-                // Ignore this filter if no redirection resource is specified
-                if value.is_empty() {
-                    return Err(NetworkFilterError::EmptyRedirection);
-                }
-
-                NetworkFilterOption::Redirect(String::from(value))
-            }
-            ("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection),
-            ("redirect-rule", false) => {
-                if value.is_empty() {
-                    return Err(NetworkFilterError::EmptyRedirection);
-                }
-
-                NetworkFilterOption::RedirectRule(String::from(value))
-            }
-            ("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() {
-                Some(String::from(value))
-            } else {
-                None
-            }),
-            ("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam),
-            ("removeparam", false) => {
-                if value.is_empty() {
-                    return Err(NetworkFilterError::EmptyRemoveparam);
-                }
-                if !VALID_PARAM.is_match(value) {
-                    return Err(NetworkFilterError::RemoveparamRegexUnsupported);
-                }
-                NetworkFilterOption::Removeparam(String::from(value))
-            }
-            ("generichide", true) | ("ghide", true) => {
-                return Err(NetworkFilterError::NegatedGenericHide)
-            }
-            ("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide,
-            ("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument),
-            ("document", false) | ("doc", false) => NetworkFilterOption::Document,
-            ("image", negated) => NetworkFilterOption::Image(!negated),
-            ("media", negated) => NetworkFilterOption::Media(!negated),
-            ("object", negated) | ("object-subrequest", negated) => {
-                NetworkFilterOption::Object(!negated)
-            }
-            ("other", negated) => NetworkFilterOption::Other(!negated),
-            ("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated),
-            ("script", negated) => NetworkFilterOption::Script(!negated),
-            ("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated),
-            ("subdocument", negated) | ("frame", negated) => {
-                NetworkFilterOption::Subdocument(!negated)
-            }
-            ("xmlhttprequest", negated) | ("xhr", negated) => {
-                NetworkFilterOption::XmlHttpRequest(!negated)
-            }
-            ("websocket", negated) => NetworkFilterOption::Websocket(!negated),
-            ("font", negated) => NetworkFilterOption::Font(!negated),
-            (_, _) => return Err(NetworkFilterError::UnrecognisedOption),
-        });
-    }
-    Ok(result)
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct NetworkFilter {
     pub mask: NetworkFilterMask,

From 77a55459dcc5e78f94800b98ed19616fcef5baca Mon Sep 17 00:00:00 2001
From: boocmp <boocmp@yandex.ru>
Date: Thu, 9 Jan 2025 14:47:19 +0700
Subject: [PATCH 2/4] The regex stuff moved from network.rs to regex_manager.rs

---
 src/filters/network.rs | 159 ++++++++++++-----------------------------
 src/regex_manager.rs   | 120 ++++++++++++++++++++++++++++++-
 2 files changed, 165 insertions(+), 114 deletions(-)

diff --git a/src/filters/network.rs b/src/filters/network.rs
index 188b91b5..afb5622e 100644
--- a/src/filters/network.rs
+++ b/src/filters/network.rs
@@ -3,10 +3,7 @@
 
 use memchr::{memchr as find_char, memmem};
 use once_cell::sync::Lazy;
-use regex::{
-    bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
-    bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex,
-};
+use regex::Regex;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 
@@ -184,47 +181,56 @@ impl From<&request::RequestType> for NetworkFilterMask {
     }
 }
 
-#[derive(Debug, Clone)]
-pub enum CompiledRegex {
-    Compiled(BytesRegex),
-    CompiledSet(BytesRegexSet),
-    MatchAll,
-    RegexParsingError(regex::Error),
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum FilterPart {
+    Empty,
+    Simple(String),
+    AnyOf(Vec<String>),
 }
 
-impl CompiledRegex {
-    pub fn is_match(&self, pattern: &str) -> bool {
-        match &self {
-            CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter
-            CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile
-            CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()),
-            CompiledRegex::CompiledSet(r) => {
-                // let matches: Vec<_> = r.matches(pattern).into_iter().collect();
-                // println!("Matching {} against RegexSet: {:?}", pattern, matches);
-                r.is_match(pattern.as_bytes())
+pub struct FilterPartIterator<'a> {
+    filter_part: &'a FilterPart,
+    index: usize,
+}
+
+impl<'a> Iterator for FilterPartIterator<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.filter_part {
+            FilterPart::Empty => None,
+            FilterPart::Simple(s) => {
+                if self.index == 0 {
+                    self.index += 1;
+                    Some(s.as_str())
+                } else {
+                    None
+                }
+            }
+            FilterPart::AnyOf(vec) => {
+                if self.index < vec.len() {
+                    let result = Some(vec[self.index].as_str());
+                    self.index += 1;
+                    result
+                } else {
+                    None
+                }
             }
         }
     }
 }
 
-impl fmt::Display for CompiledRegex {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match &self {
-            CompiledRegex::MatchAll => write!(f, ".*"), // simple case for matching everything, e.g. for empty filter
-            CompiledRegex::RegexParsingError(_e) => write!(f, "ERROR"), // no match if regex didn't even compile
-            CompiledRegex::Compiled(r) => write!(f, "{}", r.as_str()),
-            CompiledRegex::CompiledSet(r) => write!(f, "{}", r.patterns().join(" | ")),
+// Implement ExactSizeIterator for FilterPartIterator
+impl<'a> ExactSizeIterator for FilterPartIterator<'a> {
+    fn len(&self) -> usize {
+        match self.filter_part {
+            FilterPart::Empty => 0,
+            FilterPart::Simple(_) => 1,
+            FilterPart::AnyOf(vec) => vec.len(),
         }
     }
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum FilterPart {
-    Empty,
-    Simple(String),
-    AnyOf(Vec<String>),
-}
-
 impl FilterPart {
     pub fn string_view(&self) -> Option<String> {
         match &self {
@@ -233,6 +239,13 @@ impl FilterPart {
             FilterPart::AnyOf(s) => Some(s.join("|")),
         }
     }
+
+    pub fn iter(&self) -> FilterPartIterator {
+        FilterPartIterator {
+            filter_part: self,
+            index: 0,
+        }
+    }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -991,84 +1004,6 @@ fn compute_filter_id(
     hash
 }
 
-/// Compiles a filter pattern to a regex. This is only performed *lazily* for
-/// filters containing at least a * or ^ symbol. Because Regexes are expansive,
-/// we try to convert some patterns to plain filters.
-#[allow(clippy::trivial_regex)]
-pub(crate) fn compile_regex(
-    filter: &FilterPart,
-    is_right_anchor: bool,
-    is_left_anchor: bool,
-    is_complete_regex: bool,
-) -> CompiledRegex {
-    // Escape special regex characters: |.$+?{}()[]\
-    static SPECIAL_RE: Lazy<Regex> =
-        Lazy::new(|| Regex::new(r"([\|\.\$\+\?\{\}\(\)\[\]])").unwrap());
-    // * can match anything
-    static WILDCARD_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\*").unwrap());
-    // ^ can match any separator or the end of the pattern
-    static ANCHOR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\^(.)").unwrap());
-    // ^ can match any separator or the end of the pattern
-    static ANCHOR_RE_EOL: Lazy<Regex> = Lazy::new(|| Regex::new(r"\^$").unwrap());
-
-    let filters: Vec<String> = match filter {
-        FilterPart::Empty => vec![],
-        FilterPart::Simple(s) => vec![s.clone()],
-        FilterPart::AnyOf(f) => f.clone(),
-    };
-
-    let mut escaped_patterns = Vec::with_capacity(filters.len());
-    for filter_str in filters {
-        // If any filter is empty, the entire set matches anything
-        if filter_str.is_empty() {
-            return CompiledRegex::MatchAll;
-        }
-        if is_complete_regex {
-            // unescape unrecognised escaping sequences, otherwise a normal regex
-            let unescaped = filter_str[1..filter_str.len() - 1]
-                .replace("\\/", "/")
-                .replace("\\:", ":");
-
-            escaped_patterns.push(unescaped);
-        } else {
-            let repl = SPECIAL_RE.replace_all(&filter_str, "\\$1");
-            let repl = WILDCARD_RE.replace_all(&repl, ".*");
-            // in adblock rules, '^' is a separator.
-            // The separator character is anything but a letter, a digit, or one of the following: _ - . %
-            let repl = ANCHOR_RE.replace_all(&repl, "(?:[^\\w\\d\\._%-])$1");
-            let repl = ANCHOR_RE_EOL.replace_all(&repl, "(?:[^\\w\\d\\._%-]|$)");
-
-            // Should match start or end of url
-            let left_anchor = if is_left_anchor { "^" } else { "" };
-            let right_anchor = if is_right_anchor { "$" } else { "" };
-            let filter = format!("{}{}{}", left_anchor, repl, right_anchor);
-
-            escaped_patterns.push(filter);
-        }
-    }
-
-    if escaped_patterns.is_empty() {
-        CompiledRegex::MatchAll
-    } else if escaped_patterns.len() == 1 {
-        let pattern = &escaped_patterns[0];
-        match BytesRegexBuilder::new(pattern).unicode(false).build() {
-            Ok(compiled) => CompiledRegex::Compiled(compiled),
-            Err(e) => {
-                // println!("Regex parsing failed ({:?})", e);
-                CompiledRegex::RegexParsingError(e)
-            }
-        }
-    } else {
-        match BytesRegexSetBuilder::new(escaped_patterns)
-            .unicode(false)
-            .build()
-        {
-            Ok(compiled) => CompiledRegex::CompiledSet(compiled),
-            Err(e) => CompiledRegex::RegexParsingError(e),
-        }
-    }
-}
-
 /// Check if the sub-string contained between the indices start and end is a
 /// regex filter (it contains a '*' or '^' char). Here we are limited by the
 /// capability of javascript to check the presence of a pattern between two
diff --git a/src/regex_manager.rs b/src/regex_manager.rs
index 1623f60f..573cf647 100644
--- a/src/regex_manager.rs
+++ b/src/regex_manager.rs
@@ -2,9 +2,15 @@
 //! the [`crate::Engine`], infrequently used regexes can be discarded. The [`RegexManager`] is
 //! responsible for managing the storage of regexes used by filters.
 
-use crate::filters::network::{compile_regex, CompiledRegex, NetworkFilter};
+use crate::filters::network::NetworkFilter;
+
+use regex::{
+    bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
+    bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex,
+};
 
 use std::collections::HashMap;
+use std::fmt;
 use std::time::Duration;
 
 #[cfg(test)]
@@ -58,6 +64,40 @@ pub struct RegexDebugEntry {
     pub usage_count: usize,
 }
 
+#[derive(Debug, Clone)]
+pub enum CompiledRegex {
+    Compiled(BytesRegex),
+    CompiledSet(BytesRegexSet),
+    MatchAll,
+    RegexParsingError(regex::Error),
+}
+
+impl CompiledRegex {
+    pub fn is_match(&self, pattern: &str) -> bool {
+        match &self {
+            CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter
+            CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile
+            CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()),
+            CompiledRegex::CompiledSet(r) => {
+                // let matches: Vec<_> = r.matches(pattern).into_iter().collect();
+                // println!("Matching {} against RegexSet: {:?}", pattern, matches);
+                r.is_match(pattern.as_bytes())
+            }
+        }
+    }
+}
+
+impl fmt::Display for CompiledRegex {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match &self {
+            CompiledRegex::MatchAll => write!(f, ".*"), // simple case for matching everything, e.g. for empty filter
+            CompiledRegex::RegexParsingError(_e) => write!(f, "ERROR"), // no match if regex didn't even compile
+            CompiledRegex::Compiled(r) => write!(f, "{}", r.as_str()),
+            CompiledRegex::CompiledSet(r) => write!(f, "{}", r.patterns().join(" | ")),
+        }
+    }
+}
+
 struct RegexEntry {
     regex: Option<CompiledRegex>,
     last_used: Instant,
@@ -110,13 +150,89 @@ impl Default for RegexManager {
 
 fn make_regexp(filter: &NetworkFilter) -> CompiledRegex {
     compile_regex(
-        &filter.filter,
+        filter.filter.iter(),
         filter.is_right_anchor(),
         filter.is_left_anchor(),
         filter.is_complete_regex(),
     )
 }
 
+/// Compiles a filter pattern to a regex. This is only performed *lazily* for
+/// filters containing at least a * or ^ symbol. Because Regexes are expansive,
+/// we try to convert some patterns to plain filters.
+#[allow(clippy::trivial_regex)]
+pub(crate) fn compile_regex<'a, I>(
+    filters: I,
+    is_right_anchor: bool,
+    is_left_anchor: bool,
+    is_complete_regex: bool,
+) -> CompiledRegex
+where
+    I: Iterator<Item = &'a str> + ExactSizeIterator,
+{
+    use once_cell::sync::Lazy;
+    // Escape special regex characters: |.$+?{}()[]\
+    static SPECIAL_RE: Lazy<Regex> =
+        Lazy::new(|| Regex::new(r"([\|\.\$\+\?\{\}\(\)\[\]])").unwrap());
+    // * can match anything
+    static WILDCARD_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\*").unwrap());
+    // ^ can match any separator or the end of the pattern
+    static ANCHOR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\^(.)").unwrap());
+    // ^ can match any separator or the end of the pattern
+    static ANCHOR_RE_EOL: Lazy<Regex> = Lazy::new(|| Regex::new(r"\^$").unwrap());
+
+    let mut escaped_patterns = Vec::with_capacity(filters.len());
+    for filter_str in filters {
+        // If any filter is empty, the entire set matches anything
+        if filter_str.is_empty() {
+            return CompiledRegex::MatchAll;
+        }
+        if is_complete_regex {
+            // unescape unrecognised escaping sequences, otherwise a normal regex
+            let unescaped = filter_str[1..filter_str.len() - 1]
+                .replace("\\/", "/")
+                .replace("\\:", ":");
+
+            escaped_patterns.push(unescaped);
+        } else {
+            let repl = SPECIAL_RE.replace_all(&filter_str, "\\$1");
+            let repl = WILDCARD_RE.replace_all(&repl, ".*");
+            // in adblock rules, '^' is a separator.
+            // The separator character is anything but a letter, a digit, or one of the following: _ - . %
+            let repl = ANCHOR_RE.replace_all(&repl, "(?:[^\\w\\d\\._%-])$1");
+            let repl = ANCHOR_RE_EOL.replace_all(&repl, "(?:[^\\w\\d\\._%-]|$)");
+
+            // Should match start or end of url
+            let left_anchor = if is_left_anchor { "^" } else { "" };
+            let right_anchor = if is_right_anchor { "$" } else { "" };
+            let filter = format!("{}{}{}", left_anchor, repl, right_anchor);
+
+            escaped_patterns.push(filter);
+        }
+    }
+
+    if escaped_patterns.is_empty() {
+        CompiledRegex::MatchAll
+    } else if escaped_patterns.len() == 1 {
+        let pattern = &escaped_patterns[0];
+        match BytesRegexBuilder::new(pattern).unicode(false).build() {
+            Ok(compiled) => CompiledRegex::Compiled(compiled),
+            Err(e) => {
+                // println!("Regex parsing failed ({:?})", e);
+                CompiledRegex::RegexParsingError(e)
+            }
+        }
+    } else {
+        match BytesRegexSetBuilder::new(escaped_patterns)
+            .unicode(false)
+            .build()
+        {
+            Ok(compiled) => CompiledRegex::CompiledSet(compiled),
+            Err(e) => CompiledRegex::RegexParsingError(e),
+        }
+    }
+}
+
 impl RegexManager {
     /// Check whether or not a regex network filter matches a certain URL pattern, using the
     /// [`RegexManager`]'s managed regex storage.

From 5c09e1d3f84b1224d7d6fbcdb1c63ffb1012f8bb Mon Sep 17 00:00:00 2001
From: boocmp <boocmp@yandex.ru>
Date: Thu, 9 Jan 2025 15:35:14 +0700
Subject: [PATCH 3/4] Key type of compiled regexes map changed to u64.

---
 src/regex_manager.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/regex_manager.rs b/src/regex_manager.rs
index 573cf647..aeea8b0a 100644
--- a/src/regex_manager.rs
+++ b/src/regex_manager.rs
@@ -128,7 +128,7 @@ type RandomState = std::hash::BuildHasherDefault<seahash::SeaHasher>;
 ///
 /// The [`RegexManager`] is not thread safe, so any access to it must be synchronized externally.
 pub struct RegexManager {
-    map: HashMap<*const NetworkFilter, RegexEntry, RandomState>,
+    map: HashMap<u64, RegexEntry, RandomState>,
     compiled_regex_count: usize,
     now: Instant,
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
@@ -240,7 +240,7 @@ impl RegexManager {
         if !filter.is_regex() && !filter.is_complete_regex() {
             return true;
         }
-        let key = filter as *const NetworkFilter;
+        let key = (filter as *const NetworkFilter) as u64;
         use std::collections::hash_map::Entry;
         match self.map.entry(key) {
             Entry::Occupied(mut e) => {

From 22dcb7e160bd82ab3ddccd83bc921f29f4dd8ed6 Mon Sep 17 00:00:00 2001
From: boocmp <boocmp@yandex.ru>
Date: Tue, 11 Feb 2025 10:25:04 +0700
Subject: [PATCH 4/4] Compilation & cherry-pick fixes.

---
 src/filters/abstract_network.rs | 4 ++--
 tests/unit/optimizer.rs         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/filters/abstract_network.rs b/src/filters/abstract_network.rs
index bbc22edb..6a1d55ef 100644
--- a/src/filters/abstract_network.rs
+++ b/src/filters/abstract_network.rs
@@ -1,4 +1,4 @@
-use memchr::memchr as find_char;
+use memchr::memrchr as find_char_reverse;
 
 use super::network::NetworkFilterError;
 
@@ -105,7 +105,7 @@ impl AbstractNetworkFilter {
             exception = true;
         }
 
-        let maybe_options_index: Option<usize> = find_char(b'$', line.as_bytes());
+        let maybe_options_index: Option<usize> = find_char_reverse(b'$', line.as_bytes());
 
         let mut options = None;
         if let Some(options_index) = maybe_options_index {
diff --git a/tests/unit/optimizer.rs b/tests/unit/optimizer.rs
index 0e90bc8c..a5b07025 100644
--- a/tests/unit/optimizer.rs
+++ b/tests/unit/optimizer.rs
@@ -3,9 +3,9 @@ mod optimization_tests_pattern_group {
     #[cfg(test)]
     mod optimization_tests_pattern_group {
         use super::*;
-        use crate::filters::network::CompiledRegex;
         use crate::filters::network::NetworkMatchable;
         use crate::lists;
+        use crate::regex_manager::CompiledRegex;
         use crate::regex_manager::RegexManager;
         use crate::request::Request;
         use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder;
@@ -331,9 +331,9 @@ mod optimization_tests_pattern_group {
     }
     */
     use super::super::*;
-    use crate::filters::network::CompiledRegex;
     use crate::filters::network::NetworkMatchable;
     use crate::lists;
+    use crate::regex_manager::CompiledRegex;
     use crate::regex_manager::RegexManager;
     use crate::request::Request;
     use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder;