Skip to content

Commit c69021f

Browse files
authored
Merge pull request #574 from brave/pre-0.12.x
pre-0.12.x => master
2 parents 3f09734 + 29cf12d commit c69021f

20 files changed

+287
-294
lines changed

Cargo.lock

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "adblock"
3-
version = "0.11.1"
3+
version = "0.12.0"
44
authors = ["Anton Lazarev <[email protected]>", "Andrius Aucinas"]
55
edition = "2021"
66

@@ -39,6 +39,7 @@ rustc-hash = { version = "1.1.0", default-features = false }
3939
memchr = "2.4"
4040
base64 = "0.22"
4141
rmp-serde = "0.15"
42+
arrayvec = "0.7"
4243
cssparser = { version = "0.34", optional = true }
4344
selectors = { version = "0.26", optional = true }
4445
precomputed-hash = "0.1"

js/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "adblock-rs"
3-
version = "0.11.1"
3+
version = "0.12.0"
44
authors = ["Anton Lazarev <[email protected]>", "Andrius Aucinas"]
55
edition = "2021"
66
license = "MPL-2.0"

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "adblock-rs",
3-
"version": "0.11.1",
3+
"version": "0.12.0",
44
"description": "Very fast, Rust-based, native implementation of ad-blocker engine for Node",
55
"keywords": [
66
"adblock",

src/blocker.rs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ pub struct Blocker {
7979
pub(crate) filter_data_context: FilterDataContextRef,
8080
}
8181

82+
#[cfg(feature = "single-thread")]
83+
pub(crate) type RegexManagerRef<'a> = std::cell::RefMut<'a, RegexManager>;
84+
#[cfg(not(feature = "single-thread"))]
85+
pub(crate) type RegexManagerRef<'a> = std::sync::MutexGuard<'a, RegexManager>;
86+
8287
impl Blocker {
8388
/// Decide if a network request (usually from WebRequest API) should be
8489
/// blocked, redirected or allowed.
@@ -130,31 +135,36 @@ impl Blocker {
130135
self.get_list(NetworkFilterListId::TaggedFiltersAll)
131136
}
132137

133-
#[cfg(feature = "single-thread")]
134-
fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> {
138+
/// Borrow mutable reference to the regex manager for the ['Blocker`].
139+
/// Only one caller can borrow the regex manager at a time.
140+
pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> {
141+
#[cfg(feature = "single-thread")]
135142
#[allow(unused_mut)]
136143
let mut manager = self.regex_manager.borrow_mut();
144+
#[cfg(not(feature = "single-thread"))]
145+
let mut manager = self.regex_manager.lock().unwrap();
137146

138147
#[cfg(not(target_arch = "wasm32"))]
139148
manager.update_time();
140149

141150
manager
142151
}
143152

144-
#[cfg(not(feature = "single-thread"))]
145-
fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> {
146-
let mut manager = self.regex_manager.lock().unwrap();
147-
manager.update_time();
148-
manager
149-
}
150-
151153
pub fn check_generic_hide(&self, hostname_request: &Request) -> bool {
152154
let mut regex_manager = self.borrow_regex_manager();
153155
self.generic_hide()
154156
.check(hostname_request, &HashSet::new(), &mut regex_manager)
155157
.is_some()
156158
}
157159

160+
#[cfg(test)]
161+
pub(crate) fn check_exceptions(&self, request: &Request) -> bool {
162+
let mut regex_manager = self.borrow_regex_manager();
163+
self.exceptions()
164+
.check(request, &HashSet::new(), &mut regex_manager)
165+
.is_some()
166+
}
167+
158168
pub fn check_parameterised(
159169
&self,
160170
request: &Request,

src/engine.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ impl Engine {
139139
self.blocker.check(request, &self.resources)
140140
}
141141

142+
#[cfg(test)]
143+
pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool {
144+
self.blocker.check_exceptions(request)
145+
}
146+
142147
pub fn check_network_request_subset(
143148
&self,
144149
request: &Request,
@@ -266,6 +271,11 @@ impl Engine {
266271
self.blocker.set_regex_discard_policy(new_discard_policy);
267272
}
268273

274+
#[cfg(test)]
275+
pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> {
276+
self.blocker.borrow_regex_manager()
277+
}
278+
269279
#[cfg(feature = "debug-info")]
270280
pub fn discard_regex(&mut self, regex_id: u64) {
271281
self.blocker.discard_regex(regex_id);

src/filters/fb_network.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,4 @@ impl NetworkMatchable for FlatNetworkFilter<'_> {
171171
regex_manager,
172172
)
173173
}
174-
175-
#[cfg(test)]
176-
fn matches_test(&self, request: &Request) -> bool {
177-
self.matches(request, &mut RegexManager::default())
178-
}
179174
}

src/filters/fb_network_builder.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use flatbuffers::WIPOffset;
77
use crate::filters::fb_builder::EngineFlatBuilder;
88
use crate::filters::network::{FilterTokens, NetworkFilter};
99
use crate::filters::token_selector::TokenSelector;
10+
use crate::utils::TokensBuffer;
1011

1112
use crate::filters::network::NetworkFilterMaskHelper;
1213
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
@@ -134,6 +135,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
134135
let mut optimizable = HashMap::<ShortHash, Vec<NetworkFilter>>::new();
135136

136137
let mut token_frequencies = TokenSelector::new(rule_list.filters.len());
138+
let mut tokens_buffer = TokensBuffer::default();
137139

138140
{
139141
for network_filter in rule_list.filters {
@@ -157,7 +159,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
157159
}
158160
};
159161

160-
let multi_tokens = network_filter.get_tokens_optimized();
162+
let multi_tokens = network_filter.get_tokens(&mut tokens_buffer);
161163
match multi_tokens {
162164
FilterTokens::Empty => {
163165
// No tokens, add to fallback bucket (token 0)
@@ -171,7 +173,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
171173
}
172174
}
173175
FilterTokens::Other(tokens) => {
174-
let best_token = token_frequencies.select_least_used_token(&tokens);
176+
let best_token = token_frequencies.select_least_used_token(tokens);
175177
token_frequencies.record_usage(best_token);
176178
store_filter(best_token);
177179
}

src/filters/network.rs

Lines changed: 34 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ use crate::filters::abstract_network::{
1515
use crate::lists::ParseOptions;
1616
use crate::regex_manager::RegexManager;
1717
use crate::request;
18-
use crate::utils::{self, Hash};
19-
20-
pub(crate) const TOKENS_BUFFER_SIZE: usize = 200;
18+
use crate::utils::{self, Hash, TokensBuffer};
2119

2220
/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns.
2321
static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap());
@@ -312,10 +310,10 @@ pub enum FilterPart {
312310
}
313311

314312
#[derive(Debug, PartialEq)]
315-
pub enum FilterTokens {
313+
pub(crate) enum FilterTokens<'a> {
316314
Empty,
317-
OptDomains(Vec<Hash>),
318-
Other(Vec<Hash>),
315+
OptDomains(&'a [Hash]),
316+
Other(&'a [Hash]),
319317
}
320318

321319
pub struct FilterPartIterator<'a> {
@@ -883,19 +881,11 @@ impl NetworkFilter {
883881
)
884882
}
885883

886-
#[deprecated(since = "0.11.1", note = "use get_tokens_optimized instead")]
887-
pub fn get_tokens(&self) -> Vec<Vec<Hash>> {
888-
match self.get_tokens_optimized() {
889-
FilterTokens::OptDomains(domains) => {
890-
domains.into_iter().map(|domain| vec![domain]).collect()
891-
}
892-
FilterTokens::Other(tokens) => vec![tokens],
893-
FilterTokens::Empty => vec![],
894-
}
895-
}
896-
897-
pub fn get_tokens_optimized(&self) -> FilterTokens {
898-
let mut tokens: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
884+
pub(crate) fn get_tokens<'a>(
885+
&'a self,
886+
tokens_buffer: &'a mut TokensBuffer,
887+
) -> FilterTokens<'a> {
888+
tokens_buffer.clear();
899889

900890
// If there is only one domain and no domain negation, we also use this
901891
// domain as a token.
@@ -905,7 +895,7 @@ impl NetworkFilter {
905895
{
906896
if let Some(domains) = self.opt_domains.as_ref() {
907897
if let Some(domain) = domains.first() {
908-
tokens.push(*domain)
898+
tokens_buffer.push(*domain);
909899
}
910900
}
911901
}
@@ -918,7 +908,7 @@ impl NetworkFilter {
918908
(self.is_plain() || self.is_regex()) && !self.is_right_anchor();
919909
let skip_first_token = self.is_right_anchor();
920910

921-
utils::tokenize_filter_to(f, skip_first_token, skip_last_token, &mut tokens);
911+
utils::tokenize_filter_to(f, skip_first_token, skip_last_token, tokens_buffer);
922912
}
923913
}
924914
FilterPart::AnyOf(_) => (), // across AnyOf set of filters no single token is guaranteed to match to a request
@@ -928,45 +918,55 @@ impl NetworkFilter {
928918
// Append tokens from hostname, if any
929919
if !self.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) {
930920
if let Some(hostname) = self.hostname.as_ref() {
931-
utils::tokenize_to(hostname, &mut tokens);
921+
utils::tokenize_to(hostname, tokens_buffer);
932922
}
933923
} else if let Some(hostname) = self.hostname.as_ref() {
934924
// Find last dot to tokenize the prefix
935925
let last_dot_pos = hostname.rfind('.');
936926
if let Some(last_dot_pos) = last_dot_pos {
937-
utils::tokenize_to(&hostname[..last_dot_pos], &mut tokens);
927+
utils::tokenize_to(&hostname[..last_dot_pos], tokens_buffer);
938928
}
939929
}
940930

941-
if tokens.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
931+
if tokens_buffer.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
942932
if let Some(removeparam) = &self.modifier_option {
943933
if VALID_PARAM.is_match(removeparam) {
944-
utils::tokenize_to(&removeparam.to_ascii_lowercase(), &mut tokens);
934+
utils::tokenize_to(&removeparam.to_ascii_lowercase(), tokens_buffer);
945935
}
946936
}
947937
}
948938

949939
// If we got no tokens for the filter/hostname part, then we will dispatch
950940
// this filter in multiple buckets based on the domains option.
951-
if tokens.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none() {
941+
if tokens_buffer.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none()
942+
{
952943
if let Some(opt_domains) = self.opt_domains.as_ref() {
953944
if !opt_domains.is_empty() {
954-
return FilterTokens::OptDomains(opt_domains.clone());
945+
return FilterTokens::OptDomains(opt_domains);
955946
}
956947
}
957948
FilterTokens::Empty
958949
} else {
959950
// Add optional token for protocol
960951
if self.for_http() && !self.for_https() {
961-
tokens.push(utils::fast_hash("http"));
952+
tokens_buffer.push(utils::fast_hash("http"));
962953
} else if self.for_https() && !self.for_http() {
963-
tokens.push(utils::fast_hash("https"));
954+
tokens_buffer.push(utils::fast_hash("https"));
964955
}
965956

966-
// Remake a vector to drop extra capacity.
967-
let mut t = Vec::with_capacity(tokens.len());
968-
t.extend(tokens);
969-
FilterTokens::Other(t)
957+
FilterTokens::Other(tokens_buffer.as_slice())
958+
}
959+
}
960+
961+
#[cfg(test)]
962+
pub(crate) fn matches_test(&self, request: &request::Request) -> bool {
963+
let filter_set = crate::FilterSet::new_with_rules(vec![self.clone()], vec![], true);
964+
let engine = crate::Engine::from_filter_set(filter_set, true);
965+
966+
if self.is_exception() {
967+
engine.check_network_request_exceptions(request)
968+
} else {
969+
engine.check_network_request(request).matched
970970
}
971971
}
972972
}
@@ -986,35 +986,8 @@ impl fmt::Display for NetworkFilter {
986986
}
987987
}
988988

989-
pub trait NetworkMatchable {
989+
pub(crate) trait NetworkMatchable {
990990
fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool;
991-
992-
#[cfg(test)]
993-
fn matches_test(&self, request: &request::Request) -> bool;
994-
}
995-
996-
impl NetworkMatchable for NetworkFilter {
997-
fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool {
998-
use crate::filters::network_matchers::{
999-
check_excluded_domains, check_included_domains, check_options, check_pattern,
1000-
};
1001-
check_options(self.mask, request)
1002-
&& check_included_domains(self.opt_domains.as_deref(), request)
1003-
&& check_excluded_domains(self.opt_not_domains.as_deref(), request)
1004-
&& check_pattern(
1005-
self.mask,
1006-
self.filter.iter(),
1007-
self.hostname.as_deref(),
1008-
(self as *const NetworkFilter) as u64,
1009-
request,
1010-
regex_manager,
1011-
)
1012-
}
1013-
1014-
#[cfg(test)]
1015-
fn matches_test(&self, request: &request::Request) -> bool {
1016-
self.matches(request, &mut RegexManager::default())
1017-
}
1018991
}
1019992

1020993
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)