Skip to content

Commit 20fbe0c

Browse files
authored
Merge branch 'rolling' into engines
2 parents 1d0951b + 3a74bf8 commit 20fbe0c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+856
-722
lines changed

Cargo.lock

+72-80
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
[package]
22
name = "websurfx"
3-
version = "1.24.9"
4-
edition = "2021"
3+
version = "1.24.11"
4+
edition = "2024"
55
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
66
repository = "https://github.com/neon-mmd/websurfx"
77
license = "AGPL-3.0"
88

99
[dependencies]
10+
rayon = "1.10.0"
11+
arc-swap = "1.7.1"
1012
reqwest = { version = "0.12.5", default-features = false, features = [
1113
"rustls-tls",
1214
"brotli",
@@ -39,6 +41,7 @@ fake-useragent = { version = "0.1.3", default-features = false }
3941
env_logger = { version = "0.11.6", default-features = false }
4042
log = { version = "0.4.21", default-features = false }
4143
mlua = { version = "0.10.3", features = [
44+
"async",
4245
"luajit",
4346
"vendored",
4447
], default-features = false }
@@ -81,12 +84,10 @@ stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
8184
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
8285
"moby",
8386
]}
84-
8587
actix-multipart = { version = "0.7.2", default-features = false, features = [
8688
"derive",
8789
"tempfile",
8890
]}
89-
itertools = {version = "0.14.0", default-features = false}
9091

9192
[dev-dependencies]
9293
rusty-hook = { version = "^0.11.2", default-features = false }
@@ -102,6 +103,17 @@ lightningcss = { version = "1.0.0-alpha.57", default-features = false, features
102103
# Temporary fork with fix
103104
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
104105

106+
[lints.rust]
107+
unsafe_code = "forbid"
108+
rust_2018_idioms = { level = "warn", priority = 1 }
109+
missing_docs = "deny"
110+
111+
[lints.clippy]
112+
cognitive_complexity = "warn"
113+
panic = "forbid"
114+
missing_docs_in_private_items = "deny"
115+
perf = { level = "deny", priority = 1 }
116+
105117
[profile.dev]
106118
opt-level = 0
107119
debug = true
@@ -125,7 +137,7 @@ panic = 'abort'
125137
incremental = false
126138
codegen-units = 1
127139
rpath = false
128-
strip = "symbols"
140+
strip = "debuginfo"
129141

130142
[profile.profiling]
131143
inherits = "release"
@@ -176,8 +188,8 @@ inherits = "lpcb1"
176188
opt-level = "z"
177189

178190
[features]
179-
use-synonyms-search = ["thesaurus/static"]
180191
default = ["memory-cache"]
192+
use-synonyms-search = ["thesaurus/static"]
181193
dhat-heap = ["dep:dhat"]
182194
memory-cache = ["dep:moka"]
183195
redis-cache = ["dep:redis", "dep:base64"]

build.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77

88
// ------- Imports -------
99
use lightningcss::stylesheet::{MinifyOptions, ParserOptions, PrinterOptions, StyleSheet};
10-
use minify_js::{minify, Session, TopLevelMode};
10+
use minify_js::{Session, TopLevelMode, minify};
1111
use std::{
12-
fs::{read_dir, read_to_string, File, OpenOptions},
12+
fs::{File, OpenOptions, read_dir, read_to_string},
1313
io::{Read, Write},
1414
};
1515

src/aggregator.rs

+69-74
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,31 @@
11
//! This module provides the functionality to scrape and gathers all the results from the upstream
22
//! search engines and then removes duplicate results.
33
4-
use super::user_agent::random_user_agent;
5-
use crate::handler::{file_path, FileType};
4+
use crate::handler::{FileType, file_path};
65
use crate::models::{
76
aggregation::{EngineErrorInfo, SearchResult, SearchResults},
87
engine::{EngineError, EngineHandler},
98
};
109
use crate::parser::Config;
1110

1211
use error_stack::Report;
13-
use futures::stream::FuturesUnordered;
12+
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
13+
use rayon::slice::ParallelSliceMut;
1414
use regex::Regex;
1515
use reqwest::{Client, ClientBuilder};
1616
use std::sync::Arc;
17+
use tokio::task::JoinSet;
1718
use tokio::{
1819
fs::File,
1920
io::{AsyncBufReadExt, BufReader},
20-
task::JoinHandle,
2121
time::Duration,
2222
};
2323

2424
/// A constant for holding the prebuilt Client globally in the app.
2525
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
2626

2727
/// Aliases for long type annotations
28-
type FutureVec =
29-
FuturesUnordered<JoinHandle<Result<Vec<(String, SearchResult)>, Report<EngineError>>>>;
28+
type FutureVec = JoinSet<Result<Vec<(String, SearchResult)>, Report<EngineError>>>;
3029

3130
/// The function aggregates the scraped results from the user-selected upstream search engines.
3231
/// These engines can be chosen either from the user interface (UI) or from the configuration file.
@@ -72,6 +71,7 @@ pub async fn aggregate(
7271
config: &Config,
7372
upstream_search_engines: &[EngineHandler],
7473
safe_search: u8,
74+
user_agent: &'static str,
7575
) -> Result<SearchResults, Box<dyn std::error::Error>> {
7676
let client = CLIENT.get_or_init(|| {
7777
let mut cb = ClientBuilder::new()
@@ -96,19 +96,17 @@ pub async fn aggregate(
9696
cb.build().unwrap()
9797
});
9898

99-
let user_agent: &str = random_user_agent();
100-
10199
let mut names: Vec<&str> = Vec::with_capacity(0);
102100

103101
// create tasks for upstream result fetching
104-
let tasks: FutureVec = FutureVec::new();
102+
let mut tasks: FutureVec = JoinSet::new();
105103

106104
let query: Arc<String> = Arc::new(query.to_string());
107105
for engine_handler in upstream_search_engines {
108106
let (name, search_engine) = engine_handler.clone().into_name_engine();
109107
names.push(name);
110108
let query_partially_cloned = query.clone();
111-
tasks.push(tokio::spawn(async move {
109+
tasks.spawn(async move {
112110
search_engine
113111
.results(
114112
&query_partially_cloned,
@@ -118,16 +116,7 @@ pub async fn aggregate(
118116
safe_search,
119117
)
120118
.await
121-
}));
122-
}
123-
124-
// get upstream responses
125-
let mut responses = Vec::with_capacity(tasks.len());
126-
127-
for task in tasks {
128-
if let Ok(result) = task.await {
129-
responses.push(result)
130-
}
119+
});
131120
}
132121

133122
// aggregate search results, removing duplicates and handling errors the upstream engines returned
@@ -142,60 +131,58 @@ pub async fn aggregate(
142131
));
143132
};
144133

145-
for _ in 0..responses.len() {
146-
let response = responses.pop().unwrap();
134+
while let Some(Ok(response)) = tasks.join_next().await {
147135
let engine = names.pop().unwrap();
148136

149-
if result_map.is_empty() {
150-
match response {
151-
Ok(results) => result_map = results,
152-
Err(error) => handle_error(&error, engine),
153-
};
154-
continue;
155-
}
156-
157-
match response {
158-
Ok(result) => {
159-
result.into_iter().for_each(|(key, value)| {
160-
match result_map.iter().find(|(key_s, _)| key_s == &key) {
161-
Some(value) => value.1.to_owned().add_engines(engine),
162-
None => result_map.push((key, value)),
163-
};
164-
});
137+
if let Ok(result) = response {
138+
for (key, value) in result {
139+
if let Some(value) = result_map.iter().find(|(key_s, _)| key_s == &key) {
140+
value.1.to_owned().add_engines(engine)
141+
} else {
142+
result_map.push((key, value))
143+
}
165144
}
166-
Err(error) => handle_error(&error, engine),
167-
};
145+
} else if let Err(error) = response {
146+
handle_error(&error, engine)
147+
}
168148
}
169149

170150
if safe_search >= 3 {
171151
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
172152
filter_with_lists(
173153
&mut result_map,
174154
&mut blacklist_map,
175-
file_path(FileType::BlockList)?,
155+
&file_path(FileType::BlockList).await?,
176156
)
177157
.await?;
178158

179159
filter_with_lists(
180160
&mut blacklist_map,
181161
&mut result_map,
182-
file_path(FileType::AllowList)?,
162+
&file_path(FileType::AllowList).await?,
183163
)
184164
.await?;
185165

186166
drop(blacklist_map);
187167
}
188168

189-
let mut results: Box<[SearchResult]> = result_map
190-
.into_iter()
191-
.map(|(_, mut value)| {
192-
if !value.url.contains("temu.com") {
193-
value.calculate_relevance(query.as_str())
194-
}
195-
value
196-
})
197-
.collect();
198-
sort_search_results(&mut results);
169+
let results: Box<[SearchResult]> = tokio::task::spawn_blocking(move || {
170+
let mut unsorted_results: Box<[SearchResult]> = result_map
171+
.par_iter()
172+
.cloned()
173+
.map(|(_, mut value)| {
174+
if !value.url.contains("temu.com") {
175+
value.calculate_relevance(query.as_str())
176+
}
177+
value
178+
})
179+
.collect();
180+
181+
sort_search_results(&mut unsorted_results);
182+
183+
unsorted_results
184+
})
185+
.await?;
199186

200187
Ok(SearchResults::new(
201188
results,
@@ -231,17 +218,16 @@ pub async fn filter_with_lists(
231218
while idx < length {
232219
let ele = &map_to_be_filtered[idx];
233220
let ele_inner = &ele.1;
234-
match re.is_match(&ele.0.to_lowercase())
221+
// If the search result matches the regex pattern, move it from the original map to the resultant map
222+
if re.is_match(&ele.0.to_lowercase())
235223
|| re.is_match(&ele_inner.title.to_lowercase())
236224
|| re.is_match(&ele_inner.description.to_lowercase())
237225
{
238-
true => {
239-
// If the search result matches the regex pattern, move it from the original map to the resultant map
240-
resultant_map.push(map_to_be_filtered.swap_remove(idx));
241-
length -= 1;
242-
}
243-
false => idx += 1,
244-
};
226+
resultant_map.push(map_to_be_filtered.swap_remove(idx));
227+
length -= 1;
228+
} else {
229+
idx += 1;
230+
}
245231
}
246232
}
247233

@@ -251,11 +237,12 @@ pub async fn filter_with_lists(
251237
/// Sorts SearchResults by relevance score.
252238
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
253239
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
240+
///
254241
/// # Arguments
242+
///
255243
/// * `results` - A mutable slice or Vec of SearchResults
256-
///
257244
fn sort_search_results(results: &mut [SearchResult]) {
258-
results.sort_unstable_by(|a, b| {
245+
results.par_sort_unstable_by(|a, b| {
259246
use std::cmp::Ordering;
260247

261248
b.relevance_score
@@ -311,12 +298,16 @@ mod tests {
311298
.await?;
312299

313300
assert_eq!(resultant_map.len(), 2);
314-
assert!(resultant_map
315-
.iter()
316-
.any(|(key, _)| key == "https://www.example.com"));
317-
assert!(resultant_map
318-
.iter()
319-
.any(|(key, _)| key == "https://www.rust-lang.org/"));
301+
assert!(
302+
resultant_map
303+
.iter()
304+
.any(|(key, _)| key == "https://www.example.com")
305+
);
306+
assert!(
307+
resultant_map
308+
.iter()
309+
.any(|(key, _)| key == "https://www.rust-lang.org/")
310+
);
320311
assert_eq!(map_to_be_filtered.len(), 0);
321312

322313
Ok(())
@@ -362,13 +353,17 @@ mod tests {
362353
.await?;
363354

364355
assert_eq!(resultant_map.len(), 1);
365-
assert!(resultant_map
366-
.iter()
367-
.any(|(key, _)| key == "https://www.example.com"));
356+
assert!(
357+
resultant_map
358+
.iter()
359+
.any(|(key, _)| key == "https://www.example.com")
360+
);
368361
assert_eq!(map_to_be_filtered.len(), 1);
369-
assert!(map_to_be_filtered
370-
.iter()
371-
.any(|(key, _)| key == "https://www.rust-lang.org/"));
362+
assert!(
363+
map_to_be_filtered
364+
.iter()
365+
.any(|(key, _)| key == "https://www.rust-lang.org/")
366+
);
372367

373368
Ok(())
374369
}

src/cache/encryption.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
use chacha20poly1305::{
2-
consts::{B0, B1},
32
ChaChaPoly1305,
3+
consts::{B0, B1},
44
};
55
use std::sync::OnceLock;
66

77
use chacha20::{
8+
ChaChaCore,
89
cipher::{
10+
StreamCipherCoreWrapper,
911
generic_array::GenericArray,
1012
typenum::{UInt, UTerm},
11-
StreamCipherCoreWrapper,
1213
},
13-
ChaChaCore,
1414
};
1515

1616
/// The ChaCha20 core wrapped in a stream cipher for use in ChaCha20-Poly1305 authenticated encryption.

0 commit comments

Comments
 (0)