Skip to content

Commit 6badbc0

Browse files
committed
Implement Qwant search engine
1 parent e91ba08 commit 6badbc0

File tree

4 files changed

+169
-0
lines changed

4 files changed

+169
-0
lines changed

src/engines/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub mod brave;
88
pub mod duckduckgo;
99
pub mod librex;
1010
pub mod mojeek;
11+
pub mod qwant;
1112
pub mod search_result_parser;
1213
pub mod searx;
1314
pub mod startpage;

src/engines/qwant.rs

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//! The `qwant` module handles the scraping of results from the qwant search engine
2+
//! by querying the upstream qwant search engine with user provided query and with a page
3+
//! number if provided.
4+
5+
use std::collections::HashMap;
6+
7+
use reqwest::header::HeaderMap;
8+
use reqwest::Client;
9+
use serde::Deserialize;
10+
11+
use crate::models::aggregation_models::SearchResult;
12+
13+
use crate::models::engine_models::{EngineError, SearchEngine};
14+
15+
use error_stack::{Report, Result, ResultExt};
16+
17+
/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
18+
/// reduce code duplication as well as allows to create vector of different search engines easily.
19+
pub struct Qwant;
20+
21+
#[derive(Deserialize, Debug)]
22+
#[serde(rename_all = "camelCase")]
23+
/// Web page search result
24+
struct QwantSearchResult {
25+
// NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
26+
// `source`, and `is_family_friendly` attributes,
27+
// which we currently don't care about.
28+
/// Title of the result
29+
title: String,
30+
/// Url of the result
31+
url: String,
32+
/// Description of the result
33+
desc: String,
34+
}
35+
36+
impl From<&QwantSearchResult> for SearchResult {
37+
fn from(value: &QwantSearchResult) -> Self {
38+
SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
39+
}
40+
}
41+
42+
#[derive(Deserialize, Debug)]
43+
#[serde(rename_all = "snake_case")]
44+
#[serde(tag = "type")]
45+
/// A result which should be shown to the user
46+
enum QwantItem {
47+
/// Results containing web pages relevant to the query
48+
Web {
49+
// NOTE: This object also contains `count` and `serpContextId` attributes,
50+
// which we currently don't care about.
51+
/// List of web page search results
52+
items: Vec<QwantSearchResult>,
53+
},
54+
#[serde(other)]
55+
/// Other item type like "related_searches", which aren't relevant.
56+
Other,
57+
}
58+
59+
#[derive(Deserialize, Debug)]
60+
struct QwantItems {
61+
// NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
62+
// which we currently don't care about.
63+
/// Results which should be shown in the main section of the page
64+
mainline: Vec<QwantItem>,
65+
}
66+
67+
#[derive(Deserialize, Debug)]
68+
struct QwantResult {
69+
// NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
70+
// `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
71+
// which we currently don't care about.
72+
/// Entries that should be shown to the user
73+
items: QwantItems,
74+
}
75+
76+
#[derive(Deserialize, Debug)]
77+
#[serde(rename_all = "snake_case")]
78+
#[serde(tag = "status", content = "data")]
79+
enum QwantApiResponse {
80+
/// Success response
81+
Success {
82+
// NOTE: This object also contains `query` and `cache` attributes,
83+
// which we currently don't care about.
84+
/// Actual results the search produced
85+
result: QwantResult,
86+
},
87+
// TODO: Use the reported error messages
88+
#[allow(unused)]
89+
/// Error response
90+
Error {
91+
/// Machine-readable error code
92+
error_code: i32,
93+
#[serde(default)]
94+
/// List of human-readable error messages
95+
message: Vec<String>,
96+
},
97+
}
98+
99+
impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
100+
fn from(value: QwantApiResponse) -> Self {
101+
match value {
102+
QwantApiResponse::Success { result } => Ok(result),
103+
QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
104+
}
105+
}
106+
}
107+
108+
#[async_trait::async_trait]
109+
impl SearchEngine for Qwant {
110+
async fn results(
111+
&self,
112+
query: &str,
113+
page: u32,
114+
user_agent: &str,
115+
client: &Client,
116+
safe_search: u8,
117+
) -> Result<Vec<(String, SearchResult)>, EngineError> {
118+
let results_per_page = 10;
119+
let start_result = results_per_page * page;
120+
121+
let url: String = format!("https://api.qwant.com/v3/search/web?q={query}&count={results_per_page}&locale=en_US&offset={start_result}&safesearch={safe_search}&device=desktop&tgp=2&displayed=true");
122+
123+
let header_map = HeaderMap::try_from(&HashMap::from([
124+
("User-Agent".to_string(), user_agent.to_string()),
125+
("Referer".to_string(), "https://qwant.com/".to_string()),
126+
("Origin".to_string(), "https://qwant.com".to_string()),
127+
]))
128+
.change_context(EngineError::UnexpectedError)?;
129+
130+
let result: QwantApiResponse = client
131+
.get(url)
132+
.headers(header_map)
133+
.send()
134+
.await
135+
.change_context(EngineError::RequestError)?
136+
.json()
137+
.await
138+
.change_context(EngineError::RequestError)?;
139+
140+
let result = Result::from(result)?;
141+
142+
let results: Vec<_> = result
143+
.items
144+
.mainline
145+
.into_iter()
146+
.filter_map(|item| match item {
147+
QwantItem::Web { items } => Some(items),
148+
_ => None,
149+
})
150+
.flatten()
151+
.map(|result| {
152+
let search_result = SearchResult::from(&result);
153+
(result.url, search_result)
154+
})
155+
.collect();
156+
157+
if results.is_empty() {
158+
Err(Report::new(EngineError::EmptyResultSet))
159+
} else {
160+
Ok(results)
161+
}
162+
}
163+
}

src/models/engine_models.rs

+4
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ impl EngineHandler {
206206
let engine = crate::engines::bing::Bing::new()?;
207207
("bing", Box::new(engine))
208208
}
209+
"qwant" => {
210+
let engine = crate::engines::qwant::Qwant;
211+
("qwant", Box::new(engine))
212+
}
209213
_ => {
210214
return Err(Report::from(EngineError::NoSuchEngineFound(
211215
engine_name.to_string(),

websurfx/config.lua

+1
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,5 @@ upstream_search_engines = {
7272
LibreX = false,
7373
Mojeek = false,
7474
Bing = false,
75+
Qwant = false,
7576
} -- select the upstream search engines from which the results should be fetched.

0 commit comments

Comments
 (0)