Skip to content

✨ Implement qwant search engine support #605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: rolling
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ reqwest = { version = "0.12.5", default-features = false, features = [
"brotli",
"gzip",
"http2",
"json"
"socks",
] }
tokio = { version = "1.43.0", features = [
Expand Down
1 change: 1 addition & 0 deletions src/engines/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod common;
pub mod duckduckgo;
pub mod librex;
pub mod mojeek;
pub mod qwant;
pub mod search_result_parser;
pub mod searx;
pub mod startpage;
Expand Down
177 changes: 177 additions & 0 deletions src/engines/qwant.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//! The `qwant` module handles the scraping of results from the qwant search engine
//! by querying the upstream qwant search engine with user provided query and with a page
//! number if provided.

use std::borrow::Cow;
use std::collections::HashMap;

use reqwest::header::HeaderMap;
use reqwest::{Client, Url};
use serde::Deserialize;

use crate::models::aggregation_models::SearchResult;

use crate::models::engine_models::{EngineError, SearchEngine};

use error_stack::{Report, Result, ResultExt};

/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
pub struct Qwant;

#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
/// Web page search result
struct QwantSearchResult {
// NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
// `source`, and `is_family_friendly` attributes,
// which we currently don't care about.
/// Title of the result
title: String,
/// Url of the result
url: String,
/// Description of the result
desc: String,
}

impl From<&QwantSearchResult> for SearchResult {
fn from(value: &QwantSearchResult) -> Self {
SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
}
}

#[derive(Deserialize, Debug)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type")]
/// A result which should be shown to the user
enum QwantItem {
/// Results containing web pages relevant to the query
Web {
// NOTE: This object also contains `count` and `serpContextId` attributes,
// which we currently don't care about.
/// List of web page search results
items: Vec<QwantSearchResult>,
},
#[serde(other)]
/// Other item type like "related_searches", which aren't relevant.
Other,
}

#[derive(Deserialize, Debug)]
struct QwantItems {
// NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
// which we currently don't care about.
/// Results which should be shown in the main section of the page
mainline: Vec<QwantItem>,
}

#[derive(Deserialize, Debug)]
struct QwantResult {
// NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
// `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
// which we currently don't care about.
/// Entries that should be shown to the user
items: QwantItems,
}

#[derive(Deserialize, Debug)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "status", content = "data")]
enum QwantApiResponse {
/// Success response
Success {
// NOTE: This object also contains `query` and `cache` attributes,
// which we currently don't care about.
/// Actual results the search produced
result: QwantResult,
},
// TODO: Use the reported error messages
#[allow(unused)]
/// Error response
Error {
/// Machine-readable error code
error_code: i32,
#[serde(default)]
/// List of human-readable error messages
message: Vec<String>,
},
}

impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
fn from(value: QwantApiResponse) -> Self {
match value {
QwantApiResponse::Success { result } => Ok(result),
QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
}
}
}

#[async_trait::async_trait]
impl SearchEngine for Qwant {
async fn results(
&self,
query: &str,
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let results_per_page = 10;
let start_result = results_per_page * page;

let url = Url::parse_with_params(
"https://api.qwant.com/v3/search/web",
&[
("q", Cow::from(query)),
("count", results_per_page.to_string().into()),
("locale", "en_US".into()),
("offset", start_result.to_string().into()),
("safesearch", safe_search.to_string().into()),
("device", "desktop".into()),
("tgb", "2".into()),
("displayed", "true".into()),
],
)
.change_context(EngineError::UnexpectedError)?;

let header_map = HeaderMap::try_from(&HashMap::from([
("User-Agent".to_string(), user_agent.to_string()),
("Referer".to_string(), "https://qwant.com/".to_string()),
("Origin".to_string(), "https://qwant.com".to_string()),
]))
.change_context(EngineError::UnexpectedError)?;

let result: QwantApiResponse = client
.get(url)
.headers(header_map)
.send()
.await
.change_context(EngineError::RequestError)?
.json()
.await
.change_context(EngineError::RequestError)?;
Comment on lines +144 to +152
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could have used the already available json_as_bytes_from_upstream function to fetch data and deserialize it using something like bincode. What do you think? 🙂 .


let result = Result::from(result)?;

let results: Vec<_> = result
.items
.mainline
.into_iter()
.filter_map(|item| match item {
QwantItem::Web { items } => Some(items),
_ => None,
})
.flatten()
.map(|result| {
let search_result = SearchResult::from(&result);
(result.url, search_result)
})
.collect();

if results.is_empty() {
Err(Report::new(EngineError::EmptyResultSet))
} else {
Ok(results)
}
}
}
3 changes: 3 additions & 0 deletions src/models/engine_models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ impl EngineHandler {
let engine = crate::engines::bing::Bing::new()?;
("bing", Box::new(engine))
}
"qwant" => {
let engine = crate::engines::qwant::Qwant;
("qwant", Box::new(engine))
"wikipedia" => {
let engine = crate::engines::wikipedia::Wikipedia::new("en")?;
("wikipedia", Box::new(engine))
Expand Down
1 change: 1 addition & 0 deletions websurfx/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ upstream_search_engines = {
LibreX = false,
Mojeek = false,
Bing = false,
Qwant = false,
Wikipedia = true,
Yahoo = false,
} -- select the upstream search engines from which the results should be fetched.
Expand Down
Loading