|
| 1 | +//! The `qwant` module handles the scraping of results from the qwant search engine |
| 2 | +//! by querying the upstream qwant search engine with user provided query and with a page |
| 3 | +//! number if provided. |
| 4 | +
|
| 5 | +use std::collections::HashMap; |
| 6 | + |
| 7 | +use reqwest::header::HeaderMap; |
| 8 | +use reqwest::Client; |
| 9 | +use serde::Deserialize; |
| 10 | + |
| 11 | +use crate::models::aggregation_models::SearchResult; |
| 12 | + |
| 13 | +use crate::models::engine_models::{EngineError, SearchEngine}; |
| 14 | + |
| 15 | +use error_stack::{Report, Result, ResultExt}; |
| 16 | + |
| 17 | +/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to |
| 18 | +/// reduce code duplication as well as allows to create vector of different search engines easily. |
| 19 | +pub struct Qwant; |
| 20 | + |
| 21 | +#[derive(Deserialize, Debug)] |
| 22 | +#[serde(rename_all = "camelCase")] |
| 23 | +/// Web page search result |
| 24 | +struct QwantSearchResult { |
| 25 | + // NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`, |
| 26 | + // `source`, and `is_family_friendly` attributes, |
| 27 | + // which we currently don't care about. |
| 28 | + /// Title of the result |
| 29 | + title: String, |
| 30 | + /// Url of the result |
| 31 | + url: String, |
| 32 | + /// Description of the result |
| 33 | + desc: String, |
| 34 | +} |
| 35 | + |
| 36 | +impl From<&QwantSearchResult> for SearchResult { |
| 37 | + fn from(value: &QwantSearchResult) -> Self { |
| 38 | + SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"]) |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +#[derive(Deserialize, Debug)] |
| 43 | +#[serde(rename_all = "snake_case")] |
| 44 | +#[serde(tag = "type")] |
| 45 | +/// A result which should be shown to the user |
| 46 | +enum QwantItem { |
| 47 | + /// Results containing web pages relevant to the query |
| 48 | + Web { |
| 49 | + // NOTE: This object also contains `count` and `serpContextId` attributes, |
| 50 | + // which we currently don't care about. |
| 51 | + /// List of web page search results |
| 52 | + items: Vec<QwantSearchResult>, |
| 53 | + }, |
| 54 | + #[serde(other)] |
| 55 | + /// Other item type like "related_searches", which aren't relevant. |
| 56 | + Other, |
| 57 | +} |
| 58 | + |
| 59 | +#[derive(Deserialize, Debug)] |
| 60 | +struct QwantItems { |
| 61 | + // NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes, |
| 62 | + // which we currently don't care about. |
| 63 | + /// Results which should be shown in the main section of the page |
| 64 | + mainline: Vec<QwantItem>, |
| 65 | +} |
| 66 | + |
| 67 | +#[derive(Deserialize, Debug)] |
| 68 | +struct QwantResult { |
| 69 | + // NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`, |
| 70 | + // `instrumentation`, `onlyProductAds`, and `topClassification` attributes, |
| 71 | + // which we currently don't care about. |
| 72 | + /// Entries that should be shown to the user |
| 73 | + items: QwantItems, |
| 74 | +} |
| 75 | + |
| 76 | +#[derive(Deserialize, Debug)] |
| 77 | +#[serde(rename_all = "snake_case")] |
| 78 | +#[serde(tag = "status", content = "data")] |
| 79 | +enum QwantApiResponse { |
| 80 | + /// Success response |
| 81 | + Success { |
| 82 | + // NOTE: This object also contains `query` and `cache` attributes, |
| 83 | + // which we currently don't care about. |
| 84 | + /// Actual results the search produced |
| 85 | + result: QwantResult, |
| 86 | + }, |
| 87 | + // TODO: Use the reported error messages |
| 88 | + #[allow(unused)] |
| 89 | + /// Error response |
| 90 | + Error { |
| 91 | + /// Machine-readable error code |
| 92 | + error_code: i32, |
| 93 | + #[serde(default)] |
| 94 | + /// List of human-readable error messages |
| 95 | + message: Vec<String>, |
| 96 | + }, |
| 97 | +} |
| 98 | + |
| 99 | +impl From<QwantApiResponse> for Result<QwantResult, EngineError> { |
| 100 | + fn from(value: QwantApiResponse) -> Self { |
| 101 | + match value { |
| 102 | + QwantApiResponse::Success { result } => Ok(result), |
| 103 | + QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)), |
| 104 | + } |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +#[async_trait::async_trait] |
| 109 | +impl SearchEngine for Qwant { |
| 110 | + async fn results( |
| 111 | + &self, |
| 112 | + query: &str, |
| 113 | + page: u32, |
| 114 | + user_agent: &str, |
| 115 | + client: &Client, |
| 116 | + safe_search: u8, |
| 117 | + ) -> Result<Vec<(String, SearchResult)>, EngineError> { |
| 118 | + let results_per_page = 10; |
| 119 | + let start_result = results_per_page * page; |
| 120 | + |
| 121 | + let url: String = format!("https://api.qwant.com/v3/search/web?q={query}&count={results_per_page}&locale=en_US&offset={start_result}&safesearch={safe_search}&device=desktop&tgp=2&displayed=true"); |
| 122 | + |
| 123 | + let header_map = HeaderMap::try_from(&HashMap::from([ |
| 124 | + ("User-Agent".to_string(), user_agent.to_string()), |
| 125 | + ("Referer".to_string(), "https://qwant.com/".to_string()), |
| 126 | + ("Origin".to_string(), "https://qwant.com".to_string()), |
| 127 | + ])) |
| 128 | + .change_context(EngineError::UnexpectedError)?; |
| 129 | + |
| 130 | + let result: QwantApiResponse = client |
| 131 | + .get(url) |
| 132 | + .headers(header_map) |
| 133 | + .send() |
| 134 | + .await |
| 135 | + .change_context(EngineError::RequestError)? |
| 136 | + .json() |
| 137 | + .await |
| 138 | + .change_context(EngineError::RequestError)?; |
| 139 | + |
| 140 | + let result = Result::from(result)?; |
| 141 | + |
| 142 | + let results: Vec<_> = result |
| 143 | + .items |
| 144 | + .mainline |
| 145 | + .into_iter() |
| 146 | + .filter_map(|item| match item { |
| 147 | + QwantItem::Web { items } => Some(items), |
| 148 | + _ => None, |
| 149 | + }) |
| 150 | + .flatten() |
| 151 | + .map(|result| { |
| 152 | + let search_result = SearchResult::from(&result); |
| 153 | + (result.url, search_result) |
| 154 | + }) |
| 155 | + .collect(); |
| 156 | + |
| 157 | + if results.is_empty() { |
| 158 | + Err(Report::new(EngineError::EmptyResultSet)) |
| 159 | + } else { |
| 160 | + Ok(results) |
| 161 | + } |
| 162 | + } |
| 163 | +} |
0 commit comments