-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbwb_search.rs
More file actions
129 lines (111 loc) · 3.87 KB
/
Copy pathbwb_search.rs
File metadata and controls
129 lines (111 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::Json;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use crate::ApiState;
const SRU_BASE: &str = "https://zoekservice.overheid.nl/sru/Search";
const MAX_RESULTS: u32 = 20;
#[derive(Deserialize)]
pub struct SearchParams {
pub q: String,
}
#[derive(Serialize, Clone)]
pub struct BwbSearchResult {
pub bwb_id: String,
pub title: String,
#[serde(rename = "type")]
pub law_type: String,
}
/// GET /harvest/search?q=...
///
/// Search wetten.overheid.nl via the SRU API for laws matching the query.
pub async fn search_bwb(
State(state): State<ApiState>,
Query(params): Query<SearchParams>,
) -> Result<Json<Vec<BwbSearchResult>>, (StatusCode, String)> {
match search_bwb_by_name(&state.http_client, params.q.trim()).await {
Ok(results) => Ok(Json(results)),
Err(e) => Err((StatusCode::BAD_GATEWAY, e)),
}
}
/// Search wetten.overheid.nl via the SRU API for laws matching `q`.
///
/// The client-taking core shared by the axum handler and the enrich worker's
/// related-legislation resolution. Queries shorter than 3 characters (after the
/// same sanitize as the handler) return an empty list rather than an error.
pub async fn search_bwb_by_name(
client: &reqwest::Client,
q: &str,
) -> Result<Vec<BwbSearchResult>, String> {
let q = q.trim();
if q.len() < 3 {
return Ok(vec![]);
}
let sanitized: String = q
.chars()
.filter(|c| c.is_alphanumeric() || *c == ' ' || *c == '-' || *c == '.')
.collect();
let cql = format!("overheidbwb.titel any \"{sanitized}\"");
let url = url::Url::parse_with_params(
SRU_BASE,
&[
("operation", "searchRetrieve"),
("version", "1.2"),
("x-connection", "BWB"),
("query", &cql),
("maximumRecords", &MAX_RESULTS.to_string()),
],
)
.map_err(|e| format!("URL build error: {e}"))?;
let response = client
.get(url)
.send()
.await
.map_err(|e| format!("BWB search failed: {e}"))?;
let xml_text = response
.text()
.await
.map_err(|e| format!("BWB response read failed: {e}"))?;
parse_sru_response(&xml_text).map_err(|e| format!("XML parse error: {e}"))
}
/// Parse SRU XML response and extract unique laws (deduplicated by BWBR ID).
fn parse_sru_response(xml: &str) -> Result<Vec<BwbSearchResult>, String> {
let doc = roxmltree::Document::parse(xml).map_err(|e| e.to_string())?;
// SRU returns results in relevance order — preserve that ordering by
// deduplicating via a HashSet of seen IDs while pushing into a Vec.
let mut seen: HashSet<String> = HashSet::new();
let mut results: Vec<BwbSearchResult> = Vec::new();
for node in doc.descendants() {
if !node.is_element() {
continue;
}
if node.tag_name().name() != "owmskern" {
continue;
}
let mut identifier = None;
let mut title = None;
let mut law_type = None;
for child in node.children().filter(|n| n.is_element()) {
match child.tag_name().name() {
"identifier" => identifier = child.text().map(|s| s.trim().to_string()),
"title" => title = child.text().map(|s| s.trim().to_string()),
"type" => law_type = child.text().map(|s| s.trim().to_string()),
_ => {}
}
}
if let (Some(bwb_id), Some(title)) = (identifier, title) {
if !bwb_id.starts_with("BWBR") {
continue;
}
if seen.insert(bwb_id.clone()) {
results.push(BwbSearchResult {
bwb_id,
title,
law_type: law_type.unwrap_or_default(),
});
}
}
}
Ok(results)
}