Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .claude/skills/law-generate/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,44 @@ The JSON payload format (written to the temp file):
laws (>20 articles), this limit applies per batch — each batch of ~15 articles
gets its own 3-iteration budget

## Phase 4.5: Write the related-legislation result envelope

After the `machine_readable` sections are final, write a **sibling result
envelope** so the pipeline can auto-harvest the legislation this law depends on
(delegated regelingen and cross-law references the extref-only harvester misses).

Write it next to the law YAML as `.enrichment-result.yaml` (same directory,
e.g. `corpus/regulation/nl/wet/wet_op_de_zorgtoeslag/.enrichment-result.yaml`).
Use the `Write` tool — no new agent tools are needed.

```yaml
# .enrichment-result.yaml — result envelope, NOT part of the law schema
law_id: wet_op_de_zorgtoeslag
related_legislation:
- name: Regeling vaststelling standaardpremie en bestuursrechtelijke premie
relation: delegated_regeling # source_regulation | legal_basis | delegated_regeling
bwb_id: BWBR0037841 # optional, best-effort
slug: regeling_standaardpremie # optional, best-effort
open_term: standaardpremie # optional, only for delegations
- name: Algemene wet inkomensafhankelijke regelingen
relation: source_regulation
```

Coverage: add one entry for **every** `source.regulation` you bound, every
`legal_basis` you anchored on, and every `open_term` delegation you declared.
Fields:

- `name` — **required**; the human-readable law/regeling title (used for search
fallback when no id/slug is given).
- `relation` — one of `source_regulation`, `legal_basis`, `delegated_regeling`.
- `bwb_id`, `slug`, `open_term` — **optional**, best-effort. Supply what you know
(a known `bwb_id` resolves fastest); leave the rest out.

**CRITICAL — this MUST NOT go in the law YAML.** The law file stays strictly
schema-conformant (`just validate` must still pass). The related-legislation list
lives only in the `.enrichment-result.yaml` sidecar, which the pipeline reads
separately. Do not add a `related_legislation:` key anywhere inside the law YAML.

## Phase 5: Report

Report to the user:
Expand Down
3 changes: 3 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ repos:
language: system
pass_filenames: true
files: ^corpus/regulation/.*\.yaml$
# Skip dot-prefixed sidecars (.enrichment.yaml, .enrichment-result.yaml):
# enrichment metadata/result envelopes, not law files.
exclude: (^|/)\.[^/]*\.yaml$
types: [yaml]

- id: skills-no-casus
Expand Down
2 changes: 2 additions & 0 deletions packages/admin/src/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,8 @@ pub async fn create_enrich_jobs(
law_id: law_id.clone(),
yaml_path: yaml_path.clone(),
provider: Some((*provider_name).to_string()),
// Admin-requested enrichments are roots of the related-harvest chain.
depth: None,
};

let payload_json = serde_json::to_value(&enrich_payload).map_err(|e| {
Expand Down
58 changes: 29 additions & 29 deletions packages/pipeline/src/api/bwb_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,24 @@ pub async fn search_bwb(
State(state): State<ApiState>,
Query(params): Query<SearchParams>,
) -> Result<Json<Vec<BwbSearchResult>>, (StatusCode, String)> {
let q = params.q.trim();
if q.is_empty() || q.len() < 3 {
return Ok(Json(vec![]));
match search_bwb_by_name(&state.http_client, params.q.trim()).await {
Ok(results) => Ok(Json(results)),
Err(e) => Err((StatusCode::BAD_GATEWAY, e)),
}
}

/// Search wetten.overheid.nl via the SRU API for laws matching `q`.
///
/// The client-taking core shared by the axum handler and the enrich worker's
/// related-legislation resolution. Queries shorter than 3 characters (after the
/// same sanitize as the handler) return an empty list rather than an error.
pub async fn search_bwb_by_name(
client: &reqwest::Client,
q: &str,
) -> Result<Vec<BwbSearchResult>, String> {
let q = q.trim();
if q.len() < 3 {
return Ok(vec![]);
}

let sanitized: String = q
Expand All @@ -50,35 +65,20 @@ pub async fn search_bwb(
("maximumRecords", &MAX_RESULTS.to_string()),
],
)
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("URL build error: {e}"),
)
})?;

let response = state
.http_client
.map_err(|e| format!("URL build error: {e}"))?;

let response = client
.get(url)
.send()
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("BWB search failed: {e}")))?;

let xml_text = response.text().await.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("BWB response read failed: {e}"),
)
})?;

let results = parse_sru_response(&xml_text).map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("XML parse error: {e}"),
)
})?;

Ok(Json(results))
.map_err(|e| format!("BWB search failed: {e}"))?;

let xml_text = response
.text()
.await
.map_err(|e| format!("BWB response read failed: {e}"))?;

parse_sru_response(&xml_text).map_err(|e| format!("XML parse error: {e}"))
}

/// Parse SRU XML response and extract unique laws (deduplicated by BWBR ID).
Expand Down
2 changes: 1 addition & 1 deletion packages/pipeline/src/api/harvest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ async fn resolve_identifiers(
}

/// Find a law's BWB ID by its slug in the law_entries table.
async fn find_bwb_id_by_slug(
pub async fn find_bwb_id_by_slug(
pool: &sqlx::PgPool,
slug: &str,
) -> Result<Option<String>, sqlx::Error> {
Expand Down
12 changes: 12 additions & 0 deletions packages/pipeline/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ pub struct WorkerConfig {
/// Off by default; enrichment is otherwise requested explicitly via the admin
/// API. Configurable via `ENRICH_AUTO_ENQUEUE`.
pub auto_enrich_enqueue: bool,
/// Maximum recursion depth for related-legislation follow-up harvests.
/// A depth-0 enrichment may enqueue harvests at depth 1, whose enrichments
/// may enqueue at depth 2, etc., up to this cap. Default: 2. Configurable
/// via `RELATED_HARVEST_MAX_DEPTH`.
pub related_harvest_max_depth: u32,
}

impl std::fmt::Debug for WorkerConfig {
Expand All @@ -118,6 +123,7 @@ impl std::fmt::Debug for WorkerConfig {
)
.field("enrich_daily_limit", &self.enrich_daily_limit)
.field("auto_enrich_enqueue", &self.auto_enrich_enqueue)
.field("related_harvest_max_depth", &self.related_harvest_max_depth)
.finish()
}
}
Expand Down Expand Up @@ -193,6 +199,11 @@ impl WorkerConfig {
})
.unwrap_or(false);

let related_harvest_max_depth: u32 = std::env::var("RELATED_HARVEST_MAX_DEPTH")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(2);

Ok(Self {
database_url,
max_connections,
Expand All @@ -207,6 +218,7 @@ impl WorkerConfig {
max_consecutive_resource_failures,
enrich_daily_limit,
auto_enrich_enqueue,
related_harvest_max_depth,
})
}

Expand Down
Loading
Loading