Skip to content

Commit d0ce3b1

Browse files
committed
fix(pipeline): address review nits on related-legislation resolution
- Validate the single-hit SRU result as a BWB id before resolving, so a malformed SRU id can't slip into a harvest payload (paths a/b already validate). - Read the .enrichment-result.yaml sidecar via tokio::fs (was blocking std::fs) for consistency with the rest of execute_enrich_with_runner. - Clarify the depth-inherit comment: the field is the shared extref-recursion counter, so deep-via-extref laws skip related discovery (roots/shallow laws, the intended case, are unaffected).
1 parent 4a24029 commit d0ce3b1

2 files changed

Lines changed: 24 additions & 16 deletions

File tree

packages/pipeline/src/enrich.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,9 @@ pub struct EnrichmentResultEnvelope {
433433
/// Never errors, so it can never fail an otherwise-successful enrichment:
434434
/// - absent file → empty list;
435435
/// - unparseable file → logged at `warn` and empty list.
436-
fn read_enrichment_result_envelope(yaml_abs: &Path) -> Vec<RelatedLegislation> {
436+
async fn read_enrichment_result_envelope(yaml_abs: &Path) -> Vec<RelatedLegislation> {
437437
let envelope_path = enrichment_result_path(yaml_abs);
438-
let content = match std::fs::read_to_string(&envelope_path) {
438+
let content = match tokio::fs::read_to_string(&envelope_path).await {
439439
Ok(c) => c,
440440
Err(_) => return Vec::new(),
441441
};
@@ -1162,7 +1162,7 @@ pub async fn execute_enrich_with_runner(
11621162

11631163
// Read the related-legislation result envelope the agent may have written.
11641164
// Never fails: absent/malformed → empty (see read_enrichment_result_envelope).
1165-
let related_legislation = read_enrichment_result_envelope(&yaml_abs);
1165+
let related_legislation = read_enrichment_result_envelope(&yaml_abs).await;
11661166

11671167
// Collect written files for corpus staging
11681168
let mut written_files = vec![yaml_abs.clone(), metadata_path];
@@ -1392,16 +1392,16 @@ related_legislation:
13921392
assert!(entry.bwb_id.is_none());
13931393
}
13941394

1395-
#[test]
1396-
fn test_read_envelope_absent_file_is_empty() {
1395+
#[tokio::test]
1396+
async fn test_read_envelope_absent_file_is_empty() {
13971397
let dir = tempfile::tempdir().unwrap();
13981398
let yaml_abs = dir.path().join("2025-01-01.yaml");
13991399
// No sidecar exists next to it.
1400-
assert!(read_enrichment_result_envelope(&yaml_abs).is_empty());
1400+
assert!(read_enrichment_result_envelope(&yaml_abs).await.is_empty());
14011401
}
14021402

1403-
#[test]
1404-
fn test_read_envelope_malformed_is_empty() {
1403+
#[tokio::test]
1404+
async fn test_read_envelope_malformed_is_empty() {
14051405
let dir = tempfile::tempdir().unwrap();
14061406
let yaml_abs = dir.path().join("2025-01-01.yaml");
14071407
std::fs::write(
@@ -1410,19 +1410,19 @@ related_legislation:
14101410
)
14111411
.unwrap();
14121412
// Malformed sidecar must never error — it degrades to empty.
1413-
assert!(read_enrichment_result_envelope(&yaml_abs).is_empty());
1413+
assert!(read_enrichment_result_envelope(&yaml_abs).await.is_empty());
14141414
}
14151415

1416-
#[test]
1417-
fn test_read_envelope_present_parses() {
1416+
#[tokio::test]
1417+
async fn test_read_envelope_present_parses() {
14181418
let dir = tempfile::tempdir().unwrap();
14191419
let yaml_abs = dir.path().join("2025-01-01.yaml");
14201420
std::fs::write(
14211421
enrichment_result_path(&yaml_abs),
14221422
"related_legislation:\n - name: Delegated Regeling\n bwb_id: BWBR0037841\n",
14231423
)
14241424
.unwrap();
1425-
let related = read_enrichment_result_envelope(&yaml_abs);
1425+
let related = read_enrichment_result_envelope(&yaml_abs).await;
14261426
assert_eq!(related.len(), 1);
14271427
assert_eq!(related[0].bwb_id.as_deref(), Some("BWBR0037841"));
14281428
}

packages/pipeline/src/worker.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -412,8 +412,12 @@ async fn process_next_job(
412412
law_id: job.law_id.clone(),
413413
yaml_path: result.file_path.clone(),
414414
provider: Some((*provider_name).to_string()),
415-
// Inherit the just-completed harvest's depth so a
416-
// related-legislation harvest chain keeps counting.
415+
// Inherit the harvest's depth. NB: this is the shared
416+
// extref-recursion counter, so a law reached via
417+
// >= RELATED_HARVEST_MAX_DEPTH extref hops enriches at a
418+
// depth that skips related-legislation discovery. Roots and
419+
// shallow laws (the intended case) are unaffected; a
420+
// dedicated related-depth counter is the follow-up.
417421
depth: payload.depth,
418422
};
419423
let payload_json = match serde_json::to_value(&enrich_payload) {
@@ -945,9 +949,13 @@ async fn resolve_related_bwb_id(
945949
}
946950
}
947951

948-
// (c) SRU search by name — accept only an unambiguous single hit
952+
// (c) SRU search by name — accept only an unambiguous single hit, and only
953+
// if it is a well-formed BWB id (paths a/b validate too; don't let a
954+
// malformed SRU id slip into a harvest payload).
949955
match crate::api::bwb_search::search_bwb_by_name(http_client, &entry.name).await {
950-
Ok(results) if results.len() == 1 => RelatedResolution::Resolved(results[0].bwb_id.clone()),
956+
Ok(results) if results.len() == 1 && is_valid_bwb_id(&results[0].bwb_id) => {
957+
RelatedResolution::Resolved(results[0].bwb_id.clone())
958+
}
951959
Ok(results) if results.len() > 1 => RelatedResolution::NeedsConfirmation,
952960
Ok(_) => RelatedResolution::Unresolved,
953961
Err(e) => {

0 commit comments

Comments
 (0)