Skip to content

Commit 4232d37

Browse files
committed
test(pipeline): pin machine_readable null vs empty count behavior
Document and assert the one parity nuance between the typed count and the old contains_key walk: an empty `machine_readable: {}` counts as enriched (an LLM may insert a bare section), while an explicit `machine_readable: null` is treated as un-enriched. No corpus file uses the bare/null form, so this matches the previous behavior in practice. Addresses a ralph-review completeness finding.
1 parent edb8bc4 commit 4232d37

1 file changed

Lines changed: 19 additions & 6 deletions

File tree

packages/pipeline/src/enrich.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,11 @@ async fn compute_prompt_hash(repo_path: &Path) -> String {
996996
/// format. A structurally-invalid law now surfaces as a parse error here instead
997997
/// of being silently undercounted — acceptable because this only ever runs on
998998
/// real harvested/enriched corpus files, where a corruption is worth failing on.
999+
///
1000+
/// An article counts as enriched when it carries a `machine_readable` mapping,
1001+
/// including the empty `{}` an LLM may insert before filling it; an explicit
1002+
/// `machine_readable: null` is treated as un-enriched. No corpus file uses the
1003+
/// bare/null form, so this matches the previous key-presence behavior in practice.
9991004
async fn count_article_stats(path: &Path) -> Result<(usize, usize)> {
10001005
let content = tokio::fs::read_to_string(path).await?;
10011006
let law: ArticleBasedLaw = serde_yaml_ng::from_str(&content)?;
@@ -1295,25 +1300,33 @@ articles:
12951300
}
12961301

12971302
#[tokio::test]
1298-
async fn test_count_article_stats_empty_machine_readable_counts() {
1299-
// An empty `machine_readable: {}` mapping must still count as enriched —
1300-
// this matches the old key-presence semantics that `FakeLlmRunner` (and
1301-
// the enrichment delta) rely on when the LLM inserts a bare section.
1303+
async fn test_count_article_stats_empty_vs_null_machine_readable() {
1304+
// An empty `machine_readable: {}` mapping counts as enriched — this
1305+
// matches the old key-presence semantics that `FakeLlmRunner` (and the
1306+
// enrichment delta) rely on when the LLM inserts a bare section. An
1307+
// explicit `machine_readable: null` deserializes to None and is treated
1308+
// as un-enriched; no corpus file uses the bare/null form, so the typed
1309+
// count matches the previous `contains_key` behavior in practice.
13021310
let yaml = r#"---
13031311
$id: test_law
13041312
regulatory_layer: WET
13051313
publication_date: '2025-01-01'
13061314
articles:
13071315
- number: '1'
1308-
text: Article one.
1316+
text: Empty section, enriched.
13091317
machine_readable: {}
1318+
- number: '2'
1319+
text: Null section, not enriched.
1320+
machine_readable: null
1321+
- number: '3'
1322+
text: No section at all.
13101323
"#;
13111324
let dir = tempfile::tempdir().unwrap();
13121325
let path = dir.path().join("law.yaml");
13131326
tokio::fs::write(&path, yaml).await.unwrap();
13141327

13151328
let (total, with_mr) = count_article_stats(&path).await.unwrap();
1316-
assert_eq!(total, 1);
1329+
assert_eq!(total, 3);
13171330
assert_eq!(with_mr, 1);
13181331
}
13191332

0 commit comments

Comments
 (0)