Skip to content

Commit 8af38c6

Browse files
authored
Merge pull request #4 from Kapernikov/feat/null-collections
Improve null handling and diff operations
2 parents a078115 + 7381afd commit 8af38c6

File tree

8 files changed

+314
-28
lines changed

8 files changed

+314
-28
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
- Add integration tests under `src/runtime/tests/` when changing CLI/runtime behavior.
2929
- Prefer `assert_cmd` for CLI and `predicates` for output checks. Keep fixtures in `src/runtime/tests/data/`.
3030
- Run `cargo test --workspace` locally; ensure tests don’t rely on network input.
31+
- Prefer modifying existing tests over adding new ones for new code paths. Extend current scenarios with extra assertions/fixtures to avoid redundant tests proliferating. For example, if adding null-handling in diff/patch, enhance the existing diff tests rather than introducing separate "basic diff works" tests that become redundant.
3132

3233
## Commit & Pull Request Guidelines
3334
- Commits: short, imperative summary (e.g., “Add __repr__ for LinkMLValue”); group related changes.

src/python/src/lib.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ impl PyLinkMLValue {
483483
match &self.value {
484484
LinkMLValue::Scalar { slot, .. } => Some(slot.name.clone()),
485485
LinkMLValue::List { slot, .. } => Some(slot.name.clone()),
486+
LinkMLValue::Null { slot, .. } => Some(slot.name.clone()),
486487
_ => None,
487488
}
488489
}
@@ -491,6 +492,7 @@ impl PyLinkMLValue {
491492
fn kind(&self) -> String {
492493
match &self.value {
493494
LinkMLValue::Scalar { .. } => "scalar".to_string(),
495+
LinkMLValue::Null { .. } => "null".to_string(),
494496
LinkMLValue::List { .. } => "list".to_string(),
495497
LinkMLValue::Mapping { .. } => "mapping".to_string(),
496498
LinkMLValue::Object { .. } => "object".to_string(),
@@ -502,6 +504,7 @@ impl PyLinkMLValue {
502504
match &self.value {
503505
LinkMLValue::Scalar { slot, .. } => Some(slot.definition().clone()),
504506
LinkMLValue::List { slot, .. } => Some(slot.definition().clone()),
507+
LinkMLValue::Null { slot, .. } => Some(slot.definition().clone()),
505508
_ => None,
506509
}
507510
}
@@ -512,6 +515,7 @@ impl PyLinkMLValue {
512515
LinkMLValue::Object { class, .. } => Some(class.def().clone()),
513516
LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().clone()),
514517
LinkMLValue::List { class: Some(c), .. } => Some(c.def().clone()),
518+
LinkMLValue::Null { class: Some(c), .. } => Some(c.def().clone()),
515519
_ => None,
516520
}
517521
}
@@ -522,13 +526,15 @@ impl PyLinkMLValue {
522526
LinkMLValue::Object { class, .. } => Some(class.def().name.clone()),
523527
LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().name.clone()),
524528
LinkMLValue::List { class: Some(c), .. } => Some(c.def().name.clone()),
529+
LinkMLValue::Null { class: Some(c), .. } => Some(c.def().name.clone()),
525530
_ => None,
526531
}
527532
}
528533

529534
fn __len__(&self) -> PyResult<usize> {
530535
Ok(match &self.value {
531536
LinkMLValue::Scalar { .. } => 0,
537+
LinkMLValue::Null { .. } => 0,
532538
LinkMLValue::List { values, .. } => values.len(),
533539
LinkMLValue::Mapping { values, .. } => values.len(),
534540
LinkMLValue::Object { values, .. } => values.len(),
@@ -645,6 +651,9 @@ impl PyLinkMLValue {
645651
LinkMLValue::Scalar { value, slot, .. } => {
646652
format!("LinkMLValue.Scalar(slot='{}', value={})", slot.name, value)
647653
}
654+
LinkMLValue::Null { slot, .. } => {
655+
format!("LinkMLValue.Null(slot='{}')", slot.name)
656+
}
648657
LinkMLValue::List { values, slot, .. } => {
649658
format!(
650659
"LinkMLValue.List(slot='{}', len={})",
@@ -725,17 +734,17 @@ fn load_json(
725734
Ok(PyLinkMLValue::new(v, sv))
726735
}
727736

728-
#[pyfunction(name = "diff", signature = (source, target, ignore_missing_target=None))]
737+
#[pyfunction(name = "diff", signature = (source, target, treat_missing_as_null=None))]
729738
fn py_diff(
730739
py: Python<'_>,
731740
source: &PyLinkMLValue,
732741
target: &PyLinkMLValue,
733-
ignore_missing_target: Option<bool>,
742+
treat_missing_as_null: Option<bool>,
734743
) -> PyResult<PyObject> {
735744
let deltas = diff_internal(
736745
&source.value,
737746
&target.value,
738-
ignore_missing_target.unwrap_or(false),
747+
treat_missing_as_null.unwrap_or(false),
739748
);
740749
let vals: Vec<JsonValue> = deltas
741750
.iter()

src/runtime/src/diff.rs

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ impl LinkMLValue {
2727
pub fn to_json(&self) -> JsonValue {
2828
match self {
2929
LinkMLValue::Scalar { value, .. } => value.clone(),
30+
LinkMLValue::Null { .. } => JsonValue::Null,
3031
LinkMLValue::List { values, .. } => {
3132
JsonValue::Array(values.iter().map(|v| v.to_json()).collect())
3233
}
@@ -46,13 +47,20 @@ impl LinkMLValue {
4647
}
4748
}
4849

49-
pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: bool) -> Vec<Delta> {
50+
/// Compute a semantic diff between two LinkMLValue trees.
51+
///
52+
/// Semantics of nulls and missing values:
53+
/// - X -> null: update to null (old = X, new = null).
54+
/// - null -> X: update from null (old = null, new = X).
55+
/// - missing -> X: add (old = None, new = X).
56+
/// - X -> missing: ignored by default; if `treat_missing_as_null` is true, update to null (old = X, new = null).
57+
pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: bool) -> Vec<Delta> {
5058
fn inner(
5159
path: &mut Vec<String>,
5260
slot: Option<&SlotView>,
5361
s: &LinkMLValue,
5462
t: &LinkMLValue,
55-
ignore_missing: bool,
63+
treat_missing_as_null: bool,
5664
out: &mut Vec<Delta>,
5765
) {
5866
if let Some(sl) = slot {
@@ -81,14 +89,17 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b
8189
.or_else(|| tc.slots().iter().find(|s| s.name == *k));
8290
path.push(k.clone());
8391
match tm.get(k) {
84-
Some(tv) => inner(path, slot_view, sv, tv, ignore_missing, out),
92+
Some(tv) => inner(path, slot_view, sv, tv, treat_missing_as_null, out),
8593
None => {
86-
if !ignore_missing && !slot_view.is_some_and(slot_is_ignored) {
87-
out.push(Delta {
88-
path: path.clone(),
89-
old: Some(sv.to_json()),
90-
new: None,
91-
});
94+
if !slot_view.is_some_and(slot_is_ignored) {
95+
// Missing target slot: either ignore (default) or treat as update to null
96+
if treat_missing_as_null {
97+
out.push(Delta {
98+
path: path.clone(),
99+
old: Some(sv.to_json()),
100+
new: Some(JsonValue::Null),
101+
});
102+
}
92103
}
93104
}
94105
}
@@ -118,7 +129,9 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b
118129
for i in 0..max_len {
119130
path.push(i.to_string());
120131
match (sl.get(i), tl.get(i)) {
121-
(Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out),
132+
(Some(sv), Some(tv)) => {
133+
inner(path, None, sv, tv, treat_missing_as_null, out)
134+
}
122135
(Some(sv), None) => out.push(Delta {
123136
path: path.clone(),
124137
old: Some(sv.to_json()),
@@ -140,7 +153,9 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b
140153
for k in keys {
141154
path.push(k.clone());
142155
match (sm.get(&k), tm.get(&k)) {
143-
(Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out),
156+
(Some(sv), Some(tv)) => {
157+
inner(path, None, sv, tv, treat_missing_as_null, out)
158+
}
144159
(Some(sv), None) => out.push(Delta {
145160
path: path.clone(),
146161
old: Some(sv.to_json()),
@@ -156,14 +171,29 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b
156171
path.pop();
157172
}
158173
}
159-
_ => {
160-
let sv = s.to_json();
161-
let tv = t.to_json();
162-
if sv != tv {
174+
(LinkMLValue::Null { .. }, LinkMLValue::Null { .. }) => {}
175+
(LinkMLValue::Null { .. }, tv) => {
176+
out.push(Delta {
177+
path: path.clone(),
178+
old: Some(JsonValue::Null),
179+
new: Some(tv.to_json()),
180+
});
181+
}
182+
(sv, LinkMLValue::Null { .. }) => {
183+
out.push(Delta {
184+
path: path.clone(),
185+
old: Some(sv.to_json()),
186+
new: Some(JsonValue::Null),
187+
});
188+
}
189+
(sv, tv) => {
190+
let sj = sv.to_json();
191+
let tj = tv.to_json();
192+
if sj != tj {
163193
out.push(Delta {
164194
path: path.clone(),
165-
old: Some(sv),
166-
new: Some(tv),
195+
old: Some(sj),
196+
new: Some(tj),
167197
});
168198
}
169199
}
@@ -175,7 +205,7 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b
175205
None,
176206
source,
177207
target,
178-
ignore_missing_target,
208+
treat_missing_as_null,
179209
&mut out,
180210
);
181211
out

src/runtime/src/lib.rs

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ pub enum LinkMLValue {
6161
class: Option<ClassView>,
6262
sv: SchemaView,
6363
},
64+
Null {
65+
slot: SlotView,
66+
class: Option<ClassView>,
67+
sv: SchemaView,
68+
},
6469
List {
6570
values: Vec<LinkMLValue>,
6671
slot: SlotView,
@@ -104,6 +109,7 @@ impl LinkMLValue {
104109
current = values.get(key)?;
105110
}
106111
LinkMLValue::Scalar { .. } => return None,
112+
LinkMLValue::Null { .. } => return None,
107113
}
108114
}
109115
Some(current)
@@ -253,6 +259,12 @@ impl LinkMLValue {
253259
sv: sv.clone(),
254260
})
255261
}
262+
// Preserve explicit null as a Null value for list-valued slot
263+
(false, JsonValue::Null) => Ok(LinkMLValue::Null {
264+
slot: sl.clone(),
265+
class: Some(class.clone()),
266+
sv: sv.clone(),
267+
}),
256268
(false, other) => Err(LinkMLError(format!(
257269
"expected list for slot `{}`, found {:?} at {}",
258270
sl.name,
@@ -373,6 +385,12 @@ impl LinkMLValue {
373385
sv: sv.clone(),
374386
})
375387
}
388+
// Preserve explicit null as a Null value for mapping-valued slot
389+
JsonValue::Null => Ok(LinkMLValue::Null {
390+
slot: sl.clone(),
391+
class: class.clone(),
392+
sv: sv.clone(),
393+
}),
376394
other => Err(LinkMLError(format!(
377395
"expected mapping for slot `{}`, found {:?} at {}",
378396
sl.name,
@@ -485,12 +503,20 @@ impl LinkMLValue {
485503
classview_name
486504
))
487505
})?;
488-
Ok(LinkMLValue::Scalar {
489-
value,
490-
slot: sl,
491-
class: Some(class.clone()),
492-
sv: sv.clone(),
493-
})
506+
if value.is_null() {
507+
Ok(LinkMLValue::Null {
508+
slot: sl,
509+
class: Some(class.clone()),
510+
sv: sv.clone(),
511+
})
512+
} else {
513+
Ok(LinkMLValue::Scalar {
514+
value,
515+
slot: sl,
516+
class: Some(class.clone()),
517+
sv: sv.clone(),
518+
})
519+
}
494520
}
495521

496522
fn from_json_internal(
@@ -618,6 +644,7 @@ fn validate_inner(value: &LinkMLValue) -> std::result::Result<(), String> {
618644
}
619645
Ok(())
620646
}
647+
LinkMLValue::Null { .. } => Ok(()),
621648
LinkMLValue::List { values, .. } => {
622649
for v in values {
623650
validate_inner(v)?;
@@ -649,6 +676,7 @@ pub fn validate(value: &LinkMLValue) -> std::result::Result<(), String> {
649676
fn validate_collect(value: &LinkMLValue, errors: &mut Vec<String>) {
650677
match value {
651678
LinkMLValue::Scalar { .. } => {}
679+
LinkMLValue::Null { .. } => {}
652680
LinkMLValue::List { values, .. } => {
653681
for v in values {
654682
validate_collect(v, errors);

src/runtime/src/turtle.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ fn serialize_map<W: Write>(
225225
}
226226
}
227227
}
228+
LinkMLValue::Null { .. } => {
229+
// Null is treated as absent; emit nothing
230+
}
228231
LinkMLValue::Object { values, class, .. } => {
229232
let class_ref = &class;
230233
let (obj, child_id) =
@@ -288,6 +291,9 @@ fn serialize_map<W: Write>(
288291
}
289292
}
290293
}
294+
LinkMLValue::Null { .. } => {
295+
// Skip null items
296+
}
291297
LinkMLValue::Object {
292298
values: mv, class, ..
293299
} => {
@@ -364,6 +370,9 @@ fn serialize_map<W: Write>(
364370
}
365371
}
366372
}
373+
LinkMLValue::Null { .. } => {
374+
// nothing
375+
}
367376
LinkMLValue::Object {
368377
values: mv, class, ..
369378
} => {
@@ -523,6 +532,7 @@ pub fn write_turtle<W: Write>(
523532
formatter.serialize_triple(triple.as_ref())?;
524533
}
525534
}
535+
LinkMLValue::Null { .. } => {}
526536
LinkMLValue::List { .. } => {}
527537
LinkMLValue::Mapping { .. } => {}
528538
}
@@ -578,12 +588,16 @@ pub fn write_turtle<W: Write>(
578588
formatter.serialize_triple(triple.as_ref())?;
579589
}
580590
}
591+
LinkMLValue::Null { .. } => {
592+
// nothing
593+
}
581594
LinkMLValue::List { .. } => {}
582595
LinkMLValue::Mapping { .. } => {}
583596
}
584597
}
585598
}
586599
LinkMLValue::Scalar { .. } => {}
600+
LinkMLValue::Null { .. } => {}
587601
}
588602
let out_buf = formatter.finish()?;
589603
let mut out = String::from_utf8(out_buf).unwrap_or_default();
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
objects:
2+
- id: P:100
3+
objecttype: https://w3id.org/linkml/examples/personinfo/Person
4+
name: Null Collections Person
5+
# multivalued scalar list
6+
aliases: null
7+
# inlined-as-list of class instances
8+
has_employment_history: null
9+
# inlined-as-dict of class instances
10+
has_familial_relationships: null
11+

0 commit comments

Comments
 (0)