diff --git a/AGENTS.md b/AGENTS.md index 06f1266..526760e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,6 +25,7 @@ - Python (bindings helpers): follow PEP 8; prefer type hints where feasible. ## Testing Guidelines +- When testing locally, always provide network access. never try to run the tests offline - Add integration tests under `src/runtime/tests/` when changing CLI/runtime behavior. - Prefer `assert_cmd` for CLI and `predicates` for output checks. Keep fixtures in `src/runtime/tests/data/`. - Run `cargo test --workspace` locally; ensure tests don’t rely on network input. diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 8b6dfbc..ade3444 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -478,6 +478,12 @@ impl Clone for PyLinkMLValue { #[pymethods] impl PyLinkMLValue { + /// Semantic equality per LinkML Instances spec. + /// Compares this value with another `LinkMLValue`. + #[pyo3(signature = (other, treat_missing_as_null = false))] + fn equals(&self, other: &PyLinkMLValue, treat_missing_as_null: bool) -> bool { + self.value.equals(&other.value, treat_missing_as_null) + } #[getter] fn slot_name(&self) -> Option { match &self.value { @@ -499,6 +505,11 @@ impl PyLinkMLValue { } } + #[getter] + fn node_id(&self) -> u64 { + self.value.node_id() + } + #[getter] fn slot_definition(&self) -> Option { match &self.value { @@ -753,20 +764,40 @@ fn py_diff( Ok(json_value_to_py(py, &JsonValue::Array(vals))) } -#[pyfunction(name = "patch")] +#[pyfunction(name = "patch", signature = (source, deltas, treat_missing_as_null = true, ignore_no_ops = true))] fn py_patch( py: Python<'_>, source: &PyLinkMLValue, deltas: &Bound<'_, PyAny>, -) -> PyResult { + treat_missing_as_null: bool, + ignore_no_ops: bool, +) -> PyResult { let json_mod = PyModule::import(py, "json")?; let deltas_str: String = json_mod.call_method1("dumps", (deltas,))?.extract()?; let deltas_vec: Vec = serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let new_value = patch_internal(&source.value, &deltas_vec, rust_sv); - Ok(PyLinkMLValue::new(new_value, source.sv.clone_ref(py))) + let (new_value, trace) = patch_internal( + &source.value, + &deltas_vec, + rust_sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops, + treat_missing_as_null, + }, + ) + .map_err(|e| PyException::new_err(e.to_string()))?; + let trace_json = serde_json::json!({ + "added": trace.added, + "deleted": trace.deleted, + "updated": trace.updated, + }); + let py_val = PyLinkMLValue::new(new_value, source.sv.clone_ref(py)); + let dict = pyo3::types::PyDict::new(py); + dict.set_item("value", Py::new(py, py_val)?)?; + dict.set_item("trace", json_value_to_py(py, &trace_json))?; + Ok(dict.into_any().unbind()) } #[pyfunction(name = "to_turtle", signature = (value, skolem=None))] diff --git a/src/python/tests/python_equals.rs b/src/python/tests/python_equals.rs new file mode 100644 index 0000000..eb51043 --- /dev/null +++ b/src/python/tests/python_equals.rs @@ -0,0 +1,75 @@ +use linkml_runtime_python::runtime_module; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use std::path::PathBuf; + +fn data_path(name: &str) -> PathBuf { + let base = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let candidates = [ + base.join("../runtime/tests/data").join(name), + base.join("../schemaview/tests/data").join(name), + base.join("tests/data").join(name), + ]; + for c in candidates { + if c.exists() { + return c; + } + } + panic!("test data not found: {}", name); +} + +#[test] +fn python_equals_api() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let module = PyModule::new(py, "linkml_runtime").unwrap(); + runtime_module(&module).unwrap(); + let sys = py.import("sys").unwrap(); + let modules = sys.getattr("modules").unwrap(); + let sys_modules = modules.downcast::().unwrap(); + sys_modules.set_item("linkml_runtime", module).unwrap(); + + let locals = PyDict::new(py); + locals + .set_item( + "schema_path", + data_path("personinfo.yaml").to_str().unwrap(), + ) + .unwrap(); + + pyo3::py_run!( + py, + *locals, + r#" +import linkml_runtime as lr +import json +sv = lr.make_schema_view(schema_path) +cls = sv.get_class_view('Container') + +doc1 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice', + 'current_address': None + } + ] +} +doc2 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice' + } + ] +} + +v1 = lr.load_json(json.dumps(doc1), sv, cls) +v2 = lr.load_json(json.dumps(doc2), sv, cls) +assert v1['objects'][0].equals(v2['objects'][0], True) +"# + ); + }); +} diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index cf05210..5edbd44 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,7 +1,7 @@ -use crate::{load_json_str, LinkMLValue}; +use crate::{LResult, LinkMLError, LinkMLValue, NodeId}; use linkml_schemaview::schemaview::{SchemaView, SlotView}; use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value as JsonValue}; +use serde_json::Value as JsonValue; const IGNORE_ANNOTATION: &str = "diff.linkml.io/ignore"; @@ -81,6 +81,30 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: b .. }, ) => { + // If objects have an identifier or key slot and it changed, treat as whole-object replacement + // This applies for single-valued and list-valued inlined objects. + let key_slot_name = sc + .key_or_identifier_slot() + .or_else(|| tc.key_or_identifier_slot()) + .map(|s| s.name.clone()); + if let Some(ks) = key_slot_name { + let sid = sm.get(&ks); + let tid = tm.get(&ks); + if let ( + Some(LinkMLValue::Scalar { value: s_id, .. }), + Some(LinkMLValue::Scalar { value: t_id, .. }), + ) = (sid, tid) + { + if s_id != t_id { + out.push(Delta { + path: path.clone(), + old: Some(s.to_json()), + new: Some(t.to_json()), + }); + return; + } + } + } for (k, sv) in sm { let slot_view = sc .slots() @@ -211,84 +235,296 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: b out } -pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> LinkMLValue { - let mut json = source.to_json(); +#[derive(Debug, Clone, Default)] +pub struct PatchTrace { + /// Node IDs of subtrees that were newly created by the patch. + /// + /// See [`crate::NodeId`] for semantics: these are internal, ephemeral IDs + /// that are useful for tooling and provenance, not object identifiers. + pub added: Vec, + /// Node IDs of subtrees that were removed by the patch. + pub deleted: Vec, + /// Node IDs of nodes that were directly updated (e.g., parent containers, scalars). + pub updated: Vec, +} + +#[derive(Debug, Clone, Copy)] +pub struct PatchOptions { + pub ignore_no_ops: bool, + pub treat_missing_as_null: bool, +} + +impl Default for PatchOptions { + fn default() -> Self { + Self { + ignore_no_ops: true, + treat_missing_as_null: true, + } + } +} + +pub fn patch( + source: &LinkMLValue, + deltas: &[Delta], + sv: &SchemaView, + opts: PatchOptions, +) -> LResult<(LinkMLValue, PatchTrace)> { + let mut out = source.clone(); + let mut trace = PatchTrace::default(); for d in deltas { - apply_delta(&mut json, d); + apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace, opts)?; } - let json_str = serde_json::to_string(&json).unwrap(); - let conv = sv.converter(); - match source { - LinkMLValue::Object { class: ref c, .. } => load_json_str(&json_str, sv, c, &conv).unwrap(), - _ => panic!("patching non-map values is not supported here"), + Ok((out, trace)) +} + +fn collect_all_ids(value: &LinkMLValue, ids: &mut Vec) { + ids.push(value.node_id()); + match value { + LinkMLValue::Scalar { .. } => {} + LinkMLValue::Null { .. } => {} + LinkMLValue::List { values, .. } => { + for v in values { + collect_all_ids(v, ids); + } + } + LinkMLValue::Mapping { values, .. } | LinkMLValue::Object { values, .. } => { + for v in values.values() { + collect_all_ids(v, ids); + } + } } } -fn apply_delta(value: &mut JsonValue, delta: &Delta) { - apply_delta_inner(value, &delta.path, &delta.new); +fn mark_added_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.added); } -fn apply_delta_inner(value: &mut JsonValue, path: &[String], newv: &Option) { +fn mark_deleted_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.deleted); +} + +// Removed thin wrappers; call LinkMLValue builders directly at call sites. + +fn apply_delta_linkml( + current: &mut LinkMLValue, + path: &[String], + newv: &Option, + sv: &SchemaView, + trace: &mut PatchTrace, + opts: PatchOptions, +) -> LResult<()> { if path.is_empty() { if let Some(v) = newv { - *value = v.clone(); + let (class_opt, slot_opt) = match current { + LinkMLValue::Object { class, .. } => (Some(class.clone()), None), + LinkMLValue::List { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Mapping { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Scalar { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Null { class, slot, .. } => (class.clone(), Some(slot.clone())), + }; + let conv = sv.converter(); + if let Some(cls) = class_opt { + let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false)?; + if opts.ignore_no_ops && current.equals(&new_node, opts.treat_missing_as_null) { + // No-op delta; skip to preserve node IDs + return Ok(()); + } + mark_deleted_subtree(current, trace); + mark_added_subtree(&new_node, trace); + *current = new_node; + } } - return; + return Ok(()); } - match value { - JsonValue::Object(map) => { + + match current { + LinkMLValue::Object { values, class, .. } => { let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - map.insert(key.clone(), v.clone()); + let conv = sv.converter(); + let slot = class.slots().iter().find(|s| s.name == *key).cloned(); + let new_child = LinkMLValue::from_json( + v.clone(), + class.clone(), + slot.clone(), + sv, + &conv, + false, + )?; + if let Some(old_child) = values.get_mut(key) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { + // no-op; skip + return Ok(()); + } + match (&mut *old_child, &new_child) { + ( + LinkMLValue::Scalar { value: ov, .. }, + LinkMLValue::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + // In-place scalar update: keep node_id stable and mark child node + *ov = nv.clone(); + trace.updated.push(old_child.node_id()); + } + _ => { + let old_snapshot = std::mem::replace(old_child, new_child); + mark_deleted_subtree(&old_snapshot, trace); + mark_added_subtree(old_child, trace); + trace.updated.push(current.node_id()); + } + } + } else { + // adding a Null assignment may be a no-op when treating missing as null + if opts.ignore_no_ops + && opts.treat_missing_as_null + && matches!(new_child, LinkMLValue::Null { .. }) + { + return Ok(()); + } + // mark before insert + mark_added_subtree(&new_child, trace); + values.insert(key.clone(), new_child); + trace.updated.push(current.node_id()); + } } None => { - map.remove(key); + if let Some(old_child) = values.get(key) { + if opts.ignore_no_ops + && opts.treat_missing_as_null + && matches!(old_child, LinkMLValue::Null { .. }) + { + // deleting a Null assignment: no-op + return Ok(()); + } + } + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); + } } } - } else { - let entry = map - .entry(key.clone()) - .or_insert(JsonValue::Object(Map::new())); - apply_delta_inner(entry, &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } - JsonValue::Array(arr) => { - let idx: usize = path[0].parse().unwrap(); + LinkMLValue::Mapping { values, slot, .. } => { + let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - if idx < arr.len() { - arr[idx] = v.clone(); - } else if idx == arr.len() { - arr.push(v.clone()); - } else { - while arr.len() < idx { - arr.push(JsonValue::Null); + let conv = sv.converter(); + let new_child = LinkMLValue::build_mapping_entry_for_slot( + slot, + v.clone(), + sv, + &conv, + Vec::new(), + )?; + if let Some(old_child) = values.get(key) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { + return Ok(()); } - arr.push(v.clone()); + mark_deleted_subtree(old_child, trace); } + // mark before insert + mark_added_subtree(&new_child, trace); + values.insert(key.clone(), new_child); + trace.updated.push(current.node_id()); } None => { - if idx < arr.len() { - arr.remove(idx); + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); } } } - } else { - if idx >= arr.len() { - arr.resize(idx + 1, JsonValue::Null); - } - apply_delta_inner(&mut arr[idx], &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } - _ => { - if path.is_empty() { - if let Some(v) = newv { - *value = v.clone(); + LinkMLValue::List { + values, + slot, + class, + .. + } => { + let idx: usize = path[0].parse().map_err(|_| { + LinkMLError(format!("invalid list index '{}' in delta path", path[0])) + })?; + if path.len() == 1 { + match newv { + Some(v) => { + if idx < values.len() { + let conv = sv.converter(); + let new_child = LinkMLValue::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + )?; + if opts.ignore_no_ops + && values[idx].equals(&new_child, opts.treat_missing_as_null) + { + return Ok(()); + } + match (&mut values[idx], &new_child) { + ( + LinkMLValue::Scalar { value: ov, .. }, + LinkMLValue::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + *ov = nv.clone(); + trace.updated.push(values[idx].node_id()); + } + _ => { + let old = std::mem::replace(&mut values[idx], new_child); + mark_deleted_subtree(&old, trace); + mark_added_subtree(&values[idx], trace); + trace.updated.push(current.node_id()); + } + } + } else if idx == values.len() { + let conv = sv.converter(); + let new_child = LinkMLValue::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + )?; + // mark before push + mark_added_subtree(&new_child, trace); + values.push(new_child); + trace.updated.push(current.node_id()); + } else { + return Err(LinkMLError(format!( + "list index out of bounds in add: {} > {}", + idx, + values.len() + ))); + } + } + None => { + if idx < values.len() { + let old = values.remove(idx); + mark_deleted_subtree(&old, trace); + trace.updated.push(current.node_id()); + } + } } + } else if idx < values.len() { + apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace, opts)?; } } + LinkMLValue::Scalar { .. } => {} + LinkMLValue::Null { .. } => {} } + Ok(()) } diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 71fed76..8d3d242 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -5,10 +5,11 @@ use serde_json::Value as JsonValue; use std::collections::HashMap; use std::fs; use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; pub mod diff; pub mod turtle; -pub use diff::{diff, patch, Delta}; +pub use diff::{diff, patch, Delta, PatchTrace}; #[derive(Debug)] pub struct LinkMLError(pub String); @@ -56,36 +57,74 @@ fn slot_matches_key(slot: &SlotView, key: &str) -> bool { #[derive(Clone)] pub enum LinkMLValue { Scalar { + node_id: NodeId, value: JsonValue, slot: SlotView, class: Option, sv: SchemaView, }, Null { + node_id: NodeId, slot: SlotView, class: Option, sv: SchemaView, }, List { + node_id: NodeId, values: Vec, slot: SlotView, class: Option, sv: SchemaView, }, Mapping { + node_id: NodeId, values: HashMap, slot: SlotView, class: Option, sv: SchemaView, }, Object { + node_id: NodeId, values: HashMap, class: ClassView, sv: SchemaView, }, } +/// Internal node identifier used for provenance and update tracking. +/// +/// Node IDs are assigned to every `LinkMLValue` node when values are constructed or +/// transformed. They exist solely as technical identifiers to help with patching and +/// provenance (for example, `PatchTrace.added`/`deleted` collect `NodeId`s of affected +/// subtrees). They are not intended to identify domain objects — for that, use LinkML +/// identifier or key slots as defined in the schema. +/// +/// Important properties: +/// - Local and ephemeral: loading the same data twice will yield different `NodeId`s. +/// - Non-persistent: never serialize or expose as a model identifier. +/// - Useful for tracking modifications within a single in-memory value. +pub type NodeId = u64; + +static NEXT_NODE_ID: AtomicU64 = AtomicU64::new(1); + +fn new_node_id() -> NodeId { + NEXT_NODE_ID.fetch_add(1, Ordering::Relaxed) +} + impl LinkMLValue { + /// Returns the internal [`NodeId`] of this node. + /// + /// This ID is only for internal provenance/update tracking and is not a + /// semantic identifier of the represented object. + pub fn node_id(&self) -> NodeId { + match self { + LinkMLValue::Scalar { node_id, .. } + | LinkMLValue::List { node_id, .. } + | LinkMLValue::Mapping { node_id, .. } + | LinkMLValue::Object { node_id, .. } + | LinkMLValue::Null { node_id, .. } => *node_id, + } + } /// Navigate the value by a path of strings, where each element is either /// a dictionary key (for maps) or a list index (for lists). /// Returns `Some(&LinkMLValue)` if the full path can be resolved, otherwise `None`. @@ -114,6 +153,167 @@ impl LinkMLValue { } Some(current) } + + /// Compare two LinkMLValue instances for semantic equality per the + /// LinkML Instances specification (Identity conditions). + /// + /// Key points implemented: + /// - Null equals Null. + /// - Scalars: equal iff same underlying atomic value and compatible typed context + /// (same Enum range when present; otherwise same TypeDefinition range name when present). + /// - Lists: equal iff same length and pairwise equal in order. + /// - Mappings: equal iff same keys and values equal for each key (order-insensitive). + /// - Objects: equal iff same instantiated class (by identity) and slot assignments match; when + /// `treat_missing_as_null` is true, Null is treated as omitted (normalized), otherwise Null is + /// distinct from missing. + pub fn equals(&self, other: &LinkMLValue, treat_missing_as_null: bool) -> bool { + use LinkMLValue::*; + match (self, other) { + (Null { .. }, Null { .. }) => true, + ( + Scalar { + value: v1, + slot: s1, + .. + }, + Scalar { + value: v2, + slot: s2, + .. + }, + ) => { + // If either slot has an enum range, both must and enum names must match + let e1 = s1.get_range_enum(); + let e2 = s2.get_range_enum(); + if e1.is_some() || e2.is_some() { + match (e1, e2) { + (Some(ev1), Some(ev2)) => { + if ev1.schema_id() != ev2.schema_id() || ev1.name() != ev2.name() { + return false; + } + } + _ => return false, + } + } else { + // Compare type ranges if explicitly set on both + let t1 = s1.definition().range.as_ref(); + let t2 = s2.definition().range.as_ref(); + if let (Some(r1), Some(r2)) = (t1, t2) { + if r1 != r2 { + return false; + } + } + } + v1 == v2 + } + (List { values: a, .. }, List { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (x, y) in a.iter().zip(b.iter()) { + if !x.equals(y, treat_missing_as_null) { + return false; + } + } + true + } + (Mapping { values: a, .. }, Mapping { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + true + } + ( + Object { + values: a, + class: ca, + sv: sva, + .. + }, + Object { + values: b, + class: cb, + sv: svb, + .. + }, + ) => { + // Compare class identity via canonical URIs if possible + let ida = ca.canonical_uri(); + let idb = cb.canonical_uri(); + let class_equal = if let Some(conv) = sva.converter_for_schema(ca.schema_id()) { + // Use 'sva' for comparison; identifiers are global across schemas + sva.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else if let Some(conv) = svb.converter_for_schema(cb.schema_id()) { + svb.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else { + ca.name() == cb.name() + }; + if !class_equal { + return false; + } + + if treat_missing_as_null { + // Normalize conceptually by ignoring entries whose value is Null + let count_a = a.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + let count_b = b.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + if count_a != count_b { + return false; + } + for (k, va) in a.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match b.get(k) { + Some(vb) => { + if matches!(vb, Null { .. }) { + return false; + } + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + // Ensure b has no extra non-null keys not in a + for (k, _vb) in b.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match a.get(k) { + Some(va) => { + if matches!(va, Null { .. }) { + return false; + } + } + None => return false, + } + } + true + } else { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + true + } + } + _ => false, + } + } fn find_scalar_slot_for_inlined_map( class: &ClassView, key_slot_name: &str, @@ -203,6 +403,7 @@ impl LinkMLValue { ); } Ok(LinkMLValue::Object { + node_id: new_node_id(), values, class: class.clone(), sv: sv.clone(), @@ -221,38 +422,20 @@ impl LinkMLValue { match (inside_list, value) { (false, JsonValue::Array(arr)) => { let mut values = Vec::new(); - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("{}[{}]", sl.name, i)); - let v_transformed = - if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &v) { - if let Some(id_slot) = cr.identifier_slot() { - let mut m = serde_json::Map::new(); - m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); - JsonValue::Object(m) - } else { - v - } - } else { - v - }; - values.push(Self::from_json_internal( - v_transformed, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), + v, sv, conv, - true, p, )?); } Ok(LinkMLValue::List { + node_id: new_node_id(), values, slot: sl.clone(), class: Some(class.clone()), @@ -261,6 +444,7 @@ impl LinkMLValue { } // Preserve explicit null as a Null value for list-valued slot (false, JsonValue::Null) => Ok(LinkMLValue::Null { + node_id: new_node_id(), slot: sl.clone(), class: Some(class.clone()), sv: sv.clone(), @@ -272,6 +456,7 @@ impl LinkMLValue { path_to_string(&path) ))), (true, other) => Ok(LinkMLValue::Scalar { + node_id: new_node_id(), value: other, slot: sl.clone(), class: Some(class.clone()), @@ -290,95 +475,17 @@ impl LinkMLValue { ) -> LResult { match value { JsonValue::Object(map) => { - let range_cv = sl - .definition() - .range - .as_ref() - .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) - .ok_or_else(|| { - LinkMLError(format!( - "mapping slot must have class range at {}", - path_to_string(&path) - )) - })?; let mut values = HashMap::new(); for (k, v) in map.into_iter() { - let base = sv - .get_class(&Identifier::new(range_cv.name()), conv) - .ok() - .flatten() - .unwrap_or_else(|| range_cv.clone()); - let child = match v { - JsonValue::Object(m) => { - // Select the most specific subclass using any type designator in the map - let selected = Self::select_class(&m, &base, sv, conv); - let mut child_values = HashMap::new(); - for (ck, cv) in m.into_iter() { - let slot_tmp = selected - .slots() - .iter() - .find(|s| slot_matches_key(s, &ck)) - .cloned(); - let mut p = path.clone(); - p.push(format!("{}:{}", k, ck)); - let key_name = slot_tmp - .as_ref() - .map(|s| s.name.clone()) - .unwrap_or_else(|| ck.clone()); - child_values.insert( - key_name, - Self::from_json_internal( - cv, - selected.clone(), - slot_tmp, - sv, - conv, - false, - p, - )?, - ); - } - LinkMLValue::Object { - values: child_values, - class: selected, - sv: sv.clone(), - } - } - other => { - // Scalar mapping value: attach it to a chosen scalar slot if any - let scalar_slot = Self::find_scalar_slot_for_inlined_map( - &base, - range_cv - .key_or_identifier_slot() - .map(|s| s.name.as_str()) - .unwrap_or(""), - ) - .ok_or_else(|| { - LinkMLError(format!( - "no scalar slot available for inlined mapping at {}", - path_to_string(&path) - )) - })?; - let mut child_values = HashMap::new(); - child_values.insert( - scalar_slot.name.clone(), - LinkMLValue::Scalar { - value: other, - slot: scalar_slot.clone(), - class: Some(base.clone()), - sv: sv.clone(), - }, - ); - LinkMLValue::Object { - values: child_values, - class: base.clone(), - sv: sv.clone(), - } - } - }; + let child = Self::build_mapping_entry_for_slot(sl, v, sv, conv, { + let mut p = path.clone(); + p.push(k.clone()); + p + })?; values.insert(k, child); } Ok(LinkMLValue::Mapping { + node_id: new_node_id(), values, slot: sl.clone(), class: class.clone(), @@ -387,6 +494,7 @@ impl LinkMLValue { } // Preserve explicit null as a Null value for mapping-valued slot JsonValue::Null => Ok(LinkMLValue::Null { + node_id: new_node_id(), slot: sl.clone(), class: class.clone(), sv: sv.clone(), @@ -415,27 +523,21 @@ impl LinkMLValue { class.name() )) })?; - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; let mut values = Vec::new(); for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("[{}]", i)); - values.push(Self::from_json_internal( + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), v, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), sv, conv, - false, p, )?); } Ok(LinkMLValue::List { + node_id: new_node_id(), values, slot: sl, class: Some(class), @@ -482,6 +584,7 @@ impl LinkMLValue { ); } Ok(LinkMLValue::Object { + node_id: new_node_id(), values, class: chosen, sv: sv.clone(), @@ -505,12 +608,14 @@ impl LinkMLValue { })?; if value.is_null() { Ok(LinkMLValue::Null { + node_id: new_node_id(), slot: sl, class: Some(class.clone()), sv: sv.clone(), }) } else { Ok(LinkMLValue::Scalar { + node_id: new_node_id(), value, slot: sl, class: Some(class.clone()), @@ -568,6 +673,135 @@ impl LinkMLValue { ) -> LResult { Self::from_json_internal(value, class, slot, sv, conv, inside_list, Vec::new()) } + + // Shared builders (used by loaders and patch logic) + pub(crate) fn build_list_item_for_slot( + list_slot: &SlotView, + list_class: Option<&ClassView>, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let class_range: Option = list_slot.get_range_class(); + let slot_for_item = if class_range.is_some() { + None + } else { + Some(list_slot.clone()) + }; + let v_transformed = if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &value) + { + if let Some(id_slot) = cr.identifier_slot() { + let mut m = serde_json::Map::new(); + m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); + JsonValue::Object(m) + } else { + value + } + } else { + value + }; + Self::from_json_internal( + v_transformed, + class_range + .as_ref() + .or(list_class) + .cloned() + .ok_or_else(|| LinkMLError("list item class context".to_string()))?, + slot_for_item, + sv, + conv, + true, + path, + ) + } + + pub(crate) fn build_mapping_entry_for_slot( + map_slot: &SlotView, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let range_cv = map_slot + .definition() + .range + .as_ref() + .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) + .ok_or_else(|| { + LinkMLError(format!( + "mapping slot must have class range at {}", + path_to_string(&path) + )) + })?; + match value { + JsonValue::Object(m) => { + let selected = Self::select_class(&m, &range_cv, sv, conv); + let mut child_values = HashMap::new(); + for (ck, cv) in m.into_iter() { + let slot_tmp = selected + .slots() + .iter() + .find(|s| slot_matches_key(s, &ck)) + .cloned(); + let mut p = path.clone(); + p.push(ck.clone()); + let key_name = slot_tmp + .as_ref() + .map(|s| s.name.clone()) + .unwrap_or_else(|| ck.clone()); + child_values.insert( + key_name, + Self::from_json_internal( + cv, + selected.clone(), + slot_tmp, + sv, + conv, + false, + p, + )?, + ); + } + Ok(LinkMLValue::Object { + node_id: new_node_id(), + values: child_values, + class: selected, + sv: sv.clone(), + }) + } + other => { + let key_slot_name = range_cv + .key_or_identifier_slot() + .map(|s| s.name.as_str()) + .unwrap_or(""); + let scalar_slot = Self::find_scalar_slot_for_inlined_map(&range_cv, key_slot_name) + .ok_or_else(|| { + LinkMLError(format!( + "no scalar slot available for inlined mapping at {}", + path_to_string(&path) + )) + })?; + let mut child_values = HashMap::new(); + child_values.insert( + scalar_slot.name.clone(), + LinkMLValue::Scalar { + node_id: new_node_id(), + value: other, + slot: scalar_slot.clone(), + class: Some(range_cv.clone()), + sv: sv.clone(), + }, + ); + Ok(LinkMLValue::Object { + node_id: new_node_id(), + values: child_values, + class: range_cv, + sv: sv.clone(), + }) + } + } + } } pub fn load_yaml_file( diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 187cf73..e60a71c 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -1,4 +1,4 @@ -use linkml_runtime::{diff, load_yaml_file, patch}; +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; use linkml_schemaview::schemaview::SchemaView; @@ -57,7 +57,16 @@ fn diff_and_patch_person() { } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -93,7 +102,16 @@ fn diff_ignore_missing_target() { let deltas = diff(&src, &tgt, false); assert!(deltas.is_empty()); - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,7 +153,16 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -160,126 +187,78 @@ fn diff_null_and_missing_semantics() { .unwrap(); // X -> null => update to null - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut tgt_values = values.clone(); - let age_slot = class - .slots() - .iter() - .find(|s| s.name == "age") - .expect("age slot"); - tgt_values.insert( - "age".to_string(), - LinkMLValue::Null { - slot: age_slot.clone(), - class: Some(class.clone()), - sv: sv.clone(), - }, - ); - let deltas = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt, false); assert!(deltas .iter() .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))); } // null -> X => update from null - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut src_values = values.clone(); - let age_slot = class - .slots() - .iter() - .find(|s| s.name == "age") - .expect("age slot"); - src_values.insert( - "age".to_string(), - LinkMLValue::Null { - slot: age_slot.clone(), - class: Some(class.clone()), - sv: sv.clone(), - }, - ); - let deltas = diff( - &LinkMLValue::Object { - values: src_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let src_with_null = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_with_null, &src, false); assert!(deltas.iter().any(|d| d.path == vec!["age".to_string()] && d.old == Some(serde_json::Value::Null) && d.new.is_some())); } // missing -> X => add - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut src_values = values.clone(); - src_values.remove("age"); - let deltas = diff( - &LinkMLValue::Object { - values: src_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.remove("age"); + } + let src_missing = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_missing, &src, false); assert!(deltas .iter() .any(|d| d.path == vec!["age".to_string()] && d.old.is_none() && d.new.is_some())); } // X -> missing: ignored by default; produce update-to-null when treat_missing_as_null=true - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut tgt_values = values.clone(); - tgt_values.remove("age"); - let deltas = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.remove("age"); + } + let tgt_missing = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt_missing, false); assert!(deltas.iter().all(|d| d.path != vec!["age".to_string()])); - let deltas2 = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - true, - ); + let deltas2 = diff(&src, &tgt_missing, true); assert!(deltas2 .iter() .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))) diff --git a/src/runtime/tests/diff_identifier.rs b/src/runtime/tests/diff_identifier.rs new file mode 100644 index 0000000..56aa667 --- /dev/null +++ b/src/runtime/tests/diff_identifier.rs @@ -0,0 +1,282 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use serde_json::Value as JsonValue; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn single_inlined_object_identifier_change_is_replacement() { + // Use personinfo schema; diagnosis is an inlined object with identifier (via NamedThing) + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.id of the first medical history event for P:002 + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "id".to_string(), + JsonValue::String("CODE:D9999".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the diagnosis object path + assert_eq!(deltas.len(), 1); + let d = &deltas[0]; + assert_eq!( + d.path, + vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ] + ); + assert!(d.old.is_some() && d.new.is_some()); + + // Patch should yield target + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn single_inlined_object_non_identifier_change_is_field_delta() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.name only + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "name".to_string(), + JsonValue::String("new name".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + assert!(deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string(), + "name".to_string() + ])); + // Must not collapse to whole-object replacement here + assert!(!deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn list_inlined_object_identifier_change_is_replacement() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Change the id of the third object (P:002) + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + p2.insert("id".to_string(), JsonValue::String("P:099".to_string())); + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the list item path + assert!(deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string()])); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string(), "id".to_string()])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn mapping_inlined_identifier_change_is_add_delete() { + // Use mapping schema with inlined_as_dict keyed by 'key' + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class(&Identifier::new("Bag"), &conv) + .unwrap() + .expect("class not found"); + + let src = linkml_runtime::load_json_file( + Path::new(&data_path("mapping_data.json")), + &sv, + &bag, + &conv, + ) + .unwrap(); + + // Rename mapping key 'alpha' to 'alpha2' + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Object(things)) = root.get_mut("things") { + if let Some(alpha) = things.remove("alpha") { + things.insert("alpha2".to_string(), alpha); + } + } + } + let tgt = load_json_str(&serde_json::to_string(&tgt_json).unwrap(), &sv, &bag, &conv).unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect one delete and one add at mapping keys; no inner key-slot deltas + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string()] && d.new.is_none())); + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha2".to_string()] && d.old.is_none())); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string(), "key".to_string()])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} diff --git a/src/runtime/tests/equality.rs b/src/runtime/tests/equality.rs new file mode 100644 index 0000000..e9520f5 --- /dev/null +++ b/src/runtime/tests/equality.rs @@ -0,0 +1,180 @@ +use linkml_runtime::{load_json_str, load_yaml_str, LinkMLValue}; +use linkml_schemaview::identifier::converter_from_schema; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::path::Path; + +fn data_path(name: &str) -> std::path::PathBuf { + let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn object_equality_ignores_null_assignments() { + // Load personinfo schema and Container class + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_with_null = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + current_address: null +"#; + let doc_without_slot = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" +"#; + let v1 = load_yaml_str(doc_with_null, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_without_slot, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!( + p1.equals(p2, true), + "Person with null assignment should equal omission" + ); +} + +#[test] +fn list_identity_is_order_sensitive() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_a = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2019-01-01 + is_current: true + - started_at_time: 2020-01-01 + is_current: false +"#; + let doc_b = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2020-01-01 + is_current: false + - started_at_time: 2019-01-01 + is_current: true +"#; + let v1 = load_yaml_str(doc_a, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_b, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!(matches!(p1, LinkMLValue::Object { .. })); + assert!(matches!(p2, LinkMLValue::Object { .. })); + assert!(!p1.equals(p2, true), "List order must affect equality"); +} + +#[test] +fn mapping_equality_is_key_based_not_ordered() { + // Load mapping schema and Bag class + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Bag"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#"{ + "things": { + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"}, + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"} + } +}"#; + let doc2 = r#"{ + "things": { + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"}, + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"} + } +}"#; + let v1 = load_json_str(doc1, &sv, &bag, &conv).unwrap(); + let v2 = load_json_str(doc2, &sv, &bag, &conv).unwrap(); + let m1 = v1.navigate_path(["things"]).unwrap(); + let m2 = v2.navigate_path(["things"]).unwrap(); + assert!(matches!(m1, LinkMLValue::Mapping { .. })); + assert!(matches!(m2, LinkMLValue::Mapping { .. })); + assert!( + m1.equals(m2, true), + "Mapping equality should ignore key order" + ); +} + +#[test] +fn enum_scalar_equality_respects_value_and_range() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + gender: "cisgender man" +"#; + let doc2 = r#" +objects: + - objecttype: personinfo:Person + id: "P:2" + name: "Bob" + gender: "cisgender man" +"#; + let doc3 = r#" +objects: + - objecttype: personinfo:Person + id: "P:3" + name: "Carol" + gender: "cisgender woman" +"#; + let v1 = load_yaml_str(doc1, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc2, &sv, &container, &conv).unwrap(); + let v3 = load_yaml_str(doc3, &sv, &container, &conv).unwrap(); + let g1 = v1.navigate_path(["objects", "0", "gender"]).unwrap(); + let g2 = v2.navigate_path(["objects", "0", "gender"]).unwrap(); + let g3 = v3.navigate_path(["objects", "0", "gender"]).unwrap(); + assert!(g1.equals(g2, true)); + assert!(!g1.equals(g3, true)); +} diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs new file mode 100644 index 0000000..72a52ee --- /dev/null +++ b/src/runtime/tests/trace.rs @@ -0,0 +1,226 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn info_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn collect_ids(v: &linkml_runtime::LinkMLValue, out: &mut Vec) { + out.push(v.node_id()); + match v { + linkml_runtime::LinkMLValue::Scalar { .. } => {} + linkml_runtime::LinkMLValue::Null { .. } => {} + linkml_runtime::LinkMLValue::List { values, .. } => { + for c in values { + collect_ids(c, out); + } + } + linkml_runtime::LinkMLValue::Mapping { values, .. } + | linkml_runtime::LinkMLValue::Object { values, .. } => { + for c in values.values() { + collect_ids(c, out); + } + } + } +} + +#[test] +fn node_ids_preserved_scalar_update() { + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + let src = load_yaml_file( + Path::new(&data_path("person_valid.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::json!(99)); + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + let (patched, trace) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + + assert!(trace.added.is_empty()); + assert!(trace.deleted.is_empty()); + assert!(!trace.updated.is_empty()); + + let src_age = src.navigate_path(["age"]).unwrap(); + let pat_age = patched.navigate_path(["age"]).unwrap(); + assert_eq!(src_age.node_id(), pat_age.node_id()); + assert!(trace.updated.contains(&pat_age.node_id())); +} + +#[test] +fn patch_trace_add_in_list() { + let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + let base = load_yaml_file( + Path::new(&info_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Add a new object to the 'objects' list + let mut base_json = base.to_json(); + if let serde_json::Value::Object(ref mut root) = base_json { + if let Some(serde_json::Value::Array(ref mut arr)) = root.get_mut("objects") { + let new_obj = serde_json::json!({ + "id": "P:999", + "name": "Added Person", + "objecttype": "https://w3id.org/linkml/examples/personinfo/Person" + }); + arr.push(new_obj); + } + } + let target = load_json_str( + &serde_json::to_string(&base_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&base, &target, false); + let mut pre = Vec::new(); + collect_ids(&base, &mut pre); + let (patched, trace) = linkml_runtime::patch( + &base, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + let mut post = Vec::new(); + collect_ids(&patched, &mut post); + + let pre_set: HashSet = pre.into_iter().collect(); + let post_set: HashSet = post.into_iter().collect(); + let added: HashSet = post_set.difference(&pre_set).copied().collect(); + let trace_added: HashSet = trace.added.iter().copied().collect(); + assert_eq!(added, trace_added); + assert!(!added.is_empty()); +} + +#[test] +fn patch_missing_to_null_semantics() { + use linkml_runtime::LinkMLValue; + // Use simple schema + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("person_partial.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + // Build delta: set age to explicit null + let deltas = vec![linkml_runtime::Delta { + path: vec!["age".to_string()], + old: None, + new: Some(serde_json::Value::Null), + }]; + + // treat_missing_as_null = true => no-op; no trace changes, no node id changes + let pre_id = src.node_id(); + let (patched_same, trace_same) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: true, + }, + ) + .unwrap(); + assert!( + trace_same.added.is_empty() + && trace_same.deleted.is_empty() + && trace_same.updated.is_empty() + ); + assert_eq!(pre_id, patched_same.node_id()); + // Equality under treat_missing_as_null=true must hold + assert!(src.equals(&patched_same, true)); + // And age remains absent (since explicit null is treated as omitted) + if let LinkMLValue::Object { values, .. } = &patched_same { + assert!(!values.contains_key("age")); + } + + // treat_missing_as_null = false => apply explicit null + let (patched_null, trace_applied) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert!(trace_applied.updated.contains(&patched_null.node_id())); + // age present as Null + if let LinkMLValue::Object { values, .. } = &patched_null { + assert!(matches!(values.get("age"), Some(LinkMLValue::Null { .. }))); + } else { + panic!("expected object root"); + } +} diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index cf1555a..99a990d 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -23,6 +23,12 @@ struct Args { /// Output patched file; defaults to stdout #[arg(short, long)] output: Option, + /// Treat missing assignments as equivalent to explicit null for equality + #[arg(long, default_value_t = true)] + treat_missing_as_null: bool, + /// Skip deltas that do not change the value (no-ops) + #[arg(long, default_value_t = true)] + ignore_noop: bool, } fn load_value( @@ -92,7 +98,15 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: args.ignore_noop, + treat_missing_as_null: args.treat_missing_as_null, + }, + )?; write_value(args.output.as_deref(), &patched)?; Ok(()) }