Skip to content

Commit 8791437

Browse files
committed
Fix for when property ids collide: now data is also updated
1 parent 06f9fb3 commit 8791437

3 files changed

Lines changed: 184 additions & 62 deletions

File tree

pycellin/io/geff/loader.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
from pycellin.io.utils import (
3232
_graph_has_node_prop,
3333
_split_graph_into_lineages,
34+
_update_edge_prop_key,
35+
_update_lineage_prop_key,
3436
_update_lineages_IDs_key,
3537
_update_node_prop_key,
3638
check_fusions,
@@ -395,6 +397,7 @@ def _extract_props_metadata(
395397
md: dict[str, geff_spec.PropMetadata],
396398
props_dict: dict[str, Property],
397399
prop_type: PropertyType,
400+
rename_map: dict[str, dict[str, str]],
398401
) -> None:
399402
"""
400403
Extract properties metadata from a given dictionary and update the props_dict.
@@ -407,6 +410,11 @@ def _extract_props_metadata(
407410
The dictionary to update with extracted properties metadata.
408411
prop_type : PropertyType
409412
The type of property being extracted ('node' or 'edge').
413+
rename_map : dict[str, dict[str, str]]
414+
Mutable accumulator mapping property type ('node', 'edge', 'lineage') to
415+
a dict of ``{old_key: new_key}`` renames. When a key collision forces a
416+
rename, both the new and the existing property's mappings are recorded here
417+
so that the graph data can be updated accordingly.
410418
411419
Raises
412420
------
@@ -460,6 +468,7 @@ def _extract_props_metadata(
460468
dtype=prop.dtype,
461469
unit=prop.unit or None,
462470
)
471+
rename_map[prop_type][key] = new_key
463472
# Resolve a unique name for the existing colliding property.
464473
other_prop_type = props_dict[key].prop_type
465474
other_key = _resolve_prop_key(
@@ -472,6 +481,7 @@ def _extract_props_metadata(
472481
other_prop = props_dict.pop(key)
473482
other_prop.identifier = other_key
474483
props_dict[other_key] = other_prop
484+
rename_map[other_prop_type][key] = other_key
475485
else:
476486
# GEFF ensures uniqueness of property keys for nodes and edges
477487
# separately, so this should never happen.
@@ -485,6 +495,7 @@ def _extract_props_metadata(
485495
def _extract_lin_props_metadata(
486496
md: dict[str, Any],
487497
props_dict: dict[str, Property],
498+
rename_map: dict[str, dict[str, str]],
488499
) -> None:
489500
"""
490501
Extract lineage properties metadata from a given dictionary and update the props_dict.
@@ -495,6 +506,11 @@ def _extract_lin_props_metadata(
495506
The dictionary containing lineage properties metadata.
496507
props_dict : dict[str, Property]
497508
The dictionary to update with extracted lineage properties metadata.
509+
rename_map : dict[str, dict[str, str]]
510+
Mutable accumulator mapping property type ('node', 'edge', 'lineage') to
511+
a dict of ``{old_key: new_key}`` renames. When a key collision forces a
512+
rename, both the new lineage property's mapping and the existing property's
513+
mapping are recorded here so that the graph data can be updated accordingly.
498514
499515
Raises
500516
------
@@ -533,6 +549,7 @@ def _extract_lin_props_metadata(
533549
dtype=prop.get("dtype"),
534550
unit=prop.get("unit") or None,
535551
)
552+
rename_map["lineage"][key] = new_key
536553
# Resolve a unique name for the existing colliding property.
537554
existing_prop_type = props_dict[key].prop_type
538555
other_prefix = "cell" if existing_prop_type == "node" else "link"
@@ -546,6 +563,7 @@ def _extract_lin_props_metadata(
546563
other_prop = props_dict.pop(key)
547564
other_prop.identifier = other_key
548565
props_dict[other_key] = other_prop
566+
rename_map[existing_prop_type][key] = other_key
549567
else:
550568
raise KeyError(
551569
f"Cannot register property '{key}' (lineage): an identical "
@@ -554,14 +572,21 @@ def _extract_lin_props_metadata(
554572
)
555573

556574

557-
def _build_props_metadata(geff_md: geff.GeffMetadata) -> dict[str, Property]:
575+
def _build_props_metadata(
576+
geff_md: geff.GeffMetadata,
577+
rename_map: dict[str, dict[str, str]],
578+
) -> dict[str, Property]:
558579
"""
559580
Read and extract properties metadata from geff metadata.
560581
561582
Parameters
562583
----------
563584
geff_md : geff.GeffMetadata
564585
The geff metadata object containing properties metadata.
586+
rename_map : dict[str, dict[str, str]]
587+
Mutable accumulator mapping property type ('node', 'edge', 'lineage') to
588+
a dict of ``{old_key: new_key}`` renames. Populated in-place whenever a
589+
key collision forces a rename during metadata extraction.
565590
566591
Returns
567592
-------
@@ -570,9 +595,9 @@ def _build_props_metadata(geff_md: geff.GeffMetadata) -> dict[str, Property]:
570595
"""
571596
props_dict: dict[str, Property] = {}
572597
if geff_md.node_props_metadata:
573-
_extract_props_metadata(geff_md.node_props_metadata, props_dict, "node")
598+
_extract_props_metadata(geff_md.node_props_metadata, props_dict, "node", rename_map)
574599
if geff_md.edge_props_metadata:
575-
_extract_props_metadata(geff_md.edge_props_metadata, props_dict, "edge")
600+
_extract_props_metadata(geff_md.edge_props_metadata, props_dict, "edge", rename_map)
576601

577602
# TODO: for now lineage properties are not associated to a specific tag but stored
578603
# somewhere in the "extra" field. We need to check recursively if there is a dict
@@ -585,7 +610,7 @@ def _build_props_metadata(geff_md: geff.GeffMetadata) -> dict[str, Property]:
585610
geff_md.extra, "lineage_props_metadata"
586611
)
587612
if lin_props_metadata is not None:
588-
_extract_lin_props_metadata(lin_props_metadata, props_dict)
613+
_extract_lin_props_metadata(lin_props_metadata, props_dict, rename_map)
589614

590615
return props_dict
591616

@@ -823,12 +848,15 @@ def _normalize_properties_data(
823848
cell_z_key: str | None,
824849
time_key: str,
825850
cell_id_key: str | None,
851+
rename_map: dict[str, dict[str, str]],
826852
) -> None:
827853
"""
828854
Normalize properties data in lineages to match pycellin conventions.
829855
830856
This function updates the property keys in lineage node data to use
831857
standardized pycellin naming conventions (e.g., 'cell_x', 'cell_y').
858+
It also applies any key renames recorded in *rename_map* during metadata
859+
extraction so that the graph data stays consistent with the metadata.
832860
833861
Parameters
834862
----------
@@ -844,6 +872,11 @@ def _normalize_properties_data(
844872
The current z-coordinate key name, if any.
845873
cell_id_key : str | None
846874
The current cell ID key name, if any.
875+
rename_map : dict[str, dict[str, str]]
876+
Accumulator of ``{old_key: new_key}`` renames per property type
877+
('node', 'edge', 'lineage') collected during metadata extraction.
878+
Applied to each lineage's node attributes, edge attributes, and
879+
graph-level attributes respectively.
847880
"""
848881
if lin_id_key != "lineage_ID":
849882
_update_lineages_IDs_key(lineages, lin_id_key)
@@ -859,6 +892,13 @@ def _normalize_properties_data(
859892
lin.nodes[node]["cell_ID"] = node
860893
elif cell_id_key != "cell_ID":
861894
_update_node_prop_key(lin, old_key=cell_id_key, new_key="cell_ID")
895+
# Apply collision renames recorded during metadata extraction.
896+
for old_key, new_key in rename_map["node"].items():
897+
_update_node_prop_key(lin, old_key=old_key, new_key=new_key)
898+
for old_key, new_key in rename_map["edge"].items():
899+
_update_edge_prop_key(lin, old_key=old_key, new_key=new_key)
900+
for old_key, new_key in rename_map["lineage"].items():
901+
_update_lineage_prop_key(lin, old_key=old_key, new_key=new_key)
862902

863903

864904
def _normalize_properties_metadata(
@@ -982,7 +1022,8 @@ def load_GEFF(
9821022
generic_md = _build_generic_metadata(
9831023
geff_file, geff_md, time_prop, cell_x_prop, cell_y_prop, cell_z_prop
9841024
)
985-
props_md = _build_props_metadata(geff_md)
1025+
rename_map: dict[str, dict[str, str]] = {"node": {}, "edge": {}, "lineage": {}}
1026+
props_md = _build_props_metadata(geff_md, rename_map)
9861027

9871028
# EVERYTHING BELOW IS NOT DEBUGGED YET
9881029

@@ -1003,6 +1044,7 @@ def load_GEFF(
10031044
cell_z_prop,
10041045
time_prop,
10051046
cell_id_prop,
1047+
rename_map,
10061048
)
10071049
space_unit = generic_md.get("space_unit")
10081050
_normalize_properties_metadata(
@@ -1022,7 +1064,8 @@ def load_GEFF(
10221064

10231065
if __name__ == "__main__":
10241066
# geff_file = "/media/lxenard/data/Janelia_Cell_Trackathon/test_pycellin_geff/pycellin_to_geff.geff"
1025-
geff_file = "B:/Janelia_Cell_Trackathon/anniek_example/exampl_geff.zarr/tracks"
1067+
# geff_file = "B:/Janelia_Cell_Trackathon/anniek_example/exampl_geff.zarr/tracks"
1068+
geff_file = "/media/lxenard/data/Janelia_Cell_Trackathon/anniek_example/exampl_geff.zarr/tracks"
10261069
# geff_file = "E:/Janelia_Cell_Trackathon/reader_test_graph.geff"
10271070
# geff_file = "/media/lxenard/data/Janelia_Cell_Trackathon/mouse-20250719.zarr/tracks"
10281071
# geff_file = "/media/lxenard/data/Janelia_Cell_Trackathon/test_pycellin_geff/test.zarr"

pycellin/io/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,28 @@ def _update_node_prop_key(
350350
lineage.nodes[node][new_key] = default_value
351351

352352

353+
def _update_edge_prop_key(
354+
lineage: CellLineage,
355+
old_key: str,
356+
new_key: str,
357+
) -> None:
358+
"""
359+
Update the key of a property in all the edges of a lineage.
360+
361+
Parameters
362+
----------
363+
lineage : CellLineage
364+
The lineage to update.
365+
old_key : str
366+
The old key of the property.
367+
new_key : str
368+
The new key of the property.
369+
"""
370+
for u, v in lineage.edges:
371+
if old_key in lineage.edges[u, v]:
372+
lineage.edges[u, v][new_key] = lineage.edges[u, v].pop(old_key)
373+
374+
353375
def _update_lineage_prop_key(
354376
lineage: CellLineage,
355377
old_key: str,

0 commit comments

Comments
 (0)