|
| 1 | +from typing import Dict, List |
| 2 | + |
| 3 | +import geff |
| 4 | +import geff_spec |
| 5 | +import networkx as nx |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | +import os |
| 9 | +from scipy.cluster.hierarchy import DisjointSet |
| 10 | + |
| 11 | +import epicure.Utils as ut |
| 12 | + |
| 13 | +def create_label_to_track_mapping( |
| 14 | + divisions: Dict[int, List[int]], unique_labels: List[int] |
| 15 | +) -> Dict[int, int]: |
| 16 | + """ |
| 17 | + Create a mapping from labels to track IDs using scipy's DisjointSet for efficient track grouping. |
| 18 | +
|
| 19 | + Args: |
| 20 | + divisions: dict of {daughter_label: [mother_labels]} from epic.tracking.graph |
| 21 | + unique_labels: list of unique labels present in the tracking data |
| 22 | +
|
| 23 | + Returns: |
| 24 | + dict: {label: track_id} - mapping from each label to its track ID |
| 25 | + """ |
| 26 | + if not divisions: |
| 27 | + # No divisions - each unique label is its own track. |
| 28 | + return {label: label for label in unique_labels} |
| 29 | + |
| 30 | + ds = DisjointSet(unique_labels) |
| 31 | + |
| 32 | + # Union connected labels based on mother-daughter relationships. |
| 33 | + for daughter, mothers in divisions.items(): |
| 34 | + if daughter not in unique_labels: # weirdly, this can happen |
| 35 | + continue |
| 36 | + for mother in mothers: |
| 37 | + if mother in unique_labels: |
| 38 | + ds.merge(daughter, mother) |
| 39 | + |
| 40 | + # A connected component is a track. We use the root as track ID. |
| 41 | + # Create a mapping from label to track_id (root). |
| 42 | + label_to_track_id = {} |
| 43 | + for label in unique_labels: |
| 44 | + root = ds[label] |
| 45 | + label_to_track_id[label] = root |
| 46 | + |
| 47 | + return label_to_track_id |
| 48 | + |
| 49 | + |
| 50 | +def build_nodes_df( |
| 51 | + track_data: np.ndarray, divisions: Dict[int, List[int]] |
| 52 | +) -> pd.DataFrame: |
| 53 | + """Build a DataFrame representing the nodes for the GEFF graph.""" |
| 54 | + df = pd.DataFrame(track_data, columns=["label", "frame", "y", "x"]) |
| 55 | + df["node_id"] = df.index |
| 56 | + |
| 57 | + # Generate and assign track IDs. |
| 58 | + labels = list(df["label"].unique()) |
| 59 | + label_to_track_id = create_label_to_track_mapping(divisions, labels) |
| 60 | + df["track_id"] = df["label"].map(label_to_track_id) |
| 61 | + |
| 62 | + return df |
| 63 | + |
| 64 | + |
| 65 | +def build_edges_df(divisions: Dict[int, List[int]], df_nodes: pd.DataFrame): |
| 66 | + """""" |
| 67 | + if divisions is not None: |
| 68 | + for daughter, mothers in divisions.items(): |
| 69 | + if len(mothers) > 1: |
| 70 | + ut.show_error(f"Merge event detected. Label {daughter} " |
| 71 | + f"has the following mother labels: {mothers}.") |
| 72 | + # TODO: does GEFF support merge events? |
| 73 | + |
| 74 | + # Division edges: for each daughter-mother pair, create an edge. |
| 75 | + edges_data = [ |
| 76 | + {"daughter": daughter, "mother": mother} |
| 77 | + for daughter, mothers in divisions.items() |
| 78 | + for mother in mothers |
| 79 | + ] |
| 80 | + df_edges = pd.DataFrame(edges_data) |
| 81 | + # Labels stay the same until there is a division. But node IDs are unique. |
| 82 | + # It means that in df_nodes, labels appears multiple times. Because of this |
| 83 | + # we cannot easily map between df_nodes and df_edges. So we create intermediary |
| 84 | + # columns to ease the mapping. |
| 85 | + df_nodes["first_frame"] = df_nodes.groupby("label")["frame"].transform("min") |
| 86 | + df_nodes["last_frame"] = df_nodes.groupby("label")["frame"].transform("max") |
| 87 | + # A daughter is at the first frame of its label, a mother at the last frame of its label. |
| 88 | + df_nodes["daughter"] = df_nodes["first_frame"] == df_nodes["frame"] |
| 89 | + df_nodes["mother"] = df_nodes["last_frame"] == df_nodes["frame"] |
| 90 | + df_nodes.drop(columns=["first_frame", "last_frame"], inplace=True) |
| 91 | + # Now we can map between df_nodes and df_edges. |
| 92 | + # The in_id is the node ID of the matching label that is a mother, |
| 93 | + # and the out_id is the node ID of the matching label that is a daughter. |
| 94 | + df_edges["in_id"] = df_edges["mother"].map( |
| 95 | + df_nodes[df_nodes["mother"]].set_index("label")["node_id"] |
| 96 | + ) |
| 97 | + df_edges["out_id"] = df_edges["daughter"].map( |
| 98 | + df_nodes[df_nodes["daughter"]].set_index("label")["node_id"] |
| 99 | + ) |
| 100 | + df_nodes.drop(columns=["daughter", "mother"], inplace=True) |
| 101 | + |
| 102 | + # Non-division edges: for each label, connect consecutive nodes within that label. |
| 103 | + non_division_edges = [] |
| 104 | + for label in df_nodes["label"].unique(): |
| 105 | + label_spots = df_nodes[df_nodes["label"] == label].sort_values("frame") |
| 106 | + if len(label_spots) > 1: |
| 107 | + for i in range(len(label_spots) - 1): |
| 108 | + current_spot = label_spots.iloc[i] |
| 109 | + next_spot = label_spots.iloc[i + 1] |
| 110 | + non_division_edges.append( |
| 111 | + {"in_id": current_spot["node_id"], "out_id": next_spot["node_id"]} |
| 112 | + ) |
| 113 | + |
| 114 | + # Combine division and non-division edges. |
| 115 | + df_non_division_edges = pd.DataFrame(non_division_edges) |
| 116 | + if not df_edges.empty and not df_non_division_edges.empty: |
| 117 | + # Make sure both dataframes have the same columns. |
| 118 | + df_edges = df_edges[["in_id", "out_id"]] |
| 119 | + df_edges = pd.concat([df_edges, df_non_division_edges], ignore_index=True) |
| 120 | + elif not df_non_division_edges.empty: |
| 121 | + df_edges = df_non_division_edges |
| 122 | + |
| 123 | + # Final cleanup and type conversion. |
| 124 | + if not df_edges.empty: |
| 125 | + # We can have NaN if a label has no mother (appears at first frame) |
| 126 | + # or no daughter (disappears at last frame). |
| 127 | + df_edges.dropna(inplace=True) |
| 128 | + # Convert to int in case of NaN. |
| 129 | + df_edges["in_id"] = df_edges["in_id"].astype(int) |
| 130 | + df_edges["out_id"] = df_edges["out_id"].astype(int) |
| 131 | + |
| 132 | + return df_edges |
| 133 | + |
| 134 | + |
| 135 | +def build_nx_digraph(epic) -> nx.DiGraph: |
| 136 | + """Build a NetworkX directed graph from EpiCure data.""" |
| 137 | + |
| 138 | + df_nodes = build_nodes_df(epic.tracking.track_data, epic.tracking.graph) |
| 139 | + df_edges = build_edges_df(epic.tracking.graph, df_nodes) |
| 140 | + |
| 141 | + graph = nx.from_pandas_edgelist( |
| 142 | + df_edges, source="in_id", target="out_id", create_using=nx.DiGraph |
| 143 | + ) |
| 144 | + node_attrs = {row["node_id"]: row.to_dict() for _, row in df_nodes.iterrows()} |
| 145 | + nx.set_node_attributes(graph, node_attrs) |
| 146 | + |
| 147 | + return graph |
| 148 | + |
| 149 | + |
| 150 | +def build_props_metadata() -> Dict[str, geff_spec.PropMetadata]: |
| 151 | + """Build GEFF properties metadata.""" |
| 152 | + md_x = geff_spec.PropMetadata( |
| 153 | + identifier="x", |
| 154 | + dtype="int", |
| 155 | + varlength=False, |
| 156 | + unit="pixel", |
| 157 | + name="x", |
| 158 | + description="X coordinate of center of the cell", |
| 159 | + ) |
| 160 | + md_y = geff_spec.PropMetadata( |
| 161 | + identifier="y", |
| 162 | + dtype="int", |
| 163 | + varlength=False, |
| 164 | + unit="pixel", |
| 165 | + name="y", |
| 166 | + description="Y coordinate of the center of the cell", |
| 167 | + ) |
| 168 | + md_t = geff_spec.PropMetadata( |
| 169 | + identifier="frame", |
| 170 | + dtype="int32", |
| 171 | + varlength=False, |
| 172 | + unit="frame", |
| 173 | + name="frame", |
| 174 | + description="Time", |
| 175 | + ) |
| 176 | + md_label = geff_spec.PropMetadata( |
| 177 | + identifier="label", |
| 178 | + dtype="int64", |
| 179 | + varlength=False, |
| 180 | + name="label", |
| 181 | + description="Label of the cell", |
| 182 | + ) |
| 183 | + md_nid = geff_spec.PropMetadata( |
| 184 | + identifier="node_id", |
| 185 | + dtype="int64", |
| 186 | + varlength=False, |
| 187 | + name="node_id", |
| 188 | + description="Unique identifier of the node", |
| 189 | + ) |
| 190 | + |
| 191 | + return {"x": md_x, "y": md_y, "frame": md_t, "label": md_label, "node_id": md_nid} |
| 192 | + |
| 193 | + |
| 194 | +def build_geff_metadata(epic): |
| 195 | + """Build GEFF metadata.""" |
| 196 | + axes = [ |
| 197 | + geff_spec.Axis( |
| 198 | + name="x", |
| 199 | + type="space", |
| 200 | + unit="pixel", |
| 201 | + scale=epic.epi_metadata.get("ScaleXY", 1), |
| 202 | + scaled_unit=epic.epi_metadata.get("UnitXY"), |
| 203 | + ), |
| 204 | + geff_spec.Axis( |
| 205 | + name="y", |
| 206 | + type="space", |
| 207 | + unit="pixel", |
| 208 | + scale=epic.epi_metadata.get("ScaleXY", 1), |
| 209 | + scaled_unit=epic.epi_metadata.get("UnitXY"), |
| 210 | + ), |
| 211 | + geff_spec.Axis( |
| 212 | + name="frame", |
| 213 | + type="time", |
| 214 | + unit="frame", |
| 215 | + scale=epic.epi_metadata.get("ScaleT", 1), |
| 216 | + scaled_unit=epic.epi_metadata.get("UnitT"), |
| 217 | + ), |
| 218 | + ] |
| 219 | + display_hints = geff_spec.DisplayHint( |
| 220 | + display_horizontal="x", |
| 221 | + display_vertical="y", |
| 222 | + display_time="frame", |
| 223 | + ) |
| 224 | + |
| 225 | + return geff.GeffMetadata( |
| 226 | + directed=True, |
| 227 | + axes=axes, |
| 228 | + display_hints=display_hints, |
| 229 | + node_props_metadata=build_props_metadata(), |
| 230 | + edge_props_metadata={}, |
| 231 | + track_node_props={"lineage": "track_id", "tracklet": "label"}, |
| 232 | + related_objects=[ |
| 233 | + geff_spec.RelatedObject( |
| 234 | + type="labels", |
| 235 | + path=os.path.join("..", epic.imgname + "_labels.tif"), |
| 236 | + label_prop="label", |
| 237 | + ), |
| 238 | + ], |
| 239 | + ) |
| 240 | + |
| 241 | + |
| 242 | +def save_geff(epic, outname): |
| 243 | + """Save a GEFF file.""" |
| 244 | + |
| 245 | + geff_graph = build_nx_digraph(epic) |
| 246 | + geff_md = build_geff_metadata(epic) |
| 247 | + |
| 248 | + geff.write( |
| 249 | + geff_graph, |
| 250 | + outname, |
| 251 | + metadata=geff_md, |
| 252 | + zarr_format=2, # could be 3 but 2 by default in GEFF |
| 253 | + structure_validation=True, |
| 254 | + overwrite=True, |
| 255 | + ) |
0 commit comments