computational-cell-analytics
diff --git a/‎flamingo_tools/measurements.py‎
Lines changed: 100 additions & 0 deletions b/‎flamingo_tools/measurements.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎flamingo_tools/postprocessing/cochlea_mapping.py‎
Lines changed: 75 additions & 2 deletions b/‎flamingo_tools/postprocessing/cochlea_mapping.py‎
Lines changed: 75 additions & 2 deletions
diff --git a/‎flamingo_tools/postprocessing/label_components.py‎
Lines changed: 147 additions & 0 deletions b/‎flamingo_tools/postprocessing/label_components.py‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎flamingo_tools/postprocessing/synapse_per_ihc_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎flamingo_tools/postprocessing/synapse_per_ihc_utils.py‎
Lines changed: 1 addition & 1 deletion
@@ -6,6 +6,7 @@
 import warnings
 from concurrent import futures
 from functools import partial
+from multiprocessing import cpu_count
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
@@ -502,3 +503,102 @@ def _compute_block(block_id):
 
     mask = ResizedVolume(low_res_mask, shape=original_shape, order=0)
     return mask
+
+
+def object_measures_single(
+    table_path: str,
+    seg_path: str,
+    image_paths: List[str],
+    out_paths: List[str],
+    force_overwrite: bool = False,
+    component_list: List[int] = [1],
+    background_mask: Optional[np.typing.ArrayLike] = None,
+    resolution: List[float] = [0.38, 0.38, 0.38],
+    s3: bool = False,
+    s3_credentials: Optional[str] = None,
+    s3_bucket_name: Optional[str] = None,
+    s3_service_endpoint: Optional[str] = None,
+    **_
+):
+    """Compute object measures for a single or multiple image channels in respect to a single segmentation channel.
+
+    Args:
+        table_path: File path to segmentationt table.
+        seg_path: Path to segmentation channel in ome.zarr format.
+        image_paths: Path(s) to image channel(s) in ome.zarr format.
+        out_paths: Paths(s) for calculated object measures.
+        force_overwrite: Forcefully overwrite existing files.
+        component_list: Only calculate object measures for specific components.
+        background_mask: Use background mask for calculating object measures.
+        resolution: Resolution of input in micrometer.
+        s3: Use S3 file paths.
+        s3_credentials:
+        s3_bucket_name:
+        s3_service_endpoint:
+    """
+    input_key = "s0"
+    out_paths = [os.path.realpath(o) for o in out_paths]
+
+    if not isinstance(resolution, float):
+        if len(resolution) == 1:
+            resolution = resolution * 3
+        assert len(resolution) == 3
+        resolution = np.array(resolution)[::-1]
+    else:
+        resolution = (resolution,) * 3
+
+    for (img_path, out_path) in zip(image_paths, out_paths):
+        n_threads = int(os.environ.get("SLURM_CPUS_ON_NODE", cpu_count()))
+
+        # overwrite input file
+        if os.path.realpath(out_path) == os.path.realpath(table_path) and not s3:
+            force_overwrite = True
+
+        if os.path.isfile(out_path) and not force_overwrite:
+            print(f"Skipping {out_path}. Table already exists.")
+
+        else:
+            if background_mask is None:
+                feature_set = "default"
+                dilation = None
+                median_only = False
+            else:
+                print("Using background mask for calculating object measures.")
+                feature_set = "default_background_subtract"
+                dilation = 4
+                median_only = True
+
+                if s3:
+                    img_path, fs = s3_utils.get_s3_path(img_path, bucket_name=s3_bucket_name,
+                                                        service_endpoint=s3_service_endpoint,
+                                                        credential_file=s3_credentials)
+                    seg_path, fs = s3_utils.get_s3_path(seg_path, bucket_name=s3_bucket_name,
+                                                        service_endpoint=s3_service_endpoint,
+                                                        credential_file=s3_credentials)
+
+                mask_cache_path = os.path.join(os.path.dirname(out_path), "bg-mask.zarr")
+                background_mask = compute_sgn_background_mask(
+                    image_path=img_path,
+                    segmentation_path=seg_path,
+                    image_key=input_key,
+                    segmentation_key=input_key,
+                    n_threads=n_threads,
+                    cache_path=mask_cache_path,
+                )
+
+            compute_object_measures(
+                image_path=img_path,
+                segmentation_path=seg_path,
+                segmentation_table_path=table_path,
+                output_table_path=out_path,
+                image_key=input_key,
+                segmentation_key=input_key,
+                feature_set=feature_set,
+                s3_flag=s3,
+                component_list=component_list,
+                dilation=dilation,
+                median_only=median_only,
+                background_mask=background_mask,
+                n_threads=n_threads,
+                resolution=resolution,
+            )
@@ -1,4 +1,5 @@
 import math
+import os
 from typing import List, Optional, Tuple
 
 import networkx as nx
@@ -8,6 +9,7 @@
 from scipy.interpolate import interp1d
 
 from flamingo_tools.postprocessing.label_components import downscaled_centroids
+from flamingo_tools.s3_utils import get_s3_path
 
 
 def find_most_distant_nodes(G: nx.classes.graph.Graph, weight: str = 'weight') -> Tuple[float, float]:
@@ -750,8 +752,8 @@ def tonotopic_mapping(
     apex_higher: bool = True,
     otof: bool = False,
 ) -> pd.DataFrame:
-    """Tonotopic mapping of IHCs by supplying a table with component labels.
-    The mapping assigns a tonotopic label to each IHC according to the position along the length of the cochlea.
+    """Tonotopic mapping of SGNs or IHCs by supplying a table with component labels.
+    The mapping assigns a tonotopic label to each instance according to the position along the length of the cochlea.
 
     Args:
         table: Dataframe of segmentation table.
@@ -816,3 +818,74 @@ def tonotopic_mapping(
     table = map_frequency(table, animal=animal, otof=otof)
 
     return table
+
+
+def tonotopic_mapping_single(
+    table_path: str,
+    out_path: str,
+    force_overwrite: bool = False,
+    cell_type: str = "sgn",
+    animal: str = "mouse",
+    otof: bool = False,
+    apex_position: str = "apex_higher",
+    component_list: List[int] = [1],
+    component_mapping: Optional[List[int]] = None,
+    max_edge_distance: float = 30,
+    s3: bool = False,
+    s3_credentials: Optional[str] = None,
+    s3_bucket_name: Optional[str] = None,
+    s3_service_endpoint: Optional[str] = None,
+    **_
+):
+    """Tonotopic mapping of a single cochlea.
+    Each segmentation instance within a given component list is assigned a frequency[kHz], a run length and an offset.
+    The components used for the mapping itself can be a subset of the component list to adapt to broken components
+    along the Rosenthal's canal.
+    If the cochlea is broken in the direction of the Rosenthal's canal, the components have to be provided in a
+    continuous order which reflects the positioning within 3D.
+    The frequency is calculated using the Greenwood function using animal specific parameters.
+    The orientation of the mapping can be reversed using the apex position in reference to the y-coordinate.
+
+    Args:
+        table_path: File path to segmentation table.
+        out_path: Output path to segmentation table with new column "component_labels".
+        force_overwrite: Forcefully overwrite existing output path.
+        cell_type: Cell type of the segmentation. Currently supports "sgn" and "ihc".
+        animal: Animal for species specific frequency mapping. Either "mouse" or "gerbil".
+        otof: Use mapping by *Mueller, Hearing Research 202 (2005) 63-73* for OTOF cochleae.
+        apex_position: Identify position of apex and base. Apex is set to node with higher y-value per default.
+        component_list: List of components. Can be passed to obtain the number of instances within the component list.
+        components_mapping: Components to use for tonotopic mapping. Ignore components torn parallel to main canal.
+        max_edge_distance: Maximal edge distance between graph nodes to create an edge between nodes.
+        s3: Use S3 bucket.
+        s3_credentials:
+        s3_bucket_name:
+        s3_service_endpoint:
+    """
+    if os.path.isdir(out_path):
+        raise ValueError(f"Output path {out_path} is a directory. Provide a path to a single output file.")
+
+    if s3:
+        tsv_path, fs = get_s3_path(table_path, bucket_name=s3_bucket_name,
+                                   service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
+        with fs.open(tsv_path, "r") as f:
+            table = pd.read_csv(f, sep="\t")
+    else:
+        table = pd.read_csv(table_path, sep="\t")
+
+    apex_higher = (apex_position == "apex_higher")
+
+    # overwrite input file
+    if os.path.realpath(out_path) == os.path.realpath(table_path) and not s3:
+        force_overwrite = True
+
+    if os.path.isfile(out_path) and not force_overwrite:
+        print(f"Skipping {out_path}. Table already exists.")
+
+    else:
+        table = tonotopic_mapping(table, component_label=component_list, animal=animal,
+                                  cell_type=cell_type, component_mapping=component_mapping,
+                                  apex_higher=apex_higher, max_edge_distance=max_edge_distance,
+                                  otof=otof)
+
+        table.to_csv(out_path, sep="\t", index=False)
@@ -1,5 +1,6 @@
 import math
 import multiprocessing as mp
+import os
 from concurrent import futures
 from typing import Callable, List, Optional, Tuple
 
@@ -10,6 +11,7 @@
 import pandas as pd
 
 from elf.io import open_file
+from flamingo_tools.s3_utils import get_s3_path
 from scipy.ndimage import distance_transform_edt, binary_dilation, binary_closing
 from scipy.sparse import csr_matrix
 from scipy.spatial import distance
@@ -673,3 +675,148 @@ def filter_cochlea_volume(
         combined_dilated[combined_dilated > 0] = 1
 
     return combined_dilated
+
+
+def label_custom_components(tsv_table, custom_dict):
+    """Label IHC components using multiple post-processing configurations and combine the
+    results into final components.
+    The function applies successive post-processing steps defined in a `custom_dic`
+    configuration. Each entry under `label_dicts` specifies:
+    - `label_params`: a list of parameter sets. The segmentation is processed once for
+    each parameter set (e.g., {"min_size": 500, "max_edge_distance": 65, "min_component_length": 5}).
+    - `components`: lists of label IDs to extract from each corresponding post-processing run.
+    Label IDs collected from all runs are merged to form the final component (e.g., key "1").
+    Global filtering is applied using `min_size_global`, and any `missing_ids`
+    (e.g., 4800 or 4832) are added explicitly to the final component.
+    Example `custom_dic` structure:
+    {
+        "min_size_global": 500,
+        "missing_ids": [4800, 4832],
+        "label_dicts": {
+            "1": {
+                "label_params": [
+                    {"min_size": 500, "max_edge_distance": 65, "min_component_length": 5},
+                    {"min_size": 400, "max_edge_distance": 45, "min_component_length": 5}
+                ],
+                "components": [[18, 22], [1, 45, 83]]
+            }
+        }
+    }
+
+    Args:
+        tsv_table: Pandas dataframe of the MoBIE segmentation table.
+        custom_dict: Custom dictionary featuring post-processing parameters.
+
+    Returns:
+        Pandas dataframe featuring labeled components.
+    """
+    min_size = custom_dict["min_size_global"]
+    component_labels = [0 for _ in range(len(tsv_table))]
+    tsv_table.loc[:, "component_labels"] = component_labels
+    for custom_comp, label_dict in custom_dict["label_dicts"].items():
+        label_params = label_dict["label_params"]
+        label_components = label_dict["components"]
+
+        combined_label_ids = []
+        for comp, other_kwargs in zip(label_components, label_params):
+            tsv_table_tmp = label_components_ihc(tsv_table.copy(), **other_kwargs)
+            label_ids = list(tsv_table_tmp.loc[tsv_table_tmp["component_labels"].isin(comp), "label_id"])
+            combined_label_ids.extend(label_ids)
+            print(f"{comp}", len(combined_label_ids))
+
+        combined_label_ids = list(set(combined_label_ids))
+
+        tsv_table.loc[tsv_table["label_id"].isin(combined_label_ids), "component_labels"] = int(custom_comp)
+
+    tsv_table.loc[tsv_table["n_pixels"] < min_size, "component_labels"] = 0
+    if "missing_ids" in list(custom_dict.keys()):
+        for m in custom_dict["missing_ids"]:
+            tsv_table.loc[tsv_table["label_id"] == m, "component_labels"] = 1
+
+    return tsv_table
+
+
+def label_components_single(
+    table_path: str,
+    out_path: str,
+    force_overwrite: bool = False,
+    cell_type: str = "sgn",
+    component_list: List[int] = [1],
+    max_edge_distance: float = 30,
+    min_component_length: int = 50,
+    min_size: int = 1000,
+    s3: bool = False,
+    s3_credentials: Optional[str] = None,
+    s3_bucket_name: Optional[str] = None,
+    s3_service_endpoint: Optional[str] = None,
+    custom_dic: Optional[dict] = None,
+    **_
+):
+    """Process a single cochlea using one set of parameters or a custom dictionary.
+    The cochlea is analyzed using graph-connected components
+    to label segmentation instances that are closer than a given maximal edge distance.
+    This process acts on an input segmentation table to which a "component_labels" column is added.
+    Each entry in this column refers to the index of a connected component.
+    The largest connected component has an index of 1; the others follow in decreasing order.
+
+    Args:
+        table_path: File path to segmentation table.
+        out_path: Output path to segmentation table with new column "component_labels".
+        force_overwrite: Forcefully overwrite existing output path.
+        cell_type: Cell type of the segmentation. Currently supports "sgn" and "ihc".
+        component_list: List of components. Can be passed to obtain the number of instances within the component list.
+        max_edge_distance: Maximal edge distance between graph nodes to create an edge between nodes.
+        min_component_length: Minimal length of nodes of connected component. Filtered out if lower.
+        min_size: Minimal number of pixels for filtering small instances.
+        s3: Use S3 bucket.
+        s3_credentials:
+        s3_bucket_name:
+        s3_service_endpoint:
+        custom_dic: Custom dictionary which allows multiple post-processing configurations and combines the
+            results into final components.
+    """
+    if os.path.isdir(out_path):
+        raise ValueError(f"Output path {out_path} is a directory. Provide a path to a single output file.")
+
+    if s3:
+        tsv_path, fs = get_s3_path(table_path, bucket_name=s3_bucket_name,
+                                   service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
+        with fs.open(tsv_path, "r") as f:
+            table = pd.read_csv(f, sep="\t")
+    else:
+        table = pd.read_csv(table_path, sep="\t")
+
+    # overwrite input file
+    if os.path.realpath(out_path) == os.path.realpath(table_path) and not s3:
+        force_overwrite = True
+
+    if os.path.isfile(out_path) and not force_overwrite:
+        print(f"Skipping {out_path}. Table already exists.")
+
+    else:
+        if custom_dic is not None:
+            # use multiple post-processing configurations
+            tsv_table = label_custom_components(table, custom_dic)
+        else:
+            if cell_type == "sgn":
+                tsv_table = label_components_sgn(table, min_size=min_size,
+                                                 min_component_length=min_component_length,
+                                                 max_edge_distance=max_edge_distance)
+            elif cell_type == "ihc":
+                tsv_table = label_components_ihc(table, min_size=min_size,
+                                                 min_component_length=min_component_length,
+                                                 max_edge_distance=max_edge_distance)
+            else:
+                raise ValueError("Choose a supported cell type. Either 'sgn' or 'ihc'.")
+
+        custom_comp = len(tsv_table[tsv_table["component_labels"].isin(component_list)])
+        print(f"Total {cell_type.upper()}s: {len(tsv_table)}")
+        if component_list == [1]:
+            print(f"Largest component has {custom_comp} {cell_type.upper()}s.")
+        else:
+            for comp in component_list:
+                num_instances = len(tsv_table[tsv_table["component_labels"] == comp])
+                print(f"Component {comp} has {num_instances} instances.")
+            print(f"Custom component(s) have {custom_comp} {cell_type.upper()}s.")
+
+        tsv_table.to_csv(out_path, sep="\t", index=False)
@@ -47,4 +47,4 @@
                     "component_list": [2, 1, 3]},
     "M_AMD_N97_R": {"synapse_table_name": "synapse_v3_ihc_v4b", "ihc_table_name": "IHC_v4b",
                     "component_list": [2, 5]},
-}
+}