|
1 | 1 | import math |
2 | 2 | import multiprocessing as mp |
| 3 | +import os |
3 | 4 | from concurrent import futures |
4 | 5 | from typing import Callable, List, Optional, Tuple |
5 | 6 |
|
|
10 | 11 | import pandas as pd |
11 | 12 |
|
12 | 13 | from elf.io import open_file |
| 14 | +from flamingo_tools.s3_utils import get_s3_path |
13 | 15 | from scipy.ndimage import distance_transform_edt, binary_dilation, binary_closing |
14 | 16 | from scipy.sparse import csr_matrix |
15 | 17 | from scipy.spatial import distance |
@@ -673,3 +675,148 @@ def filter_cochlea_volume( |
673 | 675 | combined_dilated[combined_dilated > 0] = 1 |
674 | 676 |
|
675 | 677 | return combined_dilated |
| 678 | + |
| 679 | + |
| 680 | +def label_custom_components(tsv_table, custom_dict): |
| 681 | + """Label IHC components using multiple post-processing configurations and combine the |
| 682 | + results into final components. |
| 683 | + The function applies successive post-processing steps defined in a `custom_dic` |
| 684 | + configuration. Each entry under `label_dicts` specifies: |
| 685 | + - `label_params`: a list of parameter sets. The segmentation is processed once for |
| 686 | + each parameter set (e.g., {"min_size": 500, "max_edge_distance": 65, "min_component_length": 5}). |
| 687 | + - `components`: lists of label IDs to extract from each corresponding post-processing run. |
| 688 | + Label IDs collected from all runs are merged to form the final component (e.g., key "1"). |
| 689 | + Global filtering is applied using `min_size_global`, and any `missing_ids` |
| 690 | + (e.g., 4800 or 4832) are added explicitly to the final component. |
| 691 | + Example `custom_dic` structure: |
| 692 | + { |
| 693 | + "min_size_global": 500, |
| 694 | + "missing_ids": [4800, 4832], |
| 695 | + "label_dicts": { |
| 696 | + "1": { |
| 697 | + "label_params": [ |
| 698 | + {"min_size": 500, "max_edge_distance": 65, "min_component_length": 5}, |
| 699 | + {"min_size": 400, "max_edge_distance": 45, "min_component_length": 5} |
| 700 | + ], |
| 701 | + "components": [[18, 22], [1, 45, 83]] |
| 702 | + } |
| 703 | + } |
| 704 | + } |
| 705 | +
|
| 706 | + Args: |
| 707 | + tsv_table: Pandas dataframe of the MoBIE segmentation table. |
| 708 | + custom_dict: Custom dictionary featuring post-processing parameters. |
| 709 | +
|
| 710 | + Returns: |
| 711 | + Pandas dataframe featuring labeled components. |
| 712 | + """ |
| 713 | + min_size = custom_dict["min_size_global"] |
| 714 | + component_labels = [0 for _ in range(len(tsv_table))] |
| 715 | + tsv_table.loc[:, "component_labels"] = component_labels |
| 716 | + for custom_comp, label_dict in custom_dict["label_dicts"].items(): |
| 717 | + label_params = label_dict["label_params"] |
| 718 | + label_components = label_dict["components"] |
| 719 | + |
| 720 | + combined_label_ids = [] |
| 721 | + for comp, other_kwargs in zip(label_components, label_params): |
| 722 | + tsv_table_tmp = label_components_ihc(tsv_table.copy(), **other_kwargs) |
| 723 | + label_ids = list(tsv_table_tmp.loc[tsv_table_tmp["component_labels"].isin(comp), "label_id"]) |
| 724 | + combined_label_ids.extend(label_ids) |
| 725 | + print(f"{comp}", len(combined_label_ids)) |
| 726 | + |
| 727 | + combined_label_ids = list(set(combined_label_ids)) |
| 728 | + |
| 729 | + tsv_table.loc[tsv_table["label_id"].isin(combined_label_ids), "component_labels"] = int(custom_comp) |
| 730 | + |
| 731 | + tsv_table.loc[tsv_table["n_pixels"] < min_size, "component_labels"] = 0 |
| 732 | + if "missing_ids" in list(custom_dict.keys()): |
| 733 | + for m in custom_dict["missing_ids"]: |
| 734 | + tsv_table.loc[tsv_table["label_id"] == m, "component_labels"] = 1 |
| 735 | + |
| 736 | + return tsv_table |
| 737 | + |
| 738 | + |
| 739 | +def label_components_single( |
| 740 | + table_path: str, |
| 741 | + out_path: str, |
| 742 | + force_overwrite: bool = False, |
| 743 | + cell_type: str = "sgn", |
| 744 | + component_list: List[int] = [1], |
| 745 | + max_edge_distance: float = 30, |
| 746 | + min_component_length: int = 50, |
| 747 | + min_size: int = 1000, |
| 748 | + s3: bool = False, |
| 749 | + s3_credentials: Optional[str] = None, |
| 750 | + s3_bucket_name: Optional[str] = None, |
| 751 | + s3_service_endpoint: Optional[str] = None, |
| 752 | + custom_dic: Optional[dict] = None, |
| 753 | + **_ |
| 754 | +): |
| 755 | + """Process a single cochlea using one set of parameters or a custom dictionary. |
| 756 | + The cochlea is analyzed using graph-connected components |
| 757 | + to label segmentation instances that are closer than a given maximal edge distance. |
| 758 | + This process acts on an input segmentation table to which a "component_labels" column is added. |
| 759 | + Each entry in this column refers to the index of a connected component. |
| 760 | + The largest connected component has an index of 1; the others follow in decreasing order. |
| 761 | +
|
| 762 | + Args: |
| 763 | + table_path: File path to segmentation table. |
| 764 | + out_path: Output path to segmentation table with new column "component_labels". |
| 765 | + force_overwrite: Forcefully overwrite existing output path. |
| 766 | + cell_type: Cell type of the segmentation. Currently supports "sgn" and "ihc". |
| 767 | + component_list: List of components. Can be passed to obtain the number of instances within the component list. |
| 768 | + max_edge_distance: Maximal edge distance between graph nodes to create an edge between nodes. |
| 769 | + min_component_length: Minimal length of nodes of connected component. Filtered out if lower. |
| 770 | + min_size: Minimal number of pixels for filtering small instances. |
| 771 | + s3: Use S3 bucket. |
| 772 | + s3_credentials: |
| 773 | + s3_bucket_name: |
| 774 | + s3_service_endpoint: |
| 775 | + custom_dic: Custom dictionary which allows multiple post-processing configurations and combines the |
| 776 | + results into final components. |
| 777 | + """ |
| 778 | + if os.path.isdir(out_path): |
| 779 | + raise ValueError(f"Output path {out_path} is a directory. Provide a path to a single output file.") |
| 780 | + |
| 781 | + if s3: |
| 782 | + tsv_path, fs = get_s3_path(table_path, bucket_name=s3_bucket_name, |
| 783 | + service_endpoint=s3_service_endpoint, credential_file=s3_credentials) |
| 784 | + with fs.open(tsv_path, "r") as f: |
| 785 | + table = pd.read_csv(f, sep="\t") |
| 786 | + else: |
| 787 | + table = pd.read_csv(table_path, sep="\t") |
| 788 | + |
| 789 | + # overwrite input file |
| 790 | + if os.path.realpath(out_path) == os.path.realpath(table_path) and not s3: |
| 791 | + force_overwrite = True |
| 792 | + |
| 793 | + if os.path.isfile(out_path) and not force_overwrite: |
| 794 | + print(f"Skipping {out_path}. Table already exists.") |
| 795 | + |
| 796 | + else: |
| 797 | + if custom_dic is not None: |
| 798 | + # use multiple post-processing configurations |
| 799 | + tsv_table = label_custom_components(table, custom_dic) |
| 800 | + else: |
| 801 | + if cell_type == "sgn": |
| 802 | + tsv_table = label_components_sgn(table, min_size=min_size, |
| 803 | + min_component_length=min_component_length, |
| 804 | + max_edge_distance=max_edge_distance) |
| 805 | + elif cell_type == "ihc": |
| 806 | + tsv_table = label_components_ihc(table, min_size=min_size, |
| 807 | + min_component_length=min_component_length, |
| 808 | + max_edge_distance=max_edge_distance) |
| 809 | + else: |
| 810 | + raise ValueError("Choose a supported cell type. Either 'sgn' or 'ihc'.") |
| 811 | + |
| 812 | + custom_comp = len(tsv_table[tsv_table["component_labels"].isin(component_list)]) |
| 813 | + print(f"Total {cell_type.upper()}s: {len(tsv_table)}") |
| 814 | + if component_list == [1]: |
| 815 | + print(f"Largest component has {custom_comp} {cell_type.upper()}s.") |
| 816 | + else: |
| 817 | + for comp in component_list: |
| 818 | + num_instances = len(tsv_table[tsv_table["component_labels"] == comp]) |
| 819 | + print(f"Component {comp} has {num_instances} instances.") |
| 820 | + print(f"Custom component(s) have {custom_comp} {cell_type.upper()}s.") |
| 821 | + |
| 822 | + tsv_table.to_csv(out_path, sep="\t", index=False) |
0 commit comments