From 20239967971d0cac8f55738ce49cd5377bf0f73b Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Tue, 18 Mar 2025 17:42:20 +0100 Subject: [PATCH 01/20] perf implement rust co-occurrence statistics --- pyproject.toml | 3 + src/squidpy/gr/_ppatterns.py | 201 ++++++++++++++++++++++++++-------- tests/graph/test_ppatterns.py | 31 ++++++ 3 files changed, 189 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1143cab94..c97919602 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,9 @@ docs = [ "myst-nb>=0.17.1", "sphinx_copybutton>=0.5.0", ] +co_occurrence = [ + "ststat-rs", +] [project.urls] Homepage = "https://github.com/scverse/squidpy" diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index a61fed47e..ad72d384a 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -2,6 +2,8 @@ from __future__ import annotations +from importlib.util import find_spec + from collections.abc import Iterable, Sequence from itertools import chain from typing import TYPE_CHECKING, Any, Literal @@ -325,6 +327,73 @@ def _occur_count( return out +def _co_occurrence_origin ( + spatial: NDArrayA, + original_clust: pd.Series, + interval: NDArrayA, + copy: bool = False, + n_splits: int | None = None, + n_jobs: int | None = None, + backend: str = "loky", + show_progress_bar: bool = True +) -> NDArrayA: + + # annotate cluster idx + clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} + labs = np.array([clust_map[c] for c in original_clust], dtype=ip) + labs_unique = np.array(list(clust_map.values()), dtype=ip) + + # create intervals thresholds + if isinstance(interval, int): + thresh_min, thresh_max = _find_min_max(spatial) + interval = np.linspace(thresh_min, thresh_max, num=interval, dtype=fp) + else: + interval = np.array(sorted(interval), dtype=fp, copy=True) + if len(interval) <= 1: + raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.") + + n_obs = spatial.shape[0] + if n_splits is None: + size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024 # calc expected mem usage + n_splits = 1 + if size_arr > 2000: + while (n_obs / n_splits) > 2048: + n_splits += 1 + logg.warning( + f"`n_splits` was automatically set to `{n_splits}` to " + f"prevent `{n_obs}x{n_obs}` distance matrix from being created" + ) + n_splits = int(max(min(n_splits, n_obs), 1)) + + # split array and labels + spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s)) + labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s)) + # create idx array including unique combinations and self-comparison + x, y = np.triu_indices_from(np.empty((n_splits, n_splits))) + idx_splits = list(zip(x, y, strict=False)) + + n_jobs = _get_n_cores(n_jobs) + start = logg.info( + f"Calculating co-occurrence probabilities for `{len(interval)}` intervals " + f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)" + ) + + out_lst = parallelize( + _co_occurrence_helper, + collection=idx_splits, + extractor=chain.from_iterable, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + )( + spatial_splits=spatial_splits, + labs_splits=labs_splits, + labs_unique=labs_unique, + interval=interval, + ) + out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits) + + return out def _co_occurrence_helper( idx_splits: Iterable[tuple[int, int]], @@ -352,6 +421,55 @@ def _co_occurrence_helper( return out_lst +def _co_occurrence_rs( + spatial: NDArrayA, + original_clust: pd.Series, + interval: NDArrayA +) -> NDArrayA | None: + + from ststat_rs import co_occur_count + + clust = original_clust.cat.codes.astype(np.int32) + + co_occur_3d = co_occur_count( + spatial[:, 0], + spatial[:, 1], + interval, clust + ) + + ## for each dimension + # for i in range(co_occur.shape[2]): + num = co_occur_3d.shape[0] + labs_unique = range(co_occur_3d.shape[0]) + out = np.zeros((num, num, interval.shape[0] - 1), dtype=fp) + + interval_seq = np.arange(interval.shape[0] - 2, -1, -1) + interval_seq.shape[0] + + + for i_interval in interval_seq: + # print(i_interval) + co_occur = co_occur_3d[:, :, i_interval] + + probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp) + probs = np.sum(probs_matrix, axis=0) + probs_con = np.zeros((num, num), dtype=fp) + + for c in labs_unique: + probs_conditional = ( + co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=fp) + ) + probs_con[c, :] = np.zeros(num, dtype=fp) + for i in range(num): + if probs[i] == 0: + probs_con[c, i] = 0 + else: + probs_con[c, i] = probs_conditional[i] / probs[i] + + # print(interval_seq.shape[0] - 1 - i_interval) + out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con + + return out @d.dedent def co_occurrence( @@ -364,6 +482,7 @@ def co_occurrence( n_jobs: int | None = None, backend: str = "loky", show_progress_bar: bool = True, + use_rust: bool = False ) -> tuple[NDArrayA, NDArrayA] | None: """ Compute co-occurrence probability of clusters. @@ -381,6 +500,9 @@ def co_occurrence( Number of splits in which to divide the spatial coordinates in :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``. %(parallelize)s + use_rust + Whether to use the rust implementation of the function. If ``True``, the rust implementation will be used. + If ``False``, the python implementation will. Returns ------- @@ -393,6 +515,12 @@ def co_occurrence( - :attr:`anndata.AnnData.uns` ``['{cluster_key}_co_occurrence']['interval']`` - the distance thresholds computed at ``interval``. """ + + + if use_rust and not find_spec("ststat_rs"): + ## issue an error and ask user to install ststat_rs + raise ImportError("ststat_rs not found. Please install ststat_rs to use the rust version of co-occurrence function: `pip install ststat-rs`") + if isinstance(adata, SpatialData): adata = adata.table _assert_categorical_obs(adata, key=cluster_key) @@ -401,11 +529,6 @@ def co_occurrence( spatial = adata.obsm[spatial_key].astype(fp) original_clust = adata.obs[cluster_key] - # annotate cluster idx - clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} - labs = np.array([clust_map[c] for c in original_clust], dtype=ip) - labs_unique = np.array(list(clust_map.values()), dtype=ip) - # create intervals thresholds if isinstance(interval, int): thresh_min, thresh_max = _find_min_max(spatial) @@ -415,46 +538,32 @@ def co_occurrence( if len(interval) <= 1: raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.") - n_obs = spatial.shape[0] - if n_splits is None: - size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024 # calc expected mem usage - n_splits = 1 - if size_arr > 2000: - while (n_obs / n_splits) > 2048: - n_splits += 1 - logg.warning( - f"`n_splits` was automatically set to `{n_splits}` to " - f"prevent `{n_obs}x{n_obs}` distance matrix from being created" - ) - n_splits = int(max(min(n_splits, n_obs), 1)) - - # split array and labels - spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s)) - labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s)) - # create idx array including unique combinations and self-comparison - x, y = np.triu_indices_from(np.empty((n_splits, n_splits))) - idx_splits = list(zip(x, y, strict=False)) - - n_jobs = _get_n_cores(n_jobs) - start = logg.info( - f"Calculating co-occurrence probabilities for `{len(interval)}` intervals " - f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)" - ) - - out_lst = parallelize( - _co_occurrence_helper, - collection=idx_splits, - extractor=chain.from_iterable, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - )( - spatial_splits=spatial_splits, - labs_splits=labs_splits, - labs_unique=labs_unique, - interval=interval, - ) - out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits) + if use_rust: + out = _co_occurrence_rs( + spatial, + original_clust, + interval + ) + start = logg.info( + f"Calculating co-occurrence probabilities for `{len(interval)}` intervals" + ) + else: + # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") + print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") + + out = _co_occurrence_origin( + spatial, + original_clust, + interval, + copy=copy, + n_splits=n_splits, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar + ) + start = logg.info( + f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits" + ) if copy: logg.info("Finish", time=start) @@ -465,7 +574,7 @@ def co_occurrence( attr="uns", key=Key.uns.co_occurrence(cluster_key), data={"occ": out, "interval": interval}, - time=start, + time=start ) diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 6752b1548..91502ba2f 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -130,6 +130,24 @@ def test_co_occurrence(adata: AnnData): assert arr.shape[2] == 49 assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] +def test_co_occurrence_rs(adata: AnnData): + """ + check co_occurrence score and shape (rust implementation) + """ + co_occurrence(adata, cluster_key="leiden", use_rust=True) + + # assert occurrence in adata.uns + assert "leiden_co_occurrence" in adata.uns.keys() + assert "occ" in adata.uns["leiden_co_occurrence"].keys() + assert "interval" in adata.uns["leiden_co_occurrence"].keys() + + # assert shapes + arr = adata.uns["leiden_co_occurrence"]["occ"] + assert arr.ndim == 3 + assert arr.shape[2] == 49 + assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] + + # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)]) @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)]) @@ -155,6 +173,19 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int): assert interval is not interval_1 np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 +@pytest.mark.parametrize("size", [1, 3]) +def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int): + minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial]) + interval = np.linspace(minn, maxx, size) + if size == 1: + with pytest.raises(ValueError, match=r"Expected interval to be of length"): + _ = co_occurrence(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True) + else: + _, interval_1 = co_occurrence_rs(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True) + + assert interval is not interval_1 + np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 + def test_use_raw(dummy_adata: AnnData): var_names = [str(i) for i in range(10)] From e51f5467ac867613c8103bdab06b71c9c17c402a Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Tue, 18 Mar 2025 22:11:44 +0100 Subject: [PATCH 02/20] misc: change rust-py deps --- pyproject.toml | 2 +- src/squidpy/gr/_ppatterns.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c97919602..6183a4577 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,7 @@ docs = [ "sphinx_copybutton>=0.5.0", ] co_occurrence = [ - "ststat-rs", + "scstat-rs", ] [project.urls] diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index ad72d384a..5104cce52 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -427,7 +427,7 @@ def _co_occurrence_rs( interval: NDArrayA ) -> NDArrayA | None: - from ststat_rs import co_occur_count + from scstat_rs import co_occur_count clust = original_clust.cat.codes.astype(np.int32) From a5b82265371c47f6f8293803f1b8cb986c168c01 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Tue, 18 Mar 2025 22:35:34 +0100 Subject: [PATCH 03/20] doc: improve the documentation --- src/squidpy/gr/_ppatterns.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 5104cce52..601367c6f 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -274,6 +274,7 @@ def _score_helper( return score_perms +## The _occur_count() function is needed for the _co_occurrence_origin implementation @njit( ft[:, :, :](tt(it[:], 2), ft[:, :], it[:], ft[:], bl), parallel=False, @@ -395,6 +396,8 @@ def _co_occurrence_origin ( return out + +## The _co_occurrence_helper() function is needed for the _co_occurrence_origin implementation def _co_occurrence_helper( idx_splits: Iterable[tuple[int, int]], spatial_splits: Sequence[NDArrayA], @@ -501,8 +504,11 @@ def co_occurrence( :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``. %(parallelize)s use_rust - Whether to use the rust implementation of the function. If ``True``, the rust implementation will be used. - If ``False``, the python implementation will. + Whether to use the rust implementation of the co-occurrence probability. + If ``True``, the rust implementation will be used. + If ``False``, the python implementation will be used. + For rust implementation, the `scstat-rs` package is required. + You can install it via `pip install scstat-rs`. Returns ------- @@ -517,9 +523,9 @@ def co_occurrence( """ - if use_rust and not find_spec("ststat_rs"): - ## issue an error and ask user to install ststat_rs - raise ImportError("ststat_rs not found. Please install ststat_rs to use the rust version of co-occurrence function: `pip install ststat-rs`") + if use_rust and not find_spec("scstat_rs"): + ## issue an error and ask user to install scstat_rs + raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`") if isinstance(adata, SpatialData): adata = adata.table From f7ff2936c9126d2b3f9464351189b62da4975496 Mon Sep 17 00:00:00 2001 From: Daniele Lucarelli <108922919+MDLDan@users.noreply.github.com> Date: Wed, 19 Mar 2025 09:13:02 +0100 Subject: [PATCH 04/20] add python re-implementation --- src/squidpy/gr/_ppatterns.py | 301 +++++++++++++++++------------------ 1 file changed, 144 insertions(+), 157 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 601367c6f..3ebdd54de 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -12,7 +12,7 @@ import numpy as np import pandas as pd from anndata import AnnData -from numba import njit +from numba import njit, prange from numpy.random import default_rng from scanpy import logging as logg from scanpy.get import _get_obs_rep @@ -274,155 +274,147 @@ def _score_helper( return score_perms -## The _occur_count() function is needed for the _co_occurrence_origin implementation -@njit( - ft[:, :, :](tt(it[:], 2), ft[:, :], it[:], ft[:], bl), - parallel=False, - fastmath=True, -) +@njit(parallel=True, fastmath=True) def _occur_count( - clust: tuple[NDArrayA, NDArrayA], - pw_dist: NDArrayA, - labs_unique: NDArrayA, - interval: NDArrayA, - same_split: bool, -) -> NDArrayA: - num = labs_unique.shape[0] - out = np.zeros((num, num, interval.shape[0] - 1), dtype=ft) - - for idx in range(interval.shape[0] - 1): - co_occur = np.zeros((num, num), dtype=ft) - probs_con = np.zeros((num, num), dtype=ft) - - thres_max = interval[idx + 1] - clust_x, clust_y = clust - - # Modified to compute co-occurrence probability ratio over increasing radii sizes as opposed to discrete interval bins - # Need pw_dist > 0 to avoid counting a cell with itself as co-occurrence - idx_x, idx_y = np.nonzero((pw_dist <= thres_max) & (pw_dist > 0)) - x = clust_x[idx_x] - y = clust_y[idx_y] - # Treat computing co-occurrence using the same split and different splits differently - # Pairwise distance matrix for between the same split is symmetric and therefore only needs to be counted once - for i, j in zip(x, y): # noqa: B905 # cannot use strict=False because of numba - co_occur[i, j] += 1 - if not same_split: - co_occur[j, i] += 1 - - # Prevent divison by zero errors when we have low cell counts/small intervals - probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=ft) - probs = np.sum(probs_matrix, axis=0) - - for c in labs_unique: - probs_conditional = ( - co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=ft) - ) - probs_con[c, :] = np.zeros(num, dtype=ft) - for i in range(num): - if probs[i] == 0: - probs_con[c, i] = 0 - else: - probs_con[c, i] = probs_conditional[i] / probs[i] - - out[:, :, idx] = probs_con - - return out - -def _co_occurrence_origin ( - spatial: NDArrayA, - original_clust: pd.Series, - interval: NDArrayA, - copy: bool = False, - n_splits: int | None = None, - n_jobs: int | None = None, - backend: str = "loky", - show_progress_bar: bool = True -) -> NDArrayA: - - # annotate cluster idx - clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} - labs = np.array([clust_map[c] for c in original_clust], dtype=ip) - labs_unique = np.array(list(clust_map.values()), dtype=ip) - - # create intervals thresholds - if isinstance(interval, int): - thresh_min, thresh_max = _find_min_max(spatial) - interval = np.linspace(thresh_min, thresh_max, num=interval, dtype=fp) - else: - interval = np.array(sorted(interval), dtype=fp, copy=True) - if len(interval) <= 1: - raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.") + spatial_x: NDArrayA, + spatial_y:NDArrayA, + thresholds: NDArrayA, + label_idx: NDArrayA, + n: int, + k: int, + l_val: int +)-> NDArrayA: + + # Allocate a 2D array to store a flat local result per point. + local_results = np.zeros((n, l_val * k * k), dtype=np.int32) + for i in prange(n): + local_counts = np.zeros(l_val * k * k, dtype=np.int32) + for j in range(n): + if i == j: + continue + dx = spatial_x[i] - spatial_x[j] + dy = spatial_y[i] - spatial_y[j] + dist_sq = dx * dx + dy * dy + for r in range(l_val): + thresh = thresholds[r] + if dist_sq <= thresh: + index = r * (k * k) + label_idx[i] * k + label_idx[j] + local_counts[index] += 1 + #else: + # break # If this threshold fails, smaller ones will also fail. + for m in range(l_val * k * k): + local_results[i, m] = local_counts[m] + + # Reduce over all points: sum the local counts. + result_flat = np.zeros(l_val * k * k, dtype=np.int32) + for m in range(l_val * k * k): + s = 0 + for i in range(n): + s += local_results[i, m] + result_flat[m] = s + + # Reshape to a 3D array with shape (interval, k, k) + counts = np.empty((l_val, k, k), dtype=np.int32) + for r in range(l_val): + for i in range(k): + for j in range(k): + counts[r, i, j] = result_flat[r * (k * k) + i * k + j] + + # Rearrange axes to match the original convention: (k, k, interval) + result = np.empty((k, k, l_val), dtype=np.int32) + for i in range(k): + for j in range(k): + for r in range(l_val): + result[i, j, r] = counts[r, i, j] + return result - n_obs = spatial.shape[0] - if n_splits is None: - size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024 # calc expected mem usage - n_splits = 1 - if size_arr > 2000: - while (n_obs / n_splits) > 2048: - n_splits += 1 - logg.warning( - f"`n_splits` was automatically set to `{n_splits}` to " - f"prevent `{n_obs}x{n_obs}` distance matrix from being created" - ) - n_splits = int(max(min(n_splits, n_obs), 1)) - - # split array and labels - spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s)) - labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s)) - # create idx array including unique combinations and self-comparison - x, y = np.triu_indices_from(np.empty((n_splits, n_splits))) - idx_splits = list(zip(x, y, strict=False)) - - n_jobs = _get_n_cores(n_jobs) - start = logg.info( - f"Calculating co-occurrence probabilities for `{len(interval)}` intervals " - f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)" - ) - - out_lst = parallelize( - _co_occurrence_helper, - collection=idx_splits, - extractor=chain.from_iterable, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - )( - spatial_splits=spatial_splits, - labs_splits=labs_splits, - labs_unique=labs_unique, - interval=interval, - ) - out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits) - return out -## The _co_occurrence_helper() function is needed for the _co_occurrence_origin implementation def _co_occurrence_helper( - idx_splits: Iterable[tuple[int, int]], - spatial_splits: Sequence[NDArrayA], - labs_splits: Sequence[NDArrayA], - labs_unique: NDArrayA, - interval: NDArrayA, - queue: SigQueue | None = None, -) -> pd.DataFrame: - out_lst = [] - for t in idx_splits: - idx_x, idx_y = t - labs_x = labs_splits[idx_x] - labs_y = labs_splits[idx_y] - dist = pairwise_distances(spatial_splits[idx_x], spatial_splits[idx_y]) - - out = _occur_count((labs_x, labs_y), dist, labs_unique, interval, idx_x == idx_y) - out_lst.append(out) - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - return out_lst + v_x: NDArrayA, + v_y: NDArrayA, + v_radium: NDArrayA, + labs: NDArrayA +)-> NDArrayA: + """ + Fast co-occurrence probability computation using the new numba-accelerated counting. + + Parameters + ---------- + v_x : np.ndarray, float64 + x–coordinates. + v_y : np.ndarray, float64 + y–coordinates. + v_radium : np.ndarray, float64 + Distance thresholds (in ascending order). + labs : np.ndarray + Cluster labels (as integers). + + Returns + ------- + occ_prob : np.ndarray + A 3D array of shape (k, k, len(v_radium)-1) containing the co-occurrence probabilities. + labs_unique : np.ndarray + Array of unique labels. + """ + n = len(v_x) + labs_unique = np.unique(labs) + k = len(labs_unique) + # l_val is the number of bins; here we assume the thresholds come from v_radium[1:]. + l_val = len(v_radium) - 1 + # Compute squared thresholds from the interval (skip the first value) + thresholds = (v_radium[1:]) ** 2 + + # Compute cco-occurence ounts. + counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val) + + # Compute co-occurrence probabilities for each threshold bin. + occ_prob = np.empty((k, k, l_val), dtype=np.float32) + for r in range(l_val): + co_occur = counts[:, :, r].astype(np.float32) + + # Compute the total count for this threshold. + total = 0.0 + for i in range(k): + for j in range(k): + total += co_occur[i, j] + + # Compute the normalized probability matrix. + probs_matrix = np.zeros((k, k), dtype=np.float32) + if total != 0.0: + for i in range(k): + for j in range(k): + probs_matrix[i, j] = co_occur[i, j] / total + + probs = np.zeros(k, dtype=np.float32) + for j in range(k): + s = 0.0 + for i in range(k): + s += probs_matrix[i, j] + probs[j] = s + + # Compute conditional probabilities. + probs_con = np.zeros((k, k), dtype=np.float32) + for c in range(k): + row_sum = 0.0 + for j in range(k): + row_sum += co_occur[c, j] + for i in range(k): + cond = 0.0 + if row_sum != 0.0: + cond = co_occur[c, i] / row_sum + if probs[i] == 0.0: + probs_con[c, i] = 0.0 + else: + probs_con[c, i] = cond / probs[i] + + # Transpose to match (k, k, interval). + for i in range(k): + for c in range(k): + occ_prob[i, c, r] = probs_con[c, i] + + return occ_prob def _co_occurrence_rs( spatial: NDArrayA, @@ -451,7 +443,6 @@ def _co_occurrence_rs( for i_interval in interval_seq: - # print(i_interval) co_occur = co_occur_3d[:, :, i_interval] probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp) @@ -472,7 +463,7 @@ def _co_occurrence_rs( # print(interval_seq.shape[0] - 1 - i_interval) out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con - return out + return out, labs_unique @d.dedent def co_occurrence( @@ -534,7 +525,9 @@ def co_occurrence( spatial = adata.obsm[spatial_key].astype(fp) original_clust = adata.obs[cluster_key] - + clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} + labs = np.array([clust_map[c] for c in original_clust], dtype=ip) + # create intervals thresholds if isinstance(interval, int): thresh_min, thresh_max = _find_min_max(spatial) @@ -556,17 +549,11 @@ def co_occurrence( else: # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") + spatial_x = spatial[:, 0] + spatial_y = spatial[:, 1] - out = _co_occurrence_origin( - spatial, - original_clust, - interval, - copy=copy, - n_splits=n_splits, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar - ) + # Compute co-occurrence probabilities using the fast numba routine. + out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs) start = logg.info( f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits" ) From 9af42521903c82021f9f253b23083b2def4efd82 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 09:50:26 +0000 Subject: [PATCH 05/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/gr/_ppatterns.py | 92 ++++++++++++----------------------- tests/graph/test_ppatterns.py | 3 +- 2 files changed, 33 insertions(+), 62 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 3ebdd54de..e48d0671b 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -2,9 +2,8 @@ from __future__ import annotations -from importlib.util import find_spec - from collections.abc import Iterable, Sequence +from importlib.util import find_spec from itertools import chain from typing import TYPE_CHECKING, Any, Literal @@ -276,15 +275,8 @@ def _score_helper( @njit(parallel=True, fastmath=True) def _occur_count( - spatial_x: NDArrayA, - spatial_y:NDArrayA, - thresholds: NDArrayA, - label_idx: NDArrayA, - n: int, - k: int, - l_val: int -)-> NDArrayA: - + spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int +) -> NDArrayA: # Allocate a 2D array to store a flat local result per point. local_results = np.zeros((n, l_val * k * k), dtype=np.int32) for i in prange(n): @@ -300,11 +292,11 @@ def _occur_count( if dist_sq <= thresh: index = r * (k * k) + label_idx[i] * k + label_idx[j] local_counts[index] += 1 - #else: + # else: # break # If this threshold fails, smaller ones will also fail. for m in range(l_val * k * k): local_results[i, m] = local_counts[m] - + # Reduce over all points: sum the local counts. result_flat = np.zeros(l_val * k * k, dtype=np.int32) for m in range(l_val * k * k): @@ -319,7 +311,7 @@ def _occur_count( for i in range(k): for j in range(k): counts[r, i, j] = result_flat[r * (k * k) + i * k + j] - + # Rearrange axes to match the original convention: (k, k, interval) result = np.empty((k, k, l_val), dtype=np.int32) for i in range(k): @@ -329,17 +321,10 @@ def _occur_count( return result - - -def _co_occurrence_helper( - v_x: NDArrayA, - v_y: NDArrayA, - v_radium: NDArrayA, - labs: NDArrayA -)-> NDArrayA: +def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA: """ Fast co-occurrence probability computation using the new numba-accelerated counting. - + Parameters ---------- v_x : np.ndarray, float64 @@ -350,7 +335,7 @@ def _co_occurrence_helper( Distance thresholds (in ascending order). labs : np.ndarray Cluster labels (as integers). - + Returns ------- occ_prob : np.ndarray @@ -365,35 +350,35 @@ def _co_occurrence_helper( l_val = len(v_radium) - 1 # Compute squared thresholds from the interval (skip the first value) thresholds = (v_radium[1:]) ** 2 - + # Compute cco-occurence ounts. counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val) - + # Compute co-occurrence probabilities for each threshold bin. occ_prob = np.empty((k, k, l_val), dtype=np.float32) for r in range(l_val): co_occur = counts[:, :, r].astype(np.float32) - + # Compute the total count for this threshold. total = 0.0 for i in range(k): for j in range(k): total += co_occur[i, j] - + # Compute the normalized probability matrix. probs_matrix = np.zeros((k, k), dtype=np.float32) if total != 0.0: for i in range(k): for j in range(k): probs_matrix[i, j] = co_occur[i, j] / total - + probs = np.zeros(k, dtype=np.float32) for j in range(k): s = 0.0 for i in range(k): s += probs_matrix[i, j] probs[j] = s - + # Compute conditional probabilities. probs_con = np.zeros((k, k), dtype=np.float32) for c in range(k): @@ -408,29 +393,21 @@ def _co_occurrence_helper( probs_con[c, i] = 0.0 else: probs_con[c, i] = cond / probs[i] - + # Transpose to match (k, k, interval). for i in range(k): for c in range(k): occ_prob[i, c, r] = probs_con[c, i] - + return occ_prob -def _co_occurrence_rs( - spatial: NDArrayA, - original_clust: pd.Series, - interval: NDArrayA -) -> NDArrayA | None: +def _co_occurrence_rs(spatial: NDArrayA, original_clust: pd.Series, interval: NDArrayA) -> NDArrayA | None: from scstat_rs import co_occur_count clust = original_clust.cat.codes.astype(np.int32) - co_occur_3d = co_occur_count( - spatial[:, 0], - spatial[:, 1], - interval, clust - ) + co_occur_3d = co_occur_count(spatial[:, 0], spatial[:, 1], interval, clust) ## for each dimension # for i in range(co_occur.shape[2]): @@ -441,7 +418,6 @@ def _co_occurrence_rs( interval_seq = np.arange(interval.shape[0] - 2, -1, -1) interval_seq.shape[0] - for i_interval in interval_seq: co_occur = co_occur_3d[:, :, i_interval] @@ -465,6 +441,7 @@ def _co_occurrence_rs( return out, labs_unique + @d.dedent def co_occurrence( adata: AnnData | SpatialData, @@ -476,7 +453,7 @@ def co_occurrence( n_jobs: int | None = None, backend: str = "loky", show_progress_bar: bool = True, - use_rust: bool = False + use_rust: bool = False, ) -> tuple[NDArrayA, NDArrayA] | None: """ Compute co-occurrence probability of clusters. @@ -513,10 +490,11 @@ def co_occurrence( computed at ``interval``. """ - if use_rust and not find_spec("scstat_rs"): ## issue an error and ask user to install scstat_rs - raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`") + raise ImportError( + "scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`" + ) if isinstance(adata, SpatialData): adata = adata.table @@ -527,7 +505,7 @@ def co_occurrence( original_clust = adata.obs[cluster_key] clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} labs = np.array([clust_map[c] for c in original_clust], dtype=ip) - + # create intervals thresholds if isinstance(interval, int): thresh_min, thresh_max = _find_min_max(spatial) @@ -538,17 +516,13 @@ def co_occurrence( raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.") if use_rust: - out = _co_occurrence_rs( - spatial, - original_clust, - interval - ) - start = logg.info( - f"Calculating co-occurrence probabilities for `{len(interval)}` intervals" - ) + out = _co_occurrence_rs(spatial, original_clust, interval) + start = logg.info(f"Calculating co-occurrence probabilities for `{len(interval)}` intervals") else: # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") - print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") + print( + "Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`" + ) spatial_x = spatial[:, 0] spatial_y = spatial[:, 1] @@ -563,11 +537,7 @@ def co_occurrence( return out, interval _save_data( - adata, - attr="uns", - key=Key.uns.co_occurrence(cluster_key), - data={"occ": out, "interval": interval}, - time=start + adata, attr="uns", key=Key.uns.co_occurrence(cluster_key), data={"occ": out, "interval": interval}, time=start ) diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 91502ba2f..997dcaf4f 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -130,6 +130,7 @@ def test_co_occurrence(adata: AnnData): assert arr.shape[2] == 49 assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] + def test_co_occurrence_rs(adata: AnnData): """ check co_occurrence score and shape (rust implementation) @@ -148,7 +149,6 @@ def test_co_occurrence_rs(adata: AnnData): assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] - # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)]) @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)]) def test_co_occurrence_reproducibility(adata: AnnData, n_jobs: int, n_splits: int): @@ -173,6 +173,7 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int): assert interval is not interval_1 np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 + @pytest.mark.parametrize("size", [1, 3]) def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int): minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial]) From 9457767553cd58919d656031667561ccb533c102 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Wed, 19 Mar 2025 11:01:33 +0100 Subject: [PATCH 06/20] Clean the tests and dependencies --- pyproject.toml | 3 - src/squidpy/gr/_ppatterns.py | 136 ++++++++-------------------------- tests/graph/test_ppatterns.py | 31 -------- 3 files changed, 31 insertions(+), 139 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6183a4577..1143cab94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,9 +102,6 @@ docs = [ "myst-nb>=0.17.1", "sphinx_copybutton>=0.5.0", ] -co_occurrence = [ - "scstat-rs", -] [project.urls] Homepage = "https://github.com/scverse/squidpy" diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 3ebdd54de..962fa0ca9 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -2,8 +2,6 @@ from __future__ import annotations -from importlib.util import find_spec - from collections.abc import Iterable, Sequence from itertools import chain from typing import TYPE_CHECKING, Any, Literal @@ -276,12 +274,12 @@ def _score_helper( @njit(parallel=True, fastmath=True) def _occur_count( - spatial_x: NDArrayA, - spatial_y:NDArrayA, - thresholds: NDArrayA, - label_idx: NDArrayA, - n: int, - k: int, + spatial_x: NDArrayA, + spatial_y:NDArrayA, + thresholds: NDArrayA, + label_idx: NDArrayA, + n: int, + k: int, l_val: int )-> NDArrayA: @@ -304,7 +302,7 @@ def _occur_count( # break # If this threshold fails, smaller ones will also fail. for m in range(l_val * k * k): local_results[i, m] = local_counts[m] - + # Reduce over all points: sum the local counts. result_flat = np.zeros(l_val * k * k, dtype=np.int32) for m in range(l_val * k * k): @@ -319,7 +317,7 @@ def _occur_count( for i in range(k): for j in range(k): counts[r, i, j] = result_flat[r * (k * k) + i * k + j] - + # Rearrange axes to match the original convention: (k, k, interval) result = np.empty((k, k, l_val), dtype=np.int32) for i in range(k): @@ -332,14 +330,14 @@ def _occur_count( def _co_occurrence_helper( - v_x: NDArrayA, - v_y: NDArrayA, - v_radium: NDArrayA, + v_x: NDArrayA, + v_y: NDArrayA, + v_radium: NDArrayA, labs: NDArrayA )-> NDArrayA: """ Fast co-occurrence probability computation using the new numba-accelerated counting. - + Parameters ---------- v_x : np.ndarray, float64 @@ -350,7 +348,7 @@ def _co_occurrence_helper( Distance thresholds (in ascending order). labs : np.ndarray Cluster labels (as integers). - + Returns ------- occ_prob : np.ndarray @@ -365,35 +363,35 @@ def _co_occurrence_helper( l_val = len(v_radium) - 1 # Compute squared thresholds from the interval (skip the first value) thresholds = (v_radium[1:]) ** 2 - + # Compute cco-occurence ounts. counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val) - + # Compute co-occurrence probabilities for each threshold bin. occ_prob = np.empty((k, k, l_val), dtype=np.float32) for r in range(l_val): co_occur = counts[:, :, r].astype(np.float32) - + # Compute the total count for this threshold. total = 0.0 for i in range(k): for j in range(k): total += co_occur[i, j] - + # Compute the normalized probability matrix. probs_matrix = np.zeros((k, k), dtype=np.float32) if total != 0.0: for i in range(k): for j in range(k): probs_matrix[i, j] = co_occur[i, j] / total - + probs = np.zeros(k, dtype=np.float32) for j in range(k): s = 0.0 for i in range(k): s += probs_matrix[i, j] probs[j] = s - + # Compute conditional probabilities. probs_con = np.zeros((k, k), dtype=np.float32) for c in range(k): @@ -408,62 +406,13 @@ def _co_occurrence_helper( probs_con[c, i] = 0.0 else: probs_con[c, i] = cond / probs[i] - + # Transpose to match (k, k, interval). for i in range(k): for c in range(k): occ_prob[i, c, r] = probs_con[c, i] - - return occ_prob -def _co_occurrence_rs( - spatial: NDArrayA, - original_clust: pd.Series, - interval: NDArrayA -) -> NDArrayA | None: - - from scstat_rs import co_occur_count - - clust = original_clust.cat.codes.astype(np.int32) - - co_occur_3d = co_occur_count( - spatial[:, 0], - spatial[:, 1], - interval, clust - ) - - ## for each dimension - # for i in range(co_occur.shape[2]): - num = co_occur_3d.shape[0] - labs_unique = range(co_occur_3d.shape[0]) - out = np.zeros((num, num, interval.shape[0] - 1), dtype=fp) - - interval_seq = np.arange(interval.shape[0] - 2, -1, -1) - interval_seq.shape[0] - - - for i_interval in interval_seq: - co_occur = co_occur_3d[:, :, i_interval] - - probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp) - probs = np.sum(probs_matrix, axis=0) - probs_con = np.zeros((num, num), dtype=fp) - - for c in labs_unique: - probs_conditional = ( - co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=fp) - ) - probs_con[c, :] = np.zeros(num, dtype=fp) - for i in range(num): - if probs[i] == 0: - probs_con[c, i] = 0 - else: - probs_con[c, i] = probs_conditional[i] / probs[i] - - # print(interval_seq.shape[0] - 1 - i_interval) - out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con - - return out, labs_unique + return occ_prob @d.dedent def co_occurrence( @@ -475,8 +424,7 @@ def co_occurrence( n_splits: int | None = None, n_jobs: int | None = None, backend: str = "loky", - show_progress_bar: bool = True, - use_rust: bool = False + show_progress_bar: bool = True ) -> tuple[NDArrayA, NDArrayA] | None: """ Compute co-occurrence probability of clusters. @@ -494,12 +442,6 @@ def co_occurrence( Number of splits in which to divide the spatial coordinates in :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``. %(parallelize)s - use_rust - Whether to use the rust implementation of the co-occurrence probability. - If ``True``, the rust implementation will be used. - If ``False``, the python implementation will be used. - For rust implementation, the `scstat-rs` package is required. - You can install it via `pip install scstat-rs`. Returns ------- @@ -514,10 +456,6 @@ def co_occurrence( """ - if use_rust and not find_spec("scstat_rs"): - ## issue an error and ask user to install scstat_rs - raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`") - if isinstance(adata, SpatialData): adata = adata.table _assert_categorical_obs(adata, key=cluster_key) @@ -527,7 +465,7 @@ def co_occurrence( original_clust = adata.obs[cluster_key] clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} labs = np.array([clust_map[c] for c in original_clust], dtype=ip) - + # create intervals thresholds if isinstance(interval, int): thresh_min, thresh_max = _find_min_max(spatial) @@ -537,26 +475,14 @@ def co_occurrence( if len(interval) <= 1: raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.") - if use_rust: - out = _co_occurrence_rs( - spatial, - original_clust, - interval - ) - start = logg.info( - f"Calculating co-occurrence probabilities for `{len(interval)}` intervals" - ) - else: - # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") - print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`") - spatial_x = spatial[:, 0] - spatial_y = spatial[:, 1] - - # Compute co-occurrence probabilities using the fast numba routine. - out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs) - start = logg.info( - f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits" - ) + spatial_x = spatial[:, 0] + spatial_y = spatial[:, 1] + + # Compute co-occurrence probabilities using the fast numba routine. + out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs) + start = logg.info( + f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits" + ) if copy: logg.info("Finish", time=start) diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 91502ba2f..6752b1548 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -130,24 +130,6 @@ def test_co_occurrence(adata: AnnData): assert arr.shape[2] == 49 assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] -def test_co_occurrence_rs(adata: AnnData): - """ - check co_occurrence score and shape (rust implementation) - """ - co_occurrence(adata, cluster_key="leiden", use_rust=True) - - # assert occurrence in adata.uns - assert "leiden_co_occurrence" in adata.uns.keys() - assert "occ" in adata.uns["leiden_co_occurrence"].keys() - assert "interval" in adata.uns["leiden_co_occurrence"].keys() - - # assert shapes - arr = adata.uns["leiden_co_occurrence"]["occ"] - assert arr.ndim == 3 - assert arr.shape[2] == 49 - assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0] - - # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)]) @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)]) @@ -173,19 +155,6 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int): assert interval is not interval_1 np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 -@pytest.mark.parametrize("size", [1, 3]) -def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int): - minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial]) - interval = np.linspace(minn, maxx, size) - if size == 1: - with pytest.raises(ValueError, match=r"Expected interval to be of length"): - _ = co_occurrence(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True) - else: - _, interval_1 = co_occurrence_rs(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True) - - assert interval is not interval_1 - np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 - def test_use_raw(dummy_adata: AnnData): var_names = [str(i) for i in range(10)] From 92f3da5a633020dbb81218452530590a96d383c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:03:00 +0000 Subject: [PATCH 07/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/gr/_ppatterns.py | 20 +++++--------------- tests/graph/test_ppatterns.py | 1 + 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 36cb4e4ec..e6c992d58 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -275,14 +275,8 @@ def _score_helper( @njit(parallel=True, fastmath=True) def _occur_count( - spatial_x: NDArrayA, - spatial_y:NDArrayA, - thresholds: NDArrayA, - label_idx: NDArrayA, - n: int, - k: int, - l_val: int -)-> NDArrayA: + spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int +) -> NDArrayA: # Allocate a 2D array to store a flat local result per point. local_results = np.zeros((n, l_val * k * k), dtype=np.int32) for i in prange(n): @@ -327,12 +321,7 @@ def _occur_count( return result -def _co_occurrence_helper( - v_x: NDArrayA, - v_y: NDArrayA, - v_radium: NDArrayA, - labs: NDArrayA -)-> NDArrayA: +def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA: """ Fast co-occurrence probability computation using the new numba-accelerated counting. @@ -412,6 +401,7 @@ def _co_occurrence_helper( return occ_prob + @d.dedent def co_occurrence( adata: AnnData | SpatialData, @@ -422,7 +412,7 @@ def co_occurrence( n_splits: int | None = None, n_jobs: int | None = None, backend: str = "loky", - show_progress_bar: bool = True + show_progress_bar: bool = True, ) -> tuple[NDArrayA, NDArrayA] | None: """ Compute co-occurrence probability of clusters. diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index c0b6b45fe..6752b1548 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -155,6 +155,7 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int): assert interval is not interval_1 np.testing.assert_allclose(interval, interval_1) # allclose because in the func, we use f32 + def test_use_raw(dummy_adata: AnnData): var_names = [str(i) for i in range(10)] raw = dummy_adata[:, dummy_adata.var_names[: len(var_names)]].copy() From 52a5fae7e8ae040b1828a2733e25f26858172eea Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 12 Jun 2025 14:44:51 +0200 Subject: [PATCH 08/20] Optimize memory access pattern & cache kernel --- src/squidpy/gr/_ppatterns.py | 62 ++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 5ada2a694..9a18bbaf5 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -266,54 +266,46 @@ def _score_helper( return score_perms - @njit(parallel=True, fastmath=True) def _occur_count( - spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int + spatial_x: NDArrayA, + spatial_y: NDArrayA, + thresholds: NDArrayA, + label_idx: NDArrayA, + n: int, + k: int, + l_val: int ) -> NDArrayA: # Allocate a 2D array to store a flat local result per point. - local_results = np.zeros((n, l_val * k * k), dtype=np.int32) + k2 = k * k + local_results = np.zeros((n, l_val * k2), dtype=np.int32) + for i in prange(n): - local_counts = np.zeros(l_val * k * k, dtype=np.int32) + local_counts = np.zeros(l_val * k2, dtype=np.int32) + for j in range(n): if i == j: continue dx = spatial_x[i] - spatial_x[j] dy = spatial_y[i] - spatial_y[j] - dist_sq = dx * dx + dy * dy - for r in range(l_val): - thresh = thresholds[r] - if dist_sq <= thresh: - index = r * (k * k) + label_idx[i] * k + label_idx[j] - local_counts[index] += 1 - # else: - # break # If this threshold fails, smaller ones will also fail. - for m in range(l_val * k * k): - local_results[i, m] = local_counts[m] - - # Reduce over all points: sum the local counts. - result_flat = np.zeros(l_val * k * k, dtype=np.int32) - for m in range(l_val * k * k): - s = 0 - for i in range(n): - s += local_results[i, m] - result_flat[m] = s - - # Reshape to a 3D array with shape (interval, k, k) - counts = np.empty((l_val, k, k), dtype=np.int32) - for r in range(l_val): - for i in range(k): - for j in range(k): - counts[r, i, j] = result_flat[r * (k * k) + i * k + j] + d2 = dx * dx + dy * dy + + pair = label_idx[i] * k + label_idx[j] # fixed in r–loop + base = pair * l_val # first cell for that pair - # Rearrange axes to match the original convention: (k, k, interval) - result = np.empty((k, k, l_val), dtype=np.int32) - for i in range(k): - for j in range(k): for r in range(l_val): - result[i, j, r] = counts[r, i, j] - return result + if d2 <= thresholds[r]: + local_counts[base + r] += 1 + local_results[i, :] = local_counts + # for m in range(l_val * k * k): + # local_results[i, m] = local_counts[m] + + # reduction and reshape stay the same + result_flat = local_results.sum(axis=0) + result = result_flat.reshape(k, k, l_val) + + return result def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA: """ From cee646ae2d72f8d7f685058c836f7afded835789 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 12 Jun 2025 15:06:41 +0200 Subject: [PATCH 09/20] jit the outer function and parallelize --- src/squidpy/gr/_ppatterns.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 9a18bbaf5..cdc882bd8 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -266,7 +266,7 @@ def _score_helper( return score_perms -@njit(parallel=True, fastmath=True) +@njit(parallel=True, fastmath=True, cache=True) def _occur_count( spatial_x: NDArrayA, spatial_y: NDArrayA, @@ -296,10 +296,8 @@ def _occur_count( for r in range(l_val): if d2 <= thresholds[r]: local_counts[base + r] += 1 - local_results[i, :] = local_counts - # for m in range(l_val * k * k): - # local_results[i, m] = local_counts[m] + local_results[i, :] = local_counts # reduction and reshape stay the same result_flat = local_results.sum(axis=0) @@ -307,7 +305,12 @@ def _occur_count( return result -def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA: +@njit(parallel=True, fastmath=True, cache=True) +def _co_occurrence_helper( + v_x: NDArrayA, + v_y: NDArrayA, + v_radium: NDArrayA, + labs: NDArrayA) -> NDArrayA: """ Fast co-occurrence probability computation using the new numba-accelerated counting. @@ -342,7 +345,7 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs # Compute co-occurrence probabilities for each threshold bin. occ_prob = np.empty((k, k, l_val), dtype=np.float32) - for r in range(l_val): + for r in prange(l_val): co_occur = counts[:, :, r].astype(np.float32) # Compute the total count for this threshold. From 05dd724cb166137e525db3fa5a26ae7d4463a4be Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:08:08 +0000 Subject: [PATCH 10/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/gr/_ppatterns.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index cdc882bd8..873b757b6 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -266,15 +266,10 @@ def _score_helper( return score_perms + @njit(parallel=True, fastmath=True, cache=True) def _occur_count( - spatial_x: NDArrayA, - spatial_y: NDArrayA, - thresholds: NDArrayA, - label_idx: NDArrayA, - n: int, - k: int, - l_val: int + spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int ) -> NDArrayA: # Allocate a 2D array to store a flat local result per point. k2 = k * k @@ -290,8 +285,8 @@ def _occur_count( dy = spatial_y[i] - spatial_y[j] d2 = dx * dx + dy * dy - pair = label_idx[i] * k + label_idx[j] # fixed in r–loop - base = pair * l_val # first cell for that pair + pair = label_idx[i] * k + label_idx[j] # fixed in r–loop + base = pair * l_val # first cell for that pair for r in range(l_val): if d2 <= thresholds[r]: @@ -305,12 +300,9 @@ def _occur_count( return result + @njit(parallel=True, fastmath=True, cache=True) -def _co_occurrence_helper( - v_x: NDArrayA, - v_y: NDArrayA, - v_radium: NDArrayA, - labs: NDArrayA) -> NDArrayA: +def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA: """ Fast co-occurrence probability computation using the new numba-accelerated counting. From c490d45d57ccea292c7b5848f3958a9994f4cdc7 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 12 Jun 2025 15:16:56 +0200 Subject: [PATCH 11/20] Fix Mypy checking Typing error --- src/squidpy/gr/_ppatterns.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index cdc882bd8..cbcf700a3 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -5,7 +5,8 @@ from collections.abc import Iterable, Sequence from importlib.util import find_spec from itertools import chain -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, cast +from numpy.typing import NDArray import numba.types as nt import numpy as np @@ -303,7 +304,7 @@ def _occur_count( result_flat = local_results.sum(axis=0) result = result_flat.reshape(k, k, l_val) - return result + return cast(NDArray[np.int32], result) @njit(parallel=True, fastmath=True, cache=True) def _co_occurrence_helper( From d0884ca02df45b1df92db4357b21c827097119bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:19:46 +0000 Subject: [PATCH 12/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/gr/_ppatterns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 5d2993e54..244471352 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -6,7 +6,6 @@ from importlib.util import find_spec from itertools import chain from typing import TYPE_CHECKING, Any, Literal, cast -from numpy.typing import NDArray import numba.types as nt import numpy as np @@ -14,6 +13,7 @@ from anndata import AnnData from numba import njit, prange from numpy.random import default_rng +from numpy.typing import NDArray from scanpy import logging as logg from scanpy.get import _get_obs_rep from scanpy.metrics._gearys_c import _gearys_c From 6e5d1df6e584328f05c6db8cf25ceea6e1b01397 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 12 Jun 2025 15:47:53 +0200 Subject: [PATCH 13/20] Disable the cast typing in jit --- src/squidpy/gr/_ppatterns.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 5d2993e54..3db050c50 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -5,8 +5,7 @@ from collections.abc import Iterable, Sequence from importlib.util import find_spec from itertools import chain -from typing import TYPE_CHECKING, Any, Literal, cast -from numpy.typing import NDArray +from typing import TYPE_CHECKING, Any, Literal import numba.types as nt import numpy as np @@ -297,9 +296,7 @@ def _occur_count( # reduction and reshape stay the same result_flat = local_results.sum(axis=0) - result = result_flat.reshape(k, k, l_val) - - return cast(NDArray[np.int32], result) + return result_flat.reshape(k, k, l_val).copy() @njit(parallel=True, fastmath=True, cache=True) @@ -337,9 +334,9 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val) # Compute co-occurrence probabilities for each threshold bin. - occ_prob = np.empty((k, k, l_val), dtype=np.float32) + occ_prob = np.empty((k, k, l_val), dtype=np.float64) for r in prange(l_val): - co_occur = counts[:, :, r].astype(np.float32) + co_occur = counts[:, :, r].astype(np.float64) # Compute the total count for this threshold. total = 0.0 @@ -348,7 +345,7 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs total += co_occur[i, j] # Compute the normalized probability matrix. - probs_matrix = np.zeros((k, k), dtype=np.float32) + probs_matrix = np.zeros((k, k), dtype=np.float64) if total != 0.0: for i in range(k): for j in range(k): From a62128b3d2d84731ae07ddf76eb401a2d42bf4a5 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 12 Jun 2025 16:20:02 +0200 Subject: [PATCH 14/20] Try fix typing check error by mypy --- src/squidpy/gr/_ppatterns.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 3db050c50..19a3e78fb 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -296,7 +296,9 @@ def _occur_count( # reduction and reshape stay the same result_flat = local_results.sum(axis=0) - return result_flat.reshape(k, k, l_val).copy() + result = result_flat.reshape(k, k, l_val).astype(np.int32) + + return result @njit(parallel=True, fastmath=True, cache=True) From 0aab85671732ce0f1ddb89b11942122bafb8037a Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Sat, 14 Jun 2025 20:58:20 +0200 Subject: [PATCH 15/20] Try: fix typing check error by mypy --- src/squidpy/gr/_ppatterns.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 19a3e78fb..17b1b5578 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -276,7 +276,7 @@ def _occur_count( local_results = np.zeros((n, l_val * k2), dtype=np.int32) for i in prange(n): - local_counts = np.zeros(l_val * k2, dtype=np.int32) + local_counts: NDArrayA = np.zeros(l_val * k2, dtype=np.int32) for j in range(n): if i == j: @@ -292,11 +292,11 @@ def _occur_count( if d2 <= thresholds[r]: local_counts[base + r] += 1 - local_results[i, :] = local_counts + local_results[i] = local_counts # reduction and reshape stay the same result_flat = local_results.sum(axis=0) - result = result_flat.reshape(k, k, l_val).astype(np.int32) + result = result_flat.reshape(k, k, l_val).copy() return result From 9ac269392846ecc043c81085dda9160ba16d162d Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Sat, 14 Jun 2025 21:02:28 +0200 Subject: [PATCH 16/20] Try: fix typing check error by mypy --- src/squidpy/gr/_ppatterns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 17b1b5578..6e3230be0 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -296,7 +296,7 @@ def _occur_count( # reduction and reshape stay the same result_flat = local_results.sum(axis=0) - result = result_flat.reshape(k, k, l_val).copy() + result: NDArrayA = result_flat.reshape(k, k, l_val).copy() return result From 27a6d70eb3c406e570151483f998b8bde191853b Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Fri, 11 Jul 2025 13:44:42 +0200 Subject: [PATCH 17/20] removed copy cleanup the code --- src/squidpy/gr/_ppatterns.py | 59 ++++++------------------------------ 1 file changed, 10 insertions(+), 49 deletions(-) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 6e3230be0..be826279b 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -276,8 +276,6 @@ def _occur_count( local_results = np.zeros((n, l_val * k2), dtype=np.int32) for i in prange(n): - local_counts: NDArrayA = np.zeros(l_val * k2, dtype=np.int32) - for j in range(n): if i == j: continue @@ -290,13 +288,11 @@ def _occur_count( for r in range(l_val): if d2 <= thresholds[r]: - local_counts[base + r] += 1 - - local_results[i] = local_counts + local_results[i][base + r] += 1 # reduction and reshape stay the same result_flat = local_results.sum(axis=0) - result: NDArrayA = result_flat.reshape(k, k, l_val).copy() + result: NDArrayA = result_flat.reshape(k, k, l_val) return result @@ -332,57 +328,22 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs # Compute squared thresholds from the interval (skip the first value) thresholds = (v_radium[1:]) ** 2 - # Compute cco-occurence ounts. + # Compute co-occurence counts. counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val) - # Compute co-occurrence probabilities for each threshold bin. - occ_prob = np.empty((k, k, l_val), dtype=np.float64) - for r in prange(l_val): - co_occur = counts[:, :, r].astype(np.float64) - - # Compute the total count for this threshold. - total = 0.0 - for i in range(k): - for j in range(k): - total += co_occur[i, j] - - # Compute the normalized probability matrix. - probs_matrix = np.zeros((k, k), dtype=np.float64) - if total != 0.0: - for i in range(k): - for j in range(k): - probs_matrix[i, j] = co_occur[i, j] / total - - probs = np.zeros(k, dtype=np.float32) - for j in range(k): - s = 0.0 - for i in range(k): - s += probs_matrix[i, j] - probs[j] = s + occ_prob = np.zeros((k, k, l_val), dtype=np.float64) + row_sums = counts.sum(axis=0) + totals = row_sums.sum(axis=0) - # Compute conditional probabilities. - probs_con = np.zeros((k, k), dtype=np.float32) + for r in prange(l_val): + probs = row_sums[:, r] / totals[r] for c in range(k): - row_sum = 0.0 - for j in range(k): - row_sum += co_occur[c, j] for i in range(k): - cond = 0.0 - if row_sum != 0.0: - cond = co_occur[c, i] / row_sum - if probs[i] == 0.0: - probs_con[c, i] = 0.0 - else: - probs_con[c, i] = cond / probs[i] - - # Transpose to match (k, k, interval). - for i in range(k): - for c in range(k): - occ_prob[i, c, r] = probs_con[c, i] + if probs[i] != 0.0 and row_sums[c, r] != 0.0: + occ_prob[i, c, r] = (counts[c, i, r] / row_sums[c, r]) / probs[i] return occ_prob - @d.dedent def co_occurrence( adata: AnnData | SpatialData, From 2a11c48e86a2c5c034851c23c24e2b12a101ba9c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Jul 2025 13:47:10 +0000 Subject: [PATCH 18/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/gr/_ppatterns.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index be826279b..0286a63f2 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -344,6 +344,7 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs return occ_prob + @d.dedent def co_occurrence( adata: AnnData | SpatialData, From fa6eb24afe8f74240111ae2c463f35251b890811 Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 31 Jul 2025 15:58:58 +0200 Subject: [PATCH 19/20] remove unused "type ignore" --- src/squidpy/im/_io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/squidpy/im/_io.py b/src/squidpy/im/_io.py index 80a330bb6..3f092470b 100644 --- a/src/squidpy/im/_io.py +++ b/src/squidpy/im/_io.py @@ -25,7 +25,7 @@ def _assert_dims_present(dims: tuple[str, ...], include_z: bool = True) -> None: # modification of `skimage`'s `pil_to_ndarray`: # https://github.com/scikit-image/scikit-image/blob/main/skimage/io/_plugins/pil_plugin.py#L55 -def _infer_shape_dtype(fname: str) -> tuple[tuple[int, ...], np.dtype]: # type: ignore[type-arg] +def _infer_shape_dtype(fname: str) -> tuple[tuple[int, ...], np.dtype]: def _palette_is_grayscale(pil_image: Image.Image) -> bool: # get palette as an array with R, G, B columns palette = np.asarray(pil_image.getpalette()).reshape((256, 3)) @@ -81,7 +81,7 @@ def _palette_is_grayscale(pil_image: Image.Image) -> bool: raise ValueError(f"Unable to infer image dtype for image mode `{image.mode}`.") -def _get_image_shape_dtype(fname: str) -> tuple[tuple[int, ...], np.dtype]: # type: ignore[type-arg] +def _get_image_shape_dtype(fname: str) -> tuple[tuple[int, ...], np.dtype]: try: return _infer_shape_dtype(fname) except Image.UnidentifiedImageError as e: @@ -101,7 +101,7 @@ def _get_image_shape_dtype(fname: str) -> tuple[tuple[int, ...], np.dtype]: # t def _infer_dimensions( obj: NDArrayA | xr.DataArray | str, infer_dimensions: InferDimensions | tuple[str, ...] = InferDimensions.DEFAULT, -) -> tuple[tuple[int, ...], tuple[str, ...], np.dtype, tuple[int, ...]]: # type: ignore[type-arg] +) -> tuple[tuple[int, ...], tuple[str, ...], np.dtype, tuple[int, ...]]: """ Infer dimension names of an array. From 7575052efb87301f2f511171051d2eeb0a1f785f Mon Sep 17 00:00:00 2001 From: Wenjie SUN Date: Thu, 31 Jul 2025 19:01:09 +0200 Subject: [PATCH 20/20] chore: trigger CI check