|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import warnings |
| 4 | + |
| 5 | +import geopandas as gpd |
| 6 | +import numpy as np |
| 7 | +import spatialdata as sd |
| 8 | +from scipy.spatial import cKDTree |
| 9 | +from spatialdata.models import get_axes_names |
| 10 | +from spatialdata.transformations import get_transformation |
| 11 | + |
| 12 | +from squidpy._validators import assert_key_in_sdata |
| 13 | + |
| 14 | +__all__ = ["derive_mpp_from_shapes"] |
| 15 | + |
| 16 | +_ANISOTROPY_TOL = 1e-3 |
| 17 | +_PITCH_MAX_SAMPLES = 5000 |
| 18 | +_SQUARENESS_SAMPLE = 10 |
| 19 | +_SQUARENESS_TOL = 0.98 |
| 20 | + |
| 21 | + |
| 22 | +def derive_mpp_from_shapes( |
| 23 | + sdata: sd.SpatialData, |
| 24 | + shapes_key: str, |
| 25 | + coordinate_system: str, |
| 26 | + *, |
| 27 | + um_between_centers: float | None = None, |
| 28 | + um_diameter: float | None = None, |
| 29 | + um_square_edge: float | None = None, |
| 30 | +) -> float: |
| 31 | + """ |
| 32 | + Derive microns-per-pixel for a coordinate system from a shapes element with a known physical scale. |
| 33 | +
|
| 34 | + Given a shapes element (e.g. Visium spots, Visium HD bins) whose physical spacing, spot diameter, |
| 35 | + or bin edge length is known, this function returns the microns-per-pixel of |
| 36 | + ``coordinate_system`` by measuring the corresponding geometric quantity in the target coordinate |
| 37 | + system and dividing the physical value by it. |
| 38 | +
|
| 39 | + Exactly one of ``um_between_centers``, ``um_diameter``, or ``um_square_edge`` must be provided. |
| 40 | + Prefer ``um_between_centers`` when the technology's canonical pitch is known: it averages over |
| 41 | + thousands of centroid pairs in the realised grid and is robust to per-spot calibration noise in |
| 42 | + the stored radius/area. ``um_diameter`` and ``um_square_edge`` depend on a single stored scalar |
| 43 | + per shape and can disagree by a fraction of a percent on real Visium data where the radius and |
| 44 | + grid pitch are calibrated separately. |
| 45 | +
|
| 46 | + Each physical input is geometry-specific: |
| 47 | +
|
| 48 | + - ``um_diameter`` applies only to Point geometries (uses the ``radius`` column). |
| 49 | + - ``um_square_edge`` applies only to Polygon geometries and assumes square or rectangular |
| 50 | + polygons; a sample of polygons is checked for squareness and the call raises if any are not. |
| 51 | + - ``um_between_centers`` works for any 2D geometry. |
| 52 | +
|
| 53 | + The function requires the transformation between the shapes' native frame and ``coordinate_system`` |
| 54 | + to be a similarity (uniform scale plus optional rotation and translation). Non-uniform scales, |
| 55 | + shear, or other anisotropy raise ``ValueError``: a single scalar microns-per-pixel is not |
| 56 | + well-defined in that case. |
| 57 | +
|
| 58 | + Parameters |
| 59 | + ---------- |
| 60 | + sdata |
| 61 | + SpatialData object containing the shapes element. |
| 62 | + shapes_key |
| 63 | + Key of the shapes element in ``sdata.shapes``. |
| 64 | + coordinate_system |
| 65 | + Name of the target coordinate system (pixel grid) to derive microns-per-pixel for. Must be |
| 66 | + one of the coordinate systems the shapes element is registered against. |
| 67 | + um_between_centers |
| 68 | + Known physical center-to-center distance of neighbouring shapes, in microns. For Visium v1 |
| 69 | + this is 100 (hex grid); for Visium HD it equals the bin size in microns. Works for any 2D |
| 70 | + geometry. |
| 71 | + um_diameter |
| 72 | + Known physical diameter of a circular spot, in microns. Point geometries only (uses the |
| 73 | + ``radius`` column). For Visium v1 this is 55. |
| 74 | + um_square_edge |
| 75 | + Known physical edge length of a square/rectangular bin, in microns. Polygon geometries only. |
| 76 | + For Visium HD this equals the bin size in microns. Non-rectangular polygons (e.g. hex bins, |
| 77 | + circular approximations) are rejected. |
| 78 | +
|
| 79 | + Returns |
| 80 | + ------- |
| 81 | + float |
| 82 | + Microns per pixel of ``coordinate_system``. |
| 83 | +
|
| 84 | + Raises |
| 85 | + ------ |
| 86 | + ValueError |
| 87 | + If not exactly one of the three physical inputs is given; if ``coordinate_system`` is not |
| 88 | + registered for the element; if shapes are 3D or contain ``MultiPolygon`` geometries; if |
| 89 | + ``um_diameter`` is paired with Polygons or ``um_square_edge`` with Points; if |
| 90 | + ``um_square_edge`` is paired with non-rectangular polygons; if ``um_between_centers`` is |
| 91 | + given with only one shape; or if the transformation to ``coordinate_system`` is not a |
| 92 | + similarity. |
| 93 | + """ |
| 94 | + n_given = sum(x is not None for x in (um_between_centers, um_diameter, um_square_edge)) |
| 95 | + if n_given != 1: |
| 96 | + raise ValueError("Provide exactly one of `um_between_centers`, `um_diameter`, or `um_square_edge`.") |
| 97 | + |
| 98 | + assert_key_in_sdata(sdata, shapes_key, attr="shapes") |
| 99 | + gdf = sdata.shapes[shapes_key] |
| 100 | + |
| 101 | + if len(gdf) == 0: |
| 102 | + raise ValueError(f"Shapes element '{shapes_key}' is empty; cannot derive mpp.") |
| 103 | + |
| 104 | + axes = get_axes_names(gdf) |
| 105 | + if "z" in axes: |
| 106 | + raise ValueError(f"Shapes element '{shapes_key}' is 3D (axes={axes}); only 2D shapes are supported.") |
| 107 | + |
| 108 | + all_transforms = get_transformation(gdf, get_all=True) |
| 109 | + if coordinate_system not in all_transforms: |
| 110 | + raise ValueError( |
| 111 | + f"Coordinate system '{coordinate_system}' is not registered for shapes element " |
| 112 | + f"'{shapes_key}'. Available: {sorted(all_transforms)}." |
| 113 | + ) |
| 114 | + |
| 115 | + geom_types = set(gdf.geometry.geom_type.unique()) |
| 116 | + if "MultiPolygon" in geom_types: |
| 117 | + raise ValueError( |
| 118 | + f"Shapes element '{shapes_key}' contains MultiPolygon geometries; only Point and Polygon are supported." |
| 119 | + ) |
| 120 | + |
| 121 | + affine = np.asarray(all_transforms[coordinate_system].to_affine_matrix(("x", "y"), ("x", "y"))) |
| 122 | + A = affine[:2, :2] |
| 123 | + t = affine[:2, 2] |
| 124 | + |
| 125 | + sv = np.linalg.svd(A, compute_uv=False) |
| 126 | + s1, s2 = float(sv[0]), float(sv[1]) |
| 127 | + if abs(s1 - s2) / max(s1, s2) > _ANISOTROPY_TOL: |
| 128 | + physical = next(x for x in (um_between_centers, um_diameter, um_square_edge) if x is not None) |
| 129 | + raise ValueError( |
| 130 | + f"Transformation from shapes '{shapes_key}' to coordinate system " |
| 131 | + f"'{coordinate_system}' is anisotropic (singular values {s1:.6g}, {s2:.6g}). " |
| 132 | + f"A single scalar microns-per-pixel is not well-defined; per-axis values would be " |
| 133 | + f"{physical / s1:.6g} and {physical / s2:.6g}." |
| 134 | + ) |
| 135 | + |
| 136 | + if um_between_centers is not None: |
| 137 | + return _mpp_from_pitch(gdf, A, t, um_between_centers) |
| 138 | + if um_diameter is not None: |
| 139 | + if geom_types != {"Point"}: |
| 140 | + raise ValueError( |
| 141 | + f"`um_diameter` requires Point geometries; got {sorted(geom_types)}. " |
| 142 | + "For square/rectangular polygons use `um_square_edge`." |
| 143 | + ) |
| 144 | + return _mpp_from_diameter(gdf, A, um_diameter) |
| 145 | + assert um_square_edge is not None |
| 146 | + if geom_types != {"Polygon"}: |
| 147 | + raise ValueError( |
| 148 | + f"`um_square_edge` requires Polygon geometries; got {sorted(geom_types)}. " |
| 149 | + "For circular Point spots use `um_diameter`." |
| 150 | + ) |
| 151 | + return _mpp_from_square_edge(gdf, A, um_square_edge) |
| 152 | + |
| 153 | + |
| 154 | +def _mpp_from_pitch(gdf: gpd.GeoDataFrame, A: np.ndarray, t: np.ndarray, um_between_centers: float) -> float: |
| 155 | + n = len(gdf) |
| 156 | + if n < 2: |
| 157 | + raise ValueError("Pitch is undefined for a single shape; pass `um_diameter` or `um_square_edge` instead.") |
| 158 | + centroids = gdf.geometry.centroid |
| 159 | + xy_native = np.column_stack([centroids.x.to_numpy(), centroids.y.to_numpy()]) |
| 160 | + xy_target = xy_native @ A.T + t |
| 161 | + query_xy = xy_target |
| 162 | + if n > _PITCH_MAX_SAMPLES: |
| 163 | + rng = np.random.default_rng(0) |
| 164 | + query_xy = xy_target[rng.choice(n, size=_PITCH_MAX_SAMPLES, replace=False)] |
| 165 | + nn_dist = cKDTree(xy_target).query(query_xy, k=2)[0][:, 1] |
| 166 | + return um_between_centers / float(np.median(nn_dist)) |
| 167 | + |
| 168 | + |
| 169 | +def _mpp_from_diameter(gdf: gpd.GeoDataFrame, A: np.ndarray, um_diameter: float) -> float: |
| 170 | + if "radius" not in gdf.columns: |
| 171 | + raise ValueError("Point shapes element is missing the 'radius' column required for diameter-based mpp.") |
| 172 | + scale = float(np.sqrt(abs(np.linalg.det(A)))) |
| 173 | + diam_target = float(np.median(2.0 * gdf["radius"].to_numpy())) * scale |
| 174 | + return um_diameter / diam_target |
| 175 | + |
| 176 | + |
| 177 | +def _mpp_from_square_edge(gdf: gpd.GeoDataFrame, A: np.ndarray, um_square_edge: float) -> float: |
| 178 | + _assert_polygons_are_square(gdf) |
| 179 | + det = float(abs(np.linalg.det(A))) |
| 180 | + edge_target = float(np.sqrt(np.median(gdf.geometry.area.to_numpy()) * det)) |
| 181 | + return um_square_edge / edge_target |
| 182 | + |
| 183 | + |
| 184 | +def _assert_polygons_are_square(gdf: gpd.GeoDataFrame) -> None: |
| 185 | + n = len(gdf) |
| 186 | + sample_size = min(_SQUARENESS_SAMPLE, n) |
| 187 | + rng = np.random.default_rng(0) |
| 188 | + sample = gdf.geometry.iloc[rng.choice(n, size=sample_size, replace=False)] |
| 189 | + # shapely's oriented_envelope emits benign divide-by-zero RuntimeWarnings on axis-aligned |
| 190 | + # rectangles; result is correct, so we silence them here only. |
| 191 | + with warnings.catch_warnings(): |
| 192 | + warnings.filterwarnings("ignore", category=RuntimeWarning, module=r"shapely\..*") |
| 193 | + mrr_area = sample.minimum_rotated_rectangle().area.to_numpy() |
| 194 | + ratios = sample.area.to_numpy() / mrr_area |
| 195 | + if np.any(ratios < _SQUARENESS_TOL): |
| 196 | + raise ValueError( |
| 197 | + f"`um_square_edge` requires square/rectangular polygons; sampled {sample_size} polygons " |
| 198 | + f"and found area/minimum-rotated-rectangle ratio below {_SQUARENESS_TOL} " |
| 199 | + f"(min={float(ratios.min()):.4f}). For non-rectangular geometries use `um_between_centers`." |
| 200 | + ) |
0 commit comments