Skip to content

Commit f3eab97

Browse files
Merge branch 'main' into feat/spatial_neighbours
2 parents a0426e6 + 093217d commit f3eab97

4 files changed

Lines changed: 419 additions & 2 deletions

File tree

src/squidpy/experimental/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66

77
from __future__ import annotations
88

9-
from . import im, pl
9+
from . import im, pl, utils
1010

11-
__all__ = ["im", "pl"]
11+
__all__ = ["im", "pl", "utils"]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from __future__ import annotations
2+
3+
from ._derive_mpp import derive_mpp_from_shapes
4+
5+
__all__ = ["derive_mpp_from_shapes"]
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
from __future__ import annotations
2+
3+
import warnings
4+
5+
import geopandas as gpd
6+
import numpy as np
7+
import spatialdata as sd
8+
from scipy.spatial import cKDTree
9+
from spatialdata.models import get_axes_names
10+
from spatialdata.transformations import get_transformation
11+
12+
from squidpy._validators import assert_key_in_sdata
13+
14+
__all__ = ["derive_mpp_from_shapes"]
15+
16+
_ANISOTROPY_TOL = 1e-3
17+
_PITCH_MAX_SAMPLES = 5000
18+
_SQUARENESS_SAMPLE = 10
19+
_SQUARENESS_TOL = 0.98
20+
21+
22+
def derive_mpp_from_shapes(
23+
sdata: sd.SpatialData,
24+
shapes_key: str,
25+
coordinate_system: str,
26+
*,
27+
um_between_centers: float | None = None,
28+
um_diameter: float | None = None,
29+
um_square_edge: float | None = None,
30+
) -> float:
31+
"""
32+
Derive microns-per-pixel for a coordinate system from a shapes element with a known physical scale.
33+
34+
Given a shapes element (e.g. Visium spots, Visium HD bins) whose physical spacing, spot diameter,
35+
or bin edge length is known, this function returns the microns-per-pixel of
36+
``coordinate_system`` by measuring the corresponding geometric quantity in the target coordinate
37+
system and dividing the physical value by it.
38+
39+
Exactly one of ``um_between_centers``, ``um_diameter``, or ``um_square_edge`` must be provided.
40+
Prefer ``um_between_centers`` when the technology's canonical pitch is known: it averages over
41+
thousands of centroid pairs in the realised grid and is robust to per-spot calibration noise in
42+
the stored radius/area. ``um_diameter`` and ``um_square_edge`` depend on a single stored scalar
43+
per shape and can disagree by a fraction of a percent on real Visium data where the radius and
44+
grid pitch are calibrated separately.
45+
46+
Each physical input is geometry-specific:
47+
48+
- ``um_diameter`` applies only to Point geometries (uses the ``radius`` column).
49+
- ``um_square_edge`` applies only to Polygon geometries and assumes square or rectangular
50+
polygons; a sample of polygons is checked for squareness and the call raises if any are not.
51+
- ``um_between_centers`` works for any 2D geometry.
52+
53+
The function requires the transformation between the shapes' native frame and ``coordinate_system``
54+
to be a similarity (uniform scale plus optional rotation and translation). Non-uniform scales,
55+
shear, or other anisotropy raise ``ValueError``: a single scalar microns-per-pixel is not
56+
well-defined in that case.
57+
58+
Parameters
59+
----------
60+
sdata
61+
SpatialData object containing the shapes element.
62+
shapes_key
63+
Key of the shapes element in ``sdata.shapes``.
64+
coordinate_system
65+
Name of the target coordinate system (pixel grid) to derive microns-per-pixel for. Must be
66+
one of the coordinate systems the shapes element is registered against.
67+
um_between_centers
68+
Known physical center-to-center distance of neighbouring shapes, in microns. For Visium v1
69+
this is 100 (hex grid); for Visium HD it equals the bin size in microns. Works for any 2D
70+
geometry.
71+
um_diameter
72+
Known physical diameter of a circular spot, in microns. Point geometries only (uses the
73+
``radius`` column). For Visium v1 this is 55.
74+
um_square_edge
75+
Known physical edge length of a square/rectangular bin, in microns. Polygon geometries only.
76+
For Visium HD this equals the bin size in microns. Non-rectangular polygons (e.g. hex bins,
77+
circular approximations) are rejected.
78+
79+
Returns
80+
-------
81+
float
82+
Microns per pixel of ``coordinate_system``.
83+
84+
Raises
85+
------
86+
ValueError
87+
If not exactly one of the three physical inputs is given; if ``coordinate_system`` is not
88+
registered for the element; if shapes are 3D or contain ``MultiPolygon`` geometries; if
89+
``um_diameter`` is paired with Polygons or ``um_square_edge`` with Points; if
90+
``um_square_edge`` is paired with non-rectangular polygons; if ``um_between_centers`` is
91+
given with only one shape; or if the transformation to ``coordinate_system`` is not a
92+
similarity.
93+
"""
94+
n_given = sum(x is not None for x in (um_between_centers, um_diameter, um_square_edge))
95+
if n_given != 1:
96+
raise ValueError("Provide exactly one of `um_between_centers`, `um_diameter`, or `um_square_edge`.")
97+
98+
assert_key_in_sdata(sdata, shapes_key, attr="shapes")
99+
gdf = sdata.shapes[shapes_key]
100+
101+
if len(gdf) == 0:
102+
raise ValueError(f"Shapes element '{shapes_key}' is empty; cannot derive mpp.")
103+
104+
axes = get_axes_names(gdf)
105+
if "z" in axes:
106+
raise ValueError(f"Shapes element '{shapes_key}' is 3D (axes={axes}); only 2D shapes are supported.")
107+
108+
all_transforms = get_transformation(gdf, get_all=True)
109+
if coordinate_system not in all_transforms:
110+
raise ValueError(
111+
f"Coordinate system '{coordinate_system}' is not registered for shapes element "
112+
f"'{shapes_key}'. Available: {sorted(all_transforms)}."
113+
)
114+
115+
geom_types = set(gdf.geometry.geom_type.unique())
116+
if "MultiPolygon" in geom_types:
117+
raise ValueError(
118+
f"Shapes element '{shapes_key}' contains MultiPolygon geometries; only Point and Polygon are supported."
119+
)
120+
121+
affine = np.asarray(all_transforms[coordinate_system].to_affine_matrix(("x", "y"), ("x", "y")))
122+
A = affine[:2, :2]
123+
t = affine[:2, 2]
124+
125+
sv = np.linalg.svd(A, compute_uv=False)
126+
s1, s2 = float(sv[0]), float(sv[1])
127+
if abs(s1 - s2) / max(s1, s2) > _ANISOTROPY_TOL:
128+
physical = next(x for x in (um_between_centers, um_diameter, um_square_edge) if x is not None)
129+
raise ValueError(
130+
f"Transformation from shapes '{shapes_key}' to coordinate system "
131+
f"'{coordinate_system}' is anisotropic (singular values {s1:.6g}, {s2:.6g}). "
132+
f"A single scalar microns-per-pixel is not well-defined; per-axis values would be "
133+
f"{physical / s1:.6g} and {physical / s2:.6g}."
134+
)
135+
136+
if um_between_centers is not None:
137+
return _mpp_from_pitch(gdf, A, t, um_between_centers)
138+
if um_diameter is not None:
139+
if geom_types != {"Point"}:
140+
raise ValueError(
141+
f"`um_diameter` requires Point geometries; got {sorted(geom_types)}. "
142+
"For square/rectangular polygons use `um_square_edge`."
143+
)
144+
return _mpp_from_diameter(gdf, A, um_diameter)
145+
assert um_square_edge is not None
146+
if geom_types != {"Polygon"}:
147+
raise ValueError(
148+
f"`um_square_edge` requires Polygon geometries; got {sorted(geom_types)}. "
149+
"For circular Point spots use `um_diameter`."
150+
)
151+
return _mpp_from_square_edge(gdf, A, um_square_edge)
152+
153+
154+
def _mpp_from_pitch(gdf: gpd.GeoDataFrame, A: np.ndarray, t: np.ndarray, um_between_centers: float) -> float:
155+
n = len(gdf)
156+
if n < 2:
157+
raise ValueError("Pitch is undefined for a single shape; pass `um_diameter` or `um_square_edge` instead.")
158+
centroids = gdf.geometry.centroid
159+
xy_native = np.column_stack([centroids.x.to_numpy(), centroids.y.to_numpy()])
160+
xy_target = xy_native @ A.T + t
161+
query_xy = xy_target
162+
if n > _PITCH_MAX_SAMPLES:
163+
rng = np.random.default_rng(0)
164+
query_xy = xy_target[rng.choice(n, size=_PITCH_MAX_SAMPLES, replace=False)]
165+
nn_dist = cKDTree(xy_target).query(query_xy, k=2)[0][:, 1]
166+
return um_between_centers / float(np.median(nn_dist))
167+
168+
169+
def _mpp_from_diameter(gdf: gpd.GeoDataFrame, A: np.ndarray, um_diameter: float) -> float:
170+
if "radius" not in gdf.columns:
171+
raise ValueError("Point shapes element is missing the 'radius' column required for diameter-based mpp.")
172+
scale = float(np.sqrt(abs(np.linalg.det(A))))
173+
diam_target = float(np.median(2.0 * gdf["radius"].to_numpy())) * scale
174+
return um_diameter / diam_target
175+
176+
177+
def _mpp_from_square_edge(gdf: gpd.GeoDataFrame, A: np.ndarray, um_square_edge: float) -> float:
178+
_assert_polygons_are_square(gdf)
179+
det = float(abs(np.linalg.det(A)))
180+
edge_target = float(np.sqrt(np.median(gdf.geometry.area.to_numpy()) * det))
181+
return um_square_edge / edge_target
182+
183+
184+
def _assert_polygons_are_square(gdf: gpd.GeoDataFrame) -> None:
185+
n = len(gdf)
186+
sample_size = min(_SQUARENESS_SAMPLE, n)
187+
rng = np.random.default_rng(0)
188+
sample = gdf.geometry.iloc[rng.choice(n, size=sample_size, replace=False)]
189+
# shapely's oriented_envelope emits benign divide-by-zero RuntimeWarnings on axis-aligned
190+
# rectangles; result is correct, so we silence them here only.
191+
with warnings.catch_warnings():
192+
warnings.filterwarnings("ignore", category=RuntimeWarning, module=r"shapely\..*")
193+
mrr_area = sample.minimum_rotated_rectangle().area.to_numpy()
194+
ratios = sample.area.to_numpy() / mrr_area
195+
if np.any(ratios < _SQUARENESS_TOL):
196+
raise ValueError(
197+
f"`um_square_edge` requires square/rectangular polygons; sampled {sample_size} polygons "
198+
f"and found area/minimum-rotated-rectangle ratio below {_SQUARENESS_TOL} "
199+
f"(min={float(ratios.min()):.4f}). For non-rectangular geometries use `um_between_centers`."
200+
)

0 commit comments

Comments
 (0)