Skip to content

Commit 953da00

Browse files
srivarraieivanov
andauthored
Added Position.compute_pyramids (#348)
* added Position.compute_pyramid * moved import to compute_pyramid to avoid circular import issues * added tensorstore optional dep to docs GHA workflow * added tensorstore in pr.yml * tensorstore not optional? * implemented requested changes * fixed example * adjusted TIFFConverter._gen_chunks, had an Dask Chunk alignment problem * added tensorstore optional dep to docs and pr GHA workflows * added comma * added tensorstore in pip_args in docs/poly.py * undid changes * Fix chunk validation order in `_gen_chunks`, updated tests, consolidated checking chunks for divisibility * added delete and adjusted tests * move _adjust_chunks_for_divisibility into ngff.utils * reuse chunk limiter in utils * updated delete pyramid to make sure to delete the metadata * replace [:1] with [0] * update deletion * updated _limit_zyx_chunk_size to include chunks as an optional param * keep shape and chunks as tuples at input to _adjust_chunks_for_divisibility * make initialize_pyramid public, cleaned up a test for initializing the pyramid * Add pyramid sharding (#362) * wip: add pyramid sharding * check if array is actually sharded or not Signed-off-by: Sricharan Reddy Varra <[email protected]> --------- Signed-off-by: Sricharan Reddy Varra <[email protected]> Co-authored-by: Sricharan Reddy Varra <[email protected]> * style: running formatting Signed-off-by: Sricharan Reddy Varra <[email protected]> --------- Signed-off-by: Sricharan Reddy Varra <[email protected]> Co-authored-by: Ivan Ivanov <[email protected]>
1 parent 9ab8508 commit 953da00

File tree

10 files changed

+494
-138
lines changed

10 files changed

+494
-138
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""
2+
Multiscale Pyramid Creation
3+
============================
4+
5+
Create and compute a multiscale pyramid for efficient visualization.
6+
"""
7+
8+
import os
9+
from tempfile import TemporaryDirectory
10+
11+
import numpy as np
12+
13+
from iohub import open_ome_zarr
14+
15+
# Create temporary directory and sample data
16+
tmp_dir = TemporaryDirectory()
17+
store_path = os.path.join(tmp_dir.name, "pyramid.zarr")
18+
19+
data = np.random.randint(0, 255, size=(1, 2, 32, 256, 256), dtype=np.uint16)
20+
print(f"Original data shape: {data.shape}\n")
21+
22+
# Create position with data and compute pyramid
23+
with open_ome_zarr(
24+
store_path, layout="fov", mode="a", channel_names=["DAPI", "GFP"]
25+
) as position:
26+
# Create full resolution image
27+
position.create_image("0", data)
28+
29+
# Fill pyramid with downsampled data
30+
position.compute_pyramid(levels=3, method="mean")
31+
32+
# Print results
33+
print("Pyramid levels:")
34+
for level in range(3):
35+
level_array = position[str(level)]
36+
scale = position.get_effective_scale(str(level))
37+
print(f" Level {level}: {level_array.shape}, scale={scale[-3:]}")

docs/poly.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
SOURCE_DIR = "docs/"
3434

3535
#: Arguments to pass to `pip install`
36-
PIP_ARGS = [".[doc]"]
36+
PIP_ARGS = [".[doc,tensorstore]"]
3737

3838
#: Mock data used for building local version
3939
MOCK_DATA = {

iohub/_deprecated/upti.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(self, folder: str, extract_data: bool = False):
2828
self.files = glob.glob(os.path.join(folder, "*.tif"))
2929
info_img = tiff.imread(self.files[0])
3030
self.dtype = info_img.dtype
31-
(self.height, self.width) = info_img.shape
31+
self.height, self.width = info_img.shape
3232
self.positions = 1
3333
self.frames = 1
3434
self.patterns = 0

iohub/convert.py

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
from iohub.ngff.models import TransformationMeta
1414
from iohub.ngff.nodes import Position, open_ome_zarr
15+
from iohub.ngff.utils import (
16+
_adjust_chunks_for_divisibility,
17+
_limit_zyx_chunk_size,
18+
)
1519
from iohub.reader import MMStack, NDTiffDataset, read_images
1620

1721
__all__ = ["TIFFConverter"]
@@ -20,23 +24,6 @@
2024
MAX_CHUNK_SIZE = 500e6 # in bytes
2125

2226

23-
def _adjust_chunks_for_divisibility(
24-
chunks: list[int], dims: list[int]
25-
) -> list[int]:
26-
"""Adjust chunks to divide evenly into dimensions for Dask."""
27-
adjusted = []
28-
for chunk, dim in zip(chunks, dims):
29-
if chunk > dim:
30-
adjusted.append(dim)
31-
elif dim % chunk != 0:
32-
while chunk > 1 and dim % chunk != 0:
33-
chunk -= 1
34-
adjusted.append(chunk)
35-
else:
36-
adjusted.append(chunk)
37-
return adjusted
38-
39-
4027
def _create_grid_from_coordinates(
4128
xy_coords: list[tuple[float, float]], rows: int, columns: int
4229
):
@@ -250,15 +237,19 @@ def _get_pos_names(self):
250237
def _gen_chunks(self, input_chunks):
251238
"""Generate valid chunk sizes for the output Zarr array.
252239
253-
First limits chunk size to MAX_CHUNK_SIZE by halving Z, then adjusts
254-
chunks to divide evenly into dimensions. Order matters because halving
255-
Z (e.g. 10 -> 5 -> 2) may no longer divide evenly, so we validate
256-
divisibility last.
240+
input_chunks may be a string ("XY", "XYZ") or a tuple of chunk
241+
dimensions. Chunk size will be limited to MAX_CHUNK_SIZE and adjusted
242+
to divide evenly into dimensions.
257243
"""
258244
if not input_chunks:
259245
_logger.debug("No chunk size specified, using ZYX.")
260246
chunks = [1, 1, self.z, self.y, self.x]
261247
elif isinstance(input_chunks, tuple):
248+
if not len(input_chunks) == 5:
249+
raise ValueError(
250+
"Input chunks must be a tuple of 5 dimensions, got "
251+
f"{len(input_chunks)} dimensions."
252+
)
262253
chunks = list(input_chunks)
263254
elif isinstance(input_chunks, str):
264255
if input_chunks.lower() == "xy":
@@ -272,19 +263,20 @@ def _gen_chunks(self, input_chunks):
272263
f"Chunk type {type(input_chunks)} is not supported."
273264
)
274265

266+
shape = (self.t, self.c, self.z, self.y, self.x)
267+
original_chunks = chunks.copy()
268+
269+
# Limit chunk size to MAX_CHUNK_SIZE by halving Z
275270
bytes_per_pixel = np.dtype(self.reader.dtype).itemsize
276-
while (
277-
chunks[-3] > 1
278-
and np.prod(chunks, dtype=np.int64) * bytes_per_pixel
279-
> MAX_CHUNK_SIZE
280-
):
281-
chunks[-3] = int(np.ceil(chunks[-3] / 2))
271+
chunk_zyx_shape = _limit_zyx_chunk_size(
272+
shape, bytes_per_pixel, MAX_CHUNK_SIZE, chunks=chunks
273+
)
274+
chunks[-3:] = list(chunk_zyx_shape)
282275

283-
data_dims = [self.t, self.c, self.z, self.y, self.x]
284-
original_chunks = chunks.copy()
285-
chunks = _adjust_chunks_for_divisibility(chunks, data_dims)
276+
# Adjust chunks to divide evenly into dimensions
277+
chunks = _adjust_chunks_for_divisibility(shape, chunks)
286278
for i, (orig, adj, dim) in enumerate(
287-
zip(original_chunks, chunks, data_dims)
279+
zip(original_chunks, chunks, shape)
288280
):
289281
if orig != adj:
290282
_logger.warning(

iohub/ngff/nodes.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,7 @@ def __setitem__(self, key, value):
207207
def __delitem__(self, key):
208208
""".. Warning: this does NOT clean up metadata!"""
209209
key = normalize_path(str(key))
210-
if key in self._member_names:
211-
del self[key]
210+
del self._group[key]
212211

213212
def __contains__(self, key):
214213
key = normalize_path(str(key))
@@ -1068,6 +1067,14 @@ def initialize_pyramid(self, levels: int) -> None:
10681067
_scale_integers(array.chunks, factor), len(shape)
10691068
)
10701069

1070+
if array.shards is not None:
1071+
shards = array.shards[:-3] + _scale_integers(
1072+
array.shards[-3:], factor
1073+
)
1074+
shards_ratio = tuple(s // c for c, s in zip(chunks, shards))
1075+
else:
1076+
shards_ratio = None
1077+
10711078
transforms = deepcopy(
10721079
self.metadata.multiscales[0]
10731080
.datasets[0]
@@ -1083,9 +1090,113 @@ def initialize_pyramid(self, levels: int) -> None:
10831090
shape=shape,
10841091
dtype=array.dtype,
10851092
chunks=chunks,
1093+
shards_ratio=shards_ratio,
10861094
transform=transforms,
10871095
)
10881096

1097+
def compute_pyramid(
1098+
self,
1099+
levels: int | None = None,
1100+
method: str = "mean",
1101+
) -> None:
1102+
"""Compute pyramid by downsampling from source level.
1103+
1104+
Creates pyramid structure if none exists. Uses cascade downsampling
1105+
where each level is derived from the previous level to prevent
1106+
aliasing artifacts and chunk boundary issues.
1107+
1108+
Parameters
1109+
----------
1110+
levels : int, optional
1111+
Number of pyramid levels. If None, uses existing pyramid structure.
1112+
method : str, optional
1113+
Downsampling method: "mean", "median", "mode", "min", "max",
1114+
"stride". By default "mean".
1115+
1116+
Raises
1117+
------
1118+
ValueError
1119+
If level 0 array doesn't exist, pyramid structure is invalid,
1120+
or if a pyramid already exists with a different number of levels.
1121+
1122+
Examples
1123+
--------
1124+
>>> # Create and compute pyramid with 4 levels
1125+
>>> pos.compute_pyramid(levels=4, method="mean")
1126+
1127+
>>> # Recompute existing pyramid structure
1128+
>>> pos.compute_pyramid(method="median")
1129+
1130+
>>> # Change pyramid levels (must delete first)
1131+
>>> pos.delete_pyramid()
1132+
>>> pos.compute_pyramid(levels=3, method="mean")
1133+
"""
1134+
from iohub.ngff.utils import _downsample_tensorstore
1135+
1136+
num_arrays = len(self.array_keys())
1137+
if num_arrays == 0:
1138+
raise ValueError(
1139+
"No level 0 array exists. Create base array before computing "
1140+
"pyramid."
1141+
)
1142+
1143+
if levels is None:
1144+
if num_arrays == 1:
1145+
raise ValueError(
1146+
"Pyramid structure doesn't exist and levels=None. "
1147+
"Specify 'levels' parameter to create pyramid."
1148+
)
1149+
levels = num_arrays
1150+
1151+
if num_arrays == 1:
1152+
self.initialize_pyramid(levels=levels)
1153+
elif num_arrays != levels:
1154+
raise ValueError(
1155+
f"Pyramid structure exists with {num_arrays} levels but "
1156+
f"{levels} requested. Call delete_pyramid() first to remove "
1157+
"existing pyramid."
1158+
)
1159+
1160+
# Compute pyramid data via cascade downsampling
1161+
for level in range(1, levels):
1162+
previous_level_array = self[str(level - 1)]
1163+
current_level_array = self[str(level)]
1164+
1165+
previous_ts = previous_level_array.tensorstore()
1166+
current_ts = current_level_array.tensorstore()
1167+
1168+
current_scale = self.get_effective_scale(str(level))
1169+
previous_scale = self.get_effective_scale(str(level - 1))
1170+
1171+
downsample_factors = [
1172+
int(round(current_scale[i] / previous_scale[i]))
1173+
for i in range(len(current_scale))
1174+
]
1175+
1176+
_downsample_tensorstore(
1177+
source_ts=previous_ts,
1178+
target_ts=current_ts,
1179+
downsample_factors=downsample_factors,
1180+
method=method,
1181+
)
1182+
1183+
def delete_pyramid(self) -> None:
1184+
"""Delete all dataset pyramid levels except the base (level 0) array.
1185+
1186+
Use this before calling compute_pyramid() with different levels
1187+
on a position that already has a pyramid structure.
1188+
1189+
This method removes both the zarr arrays and updates the OME-NGFF
1190+
multiscales metadata to reflect the deletion.
1191+
"""
1192+
_multiscale = self.metadata.multiscales[0]
1193+
1194+
for dataset in _multiscale.datasets[1:]:
1195+
del self[dataset.path]
1196+
1197+
_multiscale.datasets = _multiscale.datasets[:1]
1198+
self.dump_meta()
1199+
10891200
@property
10901201
def scale(self) -> list[float]:
10911202
"""

0 commit comments

Comments
 (0)