Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: fractal-analytics-platform/fractal-helper-tasks
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.1.4
Choose a base ref
...
head repository: fractal-analytics-platform/fractal-helper-tasks
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
  • 17 commits
  • 10 files changed
  • 1 contributor

Commits on Jan 7, 2025

  1. Add initial rechunking task

    jluethi committed Jan 7, 2025
    Copy the full SHA
    b5b6cca View commit details
  2. Copy the full SHA
    9e7b1cf View commit details
  3. Cleanup

    jluethi committed Jan 7, 2025
    Copy the full SHA
    144873c View commit details
  4. Copy the full SHA
    f5d8f51 View commit details
  5. Deprecate Python 3.9 support

    jluethi committed Jan 7, 2025
    Copy the full SHA
    fb30ade View commit details
  6. Copy the full SHA
    19060da View commit details

Commits on Jan 28, 2025

  1. Copy the full SHA
    de7c200 View commit details
  2. Copy the full SHA
    9ab60cd View commit details
  3. Copy the full SHA
    9cef7b3 View commit details

Commits on Feb 3, 2025

  1. Copy the full SHA
    25c9f08 View commit details
  2. Copy the full SHA
    841019f View commit details
  3. Cleanup task function

    jluethi committed Feb 3, 2025
    Copy the full SHA
    6c907b5 View commit details
  4. Update 2D to 3D docstring

    jluethi committed Feb 3, 2025
    Copy the full SHA
    e1701d4 View commit details
  5. Copy the full SHA
    2dce4b8 View commit details
  6. Copy the full SHA
    9be7b01 View commit details
  7. Update ngio version

    jluethi committed Feb 3, 2025
    Copy the full SHA
    188b7fb View commit details
  8. Copy the full SHA
    5bb67c8 View commit details
4 changes: 1 addition & 3 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -17,10 +17,8 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04, macos-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]
exclude:
- os: macos-latest
python-version: '3.9'
- os: macos-latest
python-version: '3.10'
name: "Core, Python ${{ matrix.python-version }}, ${{ matrix.os }}"
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -25,9 +25,9 @@ authors = [
]

# Required Python version and dependencies
requires-python = ">=3.9"
requires-python = ">=3.10"
dependencies = [
"fractal-tasks-core==1.3.4"
"fractal-tasks-core==1.4.2","ngio==0.1.6",
]

# Optional dependencies (e.g. for `pip install -e ".[dev]"`, see
77 changes: 76 additions & 1 deletion src/fractal_helper_tasks/__FRACTAL_MANIFEST__.json
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@
"default": false,
"title": "Overwrite Input",
"type": "boolean",
"description": "Whether"
"description": "Whether the existing iamge should be overwritten with the new OME-Zarr without the T dimension."
}
},
"required": [
@@ -49,6 +49,9 @@
"input_types": {
"is_3D": false
},
"output_types": {
"is_3D": true
},
"tags": [
"Mixed modality",
"2D to 3D workflows"
@@ -113,6 +116,11 @@
"type": "string",
"description": "If the image name between 2D & 3D don't match, this is the suffix that should be added to the 3D image. If the 2D image is in \"/path/to/my_plate_mip.zarr/B/03/0\" and the 3D image is in \"/path/to/my_plate.zarr/B/03/0_illum_corr\", the value should be \"_illum_corr\"."
},
"z_chunks": {
"title": "Z Chunks",
"type": "integer",
"description": "Chunking for the Z dimension. Set this parameter if you want the label image to be chunked differently from the 3D image in the Z dimension."
},
"overwrite": {
"default": false,
"title": "Overwrite",
@@ -128,6 +136,73 @@
"title": "Convert2dSegmentationTo3d"
},
"docs_link": "https://github.com/jluethi/fractal-helper-tasks"
},
{
"name": "Rechunk OME-Zarr",
"tags": [
"Rechunking",
"Many files"
],
"docs_info": "### Purpose\n- Rechunks OME-Zarr to new chunking parameters: Changes whether the array is stored as many small files or few larger files.\n- Optionally applies the same rechunking to label images.\n\n### Outputs\n- A **new Zarr image** that is rechunked.\n",
"executable_parallel": "rechunk_zarr.py",
"meta_parallel": {
"cpus_per_task": 1,
"mem": 4000
},
"args_schema_parallel": {
"additionalProperties": false,
"properties": {
"zarr_url": {
"title": "Zarr Url",
"type": "string",
"description": "Path or url to the individual OME-Zarr image to be processed. (standard argument for Fractal tasks, managed by Fractal server)."
},
"chunk_sizes": {
"additionalProperties": {
"type": "integer"
},
"title": "Chunk Sizes",
"type": "object",
"description": "Dictionary of chunk sizes to adapt. One can set any of the t, c, z, y, x axes that exist in the input image to be resized to a different chunk size. For example, {\"y\": 4000, \"x\": 4000} will set a new x & y chunking while maintaining the other chunk sizes. {\"z\": 10} will just change the Z chunking while keeping all other chunk sizes the same as the input."
},
"suffix": {
"default": "rechunked",
"title": "Suffix",
"type": "string",
"description": "Suffix of the rechunked image."
},
"rechunk_labels": {
"default": true,
"title": "Rechunk Labels",
"type": "boolean",
"description": "Whether to apply the same rechunking to all label images of the OME-Zarr as well."
},
"rebuild_pyramids": {
"default": true,
"title": "Rebuild Pyramids",
"type": "boolean",
"description": "Whether pyramids are built fresh in the rechunked image. This has a small performance overhead, but ensures that this task is save against off-by-one issues when pyramid levels aren't easily downsampled by 2."
},
"overwrite_input": {
"default": true,
"title": "Overwrite Input",
"type": "boolean",
"description": "Whether the old image without rechunking should be overwritten (to avoid duplicating the data needed)."
},
"overwrite": {
"default": false,
"title": "Overwrite",
"type": "boolean",
"description": "Whether to overwrite potential pre-existing output with the name zarr_url_suffix."
}
},
"required": [
"zarr_url"
],
"type": "object",
"title": "RechunkZarr"
},
"docs_link": "https://github.com/jluethi/fractal-helper-tasks"
}
],
"has_args_schemas": true,
27 changes: 21 additions & 6 deletions src/fractal_helper_tasks/convert_2D_segmentation_to_3D.py
Original file line number Diff line number Diff line change
@@ -99,6 +99,7 @@ def convert_2D_segmentation_to_3D(
plate_suffix: str = "_mip",
image_suffix_2D_to_remove: Optional[str] = None,
image_suffix_3D_to_add: Optional[str] = None,
z_chunks: Optional[int] = None,
overwrite: bool = False,
) -> None:
"""Convert 2D segmentation to 3D segmentation.
@@ -141,6 +142,9 @@ def convert_2D_segmentation_to_3D(
If the 2D image is in "/path/to/my_plate_mip.zarr/B/03/0" and the
3D image is in "/path/to/my_plate.zarr/B/03/0_illum_corr", the
value should be "_illum_corr".
z_chunks: Chunking for the Z dimension. Set this parameter if you want
the label image to be chunked differently from the 3D image in
the Z dimension.
overwrite: If `True`, overwrite existing label and ROI tables in the
3D OME-Zarr
"""
@@ -173,12 +177,20 @@ def convert_2D_segmentation_to_3D(

# 1a) Load a 2D label image
label_img = da.from_zarr(f"{zarr_url}/labels/{label_name}/{level}")
chunks = label_img.chunksize
chunks = list(label_img.chunksize)

# 1b) Get number z planes & Z spacing from 3D OME-Zarr file
with zarr.open(zarr_3D_url, mode="rw+") as zarr_img:
zarr_3D = da.from_zarr(zarr_img[0])
new_z_planes = zarr_3D.shape[-3]
z_chunk_3d = zarr_3D.chunksize[-3]

# TODO: Improve axis detection in ngio refactor?
if z_chunks:
chunks[-3] = z_chunks
else:
chunks[-3] = z_chunk_3d
chunks = tuple(chunks)

image_meta = load_NgffImageMeta(zarr_3D_url)
z_pixel_size = image_meta.get_pixel_sizes_zyx(level=0)[0]
@@ -249,12 +261,15 @@ def convert_2D_segmentation_to_3D(

logger.info("Finished 2D to 3D conversion")

output_dict = dict(
filters=dict(
types=dict(is_3D=True),
)
# Give the 3D image as an output so that filters are applied correctly
image_list_updates = dict(
image_list_updates=[
dict(
zarr_url=zarr_3D_url,
)
]
)
return output_dict
return image_list_updates


if __name__ == "__main__":
6 changes: 6 additions & 0 deletions src/fractal_helper_tasks/dev/docs_info/rechunk_zarr.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Purpose
- Rechunks OME-Zarr to new chunking parameters: Changes whether the array is stored as many small files or few larger files.
- Optionally applies the same rechunking to label images.

### Outputs
- A **new Zarr image** that is rechunked.
11 changes: 11 additions & 0 deletions src/fractal_helper_tasks/dev/task_list.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
),
ParallelTask(
input_types=dict(is_3D=False),
output_types=dict(is_3D=True),
name="Convert 2D segmentation to 3D",
executable="convert_2D_segmentation_to_3D.py",
meta={"cpus_per_task": 2, "mem": 8000},
@@ -22,4 +23,14 @@
],
docs_info="file:docs_info/2d_to_3d.md",
),
ParallelTask(
name="Rechunk OME-Zarr",
executable="rechunk_zarr.py",
meta={"cpus_per_task": 1, "mem": 4000},
tags=[
"Rechunking",
"Many files",
],
docs_info="file:docs_info/rechunk_zarr.md",
),
]
3 changes: 2 additions & 1 deletion src/fractal_helper_tasks/drop_t_dimension.py
Original file line number Diff line number Diff line change
@@ -53,7 +53,8 @@ def drop_t_dimension(
(standard argument for Fractal tasks, managed by Fractal server).
suffix: Suffix to be used for the new Zarr image. If overwrite_input
is True, this file is only temporary.
overwrite_input: Whether
overwrite_input: Whether the existing iamge should be overwritten with
the new OME-Zarr without the T dimension.
"""
# Normalize zarr_url
zarr_url_old = zarr_url.rstrip("/")
144 changes: 144 additions & 0 deletions src/fractal_helper_tasks/rechunk_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright 2025 (C) BioVisionCenter, University of Zurich
#
# Original authors:
# Joel Lüthi <joel.luethi@uzh.ch>
"""Rechunk an existing Zarr."""

import logging
import os
import shutil
from typing import Any, Optional

import ngio
from pydantic import validate_call

from fractal_helper_tasks.utils import normalize_chunk_size_dict, rechunk_label

logger = logging.getLogger(__name__)


@validate_call
def rechunk_zarr(
*,
zarr_url: str,
chunk_sizes: Optional[dict[str, Optional[int]]] = None,
suffix: str = "rechunked",
rechunk_labels: bool = True,
rebuild_pyramids: bool = True,
overwrite_input: bool = True,
overwrite: bool = False,
) -> dict[str, Any]:
"""Drops singleton t dimension.
Args:
zarr_url: Path or url to the individual OME-Zarr image to be processed.
(standard argument for Fractal tasks, managed by Fractal server).
chunk_sizes: Dictionary of chunk sizes to adapt. One can set any of
the t, c, z, y, x axes that exist in the input image to be resized
to a different chunk size. For example, {"y": 4000, "x": 4000}
will set a new x & y chunking while maintaining the other chunk
sizes. {"z": 10} will just change the Z chunking while keeping
all other chunk sizes the same as the input.
suffix: Suffix of the rechunked image.
rechunk_labels: Whether to apply the same rechunking to all label
images of the OME-Zarr as well.
rebuild_pyramids: Whether pyramids are built fresh in the rechunked
image. This has a small performance overhead, but ensures that
this task is save against off-by-one issues when pyramid levels
aren't easily downsampled by 2.
overwrite_input: Whether the old image without rechunking should be
overwritten (to avoid duplicating the data needed).
overwrite: Whether to overwrite potential pre-existing output with the
name zarr_url_suffix.
"""
logger.info(f"Running `rechunk_zarr` on {zarr_url=} with {chunk_sizes=}.")

chunk_sizes = normalize_chunk_size_dict(chunk_sizes)

rechunked_zarr_url = zarr_url + f"_{suffix}"
ngff_image = ngio.NgffImage(zarr_url)
pyramid_paths = ngff_image.levels_paths
highest_res_img = ngff_image.get_image()
axes_names = highest_res_img.dataset.on_disk_axes_names
chunks = highest_res_img.on_disk_dask_array.chunks

# Compute the chunksize tuple
new_chunksize = [c[0] for c in chunks]
logger.info(f"Initial chunk sizes were: {chunks}")
# Overwrite chunk_size with user-set chunksize
for i, axis in enumerate(axes_names):
if axis in chunk_sizes:
if chunk_sizes[axis] is not None:
new_chunksize[i] = chunk_sizes[axis]

for axis in chunk_sizes:
if axis not in axes_names:
raise NotImplementedError(
f"Rechunking with {axis=} is specified, but the OME-Zarr only "
f"has the following axes: {axes_names}"
)

logger.info(f"Chunk sizes after rechunking will be: {new_chunksize=}")

new_ngff_image = ngff_image.derive_new_image(
store=rechunked_zarr_url,
name=ngff_image.image_meta.name,
overwrite=overwrite,
copy_labels=not rechunk_labels,
copy_tables=True,
chunks=new_chunksize,
)

ngff_image = ngio.NgffImage(zarr_url)

if rebuild_pyramids:
# Set the highest resolution, then consolidate to build a new pyramid
new_ngff_image.get_image(highest_resolution=True).set_array(
ngff_image.get_image(highest_resolution=True).on_disk_dask_array
)
new_ngff_image.get_image(highest_resolution=True).consolidate()
else:
for path in pyramid_paths:
new_ngff_image.get_image(path=path).set_array(
ngff_image.get_image(path=path).on_disk_dask_array
)

# Copy labels
if rechunk_labels:
chunk_sizes["c"] = None
label_names = ngff_image.labels.list()
for label in label_names:
rechunk_label(
orig_ngff_image=ngff_image,
new_ngff_image=new_ngff_image,
label=label,
chunk_sizes=chunk_sizes,
overwrite=overwrite,
rebuild_pyramids=rebuild_pyramids,
)

if overwrite_input:
os.rename(zarr_url, f"{zarr_url}_tmp")
os.rename(rechunked_zarr_url, zarr_url)
shutil.rmtree(f"{zarr_url}_tmp")
return
else:
output = dict(
image_list_updates=[
dict(
zarr_url=rechunked_zarr_url,
origin=zarr_url,
types=dict(rechunked=True),
)
],
)
return output


if __name__ == "__main__":
from fractal_tasks_core.tasks._utils import run_fractal_task

run_fractal_task(
task_function=rechunk_zarr,
logger_name=logger.name,
)
Loading