Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5718cba
add basic support and remote SpatialData tests
berombau Jan 24, 2025
a8be620
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 24, 2025
ce686e4
fix pre-commit
LucaMarconato Jan 31, 2025
e7fa020
Revert "Update pyproject.toml"
LucaMarconato Jan 31, 2025
a9c0801
removed 3.13 from test ci
LucaMarconato Jan 31, 2025
5cfdaec
Merge branch 'no_python_313' into remote2
LucaMarconato Jan 31, 2025
4fe6a47
fix
LucaMarconato Jan 31, 2025
d97a1d2
uploading sdata to local s3 storage
LucaMarconato Jan 31, 2025
5e26b5e
add _open_zarr_store
berombau Jan 31, 2025
5794871
revert changing write function signature
berombau Jan 31, 2025
0207ff7
update _open_zarr_store with StoreLike
berombau Jan 31, 2025
7e497ff
read image element from base store
berombau Jan 31, 2025
c674281
clean up remote mock tests, focus only on reading raster elements
berombau Feb 1, 2025
fb953a0
improve remote http test, add alternative
berombau Feb 1, 2025
52bb5fc
add support for consolidated metadata store in util function, add _cr…
berombau Feb 1, 2025
ca82493
allow for groups as store input
berombau Feb 1, 2025
ecea0e6
handle consolidated metadata with upath
berombau Feb 1, 2025
734eb45
split remote reading tests between http and http with consolidated me…
berombau Feb 1, 2025
c0ffb1c
remove f_store_path, support remote raster types fully and keep local…
berombau Feb 1, 2025
d60bd85
Fix metadata_key bug now that store is not always FSStore. Add extra …
berombau Feb 1, 2025
c3fa8cf
add mypy fixes
berombau Feb 1, 2025
d16a638
Merge branch 'main' into remote2
ap-- Mar 17, 2025
23f4a89
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 17, 2025
a80588c
Fix linting errors
ap-- Mar 17, 2025
020810b
fixed majority of tests
ap-- Mar 17, 2025
ba25564
spatialdata._io._utils: _open_zarr_store has to set dimension_separat…
ap-- Mar 17, 2025
b2ff8f8
stay in sync with ome zarr format
ap-- Mar 17, 2025
70480ce
spatialdata._io.io_raster: support remote stores
ap-- Mar 17, 2025
10cef3f
prevent crashing tests on 3.10
ap-- Mar 17, 2025
d9e4eac
Merge branch 'spatial-data-crash310' into remote2
ap-- Mar 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,22 @@ dependencies = [
"click",
"dask-image",
"dask>=2024.4.1,<=2024.11.2",
"fsspec",
"fsspec[s3,http]",
"geopandas>=0.14",
"multiscale_spatial_image>=2.0.2",
"networkx",
"numba>=0.55.0",
"numpy",
"ome_zarr>=0.8.4",
"universal_pathlib>=0.2.6",
"pandas",
"pooch",
"pyarrow",
"rich",
"setuptools",
"shapely>=2.0.1",
"spatial_image>=1.1.0",
"spatial_image>=1.2.1",
"xarray-dataclasses>=1.9.1",
"scikit-image",
"scipy",
"typing_extensions>=4.8.0",
Expand All @@ -58,6 +60,7 @@ test = [
"pytest-cov",
"pytest-mock",
"torch",
"moto[s3,server]"
]
docs = [
"sphinx>=4.5",
Expand Down
42 changes: 22 additions & 20 deletions src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,7 @@
)
from spatialdata._logging import logger
from spatialdata._types import ArrayLike, Raster_T
from spatialdata._utils import (
_deprecation_alias,
_error_message_add_element,
)
from spatialdata._utils import _deprecation_alias, _error_message_add_element
from spatialdata.models import (
Image2DModel,
Image3DModel,
Expand Down Expand Up @@ -621,9 +618,9 @@ def _get_groups_for_element(
-------
either the existing Zarr subgroup or a new one.
"""
if not isinstance(zarr_path, Path):
raise ValueError("zarr_path should be a Path object")
store = parse_url(zarr_path, mode="r+").store
from spatialdata._io._utils import _open_zarr_store

store = _open_zarr_store(zarr_path, mode="r+")
root = zarr.group(store=store)
if element_type not in ["images", "labels", "points", "polygons", "shapes", "tables"]:
raise ValueError(f"Unknown element type {element_type}")
Expand Down Expand Up @@ -1376,7 +1373,7 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None:
self.delete_element_from_disk(name)
return

from spatialdata._io._utils import _backed_elements_contained_in_path
from spatialdata._io._utils import _backed_elements_contained_in_path, _open_zarr_store

if self.path is None:
raise ValueError("The SpatialData object is not backed by a Zarr store.")
Expand Down Expand Up @@ -1417,7 +1414,7 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None:
)

# delete the element
store = parse_url(self.path, mode="r+").store
store = _open_zarr_store(self.path)
root = zarr.group(store=store)
root[element_type].pop(element_name)
store.close()
Expand All @@ -1438,15 +1435,24 @@ def _check_element_not_on_disk_with_different_type(self, element_type: str, elem
)

def write_consolidated_metadata(self) -> None:
store = parse_url(self.path, mode="r+").store
# consolidate metadata to more easily support remote reading bug in zarr. In reality, 'zmetadata' is written
# instead of '.zmetadata' see discussion https://github.com/zarr-developers/zarr-python/issues/1121
zarr.consolidate_metadata(store, metadata_key=".zmetadata")
from spatialdata._io._utils import _open_zarr_store

store = _open_zarr_store(self.path)
# Note that the store can be local (which does not have the zmetadata bug)
# or a remote FSStore (which has the bug).
# Consolidate metadata to more easily support remote reading bug in zarr.
# We write 'zmetadata' instead of the standard '.zmetadata' to avoid the FSStore bug.
# See discussion https://github.com/zarr-developers/zarr-python/issues/1121
zarr.consolidate_metadata(store, metadata_key="zmetadata")
store.close()

def has_consolidated_metadata(self) -> bool:
from spatialdata._io._utils import _open_zarr_store

return_value = False
store = parse_url(self.path, mode="r").store
store = _open_zarr_store(self.path)
# Note that the store can be local (which does not have the zmetadata bug)
# or a remote FSStore (which has the bug).
if "zmetadata" in store:
return_value = True
store.close()
Expand Down Expand Up @@ -1575,15 +1581,11 @@ def write_transformations(self, element_name: str | None = None) -> None:
)
axes = get_axes_names(element)
if isinstance(element, DataArray | DataTree):
from spatialdata._io._utils import (
overwrite_coordinate_transformations_raster,
)
from spatialdata._io._utils import overwrite_coordinate_transformations_raster

overwrite_coordinate_transformations_raster(group=element_group, axes=axes, transformations=transformations)
elif isinstance(element, DaskDataFrame | GeoDataFrame | AnnData):
from spatialdata._io._utils import (
overwrite_coordinate_transformations_non_raster,
)
from spatialdata._io._utils import overwrite_coordinate_transformations_non_raster

overwrite_coordinate_transformations_non_raster(
group=element_group, axes=axes, transformations=transformations
Expand Down
62 changes: 56 additions & 6 deletions src/spatialdata/_io/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@
from enum import Enum
from functools import singledispatch
from pathlib import Path
from typing import Any, Literal
from typing import Any, Literal, TypeAlias

import zarr
import zarr.storage
from anndata import AnnData
from dask.array import Array as DaskArray
from dask.dataframe import DataFrame as DaskDataFrame
from geopandas import GeoDataFrame
from upath import UPath
from upath.implementations.local import PosixUPath, WindowsUPath
from xarray import DataArray, DataTree
from zarr.storage import FSStore

from spatialdata._core.spatialdata import SpatialData
from spatialdata._utils import get_pyramid_levels
Expand All @@ -31,10 +34,7 @@
_validate_mapping_to_coordinate_system_type,
)
from spatialdata.transformations.ngff.ngff_transformations import NgffBaseTransformation
from spatialdata.transformations.transformations import (
BaseTransformation,
_get_current_output_axes,
)
from spatialdata.transformations.transformations import BaseTransformation, _get_current_output_axes


# suppress logger debug from ome_zarr with context manager
Expand Down Expand Up @@ -388,6 +388,56 @@ def save_transformations(sdata: SpatialData) -> None:
sdata.write_transformations()


StoreLike: TypeAlias = str | Path | UPath | zarr.storage.StoreLike | zarr.Group


def _open_zarr_store(path: StoreLike, **kwargs: Any) -> zarr.storage.BaseStore:
# TODO: ensure kwargs like mode are enforced everywhere and passed correctly to the store
if isinstance(path, str | Path):
# if the input is str or Path, map it to UPath
path = UPath(path)
if isinstance(path, PosixUPath | WindowsUPath):
# if the input is a local path, use DirectoryStore
return zarr.storage.DirectoryStore(path.path, dimension_separator="/")
if isinstance(path, zarr.Group):
# if the input is a zarr.Group, wrap it with a store
if isinstance(path.store, zarr.storage.DirectoryStore):
# create a simple FSStore if the store is a DirectoryStore with just the path
return FSStore(os.path.join(path.store.path, path.path), **kwargs)
if isinstance(path.store, FSStore):
# if the store within the zarr.Group is an FSStore, return it
# but extend the path of the store with that of the zarr.Group
return FSStore(path.store.path + "/" + path.path, fs=path.store.fs, **kwargs)
if isinstance(path.store, zarr.storage.ConsolidatedMetadataStore):
# TODO: find a way to check if the consolidated metadata is still used. Probably best to wait for Zarr v3.
# if the store is a ConsolidatedMetadataStore, just return it
# get the FSStore url path from store and append it with the path from the Group StoreLike object
url = UPath(path.store.store.path + path.path)
# same as UPath option
return FSStore(url.path, fs=url.fs, **kwargs)
raise ValueError(f"Unsupported store type or zarr.Group: {type(path.store)}")
if isinstance(path, zarr.storage.StoreLike):
# if the input already a store, wrap it in an FSStore
return FSStore(path, **kwargs)
if isinstance(path, UPath):
# if input is a remote UPath, map it to an FSStore
return FSStore(path.path, fs=path.fs, **kwargs)
raise TypeError(f"Unsupported type: {type(path)}")


def _create_upath(path: StoreLike) -> UPath:
# try to create a UPath from the input
if isinstance(path, str | Path):
return Path(path)
if hasattr(path, "store") and isinstance(path.store, zarr.storage.ConsolidatedMetadataStore):
# create a url from the ConsolidatedMetadataStore and append it with the path from the Group StoreLike object
return UPath(path.store.store.path) / path.path
if isinstance(path, zarr.storage.BaseStore):
return UPath(path.path)
# best effort to create a UPath
return UPath(path)


class BadFileHandleMethod(Enum):
ERROR = "error"
WARN = "warn"
Expand Down
4 changes: 1 addition & 3 deletions src/spatialdata/_io/io_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ def _read_points(
store: str | Path | MutableMapping | zarr.Group, # type: ignore[type-arg]
) -> DaskDataFrame:
"""Read points from a zarr store."""
assert isinstance(store, str | Path)
f = zarr.open(store, mode="r")

f = zarr.open(store, mode="r") if isinstance(store, str | Path | MutableMapping) else store
version = _parse_version(f, expect_attrs_key=True)
assert version is not None
format = PointsFormats[version]
Expand Down
24 changes: 10 additions & 14 deletions src/spatialdata/_io/io_raster.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
from typing import Any, Literal

import dask.array as da
import numpy as np
import zarr
import zarr.storage
from ome_zarr.format import Format
from ome_zarr.io import ZarrLocation
from ome_zarr.reader import Label, Multiscales, Node, Reader
Expand All @@ -13,18 +13,14 @@
from ome_zarr.writer import write_labels as write_labels_ngff
from ome_zarr.writer import write_multiscale as write_multiscale_ngff
from ome_zarr.writer import write_multiscale_labels as write_multiscale_labels_ngff
from upath import UPath
from xarray import DataArray, Dataset, DataTree

from spatialdata._io._utils import (
_get_transformations_from_ngff_dict,
overwrite_coordinate_transformations_raster,
)
from spatialdata._io.format import (
CurrentRasterFormat,
RasterFormats,
RasterFormatV01,
_parse_version,
)
from spatialdata._io.format import CurrentRasterFormat, RasterFormats, RasterFormatV01, _parse_version
from spatialdata._utils import get_pyramid_levels
from spatialdata.models._utils import get_channel_names
from spatialdata.models.models import ATTRS_KEY
Expand All @@ -36,19 +32,19 @@
)


def _read_multiscale(store: str | Path, raster_type: Literal["image", "labels"]) -> DataArray | DataTree:
assert isinstance(store, str | Path)
def _read_multiscale(store: zarr.storage.BaseStore, raster_type: Literal["image", "labels"]) -> DataArray | DataTree:
assert raster_type in ["image", "labels"]

f = zarr.open(store, mode="r")
version = _parse_version(f, expect_attrs_key=True)
if isinstance(store, str | UPath):
raise NotImplementedError("removed in this PR")
group = zarr.group(store=store)
version = _parse_version(group, expect_attrs_key=True)
# old spatialdata datasets don't have format metadata for raster elements; this line ensure backwards compatibility,
# interpreting the lack of such information as the presence of the format v01
format = RasterFormatV01() if version is None else RasterFormats[version]
f.store.close()
store.close()

nodes: list[Node] = []
image_loc = ZarrLocation(store)
image_loc = ZarrLocation(store, fmt=format)
if image_loc.exists():
image_reader = Reader(image_loc)()
image_nodes = list(image_reader)
Expand Down
16 changes: 3 additions & 13 deletions src/spatialdata/_io/io_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,16 @@
_write_metadata,
overwrite_coordinate_transformations_non_raster,
)
from spatialdata._io.format import (
CurrentShapesFormat,
ShapesFormats,
ShapesFormatV01,
ShapesFormatV02,
_parse_version,
)
from spatialdata._io.format import CurrentShapesFormat, ShapesFormats, ShapesFormatV01, ShapesFormatV02, _parse_version
from spatialdata.models import ShapesModel, get_axes_names
from spatialdata.transformations._utils import (
_get_transformations,
_set_transformations,
)
from spatialdata.transformations._utils import _get_transformations, _set_transformations


def _read_shapes(
store: str | Path | MutableMapping | zarr.Group, # type: ignore[type-arg]
) -> GeoDataFrame:
"""Read shapes from a zarr store."""
assert isinstance(store, str | Path)
f = zarr.open(store, mode="r")
f = zarr.open(store, mode="r") if isinstance(store, str | Path | MutableMapping) else store
version = _parse_version(f, expect_attrs_key=True)
assert version is not None
format = ShapesFormats[version]
Expand Down
8 changes: 2 additions & 6 deletions src/spatialdata/_io/io_table.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import os
from json import JSONDecodeError
from typing import Literal

Expand Down Expand Up @@ -48,22 +47,19 @@ def _read_table(
count = 0
for table_name in subgroup:
f_elem = subgroup[table_name]
f_elem_store = os.path.join(zarr_store_path, f_elem.path)

with handle_read_errors(
on_bad_files=on_bad_files,
location=f"{subgroup.path}/{table_name}",
exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError),
):
tables[table_name] = read_anndata_zarr(f_elem_store)
tables[table_name] = read_anndata_zarr(f_elem)

f = zarr.open(f_elem_store, mode="r")
version = _parse_version(f, expect_attrs_key=False)
version = _parse_version(f_elem, expect_attrs_key=False)
assert version is not None
# since have just one table format, we currently read it but do not use it; if we ever change the format
# we can rename the two _ to format and implement the per-format read logic (as we do for shapes)
_ = TablesFormats[version]
f.store.close()

# # replace with format from above
# version = "0.1"
Expand Down
Loading
Loading