Skip to content

Commit 8ada4aa

Browse files
authored
Merge pull request #3 from JaneliaSciComp/pydantic_ome_ngff_0.4.0
wild overhaul
2 parents ddbf55b + 213633e commit 8ada4aa

20 files changed

+3466
-1486
lines changed

.pre-commit-config.yaml

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,29 @@ default_stages: [commit, push]
55
default_language_version:
66
python: python3
77
repos:
8+
- repo: https://github.com/astral-sh/ruff-pre-commit
9+
# Ruff version.
10+
rev: 'v0.3.4'
11+
hooks:
12+
- id: ruff
813
- repo: https://github.com/psf/black
9-
rev: 22.12.0
14+
rev: 24.3.0
1015
hooks:
1116
- id: black
12-
language_version: python3.9
17+
- repo: https://github.com/codespell-project/codespell
18+
rev: v2.2.6
19+
hooks:
20+
- id: codespell
1321
- repo: https://github.com/pre-commit/pre-commit-hooks
14-
rev: v4.4.0
22+
rev: v4.5.0
1523
hooks:
1624
- id: check-yaml
17-
- repo: https://github.com/charliermarsh/ruff-pre-commit
18-
# Ruff version.
19-
rev: 'v0.0.245'
25+
- repo: https://github.com/pre-commit/mirrors-mypy
26+
rev: v1.9.0
2027
hooks:
21-
- id: ruff
22-
args: [--fix, --exit-non-zero-on-fix]
28+
- id: mypy
29+
files: zarr
30+
args: []
31+
additional_dependencies:
32+
- types-redis
33+
- types-setuptools

README.md

Lines changed: 100 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,118 @@
11
# xarray-ome-ngff
2-
Integration between xarray and the ome-ngff data model.
32

4-
At present (February, 2023) this is a partial implementation of the [OME-NGFF spec](https://ngff.openmicroscopy.org/latest/#implementations). Specifcally, *only* the [`multiscales`](https://ngff.openmicroscopy.org/latest/#multiscale-md) and specs required by `multiscales` are implemented. Complete support for the spec would be welcome.
3+
Integrating [Xarray](https://docs.xarray.dev/en/stable/) with [OME-NGFF](https://ngff.openmicroscopy.org/).
54

6-
## How it works
7-
This library depends on [`pydantic-ome-ngff`](https://github.com/JaneliaSciComp/pydantic-ome-ngff) which implements OME-NGFF metadata as [pydantic](https://docs.pydantic.dev/) models.
8-
[`Axes`](https://ngff.openmicroscopy.org/latest/#axes-md) metadata is inferred from a DataArray by iterating over the dimensions of the array and checking for `units` and `type` properties in the attributes of the `coords` assigned to each dimension. Dimensions without coordinates will raise an exception. Scale and translation `CoordinateTransforms` are inferred by inspecting the values of the coordinates for each dimension. Be advised that no attempt is made to verify that arrays are sampled on a regular grid.
5+
## Help
6+
7+
See [documentation](https://janeliscicomp.github.io/xarray-multiscale) for more details
98

109
## Usage
1110

12-
Generate `multiscales` metadata from a multiscale collection of DataArrays.
11+
### Read OME-NGFF data
1312

1413
```python
15-
from xarray import DataArray
16-
import numpy as np
17-
from xarray_ome_ngff import create_multiscale_metadata
18-
import json
19-
coords = {'z' : DataArray(np.arange(100), attrs={'units': 'nm', 'type': 'space'}, dims=('z',)),
20-
'y' : DataArray(np.arange(300) * 2.2, attrs={'units': 'nm', 'type': 'space'}, dims=('y')),
21-
'x' : DataArray((np.arange(300) * .5) + 1, attrs={'units': 'nm', 'type': 'space'}, dims=('x',))}
14+
import zarr
15+
from xarray_ome_ngff import read_multiscale_group, DaskArrayWrapper
16+
group = zarr.open_group("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr")
2217

23-
s0 = DataArray(data=0, coords=coords, dims=('z','y','x'), name='s0')
24-
s1 = s0.coarsen({dim: 2 for dim in s0.dims}).mean()
25-
s1.name = 's1'
26-
# create a small multiscale pyramid
27-
multiscale = [s0, s1]
28-
metadata = create_multiscale_metadata(name='test', type='yes', arrays=multiscale)
29-
print(metadata.json(indent=2))
30-
```
31-
```json
32-
{
33-
"version": "0.5-dev",
34-
"name": "test",
35-
"type": "yes",
36-
"metadata": null,
37-
"datasets": [
38-
{
39-
"path": "s0",
40-
"coordinateTransformations": [
41-
{
42-
"type": "scale",
43-
"scale": [
44-
1.0,
45-
2.2,
46-
0.5
47-
]
48-
},
49-
{
50-
"type": "translation",
51-
"translation": [
52-
0.0,
53-
0.0,
54-
1.0
55-
]
56-
}
57-
]
58-
},
59-
{
60-
"path": "s1",
61-
"coordinateTransformations": [
62-
{
63-
"type": "scale",
64-
"scale": [
65-
2.0,
66-
4.4,
67-
1.0
68-
]
69-
},
70-
{
71-
"type": "translation",
72-
"translation": [
73-
0.5,
74-
1.1,
75-
1.25
76-
]
77-
}
78-
]
79-
}
80-
],
81-
"axes": [
82-
{
83-
"name": "z",
84-
"type": "space",
85-
"units": null
86-
},
87-
{
88-
"name": "y",
89-
"type": "space",
90-
"units": null
91-
},
92-
{
93-
"name": "x",
94-
"type": "space",
95-
"units": null
96-
}
97-
],
98-
"coordinateTransformations": [
99-
{
100-
"type": "scale",
101-
"scale": [
102-
1.0,
103-
1.0,
104-
1.0
105-
]
106-
}
107-
]
108-
}
18+
# this ensures that we create a Dask array, which gives us lazy loading
19+
array_wrapper = DaskArrayWrapper(chunks=10)
20+
arrays = read_multiscale_group(group, array_wrapper=array_wrapper)
21+
print(arrays)
22+
"""
23+
{'0': <xarray.DataArray 'array-bb42996937dbff7600e0481e2b1572cc' (c: 2, z: 236,
24+
y: 275, x: 271)>
25+
dask.array<array, shape=(2, 236, 275, 271), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray>
26+
Coordinates:
27+
* c (c) float64 0.0 1.0
28+
* z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5
29+
* y (y) float64 0.0 0.3604 0.7208 1.081 ... 97.67 98.03 98.39 98.75
30+
* x (x) float64 0.0 0.3604 0.7208 1.081 ... 96.23 96.59 96.95 97.31, '1': <xarray.DataArray 'array-2bfe6d4a6d289444ca93aa84fcb36342' (c: 2, z: 236,
31+
y: 137, x: 135)>
32+
dask.array<array, shape=(2, 236, 137, 135), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray>
33+
Coordinates:
34+
* c (c) float64 0.0 1.0
35+
* z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5
36+
* y (y) float64 0.0 0.7208 1.442 2.162 ... 95.87 96.59 97.31 98.03
37+
* x (x) float64 0.0 0.7208 1.442 2.162 ... 94.42 95.15 95.87 96.59, '2': <xarray.DataArray 'array-80c5fc67c0c57909c0a050656a5ab630' (c: 2, z: 236,
38+
y: 68, x: 67)>
39+
dask.array<array, shape=(2, 236, 68, 67), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray>
40+
Coordinates:
41+
* c (c) float64 0.0 1.0
42+
* z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5
43+
* y (y) float64 0.0 1.442 2.883 4.325 5.766 ... 92.26 93.7 95.15 96.59
44+
* x (x) float64 0.0 1.442 2.883 4.325 5.766 ... 90.82 92.26 93.7 95.15}
45+
"""
10946
```
11047

111-
It is not possible to create a DataArray from OME-NGFF metadata, but together the OME-NGFF [`Axes`](https://ngff.openmicroscopy.org/latest/#axes-md) and [`CoordinateTransformations`](https://ngff.openmicroscopy.org/latest/#trafo-md) metadata are sufficient to create _coordinates_ for a DataArray, provided you know the shape of the data. The function `create_coords` performs this operation:
48+
### Create OME-NGFF data
11249

11350
```python
114-
from xarray_ome_ngff import create_coords
115-
from pydantic_ome_ngff.v05.coordinateTransformations import VectorScaleTransform, VectorTranslationTransform
116-
from pydantic_ome_ngff.v05.axes import Axis
51+
import numpy as np
52+
from xarray import DataArray
53+
from xarray_ome_ngff import create_multiscale_group
54+
from zarr import MemoryStore
55+
56+
base_array = DataArray(
57+
np.zeros((10,10), dtype='uint8'),
58+
coords={
59+
'x': DataArray(np.arange(-5,5) * 3, dims=('x',), attrs={'units': 'meter'}),
60+
'y': DataArray(np.arange(-10, 0) * 3, dims=('y',), attrs={'units': 'meter'})
61+
})
62+
63+
# create a little multiscale pyramid
64+
arrays = {
65+
's0': base_array,
66+
's1': base_array.coarsen({'x': 2, 'y': 2}, boundary='trim').mean().astype(base_array.dtype)
67+
}
11768

69+
# This example uses in-memory storage, but you can use a
70+
# different store class from `zarr`
71+
store = MemoryStore()
11872

119-
shape = (3, 3)
120-
axes = [Axis(name='a', units="meter", type="space"), Axis(name='b', units="meter", type="space")]
73+
group = create_multiscale_group(store=store, path='my_group', arrays=arrays)
74+
print(group.attrs.asdict())
75+
"""
76+
{
77+
'multiscales': (
78+
{
79+
'version': '0.4',
80+
'name': None,
81+
'type': None,
82+
'metadata': None,
83+
'datasets': (
84+
{
85+
'path': 's0',
86+
'coordinateTransformations': (
87+
{'type': 'scale', 'scale': (3.0, 3.0)},
88+
{'type': 'translation', 'translation': (-15.0, -30.0)},
89+
),
90+
},
91+
{
92+
'path': 's1',
93+
'coordinateTransformations': (
94+
{'type': 'scale', 'scale': (6.0, 6.0)},
95+
{'type': 'translation', 'translation': (-13.5, -28.5)},
96+
),
97+
},
98+
),
99+
'axes': (
100+
{'name': 'x', 'type': 'space', 'unit': 'meter'},
101+
{'name': 'y', 'type': 'space', 'unit': 'meter'},
102+
),
103+
'coordinateTransformations': None,
104+
},
105+
)
106+
}
107+
"""
121108

122-
transforms = [VectorScaleTransform(scale=[1, .5]), VectorTranslationTransform(translation=[1, 2])]
109+
# check that the arrays are there
110+
print(tuple(group.arrays()))
111+
"""
112+
(('s0', <zarr.core.Array '/my_group/s0' (10, 10) uint8>), ('s1', <zarr.core.Array '/my_group/s1' (5, 5) uint8>))
113+
"""
123114

124-
coords = create_coords(axes, transforms, shape)
125-
print(coords)
126-
'''
127-
{'a': <xarray.DataArray (a: 3)>
128-
array([1., 2., 3.])
129-
Dimensions without coordinates: a
130-
Attributes:
131-
units: meter, 'b': <xarray.DataArray (b: 3)>
132-
array([2. , 2.5, 3. ])
133-
Dimensions without coordinates: b
134-
Attributes:
135-
units: meter}
136-
'''
115+
# write data to the arrays
116+
for path, array in arrays.items():
117+
group[path][:] = array.data
137118
```

docs/api/array_wrap.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: xarray_ome_ngff.array_wrap

docs/api/v04/multiscale.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: xarray_ome_ngff.v04.multiscale

0 commit comments

Comments
 (0)