|
1 | 1 | # xarray-ome-ngff |
2 | | -Integration between xarray and the ome-ngff data model. |
3 | 2 |
|
4 | | -At present (February, 2023) this is a partial implementation of the [OME-NGFF spec](https://ngff.openmicroscopy.org/latest/#implementations). Specifcally, *only* the [`multiscales`](https://ngff.openmicroscopy.org/latest/#multiscale-md) and specs required by `multiscales` are implemented. Complete support for the spec would be welcome. |
| 3 | +Integrating [Xarray](https://docs.xarray.dev/en/stable/) with [OME-NGFF](https://ngff.openmicroscopy.org/). |
5 | 4 |
|
6 | | -## How it works |
7 | | -This library depends on [`pydantic-ome-ngff`](https://github.com/JaneliaSciComp/pydantic-ome-ngff) which implements OME-NGFF metadata as [pydantic](https://docs.pydantic.dev/) models. |
8 | | -[`Axes`](https://ngff.openmicroscopy.org/latest/#axes-md) metadata is inferred from a DataArray by iterating over the dimensions of the array and checking for `units` and `type` properties in the attributes of the `coords` assigned to each dimension. Dimensions without coordinates will raise an exception. Scale and translation `CoordinateTransforms` are inferred by inspecting the values of the coordinates for each dimension. Be advised that no attempt is made to verify that arrays are sampled on a regular grid. |
| 5 | +## Help |
| 6 | + |
| 7 | +See [documentation](https://janeliscicomp.github.io/xarray-multiscale) for more details |
9 | 8 |
|
10 | 9 | ## Usage |
11 | 10 |
|
12 | | -Generate `multiscales` metadata from a multiscale collection of DataArrays. |
| 11 | +### Read OME-NGFF data |
13 | 12 |
|
14 | 13 | ```python |
15 | | -from xarray import DataArray |
16 | | -import numpy as np |
17 | | -from xarray_ome_ngff import create_multiscale_metadata |
18 | | -import json |
19 | | -coords = {'z' : DataArray(np.arange(100), attrs={'units': 'nm', 'type': 'space'}, dims=('z',)), |
20 | | - 'y' : DataArray(np.arange(300) * 2.2, attrs={'units': 'nm', 'type': 'space'}, dims=('y')), |
21 | | - 'x' : DataArray((np.arange(300) * .5) + 1, attrs={'units': 'nm', 'type': 'space'}, dims=('x',))} |
| 14 | +import zarr |
| 15 | +from xarray_ome_ngff import read_multiscale_group, DaskArrayWrapper |
| 16 | +group = zarr.open_group("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr") |
22 | 17 |
|
23 | | -s0 = DataArray(data=0, coords=coords, dims=('z','y','x'), name='s0') |
24 | | -s1 = s0.coarsen({dim: 2 for dim in s0.dims}).mean() |
25 | | -s1.name = 's1' |
26 | | -# create a small multiscale pyramid |
27 | | -multiscale = [s0, s1] |
28 | | -metadata = create_multiscale_metadata(name='test', type='yes', arrays=multiscale) |
29 | | -print(metadata.json(indent=2)) |
30 | | -``` |
31 | | -```json |
32 | | -{ |
33 | | - "version": "0.5-dev", |
34 | | - "name": "test", |
35 | | - "type": "yes", |
36 | | - "metadata": null, |
37 | | - "datasets": [ |
38 | | - { |
39 | | - "path": "s0", |
40 | | - "coordinateTransformations": [ |
41 | | - { |
42 | | - "type": "scale", |
43 | | - "scale": [ |
44 | | - 1.0, |
45 | | - 2.2, |
46 | | - 0.5 |
47 | | - ] |
48 | | - }, |
49 | | - { |
50 | | - "type": "translation", |
51 | | - "translation": [ |
52 | | - 0.0, |
53 | | - 0.0, |
54 | | - 1.0 |
55 | | - ] |
56 | | - } |
57 | | - ] |
58 | | - }, |
59 | | - { |
60 | | - "path": "s1", |
61 | | - "coordinateTransformations": [ |
62 | | - { |
63 | | - "type": "scale", |
64 | | - "scale": [ |
65 | | - 2.0, |
66 | | - 4.4, |
67 | | - 1.0 |
68 | | - ] |
69 | | - }, |
70 | | - { |
71 | | - "type": "translation", |
72 | | - "translation": [ |
73 | | - 0.5, |
74 | | - 1.1, |
75 | | - 1.25 |
76 | | - ] |
77 | | - } |
78 | | - ] |
79 | | - } |
80 | | - ], |
81 | | - "axes": [ |
82 | | - { |
83 | | - "name": "z", |
84 | | - "type": "space", |
85 | | - "units": null |
86 | | - }, |
87 | | - { |
88 | | - "name": "y", |
89 | | - "type": "space", |
90 | | - "units": null |
91 | | - }, |
92 | | - { |
93 | | - "name": "x", |
94 | | - "type": "space", |
95 | | - "units": null |
96 | | - } |
97 | | - ], |
98 | | - "coordinateTransformations": [ |
99 | | - { |
100 | | - "type": "scale", |
101 | | - "scale": [ |
102 | | - 1.0, |
103 | | - 1.0, |
104 | | - 1.0 |
105 | | - ] |
106 | | - } |
107 | | - ] |
108 | | -} |
| 18 | +# this ensures that we create a Dask array, which gives us lazy loading |
| 19 | +array_wrapper = DaskArrayWrapper(chunks=10) |
| 20 | +arrays = read_multiscale_group(group, array_wrapper=array_wrapper) |
| 21 | +print(arrays) |
| 22 | +""" |
| 23 | +{'0': <xarray.DataArray 'array-bb42996937dbff7600e0481e2b1572cc' (c: 2, z: 236, |
| 24 | + y: 275, x: 271)> |
| 25 | +dask.array<array, shape=(2, 236, 275, 271), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray> |
| 26 | +Coordinates: |
| 27 | + * c (c) float64 0.0 1.0 |
| 28 | + * z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5 |
| 29 | + * y (y) float64 0.0 0.3604 0.7208 1.081 ... 97.67 98.03 98.39 98.75 |
| 30 | + * x (x) float64 0.0 0.3604 0.7208 1.081 ... 96.23 96.59 96.95 97.31, '1': <xarray.DataArray 'array-2bfe6d4a6d289444ca93aa84fcb36342' (c: 2, z: 236, |
| 31 | + y: 137, x: 135)> |
| 32 | +dask.array<array, shape=(2, 236, 137, 135), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray> |
| 33 | +Coordinates: |
| 34 | + * c (c) float64 0.0 1.0 |
| 35 | + * z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5 |
| 36 | + * y (y) float64 0.0 0.7208 1.442 2.162 ... 95.87 96.59 97.31 98.03 |
| 37 | + * x (x) float64 0.0 0.7208 1.442 2.162 ... 94.42 95.15 95.87 96.59, '2': <xarray.DataArray 'array-80c5fc67c0c57909c0a050656a5ab630' (c: 2, z: 236, |
| 38 | + y: 68, x: 67)> |
| 39 | +dask.array<array, shape=(2, 236, 68, 67), dtype=uint16, chunksize=(2, 10, 10, 10), chunktype=numpy.ndarray> |
| 40 | +Coordinates: |
| 41 | + * c (c) float64 0.0 1.0 |
| 42 | + * z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5 |
| 43 | + * y (y) float64 0.0 1.442 2.883 4.325 5.766 ... 92.26 93.7 95.15 96.59 |
| 44 | + * x (x) float64 0.0 1.442 2.883 4.325 5.766 ... 90.82 92.26 93.7 95.15} |
| 45 | +""" |
109 | 46 | ``` |
110 | 47 |
|
111 | | -It is not possible to create a DataArray from OME-NGFF metadata, but together the OME-NGFF [`Axes`](https://ngff.openmicroscopy.org/latest/#axes-md) and [`CoordinateTransformations`](https://ngff.openmicroscopy.org/latest/#trafo-md) metadata are sufficient to create _coordinates_ for a DataArray, provided you know the shape of the data. The function `create_coords` performs this operation: |
| 48 | +### Create OME-NGFF data |
112 | 49 |
|
113 | 50 | ```python |
114 | | -from xarray_ome_ngff import create_coords |
115 | | -from pydantic_ome_ngff.v05.coordinateTransformations import VectorScaleTransform, VectorTranslationTransform |
116 | | -from pydantic_ome_ngff.v05.axes import Axis |
| 51 | +import numpy as np |
| 52 | +from xarray import DataArray |
| 53 | +from xarray_ome_ngff import create_multiscale_group |
| 54 | +from zarr import MemoryStore |
| 55 | + |
| 56 | +base_array = DataArray( |
| 57 | + np.zeros((10,10), dtype='uint8'), |
| 58 | + coords={ |
| 59 | + 'x': DataArray(np.arange(-5,5) * 3, dims=('x',), attrs={'units': 'meter'}), |
| 60 | + 'y': DataArray(np.arange(-10, 0) * 3, dims=('y',), attrs={'units': 'meter'}) |
| 61 | + }) |
| 62 | + |
| 63 | +# create a little multiscale pyramid |
| 64 | +arrays = { |
| 65 | + 's0': base_array, |
| 66 | + 's1': base_array.coarsen({'x': 2, 'y': 2}, boundary='trim').mean().astype(base_array.dtype) |
| 67 | +} |
117 | 68 |
|
| 69 | +# This example uses in-memory storage, but you can use a |
| 70 | +# different store class from `zarr` |
| 71 | +store = MemoryStore() |
118 | 72 |
|
119 | | -shape = (3, 3) |
120 | | -axes = [Axis(name='a', units="meter", type="space"), Axis(name='b', units="meter", type="space")] |
| 73 | +group = create_multiscale_group(store=store, path='my_group', arrays=arrays) |
| 74 | +print(group.attrs.asdict()) |
| 75 | +""" |
| 76 | +{ |
| 77 | + 'multiscales': ( |
| 78 | + { |
| 79 | + 'version': '0.4', |
| 80 | + 'name': None, |
| 81 | + 'type': None, |
| 82 | + 'metadata': None, |
| 83 | + 'datasets': ( |
| 84 | + { |
| 85 | + 'path': 's0', |
| 86 | + 'coordinateTransformations': ( |
| 87 | + {'type': 'scale', 'scale': (3.0, 3.0)}, |
| 88 | + {'type': 'translation', 'translation': (-15.0, -30.0)}, |
| 89 | + ), |
| 90 | + }, |
| 91 | + { |
| 92 | + 'path': 's1', |
| 93 | + 'coordinateTransformations': ( |
| 94 | + {'type': 'scale', 'scale': (6.0, 6.0)}, |
| 95 | + {'type': 'translation', 'translation': (-13.5, -28.5)}, |
| 96 | + ), |
| 97 | + }, |
| 98 | + ), |
| 99 | + 'axes': ( |
| 100 | + {'name': 'x', 'type': 'space', 'unit': 'meter'}, |
| 101 | + {'name': 'y', 'type': 'space', 'unit': 'meter'}, |
| 102 | + ), |
| 103 | + 'coordinateTransformations': None, |
| 104 | + }, |
| 105 | + ) |
| 106 | +} |
| 107 | +""" |
121 | 108 |
|
122 | | -transforms = [VectorScaleTransform(scale=[1, .5]), VectorTranslationTransform(translation=[1, 2])] |
| 109 | +# check that the arrays are there |
| 110 | +print(tuple(group.arrays())) |
| 111 | +""" |
| 112 | +(('s0', <zarr.core.Array '/my_group/s0' (10, 10) uint8>), ('s1', <zarr.core.Array '/my_group/s1' (5, 5) uint8>)) |
| 113 | +""" |
123 | 114 |
|
124 | | -coords = create_coords(axes, transforms, shape) |
125 | | -print(coords) |
126 | | -''' |
127 | | -{'a': <xarray.DataArray (a: 3)> |
128 | | -array([1., 2., 3.]) |
129 | | -Dimensions without coordinates: a |
130 | | -Attributes: |
131 | | - units: meter, 'b': <xarray.DataArray (b: 3)> |
132 | | -array([2. , 2.5, 3. ]) |
133 | | -Dimensions without coordinates: b |
134 | | -Attributes: |
135 | | - units: meter} |
136 | | -''' |
| 115 | +# write data to the arrays |
| 116 | +for path, array in arrays.items(): |
| 117 | + group[path][:] = array.data |
137 | 118 | ``` |
0 commit comments