Skip to content

Commit d4fe3d0

Browse files
authored
feat: add support for extra attributes in zarr.json writing (#43)
1 parent f768344 commit d4fe3d0

4 files changed

Lines changed: 201 additions & 18 deletions

File tree

src/yaozarrs/_zarr.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ class ZarrJsonGroupV3(BaseModel):
124124
class OMEAttributesV5(BaseModel):
125125
"""The attributes field of a zarr.json document that usually appears nested."""
126126

127+
model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow")
128+
127129
ome: v05.OMEMetadata
128130

129131

src/yaozarrs/v05/_zarr_json.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@
272272

273273
from typing import Annotated, Any, Literal, TypeAlias
274274

275-
from pydantic import BaseModel, Discriminator, Tag
275+
from pydantic import BaseModel, ConfigDict, Discriminator, Tag
276276

277277
from yaozarrs._base import ZarrGroupModel, _BaseModel
278278
from yaozarrs.v05._bf2raw import Bf2Raw
@@ -335,6 +335,8 @@ def _discriminate_ome_v05_metadata(v: Any) -> str | None:
335335
class OMEAttributes(_BaseModel):
336336
"""The attributes field of a `zarr.json` document in an ome-zarr group."""
337337

338+
model_config = ConfigDict(extra="allow")
339+
338340
ome: OMEMetadata
339341

340342

src/yaozarrs/write/v05/_write.py

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def write_image(
166166
datasets: ArrayOrPyramid,
167167
*,
168168
labels: Mapping[str, tuple[LabelImage, ArrayOrPyramid]] | None = None,
169+
extra_attributes: dict[str, Any] | None = None,
169170
writer: ZarrWriter = "auto",
170171
overwrite: bool = False,
171172
chunks: tuple[int, ...] | Literal["auto"] | None = "auto",
@@ -204,6 +205,10 @@ def write_image(
204205
Optional label images to write alongside the image. Keys are label names
205206
(e.g., "cells", "nuclei"), values are (LabelImage, datasets) tuples.
206207
Labels will be written to `dest/labels/{name}/`. Default is None.
208+
extra_attributes : dict[str, Any] | None, optional
209+
Additional attributes to write alongside "ome" in zarr.json.
210+
For example, `{"custom": {...}}` will produce
211+
`attributes: {"ome": {...}, "custom": {...}}`.
207212
writer : "zarr" | "tensorstore" | "auto" | CreateArrayFunc, optional
208213
Backend to use for writing arrays. "auto" prefers tensorstore if
209214
available, otherwise falls back to zarr-python. Pass a custom function
@@ -303,6 +308,7 @@ def write_image(
303308
dest,
304309
image,
305310
specs,
311+
extra_attributes=extra_attributes,
306312
chunks=chunks,
307313
shards=shards,
308314
writer=writer,
@@ -337,6 +343,7 @@ def write_plate(
337343
images: Mapping[tuple[str, str, str], ImageWithDatasets],
338344
*,
339345
plate: Plate | dict[str, Any] | None = None,
346+
extra_attributes: dict[str, Any] | None = None,
340347
writer: ZarrWriter = "auto",
341348
overwrite: bool = False,
342349
chunks: tuple[int, ...] | Literal["auto"] | None = "auto",
@@ -386,6 +393,8 @@ def write_plate(
386393
- Plate: Use as-is (must match images dict)
387394
Common dict keys: 'name', 'acquisitions', 'field_count'.
388395
Auto-generated: 'rows', 'columns', 'wells'.
396+
extra_attributes : dict[str, Any] | None, optional
397+
Additional attributes to write alongside "ome" in zarr.json.
389398
writer : "zarr" | "tensorstore" | "auto" | CreateArrayFunc, optional
390399
Backend to use for writing arrays. Default is "auto".
391400
overwrite : bool, optional
@@ -469,6 +478,7 @@ def write_plate(
469478
builder = PlateBuilder(
470479
dest,
471480
plate=plate_obj,
481+
extra_attributes=extra_attributes,
472482
writer=writer,
473483
chunks=chunks,
474484
shards=shards,
@@ -494,6 +504,7 @@ def write_bioformats2raw(
494504
images: Mapping[str, ImageWithDatasets],
495505
*,
496506
ome_xml: str | None = None,
507+
extra_attributes: dict[str, Any] | None = None,
497508
writer: ZarrWriter = "auto",
498509
overwrite: bool = False,
499510
chunks: tuple[int, ...] | Literal["auto"] | None = "auto",
@@ -548,6 +559,8 @@ def write_bioformats2raw(
548559
ome_xml : str | None, optional
549560
OME-XML string to store as `OME/METADATA.ome.xml`.
550561
Useful for preserving full metadata from converted files.
562+
extra_attributes : dict[str, Any] | None, optional
563+
Additional attributes to write alongside "ome" in zarr.json.
551564
writer : "zarr" | "tensorstore" | "auto" | CreateArrayFunc, optional
552565
Backend to use for writing arrays.
553566
overwrite : bool, optional
@@ -617,6 +630,7 @@ def write_bioformats2raw(
617630
builder = Bf2RawBuilder(
618631
dest,
619632
ome_xml=ome_xml,
633+
extra_attributes=extra_attributes,
620634
writer=writer,
621635
chunks=chunks,
622636
shards=shards,
@@ -639,6 +653,7 @@ def prepare_image(
639653
image: Image,
640654
datasets: ShapeAndDTypeOrPyramid,
641655
*,
656+
extra_attributes: dict[str, Any] | None = ...,
642657
writer: Literal["zarr"],
643658
chunks: tuple[int, ...] | Literal["auto"] | None = ...,
644659
shards: tuple[int, ...] | None = ...,
@@ -651,6 +666,7 @@ def prepare_image(
651666
image: Image,
652667
datasets: ShapeAndDTypeOrPyramid,
653668
*,
669+
extra_attributes: dict[str, Any] | None = ...,
654670
writer: Literal["tensorstore"],
655671
chunks: tuple[int, ...] | Literal["auto"] | None = ...,
656672
shards: tuple[int, ...] | None = ...,
@@ -663,6 +679,7 @@ def prepare_image(
663679
image: Image,
664680
datasets: ShapeAndDTypeOrPyramid,
665681
*,
682+
extra_attributes: dict[str, Any] | None = ...,
666683
writer: Literal["auto"] | CreateArrayFunc = ...,
667684
chunks: tuple[int, ...] | Literal["auto"] | None = ...,
668685
shards: tuple[int, ...] | None = ...,
@@ -674,6 +691,7 @@ def prepare_image(
674691
image: Image,
675692
datasets: ShapeAndDTypeOrPyramid,
676693
*,
694+
extra_attributes: dict[str, Any] | None = None,
677695
chunks: tuple[int, ...] | Literal["auto"] | None = "auto",
678696
shards: tuple[int, ...] | None = None,
679697
writer: ZarrWriter = "auto",
@@ -709,6 +727,8 @@ def prepare_image(
709727
- Sequence of `(shape, dtype)`: For multiple datasets (multiscale pyramid)
710728
711729
Must match the number and order of `image.multiscales[0].datasets`.
730+
extra_attributes : dict[str, Any] | None, optional
731+
Additional attributes to write alongside "ome" in zarr.json.
712732
chunks : tuple[int, ...] | "auto" | None, optional
713733
Chunk shape. See `write_image` for details.
714734
shards : tuple[int, ...] | None, optional
@@ -800,7 +820,7 @@ def prepare_image(
800820

801821
# Create zarr group with Image metadata
802822
dest_path = Path(dest)
803-
_create_zarr3_group(dest_path, image, overwrite)
823+
_create_zarr3_group(dest_path, image, overwrite, extra_attributes=extra_attributes)
804824

805825
dimension_names = [ax.name for ax in multiscale.axes]
806826

@@ -921,6 +941,7 @@ def __init__(
921941
dest: str | PathLike,
922942
*,
923943
ome_xml: str | None = None,
944+
extra_attributes: dict[str, Any] | None = None,
924945
writer: ZarrWriter = "auto",
925946
chunks: ShapeLike | Literal["auto"] | None = "auto",
926947
shards: ShapeLike | None = None,
@@ -929,6 +950,7 @@ def __init__(
929950
) -> None:
930951
self._dest = Path(dest)
931952
self._ome_xml = ome_xml
953+
self._extra_attributes = extra_attributes
932954
self._writer: ZarrWriter = writer
933955
self._chunks: ShapeLike | Literal["auto"] | None = chunks
934956
self._shards = shards
@@ -1084,8 +1106,13 @@ def prepare(self) -> tuple[Path, dict[str, Any]]:
10841106
raise ValueError("No series added. Use add_series() before prepare().")
10851107

10861108
# Create root zarr.json with bioformats2raw.layout
1087-
bf2raw = Bf2Raw(bioformats2raw_layout=3) # ty: ignore[missing-argument,unknown-argument]
1088-
_create_zarr3_group(self._dest, bf2raw, self._overwrite)
1109+
bf2raw = Bf2Raw(bioformats2raw_layout=3) # type: ignore
1110+
_create_zarr3_group(
1111+
self._dest,
1112+
bf2raw,
1113+
self._overwrite,
1114+
extra_attributes=self._extra_attributes,
1115+
)
10891116

10901117
# Create OME/zarr.json with series list
10911118
ome_path = self._dest / "OME"
@@ -1133,8 +1160,13 @@ def _ensure_initialized(self) -> None:
11331160
return
11341161

11351162
# Create root zarr.json with bioformats2raw.layout
1136-
bf2raw = Bf2Raw(bioformats2raw_layout=3) # ty: ignore[missing-argument,unknown-argument]
1137-
_create_zarr3_group(self._dest, bf2raw, self._overwrite)
1163+
bf2raw = Bf2Raw(bioformats2raw_layout=3) # type: ignore
1164+
_create_zarr3_group(
1165+
self._dest,
1166+
bf2raw,
1167+
self._overwrite,
1168+
extra_attributes=self._extra_attributes,
1169+
)
11381170

11391171
# Create OME directory and write METADATA.ome.xml if provided
11401172
ome_path = self._dest / "OME"
@@ -1152,16 +1184,23 @@ def _update_ome_series(self, series_name: str) -> None:
11521184

11531185
self._written_series.append(series_name)
11541186
series_model = Series(series=self._written_series)
1187+
zarr_json_path = self._dest / "OME" / "zarr.json"
1188+
# Preserve existing extra attributes if present
1189+
existing_extra: dict[str, Any] = {}
1190+
if zarr_json_path.exists():
1191+
existing = json.loads(zarr_json_path.read_text())
1192+
existing_extra = {
1193+
k: v for k, v in existing.get("attributes", {}).items() if k != "ome"
1194+
}
11551195
zarr_json = {
11561196
"zarr_format": 3,
11571197
"node_type": "group",
11581198
"attributes": {
11591199
"ome": series_model.model_dump(mode="json", exclude_none=True),
1200+
**existing_extra,
11601201
},
11611202
}
1162-
(self._dest / "OME" / "zarr.json").write_text(
1163-
json.dumps(zarr_json, indent=self._indent)
1164-
)
1203+
zarr_json_path.write_text(json.dumps(zarr_json, indent=self._indent))
11651204

11661205

11671206
class PlateBuilder:
@@ -1275,6 +1314,7 @@ def __init__(
12751314
dest: str | PathLike,
12761315
*,
12771316
plate: Plate | None = None,
1317+
extra_attributes: dict[str, Any] | None = None,
12781318
writer: ZarrWriter = "auto",
12791319
chunks: ShapeLike | Literal["auto"] | None = "auto",
12801320
shards: ShapeLike | None = None,
@@ -1283,6 +1323,7 @@ def __init__(
12831323
) -> None:
12841324
self._dest = Path(dest)
12851325
self._user_plate = plate # Store user-provided plate (if any)
1326+
self._extra_attributes = extra_attributes
12861327
self._writer: ZarrWriter = writer
12871328
self._chunks: ShapeLike | Literal["auto"] | None = chunks
12881329
self._shards = shards
@@ -1482,7 +1523,12 @@ def prepare(self) -> tuple[Path, dict[str, Any]]:
14821523
plate = _merge_plate_metadata(self._get_images_dict(), self._user_plate)
14831524

14841525
# Create plate zarr.json
1485-
_create_zarr3_group(self._dest, plate, self._overwrite)
1526+
_create_zarr3_group(
1527+
self._dest,
1528+
plate,
1529+
self._overwrite,
1530+
extra_attributes=self._extra_attributes,
1531+
)
14861532

14871533
# Create arrays for each well/field combination
14881534
all_arrays: dict[str, Any] = {}
@@ -1593,6 +1639,7 @@ def _update_plate_metadata(self) -> None:
15931639
"node_type": "group",
15941640
"attributes": {
15951641
"ome": plate.model_dump(mode="json", exclude_none=True),
1642+
**(self._extra_attributes or {}),
15961643
},
15971644
}
15981645
(self._dest / "zarr.json").write_text(json.dumps(zarr_json, indent=2))
@@ -1987,14 +2034,23 @@ def _update_labels_group(self, label_name: str) -> None:
19872034
# If label exists and we're in overwrite mode, it's already in the list
19882035

19892036
labels_group = LabelsGroup(labels=all_labels)
2037+
zarr_json_path = self._dest / "zarr.json"
2038+
# Preserve existing extra attributes if present
2039+
existing_extra: dict[str, Any] = {}
2040+
if zarr_json_path.exists():
2041+
existing = json.loads(zarr_json_path.read_text())
2042+
existing_extra = {
2043+
k: v for k, v in existing.get("attributes", {}).items() if k != "ome"
2044+
}
19902045
zarr_json = {
19912046
"zarr_format": 3,
19922047
"node_type": "group",
19932048
"attributes": {
19942049
"ome": labels_group.model_dump(mode="json", exclude_none=True),
2050+
**existing_extra,
19952051
},
19962052
}
1997-
(self._dest / "zarr.json").write_text(json.dumps(zarr_json, indent=2))
2053+
zarr_json_path.write_text(json.dumps(zarr_json, indent=2))
19982054

19992055

20002056
# ##############################################################################
@@ -2222,6 +2278,7 @@ def _create_zarr3_group(
22222278
ome_model: OMEMetadata | None = None,
22232279
overwrite: bool = False,
22242280
indent: int = 2,
2281+
extra_attributes: dict[str, Any] | None = None,
22252282
) -> None:
22262283
"""Create a zarr group directory with optional OME metadata in zarr.json."""
22272284
zarr_json_path = dest_path / "zarr.json"
@@ -2246,17 +2303,21 @@ def _create_zarr3_group(
22462303
"zarr_format": 3,
22472304
"node_type": "group",
22482305
}
2249-
if ome_model is not None:
2250-
zarr_json["attributes"] = {
2251-
"ome": ome_model.model_dump(mode="json", exclude_none=True),
2252-
}
2306+
if ome_model is not None or extra_attributes:
2307+
attrs: dict[str, Any] = {}
2308+
if ome_model is not None:
2309+
attrs["ome"] = ome_model.model_dump(mode="json", exclude_none=True)
2310+
if extra_attributes:
2311+
attrs.update(extra_attributes)
2312+
zarr_json["attributes"] = attrs
22532313
zarr_json_path.write_text(json.dumps(zarr_json, indent=indent))
22542314

22552315

22562316
def _update_zarr3_group(
22572317
dest_path: Path,
22582318
ome_model: OMEMetadata,
22592319
indent: int = 2,
2320+
extra_attributes: dict[str, Any] | None = None,
22602321
) -> None:
22612322
"""Update the ome metadata in an existing zarr group."""
22622323
zarr_json_path = dest_path / "zarr.json"
@@ -2266,9 +2327,13 @@ def _update_zarr3_group(
22662327
with open(zarr_json_path) as f:
22672328
zarr_json = json.load(f)
22682329

2269-
zarr_json["attributes"] = {
2270-
"ome": ome_model.model_dump(mode="json", exclude_none=True),
2271-
}
2330+
# Preserve existing extra attributes (non-ome keys)
2331+
existing_attrs = zarr_json.get("attributes", {})
2332+
attrs: dict[str, Any] = {k: v for k, v in existing_attrs.items() if k != "ome"}
2333+
attrs["ome"] = ome_model.model_dump(mode="json", exclude_none=True)
2334+
if extra_attributes:
2335+
attrs.update(extra_attributes)
2336+
zarr_json["attributes"] = attrs
22722337
zarr_json_path.write_text(json.dumps(zarr_json, indent=indent))
22732338

22742339

0 commit comments

Comments
 (0)