Skip to content
This repository was archived by the owner on Apr 2, 2025. It is now read-only.

Commit 066513b

Browse files
committed
Added odp-utils package. Added gridded on RAW to utils. added py.typed files.
1 parent 6b39134 commit 066513b

File tree

15 files changed

+845
-2193
lines changed

15 files changed

+845
-2193
lines changed

.github/workflows/pypi-publish.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ jobs:
4141
package_path:
4242
- "sdk"
4343
- "dto"
44+
- "utils"
4445

4546
steps:
4647
- name: Download build artifacts
@@ -62,4 +63,4 @@ jobs:
6263
with:
6364
password: ${{ secrets.PYPI_TOKEN }}
6465
packages-dir: "./build/${{ matrix.package_path }}/dist"
65-
verbose: true
66+
verbose: true

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ GIT := git
88
PYTHON := python3
99

1010
# Subprojects
11-
SUBPROJECTS := src/sdk src/dto
11+
SUBPROJECTS := src/sdk src/dto src/utils
1212
DIST_DIRS := $(SUBPROJECTS:%=%/dist)
1313
PYPROJECTS := $(SUBPROJECTS:%=%/pyproject.toml)
1414
MD5S := $(DIST_DIRS:%=%/md5.published)
@@ -43,7 +43,7 @@ CURRENT_VERSION := $(shell $(GIT) describe --tags --abbrev=0)
4343

4444
# Update the version in all subprojects
4545
version: $(VERSIONS)
46-
$(POETRY) update odp-sdk odp-dto
46+
$(POETRY) update odp-sdk odp-dto odp-utils
4747

4848
# Build all subprojects
4949
build: $(DIST_DIRS)

poetry.lock

Lines changed: 573 additions & 2187 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ package-mode = false
1010
python = "^3.9"
1111
odp-dto = { path = "./src/dto", develop = true }
1212
odp-sdk = { path = "./src/sdk", develop = true }
13-
jupyter = "^1.0.0"
13+
odp-utils = { path = "./src/utils", develop = true }
1414

1515
[tool.poetry.group.dev.dependencies]
1616
python-dotenv = "^1.0.1"

src/dto/odp/dto/py.typed

Whitespace-only changes.

src/dto/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ license = "MIT"
77
readme = "README.md"
88
packages = [
99
{include="odp"},
10+
{include="odp/dto/py.typed"},
1011
]
1112

1213
[tool.poetry.dependencies]
@@ -15,4 +16,4 @@ pydantic = "^2.4.2"
1516

1617
[build-system]
1718
requires = ["poetry-core>=1.0.0"]
18-
build-backend = "poetry.core.masonry.api"
19+
build-backend = "poetry.core.masonry.api"

src/sdk/odp/client/py.typed

Whitespace-only changes.

src/sdk/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ license = "MIT"
77
readme = "README.md"
88
packages = [
99
{include="odp"},
10+
{include="odp/client/py.typed"},
1011
{include="odp_sdk"},
1112
]
1213

@@ -26,4 +27,4 @@ validators = "^0.28.3"
2627

2728
[build-system]
2829
requires = ["poetry-core>=1.0.0"]
29-
build-backend = "poetry.core.masonry.api"
30+
build-backend = "poetry.core.masonry.api"

src/utils/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# ODP Python Utilities
2+
3+
Package containing utilities related to the Ocean Data Platform SDK
4+
5+
## Documentation
6+
7+
https://docs.hubocea.earth
8+
9+
## Installation
10+
11+
```shell
12+
pip install odp-utils
13+
```

src/utils/odp/utils/__init__.py

Whitespace-only changes.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import warnings
2+
3+
try:
4+
import os
5+
6+
os.environ["ZARR_V3_EXPERIMENTAL_API"] = "1"
7+
warnings.filterwarnings("ignore", message=r".*The experimental Zarr V3.*")
8+
9+
from zarr._storage.store import assert_zarr_v3_api_available
10+
11+
assert_zarr_v3_api_available()
12+
13+
from .gridded_on_raw import OdpStoreV3 # noqa: F401, F841
14+
15+
# Delete package reference to prevent them from being imported from here
16+
del assert_zarr_v3_api_available
17+
del os
18+
19+
except ImportError:
20+
warnings.warn("Zarr not installed. Gridded support not enabled")
21+
except NotImplementedError as e:
22+
raise ImportError(
23+
"Zarr V3 is unavailable. "
24+
"Make sure to import this module before importing the 'zarr'-package "
25+
"or ensure that the env-variable 'ZARR_V3_EXPERIMENTAL_API'='1' is set."
26+
) from e
27+
28+
# Delete package reference to prevent them from being imported from here
29+
del warnings
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
from pathlib import Path
2+
from typing import Iterable, Iterator, List, Tuple, Union
3+
from uuid import UUID
4+
5+
from odp.client import OdpClient
6+
from odp.client.dto.file_dto import FileMetadataDto
7+
from odp.client.exc import OdpFileAlreadyExistsError, OdpFileNotFoundError
8+
from odp.dto import ResourceDto
9+
from zarr._storage.store import Store, StoreV3
10+
from zarr._storage.v3 import RmdirV3
11+
12+
13+
class OdpStoreV3(RmdirV3, StoreV3, Store):
14+
"""Zarr storage implementation for storing gridded data on ODP RAW using Zarr
15+
16+
Examples:
17+
Read a gridded dataset:
18+
19+
```python
20+
import xarray as xr
21+
from odp.client import OdpClient
22+
23+
client = OdpClient()
24+
dataset = ... # get or create dataset
25+
26+
store = OdpStoreV3(client, dataset)
27+
28+
# Load dataset from ODP RAW into an Xarray dataset
29+
ds = xr.open_zarr(store)
30+
31+
# Do XArray stuff
32+
...
33+
```
34+
35+
Write a gridded dataset:
36+
37+
```python
38+
import xarray as xr
39+
from odp.client import OdpClient
40+
41+
client = OdpClient()
42+
dataset = ... # get or create dataset
43+
44+
store = OdpStoreV3(client, dataset)
45+
46+
ds = ... # Create or load an XArray dataset
47+
48+
# Write dataset to ODP RAW
49+
ds.to_zarr(store)
50+
```
51+
"""
52+
53+
FINFO_ZARR_KEY = "io.hubocean.raw.gridded/key"
54+
55+
def __init__(self, client: OdpClient, dataset: Union[UUID, str, ResourceDto]):
56+
self._client = client
57+
58+
if isinstance(dataset, ResourceDto):
59+
self._dataset = dataset
60+
self._dataset_ref = dataset.metadata.uuid or f"{dataset.kind}/{dataset.metadata.name}"
61+
else:
62+
self._dataset_ref = dataset
63+
64+
@property
65+
def dataset(self) -> ResourceDto:
66+
if self._dataset is None or self._dataset.metadata.uuid is None:
67+
self._dataset = self._client.catalog.get(self._dataset_ref)
68+
return self._dataset
69+
70+
def _dir_fname_tuple(self, key: str) -> Tuple[str, str, str]:
71+
"""Simple function to split a path into directory name, file name and file suffix"""
72+
pth = Path(key)
73+
return str(pth.parent), pth.name, pth.suffix
74+
75+
def _key_from_finfo(self, finfo: FileMetadataDto) -> str:
76+
"""Convert from `FileMetadataDto`-object to zarr key using `io.hubocean.raw.gridded/key`-key from
77+
`finfo.metadata`.
78+
79+
Args:
80+
finfo: FileMetadataDto
81+
82+
Returns:
83+
Zarr key from `finfo`
84+
85+
Raises:
86+
KeyError: Zarr key does not exists
87+
"""
88+
try:
89+
if not finfo.metadata:
90+
raise ValueError(f"Missing zarr key from metadata: {self.FINFO_ZARR_KEY}")
91+
return finfo.metadata[self.FINFO_ZARR_KEY]
92+
except (ValueError, KeyError) as e:
93+
raise KeyError("Unable to get zarr key from file metadata") from e
94+
95+
def _key_obj(self, key) -> FileMetadataDto:
96+
"""Convert from a zarr key to `FileMetadataDto`
97+
98+
Converts an incoming key by splitting key into its directory and filename components and returning
99+
`FileMetadataDto`. The original key is also added to the `metadata`-member of the returned object.
100+
101+
Args:
102+
key: Key to be converted
103+
104+
Returns:
105+
`FileMetadataDto` based on `key`
106+
"""
107+
dirname, fname, suffix = self._dir_fname_tuple(key)
108+
if not suffix:
109+
fname = f"{fname}.bin"
110+
111+
return FileMetadataDto(name=f"{dirname}/{fname}", metadata={self.FINFO_ZARR_KEY: key})
112+
113+
def __getitem__(self, key: str) -> bytes:
114+
"""Get a value associated with a given key
115+
116+
Returns:
117+
Value associated with `key` if it assists
118+
119+
Raises:
120+
KeyError: If key does not exist
121+
"""
122+
try:
123+
return self._client.raw.download_file(self.dataset, self._key_obj(key))
124+
except OdpFileNotFoundError as e:
125+
raise KeyError(f"Key not found: '{key}'") from e
126+
127+
def __setitem__(self, key: str, value: bytes):
128+
"""Set a key-value pair
129+
130+
Args:
131+
key: Key to be set
132+
value: Value associated with `key`
133+
"""
134+
self._validate_key(key)
135+
key_obj = self._key_obj(key)
136+
137+
try:
138+
self._client.raw.create_file(self.dataset, key_obj, value)
139+
return
140+
except OdpFileAlreadyExistsError:
141+
pass
142+
143+
# File already exists - simply overwrite it
144+
self._client.raw.upload_file(self.dataset, key_obj, value, overwrite=True)
145+
146+
def __delitem__(self, key: str):
147+
"""Delete a key-value pair by key
148+
149+
Args:
150+
key: Key to be deleted
151+
152+
Raises:
153+
KeyError: Key does not exist
154+
"""
155+
try:
156+
self._client.raw.delete_file(self.dataset, self._key_obj(key))
157+
except OdpFileNotFoundError as e:
158+
raise KeyError(f"Key not found: '{key}'") from e
159+
160+
def keys(self) -> Iterable[str]:
161+
"""Iterate keys in store
162+
163+
Yields:
164+
Keys in store
165+
"""
166+
for finfo in self._client.raw.list(self.dataset):
167+
try:
168+
yield self._key_from_finfo(finfo)
169+
except KeyError:
170+
pass
171+
172+
def list(self) -> List[str]:
173+
"""List keys in store
174+
175+
Returns:
176+
All keys in store as a list
177+
"""
178+
return list(self.keys())
179+
180+
def __iter__(self) -> Iterator[str]:
181+
"""Iterate keys in store
182+
183+
Yields:
184+
Keys in store
185+
"""
186+
yield from self.keys()
187+
188+
def __len__(self) -> int:
189+
"""Get number of keys in store
190+
191+
Returns:
192+
Number of keys in store
193+
"""
194+
return len(self.list())
195+
196+
def clear(self):
197+
"""Delete all keys in store"""
198+
for finfo in self._client.raw.list(self.dataset):
199+
self._client.raw.delete_file(self.dataset, finfo)

src/utils/odp/utils/py.typed

Whitespace-only changes.

src/utils/pyproject.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[tool.poetry]
2+
name = "odp-utils"
3+
version = "0.4.10"
4+
description = "ODP Python Utilities"
5+
authors = ["Thomas Li Fredriksen <[email protected]>"]
6+
license = "MIT"
7+
readme = "README.md"
8+
packages = [
9+
{include="odp"},
10+
{include="odp/utils/py.typed"},
11+
]
12+
13+
14+
[tool.poetry.dependencies]
15+
python = "^3.9"
16+
odp-sdk = { path = "../sdk", develop = true }
17+
zarr = { version = "^2.18.3", python = "^3.10" }
18+
19+
20+
[build-system]
21+
requires = ["poetry-core>=1.0.0"]
22+
build-backend = "poetry.core.masonry.api"

src/utils/tests/test_utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)