Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0d436e3
🎈 *writes data to get teh party started*
Kezzsim Jul 17, 2025
152b886
🌫️ *anxiously adds more cloud providers*
Kezzsim Jul 17, 2025
4e42fb0
Resolve mypy errors
Kezzsim Jul 17, 2025
1c0164b
👍️ Resolve minio https error preventing us from writing `zarr.json`
Kezzsim Aug 26, 2025
6f8ab0f
🚮 Experiment with writing (sloppy) data
Kezzsim Aug 27, 2025
9db8cca
🪲 DEBUG: problems with `write`
Kezzsim Aug 28, 2025
3a4e81c
🕶️ Review : Add missing prefix
Kezzsim Sep 11, 2025
99480e2
✍️ Write regex helper function
Kezzsim Sep 22, 2025
f54cc7f
🧽 refactor to clean up repeated code
Kezzsim Sep 23, 2025
d102d11
✍️ Add Blobs to writing tests
Kezzsim Oct 17, 2025
a4d56c7
Merge branch 'bluesky:main' into obtsor
Kezzsim Oct 21, 2025
76b8dfa
✍️ Rewrite `get_storage` to be a router for buckets
Kezzsim Oct 21, 2025
6df1ecb
Merge branch 'bluesky:main' into obtsor
Kezzsim Oct 23, 2025
3be96c0
refactor ObjectStorage
genematx Oct 23, 2025
d61e11a
Merge pull request #1 from genematx/obstore-eugene
Kezzsim Oct 23, 2025
65bebe8
🐋 Add minio container to CI for testing
Kezzsim Oct 23, 2025
0e200a9
🧪 Make `TILED_TEST_BUCKET` env var for advanced testing
Kezzsim Oct 23, 2025
bd7007b
More refactoring of Storage
genematx Oct 24, 2025
401e432
FIX: look up registered storages instead of recreating them
genematx Oct 24, 2025
ef5e07d
Simplify test config
genematx Oct 24, 2025
4d8afb9
TST: fix test_writing + more refactoring
genematx Oct 24, 2025
aae6b13
MNT: add minio dependency for server
genematx Oct 24, 2025
5a270f5
ENH: generalize asset deletion
genematx Oct 24, 2025
8e71719
Merge pull request #2 from genematx/obstore-eugene
Kezzsim Oct 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ jobs:
shell: bash -l {0}
run: source continuous_integration/scripts/start_redis.sh

- name: Start Minio service in container.
#TODO: This product is leaving open-source container distribution
# Find a new image or product to use
# https://github.com/minio/minio/issues/21647#issuecomment-3418675115
shell: bash -l {0}
run: source continuous_integration/scripts/start_minio.sh

- name: Ensure example data is migrated to current catalog database schema.
# The example data is expected to be kept up to date to the latest Tiled
Expand All @@ -84,6 +90,7 @@ jobs:
# Provide test suite with PostgreSQL and Redis databases to use.
TILED_TEST_POSTGRESQL_URI: postgresql://postgres:secret@localhost:5432
TILED_TEST_REDIS: redis://localhost:6379
TILED_TEST_BUCKET: http://minioadmin:minioadmin@localhost:9000/buck
# TODO Reinstate after finding a new image to use
# https://github.com/bluesky/tiled/issues/1109
# # Opt in to LDAPAuthenticator tests.
Expand Down
29 changes: 29 additions & 0 deletions continuous_integration/docker-configs/minio-docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
version: "3.2"
services:
minio:
image: minio/minio:latest
ports:
- 9000:9000
- 9001:9001
volumes:
- minio-data:/data
environment:
MINIO_ROOT_USER: "minioadmin"
MINIO_ROOT_PASSWORD: "minioadmin"
command: server /data --console-address :9001
restart: unless-stopped
create-bucket:
image: minio/mc:latest
environment:
MC_HOST_minio: http://minioadmin:minioadmin@minio:9000
entrypoint:
- sh
- -c
- |
until mc ls minio > /dev/null 2>&1; do
sleep 0.5
done

mc mb --ignore-existing minio/buck
volumes:
minio-data:
7 changes: 7 additions & 0 deletions continuous_integration/scripts/start_minio.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
set -e

# Start MinIO server in docker container
docker pull minio/minio:latest
docker compose -f continuous_integration/docker-configs/minio-docker-compose.yml up -d
docker ps
17 changes: 17 additions & 0 deletions example_configs/bucket_storage/bucket_storage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
authentication:
allow_anonymous_access: false
trees:
- path: /
tree: catalog
args:
uri: "sqlite:///storage/catalog.db"
writable_storage:
- provider: s3
uri: "http://localhost:9000"
config:
access_key_id: "minioadmin"
secret_access_key: "minioadmin"
bucket: "buck"
virtual_hosted_style_request: False
client_options: {"allow_http": True}
init_if_not_exists: true
29 changes: 29 additions & 0 deletions example_configs/bucket_storage/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
version: "3.2"
services:
minio:
image: minio/minio:latest
ports:
- 9000:9000
- 9001:9001
volumes:
- minio-data:/data
environment:
MINIO_ROOT_USER: "minioadmin"
MINIO_ROOT_PASSWORD: "minioadmin"
command: server /data --console-address :9001
restart: unless-stopped
create-bucket:
image: minio/mc:latest
environment:
MC_HOST_minio: http://minioadmin:minioadmin@minio:9000
entrypoint:
- sh
- -c
- |
until mc ls minio > /dev/null 2>&1; do
sleep 0.5
done

mc mb --ignore-existing minio/buck
volumes:
minio-data:
18 changes: 18 additions & 0 deletions example_configs/bucket_storage/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Create a local bucket for testing access to BLOBS

In this example there exists:
- A `docker-compose.yml` file capable of instantiating and running a [Minio](https://min.io/) container.
- A configuration yaml file `bucket_storage.yml` which contains information tiled needs to authenticate with the bucket storage system and write / read Binary Large Objects (BLOBS) through the Zaar adapter.

## How to run this example:
1. In one terminal window, navigate to the directory where the `docker-compose.yml` and `bucket_storage.yml` are.
2. Run `docker compose up` with adequate permissions.
3. Open another terminal window in the same location and run `tiled serve config bucket_storage.yml --api-key secret`
4. You will need to create a `storage` directory in `/example_configs/bucket_storage` for the sqlite database.
5. Create an `ipython` session and run the following commands to write array data as a BLOB in a bucket:
```python
from tiled.client import from_uri
c = from_uri('http://localhost:8000', api_key='secret')
c.write_array([1,2,3])
```
6. You will be able to see the written data in the bucket if you log in to the minio container, exposed on your machine at `http://localhost:9001/login`. </br> Use testing credentials `minioadmin` for both fields.
1 change: 1 addition & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ flake8 = "*"
ipython = "*"
ldap3 = "*"
matplotlib = "*"
minio = "*"
mistune = "*"
myst-parser = "*"
numpydoc = "*"
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,12 @@ all = [
"jinja2",
"jmespath",
"lz4",
"minio",
"ndindex",
"numcodecs",
"numpy",
"numba >=0.59.0", # indirect, pinned to assist uv solve
"obstore",
"openpyxl",
"packaging",
"pandas",
Expand Down Expand Up @@ -156,6 +158,7 @@ dev = [
"ldap3",
"locust",
"matplotlib",
"minio",
"mistune",
"myst-parser",
"numpydoc",
Expand Down Expand Up @@ -246,6 +249,7 @@ server = [
"numba >=0.59.0", # indirect, pinned to assist uv solve
"numcodecs",
"numpy",
"obstore",
"openpyxl",
"packaging",
"pandas",
Expand Down
35 changes: 35 additions & 0 deletions tiled/_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
from pathlib import Path
from typing import Any
from urllib.parse import urlparse

import asyncpg
import pytest
Expand Down Expand Up @@ -313,6 +314,40 @@ def redis_uri():
raise pytest.skip("No TILED_TEST_REDIS configured")


@pytest.fixture
def minio_uri():
if uri := os.getenv("TILED_TEST_BUCKET"):
from minio import Minio
from minio.deleteobjects import DeleteObject

# For convenience, we split the bucket from a string
url = urlparse(uri)
bucket = url.path.lstrip("/")
uri = url._replace(netloc="{}:{}".format(url.hostname, url.port), path="")

client = Minio(
uri.geturl(),
access_key=url.username,
secret_key=url.password,
secure=False,
)

# Reset the state of the bucket after each test.
if client.bucket_exists(bucket):
delete_object_list = map(
lambda x: DeleteObject(x.object_name),
client.list_objects(bucket, recursive=True),
)
errors = client.remove_objects(bucket, delete_object_list)
for error in errors:
print("error occurred when deleting object", error)
else:
client.make_bucket(bucket)

else:
raise pytest.skip("No TILED_TEST_BUCKET configured")


@pytest.fixture(scope="function")
def tiled_websocket_context(tmpdir, redis_uri):
"""Fixture that provides a Tiled context with websocket support."""
Expand Down
22 changes: 21 additions & 1 deletion tiled/_tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
"""

import base64
import os
import threading
import uuid
from datetime import datetime
from urllib.parse import urlparse

import awkward
import dask.dataframe
Expand Down Expand Up @@ -48,11 +50,29 @@

@pytest.fixture
def tree(tmpdir):
if uri := os.getenv("TILED_TEST_BUCKET"):
url = urlparse(uri)
bucket = url.path.lstrip("/")
uri = url._replace(netloc="{}:{}".format(url.hostname, url.port), path="")
return in_memory(
writable_storage=[
f"file://localhost{str(tmpdir / 'data')}",
f"duckdb:///{tmpdir / 'data.duckdb'}",
]
].append(
{
"provider": "s3",
"uri": uri.geturl(),
"config": {
"access_key_id": url.username,
"secret_access_key": url.password,
"bucket": bucket,
"virtual_hosted_style_request": False,
"client_options": {"allow_http": True},
},
}
)
if uri
else None
)


Expand Down
66 changes: 45 additions & 21 deletions tiled/adapters/zarr.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# mypy: ignore-errors
import builtins
import copy
import os
from importlib.metadata import version
from typing import Any, Iterator, List, Optional, Set, Tuple, Union, cast
from urllib.parse import quote_plus
from urllib.parse import quote_plus, urlparse

import zarr
from numpy._typing import NDArray
Expand All @@ -17,7 +18,7 @@
from ..catalog.orm import Node
from ..iterviews import ItemsView, KeysView, ValuesView
from ..ndslice import NDSlice
from ..storage import FileStorage, Storage
from ..storage import FileStorage, ObjectStorage, Storage, get_storage
from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import Asset, DataSource
Expand All @@ -30,8 +31,10 @@
from zarr.storage import DirectoryStore as LocalStore
from zarr.storage import init_array as create_array
else:
from obstore.store import AzureStore, GCSStore, S3Store
from zarr import create_array
from zarr.storage import LocalStore
from zarr.storage import LocalStore, ObjectStore


INLINED_DEPTH = int(os.getenv("TILED_HDF5_INLINED_CONTENTS_MAX_DEPTH", "7"))

Expand Down Expand Up @@ -60,29 +63,33 @@ def init_storage(
path_parts: List[str],
) -> DataSource[ArrayStructure]:
data_source = copy.deepcopy(data_source) # Do not mutate caller input.
data_uri = storage.uri + "".join(
f"/{quote_plus(segment)}" for segment in path_parts
)

# Zarr requires evenly-sized chunks within each dimension.
# Use the first chunk along each dimension.
zarr_chunks = tuple(dim[0] for dim in data_source.structure.chunks)
shape = tuple(dim[0] * len(dim) for dim in data_source.structure.chunks)
directory = path_from_uri(data_uri)
directory.mkdir(parents=True, exist_ok=True)
store = LocalStore(str(directory))
create_array(
store,
shape=shape,
chunks=zarr_chunks,
dtype=data_source.structure.data_type.to_numpy_dtype(),
)

data_path = "/".join(quote_plus(segment) for segment in path_parts)
if isinstance(storage, ObjectStorage):
zarr_store = ObjectStore(store=storage.get_object_store(prefix=data_path))

if isinstance(storage, FileStorage):
directory = path_from_uri(storage.uri + data_path)
directory.mkdir(parents=True, exist_ok=True)
zarr_store = LocalStore(str(directory))
data_source.assets.append(
Asset(
data_uri=data_uri,
data_uri=storage.uri + data_path,
is_directory=True,
parameter="data_uri",
)
)
create_array(
zarr_store,
shape=shape,
chunks=zarr_chunks,
dtype=data_source.structure.data_type.to_numpy_dtype(),
)
return data_source

@property
Expand Down Expand Up @@ -205,7 +212,7 @@ async def patch(

@classmethod
def supported_storage(cls) -> Set[type[Storage]]:
return {FileStorage}
return {FileStorage, ObjectStorage}


class ZarrGroupAdapter(
Expand Down Expand Up @@ -298,10 +305,27 @@ def from_catalog(
/,
**kwargs: Optional[Any],
) -> Union[ZarrGroupAdapter, ZarrArrayAdapter]:
zarr_obj = zarr.open(
path_from_uri(data_source.assets[0].data_uri)
) # Group or Array
if node.structure_family == StructureFamily.container:
is_container_type = node.structure_family == StructureFamily.container
uri = data_source.assets[0].data_uri

if urlparse(uri).scheme == "file":
# This is a file-based Zarr storage
zarr_obj = zarr.open(path_from_uri(uri))

elif urlparse(uri).scheme in ("s3", "http", "https"):
# This is an object-store-based Zarr storage
base_uri, blob_path = ObjectStorage.split_blob_uri(storage_uri)
storage = cast(ObjectStorage, get_storage(base_uri))
zarr_store = ObjectStore(store=storage.get_object_store())
if is_container_type:
zarr_obj = zarr.open_group(store=zarr_store, path=blob_path)
else:
zarr_obj = zarr.open_array(store=zarr_store, path=blob_path)

else:
raise TypeError(f"Unsupported URI scheme in {uri}")

if is_container_type:
return ZarrGroupAdapter(
zarr_obj,
metadata=node.metadata_,
Expand Down
Loading
Loading