Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/source/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ Use the `ome_zarr` command to view Zarr data in the https://ome.github.io/ome-ng

ome_zarr view 6001240.zarr/

finder
======

Use the `ome_zarr` command to display multiple OME-Zarr images in the BioFile Finder app
in a browser. This command parses the specified directory to find all OME-Zarr Images
and Plates, combines them into a `biofile_finder.csv` file and opens this in the
app, which allows you to browse thumbnails of all images::

ome_zarr finder /path/to/dir/

download
========

Expand Down
29 changes: 26 additions & 3 deletions ome_zarr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .data import astronaut, coins, create_zarr
from .scale import Scaler
from .utils import download as zarr_download
from .utils import finder as bff_finder
from .utils import info as zarr_info
from .utils import view as zarr_view

Expand Down Expand Up @@ -37,6 +38,12 @@ def view(args: argparse.Namespace) -> None:
zarr_view(args.path, args.port)


def finder(args: argparse.Namespace) -> None:
"""Wrap the :func:`~ome_zarr.utils.finder` method."""
config_logging(logging.WARN, args)
bff_finder(args.path, args.port)


def download(args: argparse.Namespace) -> None:
"""Wrap the :func:`~ome_zarr.utils.download` method."""
config_logging(logging.WARN, args)
Expand Down Expand Up @@ -103,7 +110,7 @@ def main(args: Union[list[str], None] = None) -> None:

# info
parser_info = subparsers.add_parser("info")
parser_info.add_argument("path")
parser_info.add_argument("path", help="Path to image.zarr")
parser_info.add_argument("--stats", action="store_true")
parser_info.set_defaults(func=info)

Expand All @@ -115,10 +122,26 @@ def main(args: Union[list[str], None] = None) -> None:

# view (in ome-ngff-validator in a browser)
parser_view = subparsers.add_parser("view")
parser_view.add_argument("path")
parser_view.add_argument("--port", type=int, default=8000)
parser_view.add_argument(
"path",
help="Path to image.zarr to open in ome-ngff-validator",
)
parser_view.add_argument(
"--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
)
parser_view.set_defaults(func=view)

# finder (open a dir of images in BioFile Finder in a browser)
parser_finder = subparsers.add_parser("finder")
parser_finder.add_argument(
"path",
help="Directory to open in BioFile Finder",
)
parser_finder.add_argument(
"--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
)
parser_finder.set_defaults(func=finder)

# create
parser_create = subparsers.add_parser("create")
parser_create.add_argument(
Expand Down
212 changes: 206 additions & 6 deletions ome_zarr/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Utility methods for ome_zarr access."""

import csv
import json
import logging
import os
import urllib
import webbrowser
import xml.etree.ElementTree as ET
from collections.abc import Iterator
from datetime import datetime
from http.server import ( # type: ignore[attr-defined]
HTTPServer,
SimpleHTTPRequestHandler,
Expand Down Expand Up @@ -52,12 +56,31 @@ def info(path: str, stats: bool = False) -> Iterator[Node]:
yield node


def view(input_path: str, port: int = 8000) -> None:
def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None:
# serve the parent directory in a simple server with CORS. Open browser
# dry_run is for testing, so we don't open the browser or start the server

zarrs = []
if (Path(input_path) / ".zattrs").exists():
zarrs = find_multiscales(Path(input_path))
if len(zarrs) == 0:
print(
f"No OME-Zarr images found in {input_path}. "
f"Try $ ome_zarr finder {input_path}"
)
return

parent_dir, image_name = os.path.split(input_path)
if len(image_name) == 0:
parent_dir, image_name = os.path.split(parent_dir)
parent_dir = str(parent_dir)

# open ome-ngff-validator in a web browser...
url = (
f"https://ome.github.io/ome-ngff-validator/"
f"?source=http://localhost:{port}/{image_name}"
)

class CORSRequestHandler(SimpleHTTPRequestHandler):
def end_headers(self) -> None:
self.send_header("Access-Control-Allow-Origin", "*")
Expand All @@ -70,11 +93,188 @@ def translate_path(self, path: str) -> str:
super_path = super().translate_path(path)
return super_path

# open ome-ngff-validator in a web browser...
url = (
f"https://ome.github.io/ome-ngff-validator/"
f"?source=http://localhost:{port}/{image_name}"
)
# for testing
if dry_run:
return

# Open in browser...
webbrowser.open(url)

# ...then start serving content
test(CORSRequestHandler, HTTPServer, port=port)


def find_multiscales(path_to_zattrs):
# return list of images. Each image is [path_to_zarr, name, dirname]
# We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0
# AND we want image Name, e.g. "image.zarr Series 0"
# AND we want the dir path to use for Tags e.g. full/path/to
with open(path_to_zattrs / ".zattrs") as f:
text = f.read()
zattrs = json.loads(text)
if "plate" in zattrs:
plate = zattrs.get("plate")
wells = plate.get("wells")
field = "0"
if len(wells) > 0:
path_to_zarr = path_to_zattrs / wells[0].get("path") / field
plate_name = os.path.basename(path_to_zattrs)
return [[path_to_zarr, plate_name, os.path.dirname(path_to_zattrs)]]
else:
LOGGER.info(f"No wells found in plate{path_to_zattrs}")
return []
elif zattrs.get("bioformats2raw.layout") == 3:
# Open OME/METADATA.ome.xml
try:
tree = ET.parse(path_to_zattrs / "OME" / "METADATA.ome.xml")
root = tree.getroot()
# spec says "If the "series" attribute does not exist and no "plate" is
# present, separate "multiscales" images MUST be stored in consecutively
# numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...)."
series = 0
images = []
for child in root:
# tag is eg. {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image
if child.tag.endswith("Image"):
img_name = (
os.path.basename(path_to_zattrs) + " Series:" + str(series)
)
# Get Name from XML metadata, otherwise use path and Series
img_name = child.attrib.get("Name", img_name)
images.append(
[
path_to_zattrs / str(series),
img_name,
os.path.dirname(path_to_zattrs),
]
)
series += 1
return images
except Exception as ex:
print(ex)
elif zattrs.get("multiscales"):
return [
[
path_to_zattrs,
os.path.basename(path_to_zattrs),
os.path.dirname(path_to_zattrs),
]
]
return []


def splitall(path):
# Use os.path.split() repeatedly to split path into dirs
allparts = []
while 1:
parts = os.path.split(path)
if parts[0] == path: # sentinel for absolute paths
allparts.insert(0, parts[0])
break
elif parts[1] == path: # sentinel for relative paths
allparts.insert(0, parts[1])
break
else:
path = parts[0]
allparts.insert(0, parts[1])
return allparts


def finder(input_path: str, port: int = 8000, dry_run=False) -> None:
# serve the parent directory in a simple server with CORS. Open browser
# dry_run is for testing, so we don't open the browser or start the server
parent_path, server_dir = os.path.split(input_path)
# in case input_path had trailing slash, we go one level up...
if len(server_dir) == 0:
parent_path, server_dir = os.path.split(parent_path)

# 'input_path' is path passed to the script. To the data dir. E.g. "ZARR/data"
# 'parent_path', e.g. "ZARR" just for running http server
# 'server_dir' is the name of our top-level dir E.g. "data"

# We will be serving the data from last dir in /parent/dir/path
# so we need to use that as base for image URLs...

# walk the input path to find all .zattrs files...
def walk(path: Path):
if (path / ".zattrs").exists():
yield from find_multiscales(path)
else:
for p in path.iterdir():
if (p / ".zattrs").exists():
yield from find_multiscales(p)
elif p.is_dir():
yield from walk(p)
else:
continue

url = None
zarrs = list(walk(Path(input_path)))

# If we have just one zarr, open ome-ngff-validator in a web browser...
if len(zarrs) == 0:
print("No OME-Zarr files found in", input_path)
return
else:
# ...otherwise write to CSV file and open in BioFile Finder
col_names = ["File Path", "File Name", "Folders", "Uploaded"]
# write csv file into the dir we're serving from...
bff_csv = os.path.join(input_path, "biofile_finder.csv")

with open(bff_csv, "w", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow(col_names)
for zarr_img in zarrs:
# zarr paths start with full path to img
# e.g. ZARR/data/to/img (from walk("ZARR/data"))
# but we want them to be from the server_dir to img, e.g "data/to/img".
# So we want relative /to/img path, from input_path -> to img
relpath = os.path.relpath(zarr_img[0], input_path)
# On Windows, we need to replace \\ with / in relpath for URL
rel_url = "/".join(splitall(relpath))
file_path = f"http://localhost:{port}/{server_dir}/{rel_url}"
name = zarr_img[1] or os.path.basename(zarr_img[0])
# folders is "f1,f2,f3" etc.
folders_path = os.path.relpath(zarr_img[2], input_path)
folders = ",".join(splitall(folders_path))
timestamp = ""
try:
mtime = os.path.getmtime(zarr_img[0])
# format mtime as "YYYY-MM-DD HH:MM:SS.Z"
timestamp = datetime.fromtimestamp(mtime).strftime(
"%Y-%m-%d %H:%M:%S.%Z"
)
except OSError:
pass
writer.writerow([file_path, name, folders, timestamp])

source = {
"uri": f"http://localhost:{port}/{server_dir}/biofile_finder.csv",
"type": "csv",
"name": "biofile_finder.csv",
}
s = urllib.parse.quote(json.dumps(source))
url = f"https://bff.allencell.org/app?source={s}"
# show small thumbnails view by default. (v=3 for big thumbnails)
url += "&v=2"

class CORSRequestHandler(SimpleHTTPRequestHandler):
def end_headers(self) -> None:
self.send_header("Access-Control-Allow-Origin", "*")
SimpleHTTPRequestHandler.end_headers(self)

def translate_path(self, path: str) -> str:
# Since we don't call the class constructor ourselves,
# we set the directory here instead
self.directory = parent_path
super_path = super().translate_path(path)
return super_path

# for testing
if dry_run:
return

# Open in browser...
webbrowser.open(url)

# ...then start serving content
Expand Down
46 changes: 45 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from pathlib import Path

import pytest
import zarr

from ome_zarr.cli import main
from ome_zarr.utils import strip_common_prefix
from ome_zarr.utils import finder, strip_common_prefix, view
from ome_zarr.writer import write_plate_metadata


def directory_items(directory: Path):
Expand Down Expand Up @@ -102,3 +104,45 @@ def _rotate_and_test(self, *hierarchy: Path, reverse: bool = True):
secondpass: deque = deque(hierarchy)
secondpass.reverse()
self._rotate_and_test(*list(secondpass), reverse=False)

def test_view(self):
filename = str(self.path) + "-4"
main(["create", "--method=astronaut", filename])
# CLI doesn't support the dry_run option yet
# main(["view", filename, "8000"])
# we need dry_run to be True to avoid blocking the test with server
view(filename, 8000, True)

def test_finder(self):
img_dir = (self.path / "images").mkdir()
img_dir2 = (img_dir / "dir2").mkdir()
bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir()
main(["create", "--method=astronaut", (str(img_dir / "astronaut"))])
main(["create", "--method=coins", (str(img_dir2 / "coins"))])
(bf2raw_dir / "OME").mkdir()

# write minimal bioformats2raw and xml metadata
with open(bf2raw_dir / ".zattrs", "w") as f:
f.write("""{"bioformats2raw.layout" : 3}""")
with open(bf2raw_dir / "OME" / "METADATA.ome.xml", "w") as f:
f.write(
"""<?xml version="1.0" encoding="UTF-8"?>
<OME><Image ID="Image:1" Name="test.fake"></Image></OME>
"""
)

# create a plate
plate_dir = (img_dir2 / "plate").mkdir()
store = zarr.DirectoryStore(str(plate_dir))
root = zarr.group(store=store)
write_plate_metadata(root, ["A"], ["1"], ["A/1"])

finder(img_dir, 8000, True)

assert (img_dir / "biofile_finder.csv").exists()
csv_text = (img_dir / "biofile_finder.csv").read_text(encoding="utf-8")
print(csv_text)
assert "File Path,File Name,Folders,Uploaded" in csv_text
assert "dir2/plate/A/1/0,plate,dir2" in csv_text
assert "coins,dir2" in csv_text
assert "test.fake" in csv_text