diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 80ed649d..66f7a665 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -32,6 +32,16 @@ Use the `ome_zarr` command to view Zarr data in the https://ome.github.io/ome-ng ome_zarr view 6001240.zarr/ +finder +====== + +Use the `ome_zarr` command to display multiple OME-Zarr images in the BioFile Finder app +in a browser. This command parses the specified directory to find all OME-Zarr Images +and Plates, combines them into a `biofile_finder.csv` file and opens this in the +app, which allows you to browse thumbnails of all images:: + + ome_zarr finder /path/to/dir/ + download ======== diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index dd40eb5f..07bf7f73 100755 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -9,6 +9,7 @@ from .data import astronaut, coins, create_zarr from .scale import Scaler from .utils import download as zarr_download +from .utils import finder as bff_finder from .utils import info as zarr_info from .utils import view as zarr_view @@ -37,6 +38,12 @@ def view(args: argparse.Namespace) -> None: zarr_view(args.path, args.port) +def finder(args: argparse.Namespace) -> None: + """Wrap the :func:`~ome_zarr.utils.finder` method.""" + config_logging(logging.WARN, args) + bff_finder(args.path, args.port) + + def download(args: argparse.Namespace) -> None: """Wrap the :func:`~ome_zarr.utils.download` method.""" config_logging(logging.WARN, args) @@ -103,7 +110,7 @@ def main(args: Union[list[str], None] = None) -> None: # info parser_info = subparsers.add_parser("info") - parser_info.add_argument("path") + parser_info.add_argument("path", help="Path to image.zarr") parser_info.add_argument("--stats", action="store_true") parser_info.set_defaults(func=info) @@ -115,10 +122,26 @@ def main(args: Union[list[str], None] = None) -> None: # view (in ome-ngff-validator in a browser) parser_view = subparsers.add_parser("view") - parser_view.add_argument("path") - parser_view.add_argument("--port", type=int, default=8000) + parser_view.add_argument( + "path", + help="Path to image.zarr to open in ome-ngff-validator", + ) + parser_view.add_argument( + "--port", type=int, default=8000, help="Port to serve the data (default: 8000)" + ) parser_view.set_defaults(func=view) + # finder (open a dir of images in BioFile Finder in a browser) + parser_finder = subparsers.add_parser("finder") + parser_finder.add_argument( + "path", + help="Directory to open in BioFile Finder", + ) + parser_finder.add_argument( + "--port", type=int, default=8000, help="Port to serve the data (default: 8000)" + ) + parser_finder.set_defaults(func=finder) + # create parser_create = subparsers.add_parser("create") parser_create.add_argument( diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index c929c283..dc0a0e62 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -1,10 +1,14 @@ """Utility methods for ome_zarr access.""" +import csv import json import logging import os +import urllib import webbrowser +import xml.etree.ElementTree as ET from collections.abc import Iterator +from datetime import datetime from http.server import ( # type: ignore[attr-defined] HTTPServer, SimpleHTTPRequestHandler, @@ -52,12 +56,31 @@ def info(path: str, stats: bool = False) -> Iterator[Node]: yield node -def view(input_path: str, port: int = 8000) -> None: +def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None: # serve the parent directory in a simple server with CORS. Open browser + # dry_run is for testing, so we don't open the browser or start the server + + zarrs = [] + if (Path(input_path) / ".zattrs").exists(): + zarrs = find_multiscales(Path(input_path)) + if len(zarrs) == 0: + print( + f"No OME-Zarr images found in {input_path}. " + f"Try $ ome_zarr finder {input_path}" + ) + return parent_dir, image_name = os.path.split(input_path) + if len(image_name) == 0: + parent_dir, image_name = os.path.split(parent_dir) parent_dir = str(parent_dir) + # open ome-ngff-validator in a web browser... + url = ( + f"https://ome.github.io/ome-ngff-validator/" + f"?source=http://localhost:{port}/{image_name}" + ) + class CORSRequestHandler(SimpleHTTPRequestHandler): def end_headers(self) -> None: self.send_header("Access-Control-Allow-Origin", "*") @@ -70,11 +93,188 @@ def translate_path(self, path: str) -> str: super_path = super().translate_path(path) return super_path - # open ome-ngff-validator in a web browser... - url = ( - f"https://ome.github.io/ome-ngff-validator/" - f"?source=http://localhost:{port}/{image_name}" - ) + # for testing + if dry_run: + return + + # Open in browser... + webbrowser.open(url) + + # ...then start serving content + test(CORSRequestHandler, HTTPServer, port=port) + + +def find_multiscales(path_to_zattrs): + # return list of images. Each image is [path_to_zarr, name, dirname] + # We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0 + # AND we want image Name, e.g. "image.zarr Series 0" + # AND we want the dir path to use for Tags e.g. full/path/to + with open(path_to_zattrs / ".zattrs") as f: + text = f.read() + zattrs = json.loads(text) + if "plate" in zattrs: + plate = zattrs.get("plate") + wells = plate.get("wells") + field = "0" + if len(wells) > 0: + path_to_zarr = path_to_zattrs / wells[0].get("path") / field + plate_name = os.path.basename(path_to_zattrs) + return [[path_to_zarr, plate_name, os.path.dirname(path_to_zattrs)]] + else: + LOGGER.info(f"No wells found in plate{path_to_zattrs}") + return [] + elif zattrs.get("bioformats2raw.layout") == 3: + # Open OME/METADATA.ome.xml + try: + tree = ET.parse(path_to_zattrs / "OME" / "METADATA.ome.xml") + root = tree.getroot() + # spec says "If the "series" attribute does not exist and no "plate" is + # present, separate "multiscales" images MUST be stored in consecutively + # numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...)." + series = 0 + images = [] + for child in root: + # tag is eg. {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image + if child.tag.endswith("Image"): + img_name = ( + os.path.basename(path_to_zattrs) + " Series:" + str(series) + ) + # Get Name from XML metadata, otherwise use path and Series + img_name = child.attrib.get("Name", img_name) + images.append( + [ + path_to_zattrs / str(series), + img_name, + os.path.dirname(path_to_zattrs), + ] + ) + series += 1 + return images + except Exception as ex: + print(ex) + elif zattrs.get("multiscales"): + return [ + [ + path_to_zattrs, + os.path.basename(path_to_zattrs), + os.path.dirname(path_to_zattrs), + ] + ] + return [] + + +def splitall(path): + # Use os.path.split() repeatedly to split path into dirs + allparts = [] + while 1: + parts = os.path.split(path) + if parts[0] == path: # sentinel for absolute paths + allparts.insert(0, parts[0]) + break + elif parts[1] == path: # sentinel for relative paths + allparts.insert(0, parts[1]) + break + else: + path = parts[0] + allparts.insert(0, parts[1]) + return allparts + + +def finder(input_path: str, port: int = 8000, dry_run=False) -> None: + # serve the parent directory in a simple server with CORS. Open browser + # dry_run is for testing, so we don't open the browser or start the server + parent_path, server_dir = os.path.split(input_path) + # in case input_path had trailing slash, we go one level up... + if len(server_dir) == 0: + parent_path, server_dir = os.path.split(parent_path) + + # 'input_path' is path passed to the script. To the data dir. E.g. "ZARR/data" + # 'parent_path', e.g. "ZARR" just for running http server + # 'server_dir' is the name of our top-level dir E.g. "data" + + # We will be serving the data from last dir in /parent/dir/path + # so we need to use that as base for image URLs... + + # walk the input path to find all .zattrs files... + def walk(path: Path): + if (path / ".zattrs").exists(): + yield from find_multiscales(path) + else: + for p in path.iterdir(): + if (p / ".zattrs").exists(): + yield from find_multiscales(p) + elif p.is_dir(): + yield from walk(p) + else: + continue + + url = None + zarrs = list(walk(Path(input_path))) + + # If we have just one zarr, open ome-ngff-validator in a web browser... + if len(zarrs) == 0: + print("No OME-Zarr files found in", input_path) + return + else: + # ...otherwise write to CSV file and open in BioFile Finder + col_names = ["File Path", "File Name", "Folders", "Uploaded"] + # write csv file into the dir we're serving from... + bff_csv = os.path.join(input_path, "biofile_finder.csv") + + with open(bff_csv, "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(col_names) + for zarr_img in zarrs: + # zarr paths start with full path to img + # e.g. ZARR/data/to/img (from walk("ZARR/data")) + # but we want them to be from the server_dir to img, e.g "data/to/img". + # So we want relative /to/img path, from input_path -> to img + relpath = os.path.relpath(zarr_img[0], input_path) + # On Windows, we need to replace \\ with / in relpath for URL + rel_url = "/".join(splitall(relpath)) + file_path = f"http://localhost:{port}/{server_dir}/{rel_url}" + name = zarr_img[1] or os.path.basename(zarr_img[0]) + # folders is "f1,f2,f3" etc. + folders_path = os.path.relpath(zarr_img[2], input_path) + folders = ",".join(splitall(folders_path)) + timestamp = "" + try: + mtime = os.path.getmtime(zarr_img[0]) + # format mtime as "YYYY-MM-DD HH:MM:SS.Z" + timestamp = datetime.fromtimestamp(mtime).strftime( + "%Y-%m-%d %H:%M:%S.%Z" + ) + except OSError: + pass + writer.writerow([file_path, name, folders, timestamp]) + + source = { + "uri": f"http://localhost:{port}/{server_dir}/biofile_finder.csv", + "type": "csv", + "name": "biofile_finder.csv", + } + s = urllib.parse.quote(json.dumps(source)) + url = f"https://bff.allencell.org/app?source={s}" + # show small thumbnails view by default. (v=3 for big thumbnails) + url += "&v=2" + + class CORSRequestHandler(SimpleHTTPRequestHandler): + def end_headers(self) -> None: + self.send_header("Access-Control-Allow-Origin", "*") + SimpleHTTPRequestHandler.end_headers(self) + + def translate_path(self, path: str) -> str: + # Since we don't call the class constructor ourselves, + # we set the directory here instead + self.directory = parent_path + super_path = super().translate_path(path) + return super_path + + # for testing + if dry_run: + return + + # Open in browser... webbrowser.open(url) # ...then start serving content diff --git a/tests/test_cli.py b/tests/test_cli.py index b38aba46..ca7e692b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,9 +3,11 @@ from pathlib import Path import pytest +import zarr from ome_zarr.cli import main -from ome_zarr.utils import strip_common_prefix +from ome_zarr.utils import finder, strip_common_prefix, view +from ome_zarr.writer import write_plate_metadata def directory_items(directory: Path): @@ -102,3 +104,45 @@ def _rotate_and_test(self, *hierarchy: Path, reverse: bool = True): secondpass: deque = deque(hierarchy) secondpass.reverse() self._rotate_and_test(*list(secondpass), reverse=False) + + def test_view(self): + filename = str(self.path) + "-4" + main(["create", "--method=astronaut", filename]) + # CLI doesn't support the dry_run option yet + # main(["view", filename, "8000"]) + # we need dry_run to be True to avoid blocking the test with server + view(filename, 8000, True) + + def test_finder(self): + img_dir = (self.path / "images").mkdir() + img_dir2 = (img_dir / "dir2").mkdir() + bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir() + main(["create", "--method=astronaut", (str(img_dir / "astronaut"))]) + main(["create", "--method=coins", (str(img_dir2 / "coins"))]) + (bf2raw_dir / "OME").mkdir() + + # write minimal bioformats2raw and xml metadata + with open(bf2raw_dir / ".zattrs", "w") as f: + f.write("""{"bioformats2raw.layout" : 3}""") + with open(bf2raw_dir / "OME" / "METADATA.ome.xml", "w") as f: + f.write( + """ + + """ + ) + + # create a plate + plate_dir = (img_dir2 / "plate").mkdir() + store = zarr.DirectoryStore(str(plate_dir)) + root = zarr.group(store=store) + write_plate_metadata(root, ["A"], ["1"], ["A/1"]) + + finder(img_dir, 8000, True) + + assert (img_dir / "biofile_finder.csv").exists() + csv_text = (img_dir / "biofile_finder.csv").read_text(encoding="utf-8") + print(csv_text) + assert "File Path,File Name,Folders,Uploaded" in csv_text + assert "dir2/plate/A/1/0,plate,dir2" in csv_text + assert "coins,dir2" in csv_text + assert "test.fake" in csv_text