ome · will-moore · Apr 15, 2025 · Feb 13, 2025 · Feb 13, 2025 · Mar 7, 2025
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
@@ -32,6 +32,16 @@ Use the `ome_zarr` command to view Zarr data in the https://ome.github.io/ome-ng
 
     ome_zarr view 6001240.zarr/
 
+finder
+======
+
+Use the `ome_zarr` command to display multiple OME-Zarr images in the BioFile Finder app
+in a browser. This command parses the specified directory to find all OME-Zarr Images
+and Plates, combines them into a `biofile_finder.csv` file and opens this in the
+app, which allows you to browse thumbnails of all images::
+
+    ome_zarr finder /path/to/dir/
+
 download
 ========
 

diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py
@@ -9,6 +9,7 @@
 from .data import astronaut, coins, create_zarr
 from .scale import Scaler
 from .utils import download as zarr_download
+from .utils import finder as bff_finder
 from .utils import info as zarr_info
 from .utils import view as zarr_view
 
@@ -37,6 +38,12 @@ def view(args: argparse.Namespace) -> None:
     zarr_view(args.path, args.port)
 
 
+def finder(args: argparse.Namespace) -> None:
+    """Wrap the :func:`~ome_zarr.utils.finder` method."""
+    config_logging(logging.WARN, args)
+    bff_finder(args.path, args.port)
+
+
 def download(args: argparse.Namespace) -> None:
     """Wrap the :func:`~ome_zarr.utils.download` method."""
     config_logging(logging.WARN, args)
@@ -103,7 +110,7 @@ def main(args: Union[list[str], None] = None) -> None:
 
     # info
     parser_info = subparsers.add_parser("info")
-    parser_info.add_argument("path")
+    parser_info.add_argument("path", help="Path to image.zarr")
     parser_info.add_argument("--stats", action="store_true")
     parser_info.set_defaults(func=info)
 
@@ -115,10 +122,26 @@ def main(args: Union[list[str], None] = None) -> None:
 
     # view (in ome-ngff-validator in a browser)
     parser_view = subparsers.add_parser("view")
-    parser_view.add_argument("path")
-    parser_view.add_argument("--port", type=int, default=8000)
+    parser_view.add_argument(
+        "path",
+        help="Path to image.zarr to open in ome-ngff-validator",
+    )
+    parser_view.add_argument(
+        "--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
+    )
     parser_view.set_defaults(func=view)
 
+    # finder (open a dir of images in BioFile Finder in a browser)
+    parser_finder = subparsers.add_parser("finder")
+    parser_finder.add_argument(
+        "path",
+        help="Directory to open in BioFile Finder",
+    )
+    parser_finder.add_argument(
+        "--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
+    )
+    parser_finder.set_defaults(func=finder)
+
     # create
     parser_create = subparsers.add_parser("create")
     parser_create.add_argument(

diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py
@@ -1,10 +1,14 @@
 """Utility methods for ome_zarr access."""
 
+import csv
 import json
 import logging
 import os
+import urllib
 import webbrowser
+import xml.etree.ElementTree as ET
 from collections.abc import Iterator
+from datetime import datetime
 from http.server import (  # type: ignore[attr-defined]
     HTTPServer,
     SimpleHTTPRequestHandler,
@@ -52,12 +56,31 @@ def info(path: str, stats: bool = False) -> Iterator[Node]:
         yield node
 
 
-def view(input_path: str, port: int = 8000) -> None:
+def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None:
     # serve the parent directory in a simple server with CORS. Open browser
+    # dry_run is for testing, so we don't open the browser or start the server
+
+    zarrs = []
+    if (Path(input_path) / ".zattrs").exists():
+        zarrs = find_multiscales(Path(input_path))
+    if len(zarrs) == 0:
+        print(
+            f"No OME-Zarr images found in {input_path}. "
+            f"Try $ ome_zarr finder {input_path}"
+        )
+        return
 
     parent_dir, image_name = os.path.split(input_path)
+    if len(image_name) == 0:
+        parent_dir, image_name = os.path.split(parent_dir)
     parent_dir = str(parent_dir)
 
+    # open ome-ngff-validator in a web browser...
+    url = (
+        f"https://ome.github.io/ome-ngff-validator/"
+        f"?source=http://localhost:{port}/{image_name}"
+    )
+
     class CORSRequestHandler(SimpleHTTPRequestHandler):
         def end_headers(self) -> None:
             self.send_header("Access-Control-Allow-Origin", "*")
@@ -70,11 +93,188 @@ def translate_path(self, path: str) -> str:
             super_path = super().translate_path(path)
             return super_path
 
-    # open ome-ngff-validator in a web browser...
-    url = (
-        f"https://ome.github.io/ome-ngff-validator/"
-        f"?source=http://localhost:{port}/{image_name}"
-    )
+    # for testing
+    if dry_run:
+        return
+
+    # Open in browser...
+    webbrowser.open(url)
+
+    # ...then start serving content
+    test(CORSRequestHandler, HTTPServer, port=port)
+
+
+def find_multiscales(path_to_zattrs):
+    # return list of images. Each image is [path_to_zarr, name, dirname]
+    # We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0
+    # AND we want image Name, e.g. "image.zarr Series 0"
+    # AND we want the dir path to use for Tags e.g. full/path/to
+    with open(path_to_zattrs / ".zattrs") as f:
+        text = f.read()
+    zattrs = json.loads(text)
+    if "plate" in zattrs:
+        plate = zattrs.get("plate")
+        wells = plate.get("wells")
+        field = "0"
+        if len(wells) > 0:
+            path_to_zarr = path_to_zattrs / wells[0].get("path") / field
+            plate_name = os.path.basename(path_to_zattrs)
+            return [[path_to_zarr, plate_name, os.path.dirname(path_to_zattrs)]]
+        else:
+            LOGGER.info(f"No wells found in plate{path_to_zattrs}")
+            return []
+    elif zattrs.get("bioformats2raw.layout") == 3:
+        # Open OME/METADATA.ome.xml
+        try:
+            tree = ET.parse(path_to_zattrs / "OME" / "METADATA.ome.xml")
+            root = tree.getroot()
+            # spec says "If the "series" attribute does not exist and no "plate" is
+            # present, separate "multiscales" images MUST be stored in consecutively
+            # numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...)."
+            series = 0
+            images = []
+            for child in root:
+                # tag is eg. {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image
+                if child.tag.endswith("Image"):
+                    img_name = (
+                        os.path.basename(path_to_zattrs) + " Series:" + str(series)
+                    )
+                    # Get Name from XML metadata, otherwise use path and Series
+                    img_name = child.attrib.get("Name", img_name)
+                    images.append(
+                        [
+                            path_to_zattrs / str(series),
+                            img_name,
+                            os.path.dirname(path_to_zattrs),
+                        ]
+                    )
+                    series += 1
+            return images
+        except Exception as ex:
+            print(ex)
+    elif zattrs.get("multiscales"):
+        return [
+            [
+                path_to_zattrs,
+                os.path.basename(path_to_zattrs),
+                os.path.dirname(path_to_zattrs),
+            ]
+        ]
+    return []
+
+
+def splitall(path):
+    # Use os.path.split() repeatedly to split path into dirs
+    allparts = []
+    while 1:
+        parts = os.path.split(path)
+        if parts[0] == path:  # sentinel for absolute paths
+            allparts.insert(0, parts[0])
+            break
+        elif parts[1] == path:  # sentinel for relative paths
+            allparts.insert(0, parts[1])
+            break
+        else:
+            path = parts[0]
+            allparts.insert(0, parts[1])
+    return allparts
+
+
+def finder(input_path: str, port: int = 8000, dry_run=False) -> None:
+    # serve the parent directory in a simple server with CORS. Open browser
+    # dry_run is for testing, so we don't open the browser or start the server
+    parent_path, server_dir = os.path.split(input_path)
+    # in case input_path had trailing slash, we go one level up...
+    if len(server_dir) == 0:
+        parent_path, server_dir = os.path.split(parent_path)
+
+    # 'input_path' is path passed to the script. To the data dir. E.g. "ZARR/data"
+    # 'parent_path', e.g. "ZARR" just for running http server
+    # 'server_dir' is the name of our top-level dir E.g. "data"
+
+    # We will be serving the data from last dir in /parent/dir/path
+    # so we need to use that as base for image URLs...
+
+    # walk the input path to find all .zattrs files...
+    def walk(path: Path):
+        if (path / ".zattrs").exists():
+            yield from find_multiscales(path)
+        else:
+            for p in path.iterdir():
+                if (p / ".zattrs").exists():
+                    yield from find_multiscales(p)
+                elif p.is_dir():
+                    yield from walk(p)
+                else:
+                    continue
+
+    url = None
+    zarrs = list(walk(Path(input_path)))
+
+    # If we have just one zarr, open ome-ngff-validator in a web browser...
+    if len(zarrs) == 0:
+        print("No OME-Zarr files found in", input_path)
+        return
+    else:
+        # ...otherwise write to CSV file and open in BioFile Finder
+        col_names = ["File Path", "File Name", "Folders", "Uploaded"]
+        # write csv file into the dir we're serving from...
+        bff_csv = os.path.join(input_path, "biofile_finder.csv")
+
+        with open(bff_csv, "w", newline="") as csvfile:
+            writer = csv.writer(csvfile, delimiter=",")
+            writer.writerow(col_names)
+            for zarr_img in zarrs:
+                # zarr paths start with full path to img
+                # e.g. ZARR/data/to/img (from walk("ZARR/data"))
+                # but we want them to be from the server_dir to img, e.g "data/to/img".
+                # So we want relative /to/img path, from input_path -> to img
+                relpath = os.path.relpath(zarr_img[0], input_path)
+                # On Windows, we need to replace \\ with / in relpath for URL
+                rel_url = "/".join(splitall(relpath))
+                file_path = f"http://localhost:{port}/{server_dir}/{rel_url}"
+                name = zarr_img[1] or os.path.basename(zarr_img[0])
+                # folders is "f1,f2,f3" etc.
+                folders_path = os.path.relpath(zarr_img[2], input_path)
+                folders = ",".join(splitall(folders_path))
+                timestamp = ""
+                try:
+                    mtime = os.path.getmtime(zarr_img[0])
+                    # format mtime as "YYYY-MM-DD HH:MM:SS.Z"
+                    timestamp = datetime.fromtimestamp(mtime).strftime(
+                        "%Y-%m-%d %H:%M:%S.%Z"
+                    )
+                except OSError:
+                    pass
+                writer.writerow([file_path, name, folders, timestamp])
+
+        source = {
+            "uri": f"http://localhost:{port}/{server_dir}/biofile_finder.csv",
+            "type": "csv",
+            "name": "biofile_finder.csv",
+        }
+        s = urllib.parse.quote(json.dumps(source))
+        url = f"https://bff.allencell.org/app?source={s}"
+        # show small thumbnails view by default. (v=3 for big thumbnails)
+        url += "&v=2"
+
+    class CORSRequestHandler(SimpleHTTPRequestHandler):
+        def end_headers(self) -> None:
+            self.send_header("Access-Control-Allow-Origin", "*")
+            SimpleHTTPRequestHandler.end_headers(self)
+
+        def translate_path(self, path: str) -> str:
+            # Since we don't call the class constructor ourselves,
+            # we set the directory here instead
+            self.directory = parent_path
+            super_path = super().translate_path(path)
+            return super_path
+
+    # for testing
+    if dry_run:
+        return
+
+    # Open in browser...
     webbrowser.open(url)
 
     # ...then start serving content

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -3,9 +3,11 @@
 from pathlib import Path
 
 import pytest
+import zarr
 
 from ome_zarr.cli import main
-from ome_zarr.utils import strip_common_prefix
+from ome_zarr.utils import finder, strip_common_prefix, view
+from ome_zarr.writer import write_plate_metadata
 
 
 def directory_items(directory: Path):
@@ -102,3 +104,45 @@ def _rotate_and_test(self, *hierarchy: Path, reverse: bool = True):
             secondpass: deque = deque(hierarchy)
             secondpass.reverse()
             self._rotate_and_test(*list(secondpass), reverse=False)
+
+    def test_view(self):
+        filename = str(self.path) + "-4"
+        main(["create", "--method=astronaut", filename])
+        # CLI doesn't support the dry_run option yet
+        # main(["view", filename, "8000"])
+        # we need dry_run to be True to avoid blocking the test with server
+        view(filename, 8000, True)
+
+    def test_finder(self):
+        img_dir = (self.path / "images").mkdir()
+        img_dir2 = (img_dir / "dir2").mkdir()
+        bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir()
+        main(["create", "--method=astronaut", (str(img_dir / "astronaut"))])
+        main(["create", "--method=coins", (str(img_dir2 / "coins"))])
+        (bf2raw_dir / "OME").mkdir()
+
+        # write minimal bioformats2raw and xml metadata
+        with open(bf2raw_dir / ".zattrs", "w") as f:
+            f.write("""{"bioformats2raw.layout" : 3}""")
+        with open(bf2raw_dir / "OME" / "METADATA.ome.xml", "w") as f:
+            f.write(
+                """<?xml version="1.0" encoding="UTF-8"?>
+                <OME><Image ID="Image:1" Name="test.fake"></Image></OME>
+                """
+            )
+
+        # create a plate
+        plate_dir = (img_dir2 / "plate").mkdir()
+        store = zarr.DirectoryStore(str(plate_dir))
+        root = zarr.group(store=store)
+        write_plate_metadata(root, ["A"], ["1"], ["A/1"])
+
+        finder(img_dir, 8000, True)
+
+        assert (img_dir / "biofile_finder.csv").exists()
+        csv_text = (img_dir / "biofile_finder.csv").read_text(encoding="utf-8")
+        print(csv_text)
+        assert "File Path,File Name,Folders,Uploaded" in csv_text
+        assert "dir2/plate/A/1/0,plate,dir2" in csv_text
+        assert "coins,dir2" in csv_text
+        assert "test.fake" in csv_text