Skip to content

Commit 542cbe4

Browse files
authored
Merge pull request #436 from will-moore/view_in_biofile_finder
View in biofile finder
2 parents 7d1ae35 + 786602d commit 542cbe4

File tree

4 files changed

+287
-10
lines changed

4 files changed

+287
-10
lines changed

docs/source/cli.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ Use the `ome_zarr` command to view Zarr data in the https://ome.github.io/ome-ng
3232

3333
ome_zarr view 6001240.zarr/
3434

35+
finder
36+
======
37+
38+
Use the `ome_zarr` command to display multiple OME-Zarr images in the BioFile Finder app
39+
in a browser. This command parses the specified directory to find all OME-Zarr Images
40+
and Plates, combines them into a `biofile_finder.csv` file and opens this in the
41+
app, which allows you to browse thumbnails of all images::
42+
43+
ome_zarr finder /path/to/dir/
44+
3545
download
3646
========
3747

ome_zarr/cli.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .data import astronaut, coins, create_zarr
99
from .scale import Scaler
1010
from .utils import download as zarr_download
11+
from .utils import finder as bff_finder
1112
from .utils import info as zarr_info
1213
from .utils import view as zarr_view
1314

@@ -36,6 +37,12 @@ def view(args: argparse.Namespace) -> None:
3637
zarr_view(args.path, args.port)
3738

3839

40+
def finder(args: argparse.Namespace) -> None:
41+
"""Wrap the :func:`~ome_zarr.utils.finder` method."""
42+
config_logging(logging.WARN, args)
43+
bff_finder(args.path, args.port)
44+
45+
3946
def download(args: argparse.Namespace) -> None:
4047
"""Wrap the :func:`~ome_zarr.utils.download` method."""
4148
config_logging(logging.WARNING, args)
@@ -102,7 +109,7 @@ def main(args: list[str] | None = None) -> None:
102109

103110
# info
104111
parser_info = subparsers.add_parser("info")
105-
parser_info.add_argument("path")
112+
parser_info.add_argument("path", help="Path to image.zarr")
106113
parser_info.add_argument("--stats", action="store_true")
107114
parser_info.set_defaults(func=info)
108115

@@ -114,10 +121,26 @@ def main(args: list[str] | None = None) -> None:
114121

115122
# view (in ome-ngff-validator in a browser)
116123
parser_view = subparsers.add_parser("view")
117-
parser_view.add_argument("path")
118-
parser_view.add_argument("--port", type=int, default=8000)
124+
parser_view.add_argument(
125+
"path",
126+
help="Path to image.zarr to open in ome-ngff-validator",
127+
)
128+
parser_view.add_argument(
129+
"--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
130+
)
119131
parser_view.set_defaults(func=view)
120132

133+
# finder (open a dir of images in BioFile Finder in a browser)
134+
parser_finder = subparsers.add_parser("finder")
135+
parser_finder.add_argument(
136+
"path",
137+
help="Directory to open in BioFile Finder",
138+
)
139+
parser_finder.add_argument(
140+
"--port", type=int, default=8000, help="Port to serve the data (default: 8000)"
141+
)
142+
parser_finder.set_defaults(func=finder)
143+
121144
# create
122145
parser_create = subparsers.add_parser("create")
123146
parser_create.add_argument(

ome_zarr/utils.py

Lines changed: 206 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
"""Utility methods for ome_zarr access."""
22

3+
import csv
34
import json
45
import logging
56
import os
7+
import urllib
68
import webbrowser
9+
import xml.etree.ElementTree as ET
710
from collections.abc import Iterator
11+
from datetime import datetime
812
from http.server import ( # type: ignore[attr-defined]
913
HTTPServer,
1014
SimpleHTTPRequestHandler,
@@ -63,12 +67,31 @@ def info(path: str, stats: bool = False) -> Iterator[Node]:
6367
yield node
6468

6569

66-
def view(input_path: str, port: int = 8000) -> None:
70+
def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None:
6771
# serve the parent directory in a simple server with CORS. Open browser
72+
# dry_run is for testing, so we don't open the browser or start the server
73+
74+
zarrs = []
75+
if (Path(input_path) / ".zattrs").exists():
76+
zarrs = find_multiscales(Path(input_path))
77+
if len(zarrs) == 0:
78+
print(
79+
f"No OME-Zarr images found in {input_path}. "
80+
f"Try $ ome_zarr finder {input_path}"
81+
)
82+
return
6883

6984
parent_dir, image_name = os.path.split(input_path)
85+
if len(image_name) == 0:
86+
parent_dir, image_name = os.path.split(parent_dir)
7087
parent_dir = str(parent_dir)
7188

89+
# open ome-ngff-validator in a web browser...
90+
url = (
91+
f"https://ome.github.io/ome-ngff-validator/"
92+
f"?source=http://localhost:{port}/{image_name}"
93+
)
94+
7295
class CORSRequestHandler(SimpleHTTPRequestHandler):
7396
def end_headers(self) -> None:
7497
self.send_header("Access-Control-Allow-Origin", "*")
@@ -81,11 +104,188 @@ def translate_path(self, path: str) -> str:
81104
super_path = super().translate_path(path)
82105
return super_path
83106

84-
# open ome-ngff-validator in a web browser...
85-
url = (
86-
f"https://ome.github.io/ome-ngff-validator/"
87-
f"?source=http://localhost:{port}/{image_name}"
88-
)
107+
# for testing
108+
if dry_run:
109+
return
110+
111+
# Open in browser...
112+
webbrowser.open(url)
113+
114+
# ...then start serving content
115+
test(CORSRequestHandler, HTTPServer, port=port)
116+
117+
118+
def find_multiscales(path_to_zattrs):
119+
# return list of images. Each image is [path_to_zarr, name, dirname]
120+
# We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0
121+
# AND we want image Name, e.g. "image.zarr Series 0"
122+
# AND we want the dir path to use for Tags e.g. full/path/to
123+
with open(path_to_zattrs / ".zattrs") as f:
124+
text = f.read()
125+
zattrs = json.loads(text)
126+
if "plate" in zattrs:
127+
plate = zattrs.get("plate")
128+
wells = plate.get("wells")
129+
field = "0"
130+
if len(wells) > 0:
131+
path_to_zarr = path_to_zattrs / wells[0].get("path") / field
132+
plate_name = os.path.basename(path_to_zattrs)
133+
return [[path_to_zarr, plate_name, os.path.dirname(path_to_zattrs)]]
134+
else:
135+
LOGGER.info(f"No wells found in plate{path_to_zattrs}")
136+
return []
137+
elif zattrs.get("bioformats2raw.layout") == 3:
138+
# Open OME/METADATA.ome.xml
139+
try:
140+
tree = ET.parse(path_to_zattrs / "OME" / "METADATA.ome.xml")
141+
root = tree.getroot()
142+
# spec says "If the "series" attribute does not exist and no "plate" is
143+
# present, separate "multiscales" images MUST be stored in consecutively
144+
# numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...)."
145+
series = 0
146+
images = []
147+
for child in root:
148+
# tag is eg. {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image
149+
if child.tag.endswith("Image"):
150+
img_name = (
151+
os.path.basename(path_to_zattrs) + " Series:" + str(series)
152+
)
153+
# Get Name from XML metadata, otherwise use path and Series
154+
img_name = child.attrib.get("Name", img_name)
155+
images.append(
156+
[
157+
path_to_zattrs / str(series),
158+
img_name,
159+
os.path.dirname(path_to_zattrs),
160+
]
161+
)
162+
series += 1
163+
return images
164+
except Exception as ex:
165+
print(ex)
166+
elif zattrs.get("multiscales"):
167+
return [
168+
[
169+
path_to_zattrs,
170+
os.path.basename(path_to_zattrs),
171+
os.path.dirname(path_to_zattrs),
172+
]
173+
]
174+
return []
175+
176+
177+
def splitall(path):
178+
# Use os.path.split() repeatedly to split path into dirs
179+
allparts = []
180+
while 1:
181+
parts = os.path.split(path)
182+
if parts[0] == path: # sentinel for absolute paths
183+
allparts.insert(0, parts[0])
184+
break
185+
elif parts[1] == path: # sentinel for relative paths
186+
allparts.insert(0, parts[1])
187+
break
188+
else:
189+
path = parts[0]
190+
allparts.insert(0, parts[1])
191+
return allparts
192+
193+
194+
def finder(input_path: str, port: int = 8000, dry_run=False) -> None:
195+
# serve the parent directory in a simple server with CORS. Open browser
196+
# dry_run is for testing, so we don't open the browser or start the server
197+
parent_path, server_dir = os.path.split(input_path)
198+
# in case input_path had trailing slash, we go one level up...
199+
if len(server_dir) == 0:
200+
parent_path, server_dir = os.path.split(parent_path)
201+
202+
# 'input_path' is path passed to the script. To the data dir. E.g. "ZARR/data"
203+
# 'parent_path', e.g. "ZARR" just for running http server
204+
# 'server_dir' is the name of our top-level dir E.g. "data"
205+
206+
# We will be serving the data from last dir in /parent/dir/path
207+
# so we need to use that as base for image URLs...
208+
209+
# walk the input path to find all .zattrs files...
210+
def walk(path: Path):
211+
if (path / ".zattrs").exists():
212+
yield from find_multiscales(path)
213+
else:
214+
for p in path.iterdir():
215+
if (p / ".zattrs").exists():
216+
yield from find_multiscales(p)
217+
elif p.is_dir():
218+
yield from walk(p)
219+
else:
220+
continue
221+
222+
url = None
223+
zarrs = list(walk(Path(input_path)))
224+
225+
# If we have just one zarr, open ome-ngff-validator in a web browser...
226+
if len(zarrs) == 0:
227+
print("No OME-Zarr files found in", input_path)
228+
return
229+
else:
230+
# ...otherwise write to CSV file and open in BioFile Finder
231+
col_names = ["File Path", "File Name", "Folders", "Uploaded"]
232+
# write csv file into the dir we're serving from...
233+
bff_csv = os.path.join(input_path, "biofile_finder.csv")
234+
235+
with open(bff_csv, "w", newline="") as csvfile:
236+
writer = csv.writer(csvfile, delimiter=",")
237+
writer.writerow(col_names)
238+
for zarr_img in zarrs:
239+
# zarr paths start with full path to img
240+
# e.g. ZARR/data/to/img (from walk("ZARR/data"))
241+
# but we want them to be from the server_dir to img, e.g "data/to/img".
242+
# So we want relative /to/img path, from input_path -> to img
243+
relpath = os.path.relpath(zarr_img[0], input_path)
244+
# On Windows, we need to replace \\ with / in relpath for URL
245+
rel_url = "/".join(splitall(relpath))
246+
file_path = f"http://localhost:{port}/{server_dir}/{rel_url}"
247+
name = zarr_img[1] or os.path.basename(zarr_img[0])
248+
# folders is "f1,f2,f3" etc.
249+
folders_path = os.path.relpath(zarr_img[2], input_path)
250+
folders = ",".join(splitall(folders_path))
251+
timestamp = ""
252+
try:
253+
mtime = os.path.getmtime(zarr_img[0])
254+
# format mtime as "YYYY-MM-DD HH:MM:SS.Z"
255+
timestamp = datetime.fromtimestamp(mtime).strftime(
256+
"%Y-%m-%d %H:%M:%S.%Z"
257+
)
258+
except OSError:
259+
pass
260+
writer.writerow([file_path, name, folders, timestamp])
261+
262+
source = {
263+
"uri": f"http://localhost:{port}/{server_dir}/biofile_finder.csv",
264+
"type": "csv",
265+
"name": "biofile_finder.csv",
266+
}
267+
s = urllib.parse.quote(json.dumps(source))
268+
url = f"https://bff.allencell.org/app?source={s}"
269+
# show small thumbnails view by default. (v=3 for big thumbnails)
270+
url += "&v=2"
271+
272+
class CORSRequestHandler(SimpleHTTPRequestHandler):
273+
def end_headers(self) -> None:
274+
self.send_header("Access-Control-Allow-Origin", "*")
275+
SimpleHTTPRequestHandler.end_headers(self)
276+
277+
def translate_path(self, path: str) -> str:
278+
# Since we don't call the class constructor ourselves,
279+
# we set the directory here instead
280+
self.directory = parent_path
281+
super_path = super().translate_path(path)
282+
return super_path
283+
284+
# for testing
285+
if dry_run:
286+
return
287+
288+
# Open in browser...
89289
webbrowser.open(url)
90290

91291
# ...then start serving content

tests/test_cli.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from pathlib import Path
44

55
import pytest
6+
import zarr
67

78
from ome_zarr.cli import main
8-
from ome_zarr.utils import strip_common_prefix
9+
from ome_zarr.utils import finder, strip_common_prefix, view
10+
from ome_zarr.writer import write_plate_metadata
911

1012

1113
def directory_items(directory: Path):
@@ -102,3 +104,45 @@ def _rotate_and_test(self, *hierarchy: Path, reverse: bool = True):
102104
secondpass: deque = deque(hierarchy)
103105
secondpass.reverse()
104106
self._rotate_and_test(*list(secondpass), reverse=False)
107+
108+
def test_view(self):
109+
filename = str(self.path) + "-4"
110+
main(["create", "--method=astronaut", filename])
111+
# CLI doesn't support the dry_run option yet
112+
# main(["view", filename, "8000"])
113+
# we need dry_run to be True to avoid blocking the test with server
114+
view(filename, 8000, True)
115+
116+
def test_finder(self):
117+
img_dir = (self.path / "images").mkdir()
118+
img_dir2 = (img_dir / "dir2").mkdir()
119+
bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir()
120+
main(["create", "--method=astronaut", (str(img_dir / "astronaut"))])
121+
main(["create", "--method=coins", (str(img_dir2 / "coins"))])
122+
(bf2raw_dir / "OME").mkdir()
123+
124+
# write minimal bioformats2raw and xml metadata
125+
with open(bf2raw_dir / ".zattrs", "w") as f:
126+
f.write("""{"bioformats2raw.layout" : 3}""")
127+
with open(bf2raw_dir / "OME" / "METADATA.ome.xml", "w") as f:
128+
f.write(
129+
"""<?xml version="1.0" encoding="UTF-8"?>
130+
<OME><Image ID="Image:1" Name="test.fake"></Image></OME>
131+
"""
132+
)
133+
134+
# create a plate
135+
plate_dir = (img_dir2 / "plate").mkdir()
136+
store = zarr.DirectoryStore(str(plate_dir))
137+
root = zarr.group(store=store)
138+
write_plate_metadata(root, ["A"], ["1"], ["A/1"])
139+
140+
finder(img_dir, 8000, True)
141+
142+
assert (img_dir / "biofile_finder.csv").exists()
143+
csv_text = (img_dir / "biofile_finder.csv").read_text(encoding="utf-8")
144+
print(csv_text)
145+
assert "File Path,File Name,Folders,Uploaded" in csv_text
146+
assert "dir2/plate/A/1/0,plate,dir2" in csv_text
147+
assert "coins,dir2" in csv_text
148+
assert "test.fake" in csv_text

0 commit comments

Comments
 (0)