11"""Utility methods for ome_zarr access."""
22
3+ import csv
34import json
45import logging
56import os
7+ import urllib
68import webbrowser
9+ import xml .etree .ElementTree as ET
710from collections .abc import Iterator
11+ from datetime import datetime
812from http .server import ( # type: ignore[attr-defined]
913 HTTPServer ,
1014 SimpleHTTPRequestHandler ,
@@ -63,12 +67,31 @@ def info(path: str, stats: bool = False) -> Iterator[Node]:
6367 yield node
6468
6569
66- def view (input_path : str , port : int = 8000 ) -> None :
70+ def view (input_path : str , port : int = 8000 , dry_run : bool = False ) -> None :
6771 # serve the parent directory in a simple server with CORS. Open browser
72+ # dry_run is for testing, so we don't open the browser or start the server
73+
74+ zarrs = []
75+ if (Path (input_path ) / ".zattrs" ).exists ():
76+ zarrs = find_multiscales (Path (input_path ))
77+ if len (zarrs ) == 0 :
78+ print (
79+ f"No OME-Zarr images found in { input_path } . "
80+ f"Try $ ome_zarr finder { input_path } "
81+ )
82+ return
6883
6984 parent_dir , image_name = os .path .split (input_path )
85+ if len (image_name ) == 0 :
86+ parent_dir , image_name = os .path .split (parent_dir )
7087 parent_dir = str (parent_dir )
7188
89+ # open ome-ngff-validator in a web browser...
90+ url = (
91+ f"https://ome.github.io/ome-ngff-validator/"
92+ f"?source=http://localhost:{ port } /{ image_name } "
93+ )
94+
7295 class CORSRequestHandler (SimpleHTTPRequestHandler ):
7396 def end_headers (self ) -> None :
7497 self .send_header ("Access-Control-Allow-Origin" , "*" )
@@ -81,11 +104,188 @@ def translate_path(self, path: str) -> str:
81104 super_path = super ().translate_path (path )
82105 return super_path
83106
84- # open ome-ngff-validator in a web browser...
85- url = (
86- f"https://ome.github.io/ome-ngff-validator/"
87- f"?source=http://localhost:{ port } /{ image_name } "
88- )
107+ # for testing
108+ if dry_run :
109+ return
110+
111+ # Open in browser...
112+ webbrowser .open (url )
113+
114+ # ...then start serving content
115+ test (CORSRequestHandler , HTTPServer , port = port )
116+
117+
118+ def find_multiscales (path_to_zattrs ):
119+ # return list of images. Each image is [path_to_zarr, name, dirname]
120+ # We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0
121+ # AND we want image Name, e.g. "image.zarr Series 0"
122+ # AND we want the dir path to use for Tags e.g. full/path/to
123+ with open (path_to_zattrs / ".zattrs" ) as f :
124+ text = f .read ()
125+ zattrs = json .loads (text )
126+ if "plate" in zattrs :
127+ plate = zattrs .get ("plate" )
128+ wells = plate .get ("wells" )
129+ field = "0"
130+ if len (wells ) > 0 :
131+ path_to_zarr = path_to_zattrs / wells [0 ].get ("path" ) / field
132+ plate_name = os .path .basename (path_to_zattrs )
133+ return [[path_to_zarr , plate_name , os .path .dirname (path_to_zattrs )]]
134+ else :
135+ LOGGER .info (f"No wells found in plate{ path_to_zattrs } " )
136+ return []
137+ elif zattrs .get ("bioformats2raw.layout" ) == 3 :
138+ # Open OME/METADATA.ome.xml
139+ try :
140+ tree = ET .parse (path_to_zattrs / "OME" / "METADATA.ome.xml" )
141+ root = tree .getroot ()
142+ # spec says "If the "series" attribute does not exist and no "plate" is
143+ # present, separate "multiscales" images MUST be stored in consecutively
144+ # numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...)."
145+ series = 0
146+ images = []
147+ for child in root :
148+ # tag is eg. {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image
149+ if child .tag .endswith ("Image" ):
150+ img_name = (
151+ os .path .basename (path_to_zattrs ) + " Series:" + str (series )
152+ )
153+ # Get Name from XML metadata, otherwise use path and Series
154+ img_name = child .attrib .get ("Name" , img_name )
155+ images .append (
156+ [
157+ path_to_zattrs / str (series ),
158+ img_name ,
159+ os .path .dirname (path_to_zattrs ),
160+ ]
161+ )
162+ series += 1
163+ return images
164+ except Exception as ex :
165+ print (ex )
166+ elif zattrs .get ("multiscales" ):
167+ return [
168+ [
169+ path_to_zattrs ,
170+ os .path .basename (path_to_zattrs ),
171+ os .path .dirname (path_to_zattrs ),
172+ ]
173+ ]
174+ return []
175+
176+
177+ def splitall (path ):
178+ # Use os.path.split() repeatedly to split path into dirs
179+ allparts = []
180+ while 1 :
181+ parts = os .path .split (path )
182+ if parts [0 ] == path : # sentinel for absolute paths
183+ allparts .insert (0 , parts [0 ])
184+ break
185+ elif parts [1 ] == path : # sentinel for relative paths
186+ allparts .insert (0 , parts [1 ])
187+ break
188+ else :
189+ path = parts [0 ]
190+ allparts .insert (0 , parts [1 ])
191+ return allparts
192+
193+
194+ def finder (input_path : str , port : int = 8000 , dry_run = False ) -> None :
195+ # serve the parent directory in a simple server with CORS. Open browser
196+ # dry_run is for testing, so we don't open the browser or start the server
197+ parent_path , server_dir = os .path .split (input_path )
198+ # in case input_path had trailing slash, we go one level up...
199+ if len (server_dir ) == 0 :
200+ parent_path , server_dir = os .path .split (parent_path )
201+
202+ # 'input_path' is path passed to the script. To the data dir. E.g. "ZARR/data"
203+ # 'parent_path', e.g. "ZARR" just for running http server
204+ # 'server_dir' is the name of our top-level dir E.g. "data"
205+
206+ # We will be serving the data from last dir in /parent/dir/path
207+ # so we need to use that as base for image URLs...
208+
209+ # walk the input path to find all .zattrs files...
210+ def walk (path : Path ):
211+ if (path / ".zattrs" ).exists ():
212+ yield from find_multiscales (path )
213+ else :
214+ for p in path .iterdir ():
215+ if (p / ".zattrs" ).exists ():
216+ yield from find_multiscales (p )
217+ elif p .is_dir ():
218+ yield from walk (p )
219+ else :
220+ continue
221+
222+ url = None
223+ zarrs = list (walk (Path (input_path )))
224+
225+ # If we have just one zarr, open ome-ngff-validator in a web browser...
226+ if len (zarrs ) == 0 :
227+ print ("No OME-Zarr files found in" , input_path )
228+ return
229+ else :
230+ # ...otherwise write to CSV file and open in BioFile Finder
231+ col_names = ["File Path" , "File Name" , "Folders" , "Uploaded" ]
232+ # write csv file into the dir we're serving from...
233+ bff_csv = os .path .join (input_path , "biofile_finder.csv" )
234+
235+ with open (bff_csv , "w" , newline = "" ) as csvfile :
236+ writer = csv .writer (csvfile , delimiter = "," )
237+ writer .writerow (col_names )
238+ for zarr_img in zarrs :
239+ # zarr paths start with full path to img
240+ # e.g. ZARR/data/to/img (from walk("ZARR/data"))
241+ # but we want them to be from the server_dir to img, e.g "data/to/img".
242+ # So we want relative /to/img path, from input_path -> to img
243+ relpath = os .path .relpath (zarr_img [0 ], input_path )
244+ # On Windows, we need to replace \\ with / in relpath for URL
245+ rel_url = "/" .join (splitall (relpath ))
246+ file_path = f"http://localhost:{ port } /{ server_dir } /{ rel_url } "
247+ name = zarr_img [1 ] or os .path .basename (zarr_img [0 ])
248+ # folders is "f1,f2,f3" etc.
249+ folders_path = os .path .relpath (zarr_img [2 ], input_path )
250+ folders = "," .join (splitall (folders_path ))
251+ timestamp = ""
252+ try :
253+ mtime = os .path .getmtime (zarr_img [0 ])
254+ # format mtime as "YYYY-MM-DD HH:MM:SS.Z"
255+ timestamp = datetime .fromtimestamp (mtime ).strftime (
256+ "%Y-%m-%d %H:%M:%S.%Z"
257+ )
258+ except OSError :
259+ pass
260+ writer .writerow ([file_path , name , folders , timestamp ])
261+
262+ source = {
263+ "uri" : f"http://localhost:{ port } /{ server_dir } /biofile_finder.csv" ,
264+ "type" : "csv" ,
265+ "name" : "biofile_finder.csv" ,
266+ }
267+ s = urllib .parse .quote (json .dumps (source ))
268+ url = f"https://bff.allencell.org/app?source={ s } "
269+ # show small thumbnails view by default. (v=3 for big thumbnails)
270+ url += "&v=2"
271+
272+ class CORSRequestHandler (SimpleHTTPRequestHandler ):
273+ def end_headers (self ) -> None :
274+ self .send_header ("Access-Control-Allow-Origin" , "*" )
275+ SimpleHTTPRequestHandler .end_headers (self )
276+
277+ def translate_path (self , path : str ) -> str :
278+ # Since we don't call the class constructor ourselves,
279+ # we set the directory here instead
280+ self .directory = parent_path
281+ super_path = super ().translate_path (path )
282+ return super_path
283+
284+ # for testing
285+ if dry_run :
286+ return
287+
288+ # Open in browser...
89289 webbrowser .open (url )
90290
91291 # ...then start serving content
0 commit comments