-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathextinfo_mkngff.py
More file actions
181 lines (150 loc) · 6.06 KB
/
extinfo_mkngff.py
File metadata and controls
181 lines (150 loc) · 6.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""
This script processes Projects or Screens and sets external info metadata on images
to reference the zarr URL. This only works for the "mkngff" images, because their
"getImportedImageFilePaths" contains the http:// URL to the zarr file!
"""
import sys
import argparse
import requests
from omero.cli import cli_login
from omero.gateway import BlitzGateway
from omero.model import ExternalInfoI
from omero.rtypes import rstring, rlong
plate_cache = dict()
def get_images(conn, container_id, is_screen=False):
"""Iterate over images in an OMERO container.
Args:
conn: OMERO BlitzGateway connection
container_id: ID of the container (Project or Screen)
is_screen: If True, process as Screen; if False, process as Project
Yields:
tuple: (parent_name, position, image) where:
- parent_name: plate name (for screens) or dataset name (for projects)
- position: well position string like "A/123/0" (for screens) or "" (for projects)
- image: OMERO image object
"""
if is_screen:
screen = conn.getObject('Screen', attributes={'id': container_id})
for plate in screen.listChildren():
for well in plate.listChildren():
index = well.countWellSample()
well_pos = well.getWellPos()
well_pos = f"{well_pos[0]}/{well_pos[1:]}"
for index in range(0, index):
pos = f"{well_pos}/{index}"
yield plate.getName(), pos, well.getImage(index)
else:
project = conn.getObject('Project', attributes={'id': container_id})
for dataset in project.listChildren():
for image in dataset.listChildren():
yield dataset.getName(), "", image
def get_filepaths_info(img, plate_name=None):
"""Get the base file path for an image.
Uses a cache for plate names to avoid repeated lookups of the same path.
Args:
img: OMERO image object
plate_name: Optional plate name for caching (used with screens)
Returns:
str: Base directory path of the imported image file
"""
if plate_name and plate_name in plate_cache:
return plate_cache[plate_name]
path = img.getImportedImageFilePaths()["client_paths"][0]
base, _ = path.rsplit("/", 1)
if plate_name:
plate_cache[plate_name] = base
return base
def set_ext_info(conn, img, path, skip_if_set=False):
"""Set external info metadata on an OMERO image.
Creates or updates the external info to reference NGFF multiscales data.
Args:
conn: OMERO BlitzGateway connection
img: OMERO image object
path: Path to the NGFF multiscales data
skip_if_set: If True, skip setting external info if it already exists
"""
img = conn.getObject('Image', img.getId())
extinfo = img.getExternalInfo()
if extinfo:
if skip_if_set:
return
else:
extinfo = ExternalInfoI()
extinfo.entityId = rlong(3)
extinfo.entityType = rstring("com.glencoesoftware.ngff:multiscales")
extinfo.lsid = rstring(path)
img.details.externalInfo = extinfo
us = conn.getUpdateService()
extinfo = us.saveAndReturnObject(extinfo)
img = us.saveAndReturnObject(img._obj)
def check(path):
"""Verify that an NGFF path is valid by checking for multiscales in .zattrs file.
Args:
path: Path to check for NGFF data
Returns:
str or None: The valid path if multiscales exists, None otherwise
"""
if not path.startswith(("http://", "https://")):
return None
# None bioformats layout or plate image
response = requests.get(f"{path}/.zattrs")
if response.status_code == 200:
try:
data = response.json()
if "multiscales" in data:
return path
except (ValueError, KeyError):
pass
# Bioformats layout ('0' series)
response = requests.get(f"{path}/0/.zattrs")
if response.status_code == 200:
try:
data = response.json()
if "multiscales" in data:
return f"{path}/0"
except (ValueError, KeyError):
pass
return None
def main(argv=None):
parser = argparse.ArgumentParser(
description="Process Projects or Screens and set external info metadata on images to reference zarr URLs.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="Example: %(prog)s Project:123\n %(prog)s Screen:456"
)
parser.add_argument(
"container",
help="Container specification in format <Type>:<ID> (e.g., Project:123 or Screen:456)"
)
parser.add_argument(
"--skip-if-set",
action="store_true",
help="Skip setting external info if it already exists"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Print info without setting external info (ignores --skip-if-set)"
)
args = parser.parse_args(argv)
container, container_id = args.container.split(":")
container_id = int(container_id)
is_screen = container.lower() == "screen"
with cli_login() as c:
conn = BlitzGateway(client_obj=c.get_client())
for parent_name, pos, img in get_images(conn, container_id, is_screen=is_screen):
if is_screen:
path = get_filepaths_info(img, parent_name)
path = f"{path}/{pos}"
else:
path = get_filepaths_info(img)
checked_path = check(path)
if checked_path:
if args.dry_run:
print(f"[DRY RUN] Would set extinfo for image {img.getName()}({img.getId()}) to {checked_path}")
else:
set_ext_info(conn, img, checked_path, skip_if_set=args.skip_if_set)
print(f"Set extinfo for image {img.getName()}({img.getId()}) to {checked_path}")
else:
print(f"Could not resolve {path} for image {img.getName()}({img.getId()})")
if __name__ == "__main__":
main()