Merge pull request #109 from ttngu207/main

kushalbakshi · web-flow · commit 95a51d184000 · 2024-05-22T15:12:04.000-04:00
Primarily fixing suite2p loader for missing ROI detection or trace extraction - pulling from staging for other minor updates
diff --git a/element_interface/dandi.py b/element_interface/dandi.py
@@ -1,7 +1,6 @@
 import os
 import subprocess
 
-from dandi.download import download
 from dandi.upload import upload
 
 
@@ -13,6 +12,8 @@ def upload_to_dandi(
     api_key: str = None,
     sync: bool = False,
     existing: str = "refresh",
+    validation: str = "required",
+    shell=True,  # without this param, subprocess interprets first arg as file/dir
 ):
     """Upload NWB files to DANDI Archive
 
@@ -27,6 +28,7 @@ def upload_to_dandi(
         sync (str, optional): If True, delete all files in archive that are not present
             in the local directory.
         existing (str, optional): see full description from `dandi upload --help`
+        validation (str, optional): [require|skip|ignore] see full description from `dandi upload --help`
     """
 
     working_directory = working_directory or os.path.curdir
@@ -38,29 +40,46 @@ def upload_to_dandi(
         working_directory, str(dandiset_id)
     )  # enforce str
 
-    dandiset_url = f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}" if staging else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft"
-
-    subprocess.run(
-        ["dandi", "download", "--download", "dandiset.yaml", "-o", working_directory, dandiset_url],
-        shell=True, 
+    dandiset_url = (
+        f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}"
+        if staging
+        else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft"
     )
 
     subprocess.run(
-        ["dandi", "organize", "-d", dandiset_directory, data_directory, "-f", "dry"],
-        shell=True,  # without this param, subprocess interprets first arg as file/dir
+        [
+            "dandi",
+            "download",
+            "--download",
+            "dandiset.yaml",
+            "-o",
+            working_directory,
+            dandiset_url,
+        ],
+        shell=shell,
     )
 
     subprocess.run(
-        ["dandi", "organize", "-d", dandiset_directory, data_directory], shell=True
+        [
+            "dandi",
+            "organize",
+            "-d",
+            dandiset_directory,
+            data_directory,
+            "--required-field",
+            "subject_id",
+            "--required-field",
+            "session_id",
+        ],
+        shell=shell,
     )
 
-    subprocess.run(
-        ["dandi", "validate", dandiset_directory], shell=True
-    )
+    subprocess.run(["dandi", "validate", dandiset_directory], shell=shell)
 
     upload(
         paths=[dandiset_directory],
         dandi_instance="dandi-staging" if staging else "dandi",
         existing=existing,
         sync=sync,
+        validation=validation,
     )
diff --git a/element_interface/prairie_view_loader.py b/element_interface/prairie_view_loader.py
@@ -1,89 +1,114 @@
 import pathlib
+from pathlib import Path
 import xml.etree.ElementTree as ET
 from datetime import datetime
-
 import numpy as np
 
 
-def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
-    """Extract metadata for scans generated by Prairie View acquisition software.
+class PrairieViewMeta:
 
-    The Prairie View software generates one `.ome.tif` imaging file per frame
-    acquired. The metadata for all frames is contained in one .xml file. This
-    function locates the .xml file and generates a dictionary necessary to
-    populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works
-    with resonance scanners with a single field. Prairie View does not support
-    bidirectional x and y scanning. ROI information is not contained in the
-    `.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512).
+    def __init__(self, prairieview_dir: str):
+        """Initialize PrairieViewMeta loader class
 
-    Args:
-        ome_tif_filepath: An absolute path to the .ome.tif image file.
+        Args:
+            prairieview_dir (str): string, absolute file path to directory containing PrairieView dataset
+        """
+        # ---- Search and verify CaImAn output file exists ----
+        # May return multiple xml files. Only need one that contains scan metadata.
+        self.prairieview_dir = Path(prairieview_dir)
 
-    Raises:
-        FileNotFoundError: No .xml file containing information about the acquired scan
-            was found at path in parent directory at `ome_tif_filepath`.
+        for file in self.prairieview_dir.glob("*.xml"):
+            xml_tree = ET.parse(file)
+            xml_root = xml_tree.getroot()
+            if xml_root.find(".//Sequence"):
+                self.xml_file = file
+                self._xml_root = xml_root
+                break
+        else:
+            raise FileNotFoundError(
+                f"No PrarieView metadata .xml file found at {prairieview_dir}"
+            )
 
-    Returns:
-        metainfo: A dict mapping keys to corresponding metadata values fetched from the
-            .xml file.
-    """
+        self._meta = None
 
-    # May return multiple xml files. Only need one that contains scan metadata.
-    xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml")
+    @property
+    def meta(self):
+        if self._meta is None:
+            self._meta = _extract_prairieview_metadata(self.xml_file)
+        return self._meta
 
-    for file in xml_files_list:
-        xml_tree = ET.parse(file)
-        xml_file = xml_tree.getroot()
-        if xml_file.find(".//Sequence"):
-            break
-    else:
-        raise FileNotFoundError(
-            f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}"
-        )
+    def get_prairieview_files(self, plane_idx=None, channel=None):
+        if plane_idx is None:
+            if self.meta['num_planes'] > 1:
+                raise ValueError(f"Please specify 'plane_idx' - Plane indices: {self.meta['plane_indices']}")
+            else:
+                plane_idx = self.meta['plane_indices'][0]
+        else:
+            assert plane_idx in self.meta['plane_indices'], f"Invalid 'plane_idx' - Plane indices: {self.meta['plane_indices']}"
+
+        if channel is None:
+            if self.meta['num_channels'] > 1:
+                raise ValueError(f"Please specify 'channel' - Channels: {self.meta['channels']}")
+            else:
+                plane_idx = self.meta['channels'][0]
+        else:
+            assert channel in self.meta['channels'], f"Invalid 'channel' - Channels: {self.meta['channels']}"
+
+        frames = self._xml_root.findall(f".//Sequence/Frame/[@index='{plane_idx}']/File/[@channel='{channel}']")
+        return [f.attrib['filename'] for f in frames]
+
+
+def _extract_prairieview_metadata(xml_filepath: str):
+    xml_filepath = Path(xml_filepath)
+    if not xml_filepath.exists():
+        raise FileNotFoundError(f"{xml_filepath} does not exist")
+    xml_tree = ET.parse(xml_filepath)
+    xml_root = xml_tree.getroot()
 
     bidirectional_scan = False  # Does not support bidirectional
     roi = 0
     n_fields = 1  # Always contains 1 field
-    recording_start_time = xml_file.find(".//Sequence/[@cycle='1']").attrib.get("time")
+    recording_start_time = xml_root.find(".//Sequence/[@cycle='1']").attrib.get("time")
 
     # Get all channels and find unique values
     channel_list = [
         int(channel.attrib.get("channel"))
-        for channel in xml_file.iterfind(".//Sequence/Frame/File/[@channel]")
+        for channel in xml_root.iterfind(".//Sequence/Frame/File/[@channel]")
     ]
-    n_channels = len(set(channel_list))
-    n_frames = len(xml_file.findall(".//Sequence/Frame"))
+    channels = set(channel_list)
+    n_channels = len(channels)
+    n_frames = len(xml_root.findall(".//Sequence/Frame"))
     framerate = 1 / float(
-        xml_file.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value")
+        xml_root.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value")
     )  # rate = 1/framePeriod
 
     usec_per_line = (
         float(
-            xml_file.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get(
+            xml_root.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get(
                 "value"
             )
         )
         * 1e6
     )  # Convert from seconds to microseconds
 
     scan_datetime = datetime.strptime(
-        xml_file.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p"
+        xml_root.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p"
     )
 
     total_scan_duration = float(
-        xml_file.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime")
+        xml_root.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime")
     )
 
     pixel_height = int(
-        xml_file.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get(
+        xml_root.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get(
             "value"
         )
     )
     # All PrairieView-acquired images have square dimensions (512 x 512; 1024 x 1024)
     pixel_width = pixel_height
 
     um_per_pixel = float(
-        xml_file.find(
+        xml_root.find(
             ".//PVStateValue/[@key='micronsPerPixel']/IndexedValue/[@index='XAxis']"
         ).attrib.get("value")
     )
@@ -92,43 +117,45 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
 
     # x and y coordinate values for the center of the field
     x_field = float(
-        xml_file.find(
+        xml_root.find(
             ".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='XAxis']"
         ).attrib.get("value")
     )
     y_field = float(
-        xml_file.find(
+        xml_root.find(
             ".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='YAxis']"
         ).attrib.get("value")
     )
+
     if (
-        xml_file.find(
+        xml_root.find(
             ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']"
         )
         is None
     ):
         z_fields = np.float64(
-            xml_file.find(
+            xml_root.find(
                 ".//PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue"
             ).attrib.get("value")
         )
         n_depths = 1
+        plane_indices = {0}
         assert z_fields.size == n_depths
         bidirection_z = False
-
     else:
         bidirection_z = (
-            xml_file.find(".//Sequence").attrib.get("bidirectionalZ") == "True"
+            xml_root.find(".//Sequence").attrib.get("bidirectionalZ") == "True"
         )
 
         # One "Frame" per depth in the .xml file. Gets number of frames in first sequence
         planes = [
             int(plane.attrib.get("index"))
-            for plane in xml_file.findall(".//Sequence/[@cycle='1']/Frame")
+            for plane in xml_root.findall(".//Sequence/[@cycle='1']/Frame")
         ]
-        n_depths = len(set(planes))
+        plane_indices = set(planes)
+        n_depths = len(plane_indices)
 
-        z_controllers = xml_file.findall(
+        z_controllers = xml_root.findall(
             ".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue"
         )
 
@@ -137,13 +164,13 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
         # must change depths.
         if len(z_controllers) > 1:
             z_repeats = []
-            for controller in xml_file.findall(
+            for controller in xml_root.findall(
                 ".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/"
             ):
                 z_repeats.append(
                     [
                         float(z.attrib.get("value"))
-                        for z in xml_file.findall(
+                        for z in xml_root.findall(
                             ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='{0}']".format(
                                 controller.attrib.get("subindex")
                             )
@@ -163,7 +190,7 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
         else:
             z_fields = [
                 z.attrib.get("value")
-                for z in xml_file.findall(
+                for z in xml_root.findall(
                     ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='0']"
                 )
             ]
@@ -195,6 +222,47 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
         fieldY=y_field,
         fieldZ=z_fields,
         recording_time=recording_start_time,
+        channels=list(channels),
+        plane_indices=list(plane_indices),
     )
 
     return metainfo
+
+
+def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
+    """Extract metadata for scans generated by Prairie View acquisition software.
+
+    The Prairie View software generates one `.ome.tif` imaging file per frame
+    acquired. The metadata for all frames is contained in one .xml file. This
+    function locates the .xml file and generates a dictionary necessary to
+    populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works
+    with resonance scanners with a single field. Prairie View does not support
+    bidirectional x and y scanning. ROI information is not contained in the
+    `.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512).
+
+    Args:
+        ome_tif_filepath: An absolute path to the .ome.tif image file.
+
+    Raises:
+        FileNotFoundError: No .xml file containing information about the acquired scan
+            was found at path in parent directory at `ome_tif_filepath`.
+
+    Returns:
+        metainfo: A dict mapping keys to corresponding metadata values fetched from the
+            .xml file.
+    """
+
+    # May return multiple xml files. Only need one that contains scan metadata.
+    xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml")
+
+    for file in xml_files_list:
+        xml_tree = ET.parse(file)
+        xml_file = xml_tree.getroot()
+        if xml_file.find(".//Sequence"):
+            break
+    else:
+        raise FileNotFoundError(
+            f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}"
+        )
+
+    return _extract_prairieview_metadata(file)
diff --git a/element_interface/suite2p_loader.py b/element_interface/suite2p_loader.py
@@ -138,15 +138,6 @@ def __init__(self, suite2p_plane_dir: str):
             )
         self.creation_time = datetime.fromtimestamp(ops_fp.stat().st_ctime)
 
-        iscell_fp = self.fpath / "iscell.npy"
-        if not iscell_fp.exists():
-            raise FileNotFoundError(
-                'No "iscell.npy" found. Invalid suite2p plane folder: {}'.format(
-                    self.fpath
-                )
-            )
-        self.curation_time = datetime.fromtimestamp(iscell_fp.stat().st_ctime)
-
         # -- Initialize attributes --
         for s2p_type in _suite2p_ftypes:
             setattr(self, "_{}".format(s2p_type), None)
@@ -160,6 +151,11 @@ def __init__(self, suite2p_plane_dir: str):
 
     # -- load core files --
 
+    @property
+    def curation_time(self):
+        print("DeprecationWarning: 'curation_time' is deprecated, set to be the same as 'creation time', no longer reliable.")
+        return self.creation_time
+
     @property
     def ops(self):
         if self._ops is None:
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
-dandi
+dandi>=0.56.0
 numpy

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-dandi`
	`1`	`+dandi>=0.56.0`
`2`	`2`	`numpy`