DeepLabCut
diff --git a/‎src/napari_deeplabcut/_reader.py‎
Lines changed: 212 additions & 40 deletions b/‎src/napari_deeplabcut/_reader.py‎
Lines changed: 212 additions & 40 deletions
@@ -1,5 +1,5 @@
 import json
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from pathlib import Path
 
 import cv2
@@ -58,57 +58,225 @@ def get_config_reader(path):
     return read_config
 
 
+def _filter_extensions(
+    image_paths: list[str | Path],
+    valid_extensions: tuple[str] = SUPPORTED_IMAGES,
+) -> list[Path]:
+    """
+    Filter image paths by valid extensions.
+    """
+    return [Path(p) for p in image_paths if Path(p).suffix.lower() in valid_extensions]
+
+
 def get_folder_parser(path):
     if not path or not Path(path).is_dir():
         return None
-
     layers = []
-    files = Path(path).iterdir()
-    images = ""
-    for file in files:
-        if any(file.name.lower().endswith(ext) for ext in SUPPORTED_IMAGES):
-            images = str(Path(path) / f"*{Path(file.name).suffix}")
-            break
+
+    images = _filter_extensions(Path(path).iterdir(), valid_extensions=SUPPORTED_IMAGES)
+
     if not images:
-        raise OSError(f"No supported images were found in {path}.")
+        raise OSError(f"No supported images were found in {path} with extensions {SUPPORTED_IMAGES}.")
 
-    layers.extend(read_images(images))
+    image_layer = read_images(images)
+    layers.extend(image_layer)
     for file in Path(path).iterdir():
         if file.name.endswith(".h5"):
-            layers.extend(read_hdf(str(file)))
-            break  # one h5 per annotated video
-
+            try:
+                layers.extend(read_hdf(str(file)))
+                break  # one h5 per annotated video
+            except Exception as e:
+                raise RuntimeError(f"Could not read annotation data from {file}") from e
     return lambda _: layers
 
 
-def read_images(path):
-    if isinstance(path, list):
-        first_path = Path(path[0])
-        suffixes = first_path.suffixes
-        ext = "".join(suffixes) if suffixes else ""
-        pattern = f"*{ext}" if ext else "*"
-        path = str(first_path.parent / pattern)
-    # Retrieve filepaths exactly as parsed by pims
-    filepaths = []
-    for filepath in Path(path).parent.glob(Path(path).name):
-        relpath = Path(filepath).parts[-3:]
-        filepaths.append(str(Path(*relpath)))
+# Helper functions for lazy image reading and normalization
+# NOTE : forced keyword-only arguments for clarity
+def _read_and_normalize(*, filepath: Path, normalize_func: Callable[[np.ndarray], np.ndarray]) -> np.ndarray:
+    arr = cv2.imread(str(filepath), cv2.IMREAD_UNCHANGED)
+    if arr is None:
+        raise OSError(f"Could not read image: {filepath}")
+    return normalize_func(arr)
+
+
+def _normalize_to_rgb(arr: np.ndarray) -> np.ndarray:
+    if arr.ndim == 2:
+        return cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
+    if arr.ndim == 3 and arr.shape[2] == 4:
+        return cv2.cvtColor(arr, cv2.COLOR_BGRA2RGB)
+    return cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
+
+
+def _expand_image_paths(path: str | Path | list[str | Path] | tuple[str | Path, ...]) -> list[Path]:
+    # Normalize input to list[Path]
+    raw_paths = [Path(p) for p in path] if isinstance(path, (list, tuple)) else [Path(path)]
+
+    expanded: list[Path] = []
+    for p in raw_paths:
+        if p.is_dir() and p.suffix.lower() != ".zarr":
+            file_matches: list[Path] = []
+            for ext in SUPPORTED_IMAGES:
+                file_matches.extend(p.glob(f"*{ext}"))
+            expanded.extend(x for x in natsorted(file_matches, key=str) if x.is_file())
+        else:
+            matches = list(p.parent.glob(p.name))
+            expanded.extend(matches or [p])
+
+    return [p for p in expanded if p.is_file() and p.suffix.lower() in SUPPORTED_IMAGES]
+
+
+# Lazy image reader that supports directories and lists of files
+def _lazy_imread(
+    filenames: str | Path | list[str | Path],
+    use_dask: bool | None = None,
+    stack: bool = True,
+) -> np.ndarray | da.Array | list[np.ndarray | da.Array]:
+    """Lazily reads one or more images with optional Dask support.
+
+    Resolves file paths using `_expand_image_paths`, ensuring consistent
+    handling of directories, glob patterns, and lists/tuples of paths.
+    Images are normalized to RGB and may be wrapped in Dask delayed
+    objects for lazy loading.
+
+    Behavior:
+        * If a single image is resolved:
+            - The image is read eagerly and returned as a NumPy array.
+        * If multiple images are resolved:
+            - The first image is read eagerly to determine shape and dtype.
+            - Subsequent images are loaded lazily via Dask unless
+              `use_dask=False`.
+            - Stacking behavior is controlled by `stack`.
+
+    Args:
+        filenames (str | Path | list[str | Path]):
+            File path(s), directory, or glob pattern(s) to load.
+        use_dask (bool | None, optional):
+            Whether to load images lazily using Dask.
+            Defaults to `True` when multiple files are found, otherwise
+            `False`.
+        stack (bool, optional):
+            If True, stack images along axis 0 into a single array.
+            If False, return a list of arrays or delayed arrays.
+            Defaults to True.
+
+    Returns:
+        np.ndarray | da.Array | list[np.ndarray | da.Array]:
+            Loaded image data. The return type depends on the number of
+            images found, the `use_dask` flag, and the `stack` option.
+
+    Raises:
+        ValueError: If no supported images are found.
+    """
+    expanded = _expand_image_paths(filenames)
+
+    if not expanded:
+        raise ValueError(f"No supported images were found for input: {filenames}")
+
+    if use_dask is None:
+        use_dask = len(expanded) > 1
+
+    images = []
+    first_shape = None
+    first_dtype = None
+
+    def make_delayed_array(fp: Path, first_shape: tuple[int, ...], first_dtype: np.dtype) -> da.Array:
+        """Create a dask array for a single file."""
+        return da.from_delayed(
+            delayed(_read_and_normalize)(filepath=fp, normalize_func=_normalize_to_rgb),
+            shape=first_shape,
+            dtype=first_dtype,
+        )
+
+    for fp in expanded:
+        if first_shape is None:
+            arr0 = _read_and_normalize(filepath=fp, normalize_func=_normalize_to_rgb)
+            first_shape = arr0.shape
+            first_dtype = arr0.dtype
+
+            if use_dask:
+                images.append(make_delayed_array(fp, first_shape, first_dtype))
+            else:
+                images.append(arr0)
+            continue
+
+        if use_dask:
+            images.append(make_delayed_array(fp, first_shape, first_dtype))
+        else:
+            images.append(_read_and_normalize(filepath=fp, normalize_func=_normalize_to_rgb))
+
+    if len(images) == 1:
+        return images[0]
+
+    try:
+        return da.stack(images) if use_dask and stack else (np.stack(images) if stack else images)
+    except ValueError as e:
+        raise ValueError(
+            "Cannot stack images with different shapes using NumPy. "
+            "Ensure all images have the same shape or set stack=False."
+        ) from e
+
+
+# Read images from a list of files or a glob/string path
+def read_images(path: str | Path | list[str | Path]):
+    """Reads one or multiple images and returns a Napari Image layer.
+
+    Uses `_expand_image_paths` to resolve the input into a list of valid
+    image files. Supports single paths, glob expressions, directories,
+    and lists or tuples of such paths.
+
+    Behavior:
+        * If one file is found:
+            - Loaded using `dask_image.imread.imread`.
+        * If multiple files are found:
+            - Loaded lazily using `lazy_imread` into a stacked image
+              layer.
+
+    Args:
+        path (str | Path | list[str | Path]):
+            Input path(s), directory, or glob pattern(s) to expand into
+            supported image files.
+
+    Returns:
+        list[LayerData]:
+            A list containing one Napari layer tuple of the form
+            `(data, metadata, "image")`.
+
+    Raises:
+        OSError: If no supported images are found after expansion.
+    """
+    filepaths = _expand_image_paths(path)
+
+    if not filepaths:
+        raise OSError(f"No supported images were found in {path}")
+
+    filepaths = natsorted(filepaths, key=str)
+
+    # Multiple images → lazy-imread stack
+    if len(filepaths) > 1:
+        relative_paths = [str(Path(*fp.parts[-3:])) for fp in filepaths]
+        params = {
+            "name": "images",
+            "metadata": {
+                "paths": relative_paths,
+                "root": str(filepaths[0].parent),
+            },
+        }
+        data = _lazy_imread(filepaths, use_dask=True, stack=True)
+        return [(data, params, "image")]
+
+    # Single image → old behavior
+    image_path = filepaths[0]
     params = {
         "name": "images",
         "metadata": {
-            "paths": natsorted(filepaths),
-            "root": str(Path(path).parent),
+            "paths": [str(Path(*image_path.parts[-3:]))],
+            "root": str(image_path.parent),
         },
     }
-
-    # https://github.com/soft-matter/pims/issues/452
-    if len(filepaths) == 1:
-        path = next(Path(path).parent.glob(Path(path).name), None)
-        if path is None:
-            raise FileNotFoundError(f"No files found for pattern: {path}")
-    return [(imread(path), params, "image")]
+    return [(imread(str(image_path)), params, "image")]
 
 
+# Helper to populate keypoint layer metadata
 def _populate_metadata(
     header: misc.DLCHeader,
     *,
@@ -175,6 +343,7 @@ def _load_config(config_path: str):
         return yaml.safe_load(file)
 
 
+# Read config file and create keypoint layer metadata
 def read_config(configname: str) -> list[LayerData]:
     config = _load_config(configname)
     header = misc.DLCHeader.from_config(config)
@@ -196,6 +365,7 @@ def read_config(configname: str) -> list[LayerData]:
     return [(None, metadata, "points")]
 
 
+# Read HDF file and create keypoint layers
 def read_hdf(filename: str) -> list[LayerData]:
     config_path = misc.find_project_config_path(filename)
     layers = []
@@ -228,7 +398,7 @@ def read_hdf(filename: str) -> list[LayerData]:
         nrows = df.shape[0]
         data = np.empty((nrows, 3))
         image_paths = df["level_0"]
-        if np.issubdtype(image_paths.dtype, np.number):
+        if pd.api.types.is_numeric_dtype(getattr(image_paths, "dtype", np.asarray(image_paths).dtype)):
             image_inds = image_paths.values
             paths2inds = []
         else:
@@ -254,6 +424,7 @@ def read_hdf(filename: str) -> list[LayerData]:
     return layers
 
 
+# Video reader using OpenCV
 class Video:
     def __init__(self, video_path):
         if not Path(video_path).is_file():
@@ -297,13 +468,14 @@ def close(self):
 def read_video(filename: str, opencv: bool = True):
     if opencv:
         stream = Video(filename)
-        shape = stream.width, stream.height, 3
+        # NOTE construct output shape tuple in (H, W, C) order to match read_frame() data
+        shape = stream.height, stream.width, 3
 
         def _read_frame(ind):
             stream.set_to_frame(ind)
             return stream.read_frame()
 
-        lazy_imread = delayed(_read_frame)
+        lazy_reader = delayed(_read_frame)
     else:  # pragma: no cover
         from pims import PyAVReaderIndexed
 
@@ -313,9 +485,9 @@ def _read_frame(ind):
             raise ImportError("`pip install av` to use the PyAV video reader.") from None
 
         shape = stream.frame_shape
-        lazy_imread = delayed(stream.get_frame)
+        lazy_reader = delayed(stream.get_frame)
 
-    movie = da.stack([da.from_delayed(lazy_imread(i), shape=shape, dtype=np.uint8) for i in range(len(stream))])
+    movie = da.stack([da.from_delayed(lazy_reader(i), shape=shape, dtype=np.uint8) for i in range(len(stream))])
     elems = list(Path(filename).parts)
     elems[-2] = "labeled-data"
     elems[-1] = Path(elems[-1]).stem  # + Path(filename).suffix
@@ -326,4 +498,4 @@ def _read_frame(ind):
             "root": root,
         },
     }
-    return [(movie, params)]
+    return [(movie, params, "image")]