Ended up doing a lot but initially: convert_ids_to_logits() does not work with single-channel IDs (#223)

ATATC · web-flow · commit 1732af20a14e · 2026-02-22T21:29:26.000-05:00
* Refactored `convert_ids_to_logits` and `convert_logits_to_ids` for enhanced flexibility and clarity. (#222) * Refactored `logitfy_no_grad` to simplify logic and ensure no-grad context only where necessary. (#222) * Refactored `convert_ids_to_logits` to simplify shape modification logic. (#222) * Fixed output handling in deep supervision for segmentation preset. (#222) * Refactored `forward` methods in loss functions for improved readability and consistency. (#222) * Refactored dice metric to remove double summation. (#222) * Added `folder` method to `Dataset` class for retrieving folder attribute. (#222) * Added validation to `convert_ids_to_logits` to enforce positive integer class IDs. (#222) * Refactored type hints in `inspection.py` to explicitly use tuples for shape definitions. (#222) * Added worst validation case logging during training for improved tracking. (#222) * Refactored statistical shape computation in `inspection.py` for simplification and removed redundant caching. (#222) * Refactored and renamed statistical shape computation in `inspection.py` for improved clarity and consistency. (#222) * Added `TensorLoader` import to `__init__.py` in `data` module. (#222) * Added `order` parameter to `JointTransform` for configurable transform application sequence. (#222) * Refactored `JointTransform` to use `Literal`-based type hints for `order` to enhance type safety. (#222) * Refactored `sanity_check` logic into a dedicated method `sanity_check` in `training.py` for improved modularity and code reuse. (#222) * Refactored `sanity_check` to accept `template_model` as a parameter and adjusted its usage in `training.py` for improved flexibility and clarity. (#222)
diff --git a/mipcandy/common/optim/loss.py b/mipcandy/common/optim/loss.py
@@ -37,29 +37,11 @@ def __init__(self, num_classes: int, include_background: bool) -> None:
         self.include_background: bool = include_background
 
     def logitfy_no_grad(self, ids: torch.Tensor) -> torch.Tensor:
-        with torch.no_grad():
-            if self.num_classes != 1 and ids.shape[1] == 1:
-                if (d := ids.ndim - 2) not in (1, 2, 3):
-                    raise ValueError(f"Expected labels to be 1D, 2D, or 3D, got {d} spatial dimensions")
-                return convert_ids_to_logits(ids.int(), d, self.num_classes)
+        if self.num_classes != 1 and ids.shape[1] == 1:
+            with torch.no_grad():
+                return convert_ids_to_logits(ids.int(), self.num_classes)
         return ids.float()
 
-    def forward(self, outputs: torch.Tensor, labels: torch.Tensor) -> tuple[torch.Tensor, dict[str, float]]:
-        if not self.validation_mode:
-            return self._forward(outputs, labels)
-        with torch.no_grad():
-            c, metrics = self._forward(outputs, labels)
-            outputs = convert_logits_to_ids(outputs)
-            dice = 0
-            for i in range(0 if self.include_background else 1, self.num_classes):
-                class_dice = binary_dice(outputs == i, labels == i).item()
-                dice += class_dice
-                metrics[f"dice {i}"] = class_dice
-            metrics["dice"] = dice_similarity_coefficient(
-                self.logitfy_no_grad(outputs), self.logitfy_no_grad(labels)
-            ).item()
-            return c, metrics
-
 
 class DiceCELossWithLogits(_SegmentationLoss):
     def __init__(self, num_classes: int, *, lambda_ce: float = 1, lambda_soft_dice: float = 1,
@@ -81,6 +63,20 @@ def _forward(self, outputs: torch.Tensor, labels: torch.Tensor) -> tuple[torch.T
         c = self.lambda_ce * ce + self.lambda_soft_dice * (1 - dice)
         return c, metrics
 
+    def forward(self, outputs: torch.Tensor, labels: torch.Tensor) -> tuple[torch.Tensor, dict[str, float]]:
+        if not self.validation_mode:
+            return self._forward(outputs, labels)
+        with torch.no_grad():
+            c, metrics = self._forward(outputs, labels)
+            outputs = convert_logits_to_ids(outputs)
+            for i in range(0 if self.include_background else 1, self.num_classes):
+                class_dice = binary_dice(outputs == i, labels == i).item()
+                metrics[f"dice {i}"] = class_dice
+            metrics["dice"] = dice_similarity_coefficient(
+                self.logitfy_no_grad(outputs), self.logitfy_no_grad(labels)
+            ).item()
+            return c, metrics
+
 
 class DiceBCELossWithLogits(_SegmentationLoss):
     def __init__(self, *, lambda_bce: float = 1, lambda_soft_dice: float = 1,
@@ -99,3 +95,12 @@ def _forward(self, outputs: torch.Tensor, labels: torch.Tensor) -> tuple[torch.T
         metrics = {"soft dice": dice.item(), "bce loss": bce.item()}
         c = self.lambda_bce * bce + self.lambda_soft_dice * (1 - dice)
         return c, metrics
+
+    def forward(self, outputs: torch.Tensor, labels: torch.Tensor) -> tuple[torch.Tensor, dict[str, float]]:
+        if not self.validation_mode:
+            return self._forward(outputs, labels)
+        with torch.no_grad():
+            c, metrics = self._forward(outputs, labels)
+            outputs = convert_logits_to_ids(outputs).bool()
+            metrics["dice"] = binary_dice(outputs, labels.bool()).item()
+            return c, metrics
diff --git a/mipcandy/data/__init__.py b/mipcandy/data/__init__.py
@@ -1,6 +1,7 @@
 from mipcandy.data.convertion import convert_ids_to_logits, convert_logits_to_ids, auto_convert
-from mipcandy.data.dataset import Loader, UnsupervisedDataset, SupervisedDataset, DatasetFromMemory, MergedDataset, \
-    PathBasedUnsupervisedDataset, SimpleDataset, PathBasedSupervisedDataset, NNUNetDataset, BinarizedDataset
+from mipcandy.data.dataset import Loader, TensorLoader, UnsupervisedDataset, SupervisedDataset, DatasetFromMemory, \
+    MergedDataset, PathBasedUnsupervisedDataset, SimpleDataset, PathBasedSupervisedDataset, NNUNetDataset, \
+    BinarizedDataset
 from mipcandy.data.download import download_dataset
 from mipcandy.data.geometric import ensure_num_dimensions, orthographic_views, aggregate_orthographic_views, crop
 from mipcandy.data.inspection import InspectionAnnotation, InspectionAnnotations, load_inspection_annotations, \
diff --git a/mipcandy/data/convertion.py b/mipcandy/data/convertion.py
@@ -1,25 +1,30 @@
-from typing import Literal
-
 import torch
 
 from mipcandy.common import Normalize
 
 
-def convert_ids_to_logits(ids: torch.Tensor, d: Literal[1, 2, 3], num_classes: int) -> torch.Tensor:
-    if ids.dtype != torch.int or ids.min() < 0:
-        raise TypeError("`ids` should be positive integers")
-    d += 1
-    if ids.ndim != d:
-        if ids.ndim == d + 1 and ids.shape[1] == 1:
-            ids = ids.squeeze(1)
-        else:
-            raise ValueError(f"`ids` should be {d} dimensional or {d + 1} dimensional with single channel")
-    logits = torch.zeros((ids.shape[0], num_classes, *ids.shape[1:]), device=ids.device, dtype=torch.float32)
-    logits.scatter_(1, ids.unsqueeze(1).long(), 1)
+def convert_ids_to_logits(ids: torch.Tensor, num_classes: int, *, channel_dim: int = 1) -> torch.Tensor:
+    """
+    :param ids: class ids (..., 1, ...)
+    :param num_classes: number of classes
+    :param channel_dim: the index of the channel dimension
+    :return: logits (..., num_classes, ...)
+    """
+    if torch.is_floating_point(ids) or (ids < 0).any():
+        raise TypeError("Class ids must be positive integers")
+    shape = list(ids.shape)
+    shape[channel_dim] = num_classes
+    logits = torch.zeros(shape, device=ids.device, dtype=torch.float32)
+    logits.scatter_(channel_dim, ids.long(), 1)
     return logits
 
 
 def convert_logits_to_ids(logits: torch.Tensor, *, channel_dim: int = 1) -> torch.Tensor:
+    """
+    :param logits: logits (..., num_classes, ...)
+    :param channel_dim: the index of the channel dimension
+    :return: class ids (..., 1, ...)
+    """
     return logits.round().int() if logits.shape[channel_dim] < 2 else logits.argmax(channel_dim, keepdim=True)
 
 
diff --git a/mipcandy/data/dataset.py b/mipcandy/data/dataset.py
@@ -339,6 +339,9 @@ def __init__(self, folder: str | PathLike[str], *, split: str | Literal["Tr", "T
         self._prefix: str = prefix
         self._align_spacing: bool = align_spacing
 
+    def folder(self) -> str:
+        return self._folder
+
     @staticmethod
     def _create_subset(folder: str) -> None:
         if exists(folder) and len(listdir(folder)) > 0:
diff --git a/mipcandy/data/inspection.py b/mipcandy/data/inspection.py
@@ -90,7 +90,7 @@ def save(self, path: str | PathLike[str]) -> None:
             }, f)
 
     def _get_shapes(self, get_shape: Callable[[InspectionAnnotation], AmbiguousShape]) -> tuple[
-        AmbiguousShape | None, AmbiguousShape, AmbiguousShape]:
+        tuple[int, ...] | None, tuple[int, ...], tuple[int, ...]]:
         depths = []
         widths = []
         heights = []
@@ -105,26 +105,29 @@ def _get_shapes(self, get_shape: Callable[[InspectionAnnotation], AmbiguousShape
                 widths.append(shape[2])
         return tuple(depths) if depths else None, tuple(heights), tuple(widths)
 
-    def shapes(self) -> tuple[AmbiguousShape | None, AmbiguousShape, AmbiguousShape]:
+    def shapes(self) -> tuple[tuple[int, ...] | None, tuple[int, ...], tuple[int, ...]]:
         if self._shapes:
             return self._shapes
         self._shapes = self._get_shapes(lambda annotation: annotation.shape)
         return self._shapes
 
-    def foreground_shapes(self) -> tuple[AmbiguousShape | None, AmbiguousShape, AmbiguousShape]:
+    def statistical_shape(self, *, percentile: float = .95) -> Shape:
+        depths, heights, widths = self.shapes()
+        percentile *= 100
+        sfs = (round(np.percentile(heights, percentile)), round(np.percentile(widths, percentile)))
+        return (round(np.percentile(depths, percentile)),) + sfs if depths else sfs
+
+    def foreground_shapes(self) -> tuple[tuple[int, ...] | None, tuple[int, ...], tuple[int, ...]]:
         if self._foreground_shapes:
             return self._foreground_shapes
         self._foreground_shapes = self._get_shapes(lambda annotation: annotation.foreground_shape())
         return self._foreground_shapes
 
     def statistical_foreground_shape(self, *, percentile: float = .95) -> Shape:
-        if self._statistical_foreground_shape:
-            return self._statistical_foreground_shape
         depths, heights, widths = self.foreground_shapes()
         percentile *= 100
         sfs = (round(np.percentile(heights, percentile)), round(np.percentile(widths, percentile)))
-        self._statistical_foreground_shape = (round(np.percentile(depths, percentile)),) + sfs if depths else sfs
-        return self._statistical_foreground_shape
+        return (round(np.percentile(depths, percentile)),) + sfs if depths else sfs
 
     def crop_foreground(self, i: int, *, expand_ratio: float = 1) -> tuple[torch.Tensor, torch.Tensor]:
         image, label = self._dataset.image(i), self._dataset.label(i)
@@ -371,10 +374,10 @@ def __init__(self, annotations: InspectionAnnotations, batch_size: int, *, num_p
             self._images, self._labels = images, images.copy()
         self._batch_size: int = batch_size
         self._oversample_rate: float = oversample_rate
-        sfs = self._annotations.statistical_foreground_shape(percentile=self._percentile)
-        sfs = [ceil(s / min_factor) * min_factor for s in sfs]
-        self._roi_shape: Shape = (min(sfs[0], 2048), min(sfs[1], 2048)) if len(sfs) == 2 else (
-            min(sfs[0], 128), min(sfs[1], 128), min(sfs[2], 128))
+        median_shape = self._annotations.statistical_shape(percentile=self._percentile)
+        median_shape = [ceil(s / min_factor) * min_factor for s in median_shape]
+        self._roi_shape: Shape = (min(median_shape[0], 2048), min(median_shape[1], 2048)) if len(
+            median_shape) == 2 else (min(median_shape[0], 128), min(median_shape[1], 128), min(median_shape[2], 128))
 
     def convert_idx(self, idx: int) -> int:
         idx, idx2 = self._images[idx], self._labels[idx]
diff --git a/mipcandy/data/transform.py b/mipcandy/data/transform.py
@@ -1,27 +1,38 @@
+from typing import Literal
+
 import torch
 from torch import nn
 
 from mipcandy.types import Transform
 
+type _Order = Literal["transform", "image_only", "label_only"]
+
 
 class JointTransform(nn.Module):
     def __init__(self, *, transform: Transform | None = None, image_only: Transform | None = None,
-                 label_only: Transform | None = None, keys: tuple[str, str] = ("image", "label")) -> None:
+                 label_only: Transform | None = None, keys: tuple[str, str] = ("image", "label"),
+                 order: tuple[_Order, _Order, _Order] = ("transform", "image_only", "label_only")) -> None:
         super().__init__()
         self.transform: Transform | None = transform
         self.image_only: Transform | None = image_only
         self.label_only: Transform | None = label_only
         self._keys: tuple[str, str] = keys
+        self._order: tuple[_Order, _Order, _Order] = order
 
     def forward(self, image: torch.Tensor, label: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         ik, lk = self._keys
         data = {ik: image, lk: label}
-        if self.transform:
-            data = self.transform(data)
-        if self.image_only:
-            data[ik] = self.image_only(data[ik])
-        if self.label_only:
-            data[lk] = self.label_only(data[lk])
+        for t in self._order:
+            transform = getattr(self, t)
+            if not transform:
+                continue
+            match t:
+                case "transform":
+                    data = transform(data)
+                case "image_only":
+                    data[ik] = transform(data[ik])
+                case "label_only":
+                    data[lk] = transform(data[lk])
         return data[ik], data[lk]
 
 
diff --git a/mipcandy/metrics.py b/mipcandy/metrics.py
@@ -79,8 +79,4 @@ def soft_dice(outputs: torch.Tensor, labels: torch.Tensor, *, smooth: float = 1,
     label_sum = labels.sum(axes)
     intersection = (outputs * labels).sum(axes)
     output_sum = outputs.sum(axes)
-    if batch_dice:
-        intersection = intersection.sum(0)
-        output_sum = output_sum.sum(0)
-        label_sum = label_sum.sum(0)
     return do_reduction((2 * intersection + smooth) / (label_sum + output_sum + smooth), reduction)
diff --git a/mipcandy/presets/segmentation.py b/mipcandy/presets/segmentation.py
@@ -158,7 +158,10 @@ def validate_case(self, idx: int, image: torch.Tensor, label: torch.Tensor, tool
         if self.deep_supervision:
             if not isinstance(toolbox.criterion, DeepSupervisionWrapper):
                 raise TypeError("Deep supervision is enabled but criterion is not a `DeepSupervisionWrapper`")
-            output = output[0] if isinstance(output, (list, tuple)) else output[:, 0]
+            if isinstance(output, (list, tuple)):
+                output = output[0]
+            elif output.ndim > label.ndim:
+                output = output[:, 0]
             loss, metrics = toolbox.criterion([output], [label])
         else:
             loss, metrics = toolbox.criterion(output, label)
diff --git a/mipcandy/training.py b/mipcandy/training.py
@@ -27,7 +27,7 @@
 from mipcandy.frontend import Frontend
 from mipcandy.layer import WithPaddingModule, WithNetwork
 from mipcandy.profiler import Profiler
-from mipcandy.sanity_check import sanity_check
+from mipcandy.sanity_check import sanity_check, SanityCheckResult
 from mipcandy.types import Params, Setting, AmbiguousShape
 
 
@@ -391,6 +391,12 @@ def empty_cache(self) -> None:
 
     # Training methods
 
+    def sanity_check(self, template_model: nn.Module, example_shape: AmbiguousShape) -> SanityCheckResult:
+        try:
+            return sanity_check(template_model, example_shape, device=self._device)
+        finally:
+            del template_model
+
     @abstractmethod
     def backward(self, images: torch.Tensor, labels: torch.Tensor, toolbox: TrainerToolbox) -> tuple[float, dict[
         str, float]]:
@@ -454,7 +460,7 @@ def train(self, num_epochs: int, *, note: str = "", num_checkpoints: int = 5, co
         template_model = self.build_network(example_shape)
         model_name = template_model.__class__.__name__
         self.log(f"Model: {model_name}")
-        sanity_check_result = sanity_check(template_model, example_shape, device=self._device)
+        sanity_check_result = self.sanity_check(template_model, example_shape)
         self.log(str(sanity_check_result))
         self.log(f"Example output shape: {tuple(sanity_check_result.output.shape)}")
         self.record_profiler()
@@ -467,7 +473,7 @@ def train(self, num_epochs: int, *, note: str = "", num_checkpoints: int = 5, co
         self._frontend.on_experiment_created(self._experiment_id, self._trainer_variant, model_name, note,
                                              sanity_check_result.num_macs, sanity_check_result.num_params, num_epochs,
                                              early_stop_tolerance)
-        del sanity_check_result, template_model, example_input
+        del sanity_check_result, example_input
         self.empty_cache()
         try:
             for epoch in range(self._tracker.epoch, self._tracker.epoch + num_epochs):
@@ -506,6 +512,7 @@ def train(self, num_epochs: int, *, note: str = "", num_checkpoints: int = 5, co
                     self.log(f"Estimated time of completion in {etc:.1f} seconds at {datetime.fromtimestamp(
                         time() + etc):%m-%d %H:%M:%S}")
                 self.show_metrics_per_case(epoch, metrics)
+                self.log(f"Validation worst case: {self._tracker.worst_case}")
                 self.show_metrics(epoch, metrics, "validation", lookup_prefix="val ")
                 if score > self._tracker.best_score:
                     copy(checkpoint_path("latest"), checkpoint_path("best"))