integrate volume sampler to semantic dataset

xiuliren · xiuliren · commit 2ca781ce2d45 · 2023-02-09T15:53:38.000-05:00
diff --git a/examples/mito.yaml b/examples/mito.yaml
@@ -0,0 +1,32 @@
+system:
+  cpus: 1
+  gpus: 1
+  seed: 1
+  
+dataset:
+  training:
+    susumu100:
+      images: ["file:///mnt/ceph/users/jwu/31_organelle/30_SM-WT1/84_susumu_inference/04_precomputed/image:mip=1",]
+      label: "file:///mnt/ceph/users/jwu/31_organelle/30_SM-WT1/84_susumu_inference/04_precomputed/mito:mip=1"
+  validation:
+    susumu100:
+      images: ["file:///mnt/ceph/users/jwu/31_organelle/30_SM-WT1/84_susumu_inference/04_precomputed/image:mip=1",]
+      label: "file:///mnt/ceph/users/jwu/31_organelle/30_SM-WT1/84_susumu_inference/04_precomputed/mito:mip=1"
+model:
+  in_channels: 1
+  out_channels: 3
+
+train:
+  iter_start: 0
+  iter_stop: 1000000
+  class_rebalance: false
+  # batch size per GPU
+  # The dataprovider should provide nGPU*batch_size batches!
+  batch_size: 1
+  output_dir: "./"
+  patch_size: [128, 128, 128]
+  learning_rate: 0.001
+    #training_interval: 200
+    #validation_interval: 2000
+  training_interval: 2
+  validation_interval: 4
diff --git a/neutorch/data/dataset.py b/neutorch/data/dataset.py
@@ -123,7 +123,16 @@ def __init__(self, samples: list):
         super().__init__(samples)
     
     @classmethod
-    def from_config(cls, cfg: CfgNode, is_train: bool):
+    def from_config(cls, cfg: CfgNode, is_train: bool, **kwargs):
+        """Construct a semantic dataset with chunk or volume
+
+        Args:
+            cfg (CfgNode): _description_
+            is_train (bool): _description_
+
+        Returns:
+            _type_: _description_
+        """
         if is_train:
             name2chunks = cfg.dataset.training
         else:
@@ -134,7 +143,8 @@ def from_config(cls, cfg: CfgNode, is_train: bool):
             sample = SemanticSample.from_explicit_dict(
                     name2path, 
                     output_patch_size=cfg.train.patch_size,
-                    num_classes=cfg.model.out_channels)
+                    num_classes=cfg.model.out_channels,
+                    **kwargs)
             samples.append(sample)
 
         return cls( samples )
diff --git a/neutorch/data/sample.py b/neutorch/data/sample.py
@@ -5,14 +5,14 @@
 
 import numpy as np
 
-from chunkflow.lib.cartesian_coordinate import BoundingBox, Cartesian, BoundingBoxes
-from chunkflow.chunk import Chunk, load_chunk
+from chunkflow.lib.cartesian_coordinate import BoundingBox, Cartesian 
+from chunkflow.chunk import Chunk
+from chunkflow.chunk.utils import load_chunk_or_volume
 from chunkflow.lib.synapses import Synapses
 
 from neutorch.data.patch import Patch
 from neutorch.data.transform import *
-
-from cloudvolume import CloudVolume
+from chunkflow.volume import PrecomputedVolume
 
 DEFAULT_PATCH_SIZE = Cartesian(128, 128, 128)
 DEFAULT_NUM_CLASSES = 1
@@ -48,7 +48,8 @@ def sampling_weight(self) -> int:
 
 class Sample(AbstractSample):
     def __init__(self, 
-            images: List[Chunk], label: Union[np.ndarray, Chunk],
+            images: List[Chunk, PrecomputedVolume], 
+            label: Union[Chunk, PrecomputedVolume],
             output_patch_size: Cartesian, 
             forbbiden_distance_to_boundary: tuple = None) -> None:
         """Image sample with ground truth annotations
@@ -351,13 +352,15 @@ def __init__(self,
     def from_explicit_path(cls, 
             image_paths: list, label_path: str, 
             output_patch_size: Cartesian,
-            num_classes: int=DEFAULT_NUM_CLASSES):
-        label = load_chunk(label_path)
+            num_classes: int=DEFAULT_NUM_CLASSES,
+            **kwargs,
+            ):
+        label = load_chunk_or_volume(label_path, **kwargs)
         print(f'label path: {label_path} with size {label.shape}')
 
         images = []
         for image_path in image_paths:
-            image = load_chunk(image_path)
+            image = load_chunk_or_volume(image_path, **kwargs)
             images.append(image)
             print(f'image path: {image_path} with size {image.shape}')
         return cls(images, label, output_patch_size, num_classes=num_classes)
@@ -438,76 +441,6 @@ def transform(self):
             # MissAlignment(),
         ])
 
-class PrecomputedVolumeSample(AbstractSample):
-    def __init__(self, 
-            output_patch_size: Union[int, tuple, Cartesian],
-            volume: Union[str, CloudVolume],
-            mask: Chunk = None,
-            forground_weight: int = None):
-        """Neuroglancer Precomputed Volume Dataset
-
-        Args:
-            volume_path (str): cloudvolume precomputed path
-            patch_size (Union[int, tuple], optional): patch size of network input. Defaults to volume block size.
-            mask (Chunk, optional): forground mask. Defaults to None.
-            forground_weight (int, optional): weight of bounding boxes containing forground voxels. Defaults to None.
-        """
-        super.__init__(output_patch_size)
-
-        if isinstance(volume, str):
-            self.vol = CloudVolume(
-                volume, 
-                fill_missing=True, 
-                parallel=False, 
-                progress=False,
-                green_threads = False,
-            )
-        elif isinstance(volume, CloudVolume):
-            self.vol = volume
-        else:
-            raise ValueError("volume should be either an instance of CloudVolume or precomputed volume path.")
-
-        # self.voxel_size = tuple(self.vol.resolution)
-
-        self.bboxes = BoundingBoxes.from_manual_setup(
-            self.output_patch_size,
-            roi_start=(0, 0, 0),
-            roi_stop=self.vol.bounds.maxpt[-3:][::-1],
-            bounded=True,
-        )
-        print(f'found {len(self.bboxes)} bounding boxes in volume: {volume}')
-
-        if mask is not None:
-            # find out bboxes containing forground voxels
-
-            if forground_weight is None:
-                pass
-        
-    def __getitem__(self, idx: int):
-        bbox = self.bboxes[idx]
-        xyz_slices = bbox.to_slices()[::-1]
-        print('xyz slices: ', xyz_slices)
-        image = self.vol[xyz_slices]
-        image = np.asarray(image)
-        image = np.transpose(image)
-        # image = image.astype(np.float32)
-        # image /= 255.
-        # chunk = Chunk(arr, voxel_offset=bbox.minpt, voxel_size=self.voxel_size)
-        # tensor = torch.Tensor(arr)
-        target = deepcopy(image)
-        patch = Patch(image, target)
-        self.transform(patch)
-        patch.to_tensor()
-        patch.normalize()
-        return patch.image, patch.target
-
-    @property
-    def random_patch(self):
-        idx = random.randrange(0, len(self.bboxes))
-        return self.__getitem__(idx)
-
-    def __len__(self):
-        return len(self.bboxes)
 
 if __name__ == '__main__':
     import os
diff --git a/neutorch/data/volume.py b/neutorch/data/volume.py