IBM · blumenstiel · Dec 12, 2024 · Jan 8, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/examples/confs/multimae_sen1floods11.yaml b/examples/confs/multimae_sen1floods11.yaml
@@ -35,8 +35,7 @@ data:
     num_workers: 0
     modalities:
       - S2L2A
-      - S1
-      - LULC
+      - S1GRD
     rgb_modality: S2L2A # If not provided, uses first modality
     rgb_indices:
       - 3
@@ -45,18 +44,15 @@ data:
 
     train_data_root:
       S2L2A: data/sen1floods11/data/data/flood_events/HandLabeled/S2L2AHand
-      S1: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
-      LULC: data/sen1floods11/data/data/flood_events/HandLabeled/LULCHand
+      S1GRD: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
     train_label_data_root: data/sen1floods11/data/data/flood_events/HandLabeled/LabelHand
     val_data_root:
       S2L2A: data/sen1floods11/data/data/flood_events/HandLabeled/S2L2AHand
-      S1: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
-      LULC: data/sen1floods11/data/data/flood_events/HandLabeled/LULCHand
+      S1GRD: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
     val_label_data_root: data/sen1floods11/data/data/flood_events/HandLabeled/LabelHand
     test_data_root:
       S2L2A: data/sen1floods11/data/data/flood_events/HandLabeled/S2L2AHand
-      S1: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
-      LULC: data/sen1floods11/data/data/flood_events/HandLabeled/LULCHand
+      S1GRD: data/sen1floods11/data/data/flood_events/HandLabeled/S1Hand
     test_label_data_root: data/sen1floods11/data/data/flood_events/HandLabeled/LabelHand
 
     train_split: data/sen1floods11/splits/splits/flood_handlabeled/dev_train.txt
@@ -66,8 +62,7 @@ data:
     allow_substring_file_names: True
     image_grep:
       S2L2A: "*_S2L2AHand.tif"
-      S1: "*_S1Hand.tif"
-      LULC: "*_LULCHand.npy"
+      S1GRD: "*_S1Hand.tif"
     label_grep: "*_LabelHand.tif"
     no_label_replace: -1
     no_data_replace: 0
@@ -86,7 +81,7 @@ data:
         - 3711.071
         - 3416.714
         - 2849.625
-      S1:
+      S1GRD:
         - -12.577
         - -20.265
 
@@ -104,17 +99,13 @@ data:
         - 1652.703
         - 1471.002
         - 1365.30
-      S1:
+      S1GRD:
         - 5.179
         - 5.872
 
     num_classes: 2
 
     train_transform:
-      - class_path: albumentations.RandomCrop
-        init_args:
-          height: 224
-          width: 224
       - class_path: albumentations.D4
       - class_path: ToTensorV2
 
@@ -127,35 +118,33 @@ model:
       backbone_pretrained: false
       backbone: multimae_base
       backbone_input_adapters:
-        - S1
+        - S1GRD
         - S2L2A
-        - LULC
-      decoder: FCNDecoder  # UperNetDecoder
-      decoder_num_convs: 4  # only for FCNDecoder
-      # decoder_scale_modules: True  # only for UperNetDecoder
-      decoder_channels: 256
-      num_classes: 2
+      backbone_merge_method: mean
+      necks:
+        - name: ReshapeTokensToImage
+          remove_cls_token: False  # Need to be False because of missing CLS token in MultiMAE
+        - name: SelectIndices
+          indices: [2, 5, 8, 11]
+        - name: LearnedInterpolateToPyramidal
+      decoder: UNetDecoder
+      decoder_channels: [512, 256, 128, 64]
       head_dropout: 0.1
-      head_channel_list:
-        - 256
-    loss: ce
+      num_classes: 2
+    loss: dice
     ignore_index: -1
-    class_weights:
-      - 0.3
-      - 0.7
-    class_names:
-      - Others
-      - Flood
     freeze_backbone: false
     freeze_decoder: false
 
 optimizer:
   class_path: torch.optim.AdamW
   init_args:
-    lr: 6.e-5
+    lr: 1.e-4
     weight_decay: 0.05
 lr_scheduler:
   class_path: ReduceLROnPlateau
   init_args:
     monitor: val/loss
+    factor: 0.5
+    patience: 5
 
diff --git a/terratorch/datamodules/generic_multimodal_data_module.py b/terratorch/datamodules/generic_multimodal_data_module.py
@@ -23,6 +23,10 @@
 logger = logging.getLogger("terratorch")
 
 def collate_chunk_dicts(batch_list):
+    if isinstance(batch_list, dict):
+        # batch size = 1
+        return batch_list
+
     batch = {}
     for key, value in batch_list[0].items():  # TODO: Handle missing modalities when allow_missing_modalities is set.
         if isinstance(value, torch.Tensor):
@@ -185,7 +189,7 @@ def __init__(
         image_modalities: list[str] | None = None,
         rgb_modality: str | None = None,
         rgb_indices: list[int] | None = None,
-        allow_substring_file_names: bool = False,
+        allow_substring_file_names: bool = True,
         class_names: list[str] | None = None,
         constant_scale: dict[float] = None,
         train_transform: dict | A.Compose | None | list[A.BasicTransform] = None,
@@ -439,7 +443,8 @@ def setup(self, stage: str) -> None:
                 expand_temporal_dimension=self.expand_temporal_dimension,
                 reduce_zero_label=self.reduce_zero_label,
                 channel_position=self.channel_position,
-                concat_bands=self.concat_bands ,
+                data_with_sample_dim = self.data_with_sample_dim,
+                concat_bands=self.concat_bands,
             )
             logger.info(f"Train dataset: {len(self.train_dataset)}")
         if stage in ["fit", "validate"]:
@@ -463,6 +468,7 @@ def setup(self, stage: str) -> None:
                 expand_temporal_dimension=self.expand_temporal_dimension,
                 reduce_zero_label=self.reduce_zero_label,
                 channel_position=self.channel_position,
+                data_with_sample_dim = self.data_with_sample_dim,
                 concat_bands=self.concat_bands,
             )
             logger.info(f"Val dataset: {len(self.val_dataset)}")
@@ -487,6 +493,7 @@ def setup(self, stage: str) -> None:
                 expand_temporal_dimension=self.expand_temporal_dimension,
                 reduce_zero_label=self.reduce_zero_label,
                 channel_position=self.channel_position,
+                data_with_sample_dim = self.data_with_sample_dim,
                 concat_bands=self.concat_bands,
             )
             logger.info(f"Test dataset: {len(self.test_dataset)}")
@@ -507,6 +514,7 @@ def setup(self, stage: str) -> None:
                 expand_temporal_dimension=self.expand_temporal_dimension,
                 reduce_zero_label=self.reduce_zero_label,
                 channel_position=self.channel_position,
+                data_with_sample_dim=self.data_with_sample_dim,
                 concat_bands=self.concat_bands,
             )
             logger.info(f"Predict dataset: {len(self.predict_dataset)}")