Sllambias
diff --git a/‎asparagus/functional/pos_embed.py‎
Lines changed: 50 additions & 0 deletions b/‎asparagus/functional/pos_embed.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎asparagus/modules/data_modules/training.py‎
Lines changed: 13 additions & 8 deletions b/‎asparagus/modules/data_modules/training.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎asparagus/modules/lightning_modules/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎asparagus/modules/lightning_modules/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎asparagus/modules/lightning_modules/base_module.py‎
Lines changed: 23 additions & 0 deletions b/‎asparagus/modules/lightning_modules/base_module.py‎
Lines changed: 23 additions & 0 deletions
@@ -0,0 +1,50 @@
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+
+
+def interpolate_patch_embed_3d(patch_embed, in_shape, out_shape):
+    """Resizes patch embeddings using 3D trilinear interpolation.
+
+    Copied from SSL3D_classification/models/eva_mae_openneuro.py
+    """
+    patch_embed = patch_embed.permute(0, 2, 1)
+    patch_embed = rearrange(patch_embed, "B C (x y z) -> B C x y z", **in_shape)
+    patch_embed = F.interpolate(patch_embed, size=list(out_shape.values()), mode="trilinear", align_corners=False)
+    patch_embed = rearrange(patch_embed, "B C x y z -> B C (x y z)", **out_shape)
+    return patch_embed.permute(0, 2, 1)
+
+
+def resize_pos_embed_3d(
+    ckpt_pos_embed, model_pos_embed, num_prefix_tokens, pretrained_target_size, target_size, patch_embed_size
+):
+    """Resize a pos_embed tensor to match the model's expected shape.
+
+    Separates prefix tokens (cls/register), applies 3D trilinear interpolation
+    to the patch tokens, and reattaches the prefix.
+    """
+    if num_prefix_tokens > 0:
+        prefix = ckpt_pos_embed[:, :num_prefix_tokens, :]
+        patch_pos_embed = ckpt_pos_embed[:, num_prefix_tokens:, :]
+    else:
+        prefix = None
+        patch_pos_embed = ckpt_pos_embed
+
+    in_shape = {
+        "x": pretrained_target_size[0] // patch_embed_size[0],
+        "y": pretrained_target_size[1] // patch_embed_size[1],
+        "z": pretrained_target_size[2] // patch_embed_size[2],
+    }
+
+    out_shape = {
+        "x": target_size[0] // patch_embed_size[0],
+        "y": target_size[1] // patch_embed_size[1],
+        "z": target_size[2] // patch_embed_size[2],
+    }
+
+    orig_dtype = patch_pos_embed.dtype
+    resized = interpolate_patch_embed_3d(patch_pos_embed.float(), in_shape, out_shape).to(orig_dtype)
+
+    if prefix is not None:
+        return torch.cat([prefix, resized], dim=1)
+    return resized
@@ -111,6 +111,7 @@ def __init__(
         val_transforms: Optional[Compose] = None,
         test_transforms: Optional[Compose] = None,
         test_samples: Optional[list] = [],
+        use_random_datasampler: Optional[bool] = True,
     ):
         super().__init__()
         self.batch_size = batch_size
@@ -121,6 +122,7 @@ def __init__(
         self.train_split = train_split
         self.val_split = val_split
         self.test_samples = test_samples
+        self.use_random_datasampler = use_random_datasampler
         logging.info(f"Using {self.num_workers} workers")
 
     def setup(self, stage: Literal["fit", "test", "predict"]):
@@ -149,9 +151,10 @@ def setup_test(self):
         )
 
     def train_dataloader(self):
-        sampler = RandomSampler(self.train_dataset, num_samples=999999, replacement=True)
-        if dist.is_initialized():
-            sampler = DistributedSamplerWrapper(sampler)
+        sampler = None
+        if self.use_random_datasampler:
+            sampler = RandomSampler(self.train_dataset, num_samples=999999, replacement=True)
+            sampler = DistributedSamplerWrapper(sampler) if dist.is_initialized() else sampler
 
         return DataLoader(
             self.train_dataset,
@@ -160,23 +163,25 @@ def train_dataloader(self):
             pin_memory=False,
             persistent_workers=True,
             drop_last=True,
+            shuffle=sampler is None,
             sampler=sampler,
         )
 
     def val_dataloader(self):
-        sampler = RandomSampler(self.val_dataset, num_samples=999999, replacement=True)
-        if dist.is_initialized():
-            sampler = DistributedSamplerWrapper(sampler)
+        sampler = None
+        if self.use_random_datasampler:
+            sampler = RandomSampler(self.val_dataset, num_samples=999999, replacement=True)
+            sampler = DistributedSamplerWrapper(sampler) if dist.is_initialized() else sampler
 
         return DataLoader(
             self.val_dataset,
             num_workers=self.num_workers // 2,
             batch_size=self.batch_size,
             pin_memory=False,
-            shuffle=False,
             persistent_workers=True,
-            drop_last=True,
+            drop_last=False,
             sampler=sampler,
+            shuffle=False,
         )
 
     def test_dataloader(self):
 
@@ -1,4 +1,5 @@
 from .clsreg_module import ClassificationModule, RegressionModule
+from .linear_probe_module import LinearProbeModule
 from .segmentation_module import SegmentationModule
 from .self_supervised import SelfSupervisedModule
 
@@ -7,4 +8,5 @@
     "ClassificationModule",
     "RegressionModule",
     "SelfSupervisedModule",
+    "LinearProbeModule",
 ]
@@ -10,6 +10,7 @@
     separate_encoder_decoder_weights,
     simple_warmup_cosine_decay_schedule,
 )
+from asparagus.functional.pos_embed import resize_pos_embed_3d
 from asparagus.functional.visualization import (
     get_logger_compatible_image_output_target,
     log_image_output_target_to_mlflow,
@@ -39,12 +40,16 @@ def __init__(
         nesterov: bool = True,
         momentum: float = 0.99,
         repeat_stem_weights: bool = True,
+        pretrained_target_size: Optional[tuple] = None,
+        target_size: Optional[tuple] = None,
     ):
         super().__init__()
         self.learning_rate = learning_rate
         self.train_transforms = train_transforms
         self.test_transforms = test_transforms
         self.val_transforms = val_transforms
+        self.pretrained_target_size = pretrained_target_size
+        self.target_size = target_size
 
         self.loss = None
         self.train_metrics = None
@@ -168,6 +173,24 @@ def load_state_dict(self, state_dict, load_decoder=True, *args, **kwargs):
                 print(f"Repeating stem weights from {pt_input_channels} to {ft_input_channels} channels for {stem_name}.")
                 state_dict[stem_name] = state_dict[stem_name].repeat(1, ft_input_channels, 1, 1, 1) / ft_input_channels
 
+        # Interpolate positional embeddings when spatial dimensions differ
+        if self.pretrained_target_size is not None and self.target_size is not None:
+            for key in list(state_dict.keys()):
+                if key not in old_params or old_params[key].shape == state_dict[key].shape:
+                    continue
+                if key.endswith("pos_embed"):
+                    num_prefix_tokens = getattr(self.model.eva, "num_prefix_tokens", 0)
+                    patch_embed_size = tuple(self.model.encoder.proj.weight.shape[2:])
+                    print(f"Interpolating {key}: {state_dict[key].shape} -> {old_params[key].shape}")
+                    state_dict[key] = resize_pos_embed_3d(
+                        state_dict[key],
+                        old_params[key],
+                        num_prefix_tokens=num_prefix_tokens,
+                        pretrained_target_size=self.pretrained_target_size,
+                        target_size=self.target_size,
+                        patch_embed_size=patch_embed_size,
+                    )
+
         # Filter out keys that are not in the old state dict or have different shapes
         def should_load_key(key, state_dict, old_params, load_decoder):
             # reject all decoder keys regardless of their shape