deploy changes

github-actions[bot] · github-actions[bot] · commit 8cbf45a7f4dd · 2026-04-09T09:16:18.000Z
diff --git a/.gitignore b/.gitignore
@@ -194,3 +194,4 @@ tail_log.bash
 CLAUDE.md
 
 .vscode/
+debug_logs
diff --git a/asparagus/functional/metrics/distribution.py b/asparagus/functional/metrics/distribution.py
@@ -102,7 +102,7 @@ def compute_alignment_uniformity(
     # Flatten spatial dimensions if present
     if features.dim() > 2:
         B = features.shape[0]
-        features = features.view(B, -1).float()  # (B, D)
+        features = features.reshape(B, -1).float()  # (B, D)
     else:
         features = features.float()
 
diff --git a/asparagus/functional/metrics/features.py b/asparagus/functional/metrics/features.py
@@ -7,6 +7,16 @@
 from typing import Dict
 
 
+def _to_channel_samples(features: torch.Tensor) -> torch.Tensor:
+    """Reshape [B, C, *spatial] → [B*N_spatial, C] for channel-space analysis.
+    2D inputs [B, C] are returned unchanged."""
+    if features.dim() <= 2:
+        return features
+    C = features.shape[1]
+    spatial_dims = list(range(2, features.dim()))
+    return features.permute(0, *spatial_dims, 1).reshape(-1, C)
+
+
 def compute_train(encoder_features: torch.Tensor) -> Dict[str, float]:
     """Metrics computed every training step."""
     return compute_embedding_metrics(encoder_features)
@@ -35,10 +45,7 @@ def compute_feature_covariance(features: torch.Tensor) -> Dict[str, float]:
     if features is None:
         return {}
 
-    # Flatten spatial dimensions if present
-    if features.dim() > 2:
-        B = features.shape[0]
-        features = features.view(B, -1)
+    features = _to_channel_samples(features)
 
     features_centered = features - features.mean(dim=0, keepdim=True)
     cov = torch.mm(features_centered.T, features_centered) / (features.shape[0] - 1)
@@ -84,9 +91,7 @@ def compute_collapse_score(features: torch.Tensor, eps: float = 1e-8) -> Dict[st
     if features is None or features.numel() == 0:
         return {}
 
-    if features.dim() > 2:
-        B = features.shape[0]
-        features = features.view(B, -1)
+    features = _to_channel_samples(features)
 
     dim_variance = features.var(dim=0, unbiased=False)
 
@@ -135,11 +140,7 @@ def compute_participation_ratio(features: torch.Tensor, k_values: list = [10, 50
     if features is None or features.numel() == 0:
         return {}
 
-    if features.dim() > 2:
-        B = features.shape[0]
-        features = features.view(B, -1).float()
-    else:
-        features = features.float()
+    features = _to_channel_samples(features).float()
 
     features_centered = features - features.mean(dim=0, keepdim=True)
 
@@ -183,11 +184,7 @@ def compute_whitening_diagnostics(features: torch.Tensor) -> Dict[str, float]:
     if features is None or features.numel() == 0:
         return {}
 
-    if features.dim() > 2:
-        B = features.shape[0]
-        features = features.view(B, -1).float()
-    else:
-        features = features.float()
+    features = _to_channel_samples(features).float()
 
     # Compute correlation matrix
     features_centered = features - features.mean(dim=0, keepdim=True)
diff --git a/asparagus/functional/metrics/stability.py b/asparagus/functional/metrics/stability.py
@@ -105,13 +105,13 @@ def compute_feature_stability(
     # Flatten spatial dimensions if present
     if current_features.dim() > 2:
         B = current_features.shape[0]
-        current_features = current_features.view(B, -1).float()
+        current_features = current_features.reshape(B, -1).float()
     else:
         current_features = current_features.float()
 
     if previous_features.dim() > 2:
         B = previous_features.shape[0]
-        previous_features = previous_features.view(B, -1).float()
+        previous_features = previous_features.reshape(B, -1).float()
     else:
         previous_features = previous_features.float()
 
diff --git a/asparagus/modules/networks/primus.py b/asparagus/modules/networks/primus.py
@@ -176,11 +176,11 @@ def freeze_backbone(self):
 
 
 @depends_on_timm()
-def primus_s(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_s(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=396,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=12,
         eva_numheads=6,
@@ -193,11 +193,11 @@ def primus_s(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 
 @depends_on_timm()
-def primus_b(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_b(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=792,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=12,
         eva_numheads=12,
@@ -211,11 +211,11 @@ def primus_b(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 
 @depends_on_timm()
-def primus_m(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_m(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=864,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=16,
         eva_numheads=12,
@@ -229,11 +229,11 @@ def primus_m(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 
 @depends_on_timm()
-def primus_l(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_l(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=1056,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=24,
         eva_numheads=16,
@@ -247,11 +247,11 @@ def primus_l(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 
 @depends_on_timm()
-def primus_h(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_h(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=1248,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=32,
         eva_numheads=16,
@@ -265,11 +265,11 @@ def primus_h(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 
 @depends_on_timm()
-def primus_g(input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), patch_drop_rate=0.0):
+def primus_g(input_channels, output_channels, patch_size, patch_embed_size=8, patch_drop_rate=0.0):
     model = Primus(
         input_channels=input_channels,
         embed_dim=1584,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         num_classes=output_channels,
         eva_depth=32,
         eva_numheads=24,
@@ -284,13 +284,13 @@ def primus_g(input_channels, output_channels, patch_size, patch_embed_size=(8, 8
 
 @depends_on_timm()
 def primus_m_clsreg(
-    input_channels, output_channels, patch_size, patch_embed_size=(8, 8, 8), dropout_rate=0.0, late_fusion: bool = False
+    input_channels, output_channels, patch_size, patch_embed_size=8, dropout_rate=0.0, late_fusion: bool = False
 ):
     return PrimusCLSREG(
         input_channels=input_channels,
         output_channels=output_channels,
         embed_dim=864,
-        patch_embed_size=patch_embed_size,
+        patch_embed_size=(patch_embed_size,) * len(patch_size),
         eva_depth=16,
         eva_numheads=12,
         input_shape=patch_size,
diff --git a/configs/default_pretrain.yaml b/configs/default_pretrain.yaml
@@ -25,7 +25,7 @@ training:
   accumulate_grad_batches: 1
   patch_size: [160, 160, 160]
   seed: ${random:0,1000000}
-  mask_patch_size: 4
+  mask_patch_size: ${model.patch_embed_size}
   mask_ratio: 0.6
   max_samples: 6_000_000
   warmup_ratio: 0.02
diff --git a/configs/model/core/primus.yaml b/configs/model/core/primus.yaml
@@ -2,27 +2,33 @@ _pretrain_net:
   _target_: asparagus.modules.networks.primus.${model.pretrain_net}
   patch_size: ${training.patch_size}
   patch_drop_rate: ${training.mask_ratio}
+  patch_embed_size: ${model.patch_embed_size}
 
 _seg_net:
   _target_: asparagus.modules.networks.primus.${model.seg_net}
   patch_size: ${training.patch_size}
   patch_drop_rate: 0.0
+  patch_embed_size: ${model.patch_embed_size}
 
 _cls_net:
   _target_: asparagus.modules.networks.primus.${model.cls_net}
-  patch_size: ${training.target_size}
+  patch_size: ${training.patch_size}
+  patch_drop_rate: 0.0
+  patch_embed_size: ${model.patch_embed_size}
 
 _plugin_seg_net:
   _target_: asparagus.modules.networks.primus.${model.plugin_seg_net}
   patch_size: ${training.patch_size}
   patch_drop_rate: 0.0
+  patch_embed_size: ${model.patch_embed_size}
 
 pretrain_optim: AdamW
 pretrain_lr: 3e-4
 train_optim: AdamW
 train_lr: 3e-4
 finetune_optim: AdamW
 finetune_lr: 3e-5
+patch_embed_size: 8
 
 weight_decay: 5e-2
 nesterov: False
diff --git a/configs/model/core/resenc_unet.yaml b/configs/model/core/resenc_unet.yaml
@@ -32,6 +32,7 @@ finetune_lr: 1e-3
 weight_decay: 3e-5
 nesterov: True
 momentum: 0.99
+patch_embed_size: 4
 
 min_test_patch_size: [96, 96, 96]
 deep_supervision: False
diff --git a/configs/model/core/unet.yaml b/configs/model/core/unet.yaml
@@ -26,6 +26,7 @@ finetune_lr: 1e-3
 weight_decay: 3e-5
 nesterov: True
 momentum: 0.99
+patch_embed_size: 4
 
 use_skip_connections: True
 deep_supervision: False
diff --git a/configs/projects/datapaper/pretrain/primus.yaml b/configs/projects/datapaper/pretrain/primus.yaml
@@ -0,0 +1,16 @@
+# @package _global_
+defaults:
+  - /default_pretrain
+  - /model/primus_m@model
+  - /hardware/1node8gpus@hardware
+  - _self_
+
+task: PT900_FOMO300K
+root: datapaper
+stem: pretrain
+
+checkpoint_run_id:
+
+training:
+  batch_size: 16
+  max_samples: 6_000_000
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,7 @@ keywords = ['deep learning', 'medical image analysis','foundation models']
 
 
 dependencies = [
-    "gardening_tools>=0.2.0",
+    "gardening_tools>=0.3.2",
     "lightning==2.4.0",
     "nibabel>=5.3.2",
     "numpy>=1.23.1",
@@ -28,7 +28,7 @@ dependencies = [
     "torchmetrics>=1.7.2",
     "torchvision==0.21.0",
     "wandb>=0.23.0",
-    "huggingface-hub>=1.5.0"
+    "huggingface-hub>=1.5.0",
 ]
 
 
@@ -49,6 +49,7 @@ dcai = [
 [project.optional-dependencies]
 extras = [
     "mlflow>=3.0.0",
+    "timm<=1.0.14",
     "gardening_tools[extras]>=0.1.1",
 ]
 test = [
@@ -77,7 +78,6 @@ asp_test_reg = 'asparagus.pipeline.run.test_cls:main'
 asp_finetune_seg = 'asparagus.pipeline.run.finetune_seg:main'
 asp_finetune_cls = 'asparagus.pipeline.run.finetune_cls:main'
 asp_finetune_reg = 'asparagus.pipeline.run.finetune_reg:main'
-asp_linear_probe = 'asparagus.pipeline.run.linear_probe:main'
 
 asp_eval_box_run = 'asparagus.pipeline.run.eval_box:main'
 asp_eval_box_prepare_data = 'asparagus.pipeline.run.eval_box:prepare_data'

Original file line number	Diff line number	Diff line change
`@@ -194,3 +194,4 @@ tail_log.bash`
`194`	`194`	`CLAUDE.md`
`195`	`195`
`196`	`196`	`.vscode/`
	`197`	`+debug_logs`