deploy changes

github-actions[bot] · github-actions[bot] · commit ede86caec889 · 2026-05-06T19:37:41.000Z
diff --git a/asparagus/functional/lr_scheduling.py b/asparagus/functional/lr_scheduling.py
@@ -43,6 +43,7 @@ def sawtooth_warmup_cosine_decay_schedule(
     Phase 2: Both encoder and decoder warmup
     Phase 3: Cosine annealing for both
     """
+    assert max_epochs > 0 and steps_per_epoch > 0, "max_epochs and steps_per_epoch must be greater than 0"
     print(f"Using separate warmup: decoder for {decoder_warmup_epochs} epochs, then both for {warmup_epochs} epochs")
 
     decoder_warmup_steps = int(decoder_warmup_epochs * steps_per_epoch)
@@ -72,16 +73,24 @@ def decoder_phase1_lambda(step):
     )
 
 
-def simple_warmup_cosine_decay_schedule(optimizer, warmup_epochs, steps_per_epoch, cosine_period_ratio, max_epochs):
+def simple_warmup_cosine_decay_schedule(
+    optimizer, warmup_epochs, steps_per_epoch, cosine_period_ratio, max_epochs=-1, max_steps=-1
+):
     """
     Phase 1: Warmup for both encoder and decoder
     Phase 2: Cosine annealing for both
     """
-    print(f"Using warmup for {warmup_epochs} epochs")
+    assert warmup_epochs >= 0, "Warmup epochs must be greater than or equal to 0."
+    assert cosine_period_ratio > 0, "Cosine period ratio must be greater than 0."
+    assert steps_per_epoch > 0, "Steps per epoch must be greater than 0."
+    assert max_epochs > 0 or max_steps > 0, "Either max_epochs or max_steps must be greater than 0."
 
-    total_warmup_steps = int(warmup_epochs * steps_per_epoch)
     # cosine_half_period is from max to min
-    cosine_steps = int(cosine_period_ratio * (max_epochs * steps_per_epoch - total_warmup_steps))
+    if max_epochs > 0:
+        max_steps = max_epochs * steps_per_epoch
+
+    total_warmup_steps = int(warmup_epochs * steps_per_epoch)
+    cosine_steps = int(cosine_period_ratio * (max_steps - total_warmup_steps))
 
     cosine_scheduler = CosineAnnealingLR(optimizer, T_max=cosine_steps)
     warmup_scheduler = LinearLR(
@@ -90,17 +99,25 @@ def simple_warmup_cosine_decay_schedule(optimizer, warmup_epochs, steps_per_epoc
         total_iters=total_warmup_steps,
     )
 
+    print(f"Using warmup for {warmup_epochs} epochs ({total_warmup_steps} steps)")
+    print(f"Cosine decay for {cosine_steps} steps after warmup")
+    assert total_warmup_steps > 0, "Warmup steps must be greater than 0 for warmup schedule."
+    assert cosine_steps > 0, "Cosine steps must be greater than 0 for warmup cosine decay schedule."
+
     return SequentialLR(
         optimizer,
         schedulers=[warmup_scheduler, cosine_scheduler],
         milestones=[total_warmup_steps],
     )
 
 
-def cosine_decay_schedule(optimizer, steps_per_epoch, cosine_period_ratio, max_epochs):
+def cosine_decay_schedule(optimizer, steps_per_epoch, cosine_period_ratio, max_epochs=-1, max_steps=-1):
     """
     Phase 1: Cosine annealing for both encoder and decoder
     """
     # cosine_half_period is from max to min
-    cosine_steps = int(cosine_period_ratio * (max_epochs * steps_per_epoch))
+    if max_epochs > 0:
+        max_steps = max_epochs * steps_per_epoch
+    cosine_steps = int(cosine_period_ratio * max_steps)
+    assert cosine_steps > 0, "Cosine steps must be greater than 0 for cosine decay schedule."
     return CosineAnnealingLR(optimizer, T_max=cosine_steps)
diff --git a/asparagus/modules/lightning_modules/base_module.py b/asparagus/modules/lightning_modules/base_module.py
@@ -110,34 +110,42 @@ def configure_optimizers(self):
 
         print(f"Using optimizer {optimizer.__class__.__name__} with learning rate {self.learning_rate}")
 
-        steps_per_epoch = self.trainer.estimated_stepping_batches // self.trainer.max_epochs
+        # Calculate steps per epoch based on trainer configuration
+        # if max_epochs is *not* set (i.e., set to -1), we are probably using max_steps
+        # if max_epochs is set, we can calculate steps per epoch based on estimated_stepping_batches
+        if self.trainer.max_epochs <= 0:
+            optimizer_steps_per_epoch = self.trainer.limit_train_batches // self.trainer.accumulate_grad_batches
+        else:
+            optimizer_steps_per_epoch = self.trainer.estimated_stepping_batches // self.trainer.max_epochs
 
         # Scheduler option 1: Three-phase schedule with separate decoder/joint warmup
         if self.decoder_warmup_epochs > 0:
             scheduler = sawtooth_warmup_cosine_decay_schedule(
                 optimizer,
                 self.decoder_warmup_epochs,
                 self.warmup_epochs,
-                steps_per_epoch,
+                optimizer_steps_per_epoch,
                 self.cosine_period_ratio,
-                self.trainer.max_epochs,
+                self.trainer.max_epochs,  # may be -1, if using max_steps
             )
         # Scheduler option 2: Two-phase schedule with joint warmup
         elif self.warmup_epochs > 0:
             scheduler = simple_warmup_cosine_decay_schedule(
                 optimizer,
                 self.warmup_epochs,
-                steps_per_epoch,
+                optimizer_steps_per_epoch,
                 self.cosine_period_ratio,
-                self.trainer.max_epochs,
+                self.trainer.max_epochs,  # may be -1, if using max_steps
+                self.trainer.max_steps,  # may be -1, if using max_epochs
             )
         # Scheduler option 3: Just cosine annealing
         else:
             scheduler = cosine_decay_schedule(
                 optimizer,
-                steps_per_epoch,
+                optimizer_steps_per_epoch,
                 self.cosine_period_ratio,
-                self.trainer.max_epochs,
+                self.trainer.max_epochs,  # may be -1, if using max_steps
+                self.trainer.max_steps,  # may be -1, if using max_epochs
             )
 
         scheduler_config = {
diff --git a/asparagus/modules/transforms/__init__.py b/asparagus/modules/transforms/__init__.py
@@ -0,0 +1 @@
+from asparagus.modules.transforms.clamp import Torch_ClampTarget as Torch_ClampTarget
diff --git a/asparagus/modules/transforms/clamp.py b/asparagus/modules/transforms/clamp.py
@@ -0,0 +1,13 @@
+import torch
+
+
+class Torch_ClampTarget:
+    def __init__(self, clamp: bool = False, min_value: float = 0.0, max_value: float = 1.0):
+        self.clamp = clamp
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def __call__(self, data_dict: dict) -> dict:
+        if self.clamp and "label" in data_dict:
+            data_dict["label"] = torch.clamp(data_dict["label"], min=self.min_value, max=self.max_value)
+        return data_dict
diff --git a/asparagus/modules/transforms/presets/pretrain.py b/asparagus/modules/transforms/presets/pretrain.py
@@ -1,3 +1,4 @@
+from asparagus.modules.transforms import Torch_ClampTarget
 from gardening_tools.functional.transforms.spatial import get_max_rotated_size
 from gardening_tools.modules.transforms.bias_field import Torch_BiasField
 from gardening_tools.modules.transforms.blur import Torch_Blur
@@ -18,8 +19,9 @@ def CPU_val_transforms(patch_size):
     return transforms.Compose(
         [
             Torch_Normalize(normalize=True),
-            Torch_CropPad(patch_size=patch_size, p_oversample_foreground=0.4),
+            Torch_CropPad(patch_size=patch_size, p_oversample_foreground=0.0),
             Torch_CopyImageToLabel(copy=True),
+            Torch_ClampTarget(clamp=True, min_value=-2.0, max_value=4.0),
         ]
     )
 
@@ -36,7 +38,7 @@ def CPU_train_transforms(patch_size):
     return transforms.Compose(
         [
             Torch_Normalize(normalize=True),
-            Torch_CropPad(patch_size=pre_aug_patch_size, p_oversample_foreground=0.4),
+            Torch_CropPad(patch_size=pre_aug_patch_size, p_oversample_foreground=0.0),
             Torch_Spatial(
                 patch_size=patch_size,
                 p_deform_all_channel=0.0,
@@ -47,6 +49,7 @@ def CPU_train_transforms(patch_size):
                 skip_label=False,
             ),
             Torch_CopyImageToLabel(copy=True),
+            Torch_ClampTarget(clamp=True, min_value=-2.0, max_value=4.0),
         ]
     )
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from asparagus.modules.transforms.clamp import Torch_ClampTarget as Torch_ClampTarget`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+from asparagus.modules.transforms import Torch_ClampTarget`
`1`	`2`	`from gardening_tools.functional.transforms.spatial import get_max_rotated_size`
`2`	`3`	`from gardening_tools.modules.transforms.bias_field import Torch_BiasField`
`3`	`4`	`from gardening_tools.modules.transforms.blur import Torch_Blur`
`@@ -18,8 +19,9 @@ def CPU_val_transforms(patch_size):`
`18`	`19`	`return transforms.Compose(`
`19`	`20`	`[`
`20`	`21`	`Torch_Normalize(normalize=True),`
`21`		`- Torch_CropPad(patch_size=patch_size, p_oversample_foreground=0.4),`
	`22`	`+ Torch_CropPad(patch_size=patch_size, p_oversample_foreground=0.0),`
`22`	`23`	`Torch_CopyImageToLabel(copy=True),`
	`24`	`+ Torch_ClampTarget(clamp=True, min_value=-2.0, max_value=4.0),`
`23`	`25`	`]`
`24`	`26`	`)`
`25`	`27`
`@@ -36,7 +38,7 @@ def CPU_train_transforms(patch_size):`
`36`	`38`	`return transforms.Compose(`
`37`	`39`	`[`
`38`	`40`	`Torch_Normalize(normalize=True),`
`39`		`- Torch_CropPad(patch_size=pre_aug_patch_size, p_oversample_foreground=0.4),`
	`41`	`+ Torch_CropPad(patch_size=pre_aug_patch_size, p_oversample_foreground=0.0),`
`40`	`42`	`Torch_Spatial(`
`41`	`43`	`patch_size=patch_size,`
`42`	`44`	`p_deform_all_channel=0.0,`
`@@ -47,6 +49,7 @@ def CPU_train_transforms(patch_size):`
`47`	`49`	`skip_label=False,`
`48`	`50`	`),`
`49`	`51`	`Torch_CopyImageToLabel(copy=True),`
	`52`	`+ Torch_ClampTarget(clamp=True, min_value=-2.0, max_value=4.0),`
`50`	`53`	`]`
`51`	`54`	`)`
`52`	`55`