sgl-project · mickqian · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/python/sglang/multimodal_gen/runtime/loader/component_loaders/transformer_loader.py b/python/sglang/multimodal_gen/runtime/loader/component_loaders/transformer_loader.py
@@ -133,7 +133,7 @@ def load_customized(
         ):
             init_params["quant_config"] = nunchaku_config
 
-        # Load the model using FSDP loader
+        # Load the model using FSDP loader.
         model = maybe_load_fsdp_model(
             model_cls=model_cls,
             init_params=init_params,

@@ -148,13 +148,15 @@ def _list_safetensors_files(model_path: str) -> list[str]:
     return sorted(glob.glob(os.path.join(str(model_path), "*.safetensors")))
 
 
+BYTES_PER_GB = 1024**3
+
+
 def get_memory_usage_of_component(module) -> float | None:
     """
     returned value is in GB, rounded to 2 decimal digits
     """
     if not isinstance(module, nn.Module):
         return None
-    BYTES_PER_GB = 1024**3
     if hasattr(module, "get_memory_footprint"):
         usage = module.get_memory_footprint() / BYTES_PER_GB
     else:

@@ -26,6 +26,7 @@
 from sglang.multimodal_gen.runtime.layers.quantization.configs.nunchaku_config import (
     NunchakuConfig,
 )
+from sglang.multimodal_gen.runtime.loader.utils import BYTES_PER_GB
 from sglang.multimodal_gen.runtime.platforms import (
     AttentionBackendEnum,
     current_platform,
@@ -411,7 +412,18 @@ def _adjust_quant_config(self):
             )
 
     def _adjust_offload(self):
-        if self.pipeline_config.task_type.is_image_gen():
+        # TODO: to be handled by each platform
+        if current_platform.get_device_total_memory() / BYTES_PER_GB < 30:
+            logger.info("Enabling all offloading for GPU with low device memory")
+            if self.dit_cpu_offload is None:
+                self.dit_cpu_offload = True
+            if self.text_encoder_cpu_offload is None:
+                self.text_encoder_cpu_offload = True
+            if self.image_encoder_cpu_offload is None:
+                self.image_encoder_cpu_offload = True
+            if self.vae_cpu_offload is None:
+                self.vae_cpu_offload = True
-        if current_platform.get_device_total_memory() / BYTES_PER_GB < 30:
-            logger.info("Enabling all offloading for GPU with low device memory")
-            if self.dit_cpu_offload is None:
-                self.dit_cpu_offload = True
-            if self.text_encoder_cpu_offload is None:
-                self.text_encoder_cpu_offload = True
-            if self.image_encoder_cpu_offload is None:
-                self.image_encoder_cpu_offload = True
-            if self.vae_cpu_offload is None:
-                self.vae_cpu_offload = True
+        if current_platform.get_device_total_memory() / BYTES_PER_GB < 30:
+            logger.info("Enabling all offloading for GPU with low device memory")
+            offload_attrs = [
+                "dit_cpu_offload",
+                "text_encoder_cpu_offload",
+                "image_encoder_cpu_offload",
+                "vae_cpu_offload",
+            ]
+            for attr in offload_attrs:
+                if getattr(self, attr) is None:
+                    setattr(self, attr, True)
-        if current_platform.get_device_total_memory() / BYTES_PER_GB < 30:
-            logger.info("Enabling all offloading for GPU with low device memory")
-            if self.dit_cpu_offload is None:
-                self.dit_cpu_offload = True
-            if self.text_encoder_cpu_offload is None:
-                self.text_encoder_cpu_offload = True
-            if self.image_encoder_cpu_offload is None:
-                self.image_encoder_cpu_offload = True
-            if self.vae_cpu_offload is None:
-                self.vae_cpu_offload = True
+        if current_platform.get_device_total_memory() / BYTES_PER_GB < 30:
+            logger.info("Enabling all offloading for GPU with low device memory")
+            offload_attrs = [
+                "dit_cpu_offload",
+                "text_encoder_cpu_offload",
+                "image_encoder_cpu_offload",
+                "vae_cpu_offload",
+            ]
+            for attr in offload_attrs:
+                if getattr(self, attr) is None:
+                    setattr(self, attr, True)
+        elif self.pipeline_config.task_type.is_image_gen():
             logger.info(
                 "Disabling some offloading (except dit, text_encoder) for image generation model"
             )
@@ -1086,7 +1098,7 @@ def _validate_offload(self):
                 )
                 self.use_fsdp_inference = False
 
-            if self.dit_cpu_offload:
+            if self.dit_cpu_offload is None:
-            if self.dit_cpu_offload is None:
+            if self.dit_cpu_offload:
-            if self.dit_cpu_offload is None:
+            if self.dit_cpu_offload:
                 logger.warning(
                     "dit_layerwise_offload is enabled, automatically disabling dit_cpu_offload."
                 )