fix bug1

david6666666 · david6666666 · commit 837f79650916 · 2026-02-06T10:48:55.000+08:00
Signed-off-by: David Chen &lt;530634352@qq.com&gt;
diff --git a/vllm_omni/diffusion/models/ltx2/pipeline_ltx2.py b/vllm_omni/diffusion/models/ltx2/pipeline_ltx2.py
@@ -8,6 +8,7 @@
 import json
 import os
 from collections.abc import Iterable
+from contextlib import nullcontext
 from typing import Any
 
 import numpy as np
@@ -615,8 +616,14 @@ def interrupt(self):
     def _is_cfg_parallel_enabled(self, do_true_cfg: bool) -> bool:
         return do_true_cfg and get_classifier_free_guidance_world_size() > 1
 
+    def _transformer_cache_context(self, context_name: str):
+        cache_context = getattr(self.transformer, "cache_context", None)
+        if callable(cache_context):
+            return cache_context(context_name)
+        return nullcontext()
+
     def _predict_noise_av(self, **kwargs) -> tuple[torch.Tensor, torch.Tensor]:
-        with self.transformer.cache_context("cond_uncond"):
+        with self._transformer_cache_context("cond_uncond"):
             noise_pred_video, noise_pred_audio = self.transformer(**kwargs)
         return noise_pred_video, noise_pred_audio
 
@@ -1069,7 +1076,7 @@ def forward(
 
                 timestep = t.expand(latent_model_input.shape[0])
 
-                with self.transformer.cache_context("cond_uncond"):
+                with self._transformer_cache_context("cond_uncond"):
                     noise_pred_video, noise_pred_audio = self.transformer(
                         hidden_states=latent_model_input,
                         audio_hidden_states=audio_latent_model_input,
diff --git a/vllm_omni/diffusion/models/ltx2/pipeline_ltx2_image2video.py b/vllm_omni/diffusion/models/ltx2/pipeline_ltx2_image2video.py
@@ -603,7 +603,7 @@ def forward(
                 timestep = t.expand(latent_model_input.shape[0])
                 video_timestep = timestep.unsqueeze(-1) * (1 - conditioning_mask)
 
-                with self.transformer.cache_context("cond_uncond"):
+                with self._transformer_cache_context("cond_uncond"):
                     noise_pred_video, noise_pred_audio = self.transformer(
                         hidden_states=latent_model_input,
                         audio_hidden_states=audio_latent_model_input,