File tree Expand file tree Collapse file tree 1 file changed +3
-11
lines changed
tests/e2e/offline_inference Expand file tree Collapse file tree 1 file changed +3
-11
lines changed Original file line number Diff line number Diff line change 88- VAE patch parallelism (vae_patch_parallel_size=2) vs baseline on TP=2.
99
1010Note: CUDA-only (>=2 GPUs). We use `enforce_eager=False` (default) to enable
11- `torch.compile` on supported GPUs. On pre-Ampere GPUs (e.g., V100), we force
12- eager mode because `torch.compile` does not support bfloat16 compilation there.
11+ `torch.compile`.
1312"""
1413
1514import os
@@ -79,13 +78,6 @@ def _extract_single_image(outputs) -> Image.Image:
7978 return images [0 ]
8079
8180
82- def _should_force_eager_for_compile () -> bool :
83- # The diffusion pipeline defaults to bfloat16 weights. Torch inductor does
84- # not support bfloat16 compilation on pre-Ampere GPUs.
85- major , _minor = torch .cuda .get_device_capability ()
86- return major < 8
87-
88-
8981def _run_zimage_generate (
9082 * ,
9183 tp_size : int ,
@@ -169,7 +161,7 @@ def test_zimage_tensor_parallel_tp2(tmp_path: Path):
169161 if not torch .cuda .is_available () or torch .cuda .device_count () < 2 :
170162 pytest .skip ("Z-Image TP=2 requires >= 2 CUDA devices." )
171163
172- enforce_eager = _should_force_eager_for_compile ()
164+ enforce_eager = False
173165
174166 height = 512
175167 width = 512
@@ -231,7 +223,7 @@ def test_zimage_vae_patch_parallel_tp2(tmp_path: Path):
231223 if not torch .cuda .is_available () or torch .cuda .device_count () < 2 :
232224 pytest .skip ("Z-Image VAE patch parallel TP=2 requires >= 2 CUDA devices." )
233225
234- enforce_eager = _should_force_eager_for_compile ()
226+ enforce_eager = False
235227
236228 # Use a larger image to ensure there are multiple VAE tiles.
237229 height = 1152
You can’t perform that action at this time.
0 commit comments