Skip to content

Commit a24e0c6

Browse files
authored
Merge branch 'main' into docker-build-workflow
2 parents fdedad1 + abd922b commit a24e0c6

File tree

110 files changed

+818
-5033
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+818
-5033
lines changed

.github/workflows/push_tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
max-parallel: 1
6565
matrix:
6666
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
67-
runs-on: docker-gpu
67+
runs-on: [single-gpu, nvidia-gpu, t4, ci]
6868
container:
6969
image: diffusers/diffusers-pytorch-cuda
7070
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0

examples/community/clip_guided_images_mixing_stable_diffusion.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
from diffusers import (
1313
AutoencoderKL,
1414
DDIMScheduler,
15-
DiffusionPipeline,
1615
DPMSolverMultistepScheduler,
1716
LMSDiscreteScheduler,
1817
PNDMScheduler,
1918
UNet2DConditionModel,
2019
)
20+
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
2121
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
2222
from diffusers.utils import PIL_INTERPOLATION
2323
from diffusers.utils.torch_utils import randn_tensor
@@ -77,7 +77,7 @@ def set_requires_grad(model, value):
7777
param.requires_grad = value
7878

7979

80-
class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline):
80+
class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
8181
def __init__(
8282
self,
8383
vae: AutoencoderKL,
@@ -113,16 +113,6 @@ def __init__(
113113
set_requires_grad(self.text_encoder, False)
114114
set_requires_grad(self.clip_model, False)
115115

116-
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
117-
if slice_size == "auto":
118-
# half the attention head size is usually a good trade-off between
119-
# speed and memory
120-
slice_size = self.unet.config.attention_head_dim // 2
121-
self.unet.set_attention_slice(slice_size)
122-
123-
def disable_attention_slicing(self):
124-
self.enable_attention_slicing(None)
125-
126116
def freeze_vae(self):
127117
set_requires_grad(self.vae, False)
128118

examples/community/clip_guided_stable_diffusion.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
from diffusers import (
1111
AutoencoderKL,
1212
DDIMScheduler,
13-
DiffusionPipeline,
1413
DPMSolverMultistepScheduler,
1514
LMSDiscreteScheduler,
1615
PNDMScheduler,
1716
UNet2DConditionModel,
1817
)
18+
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
1919
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
2020

2121

@@ -51,7 +51,7 @@ def set_requires_grad(model, value):
5151
param.requires_grad = value
5252

5353

54-
class CLIPGuidedStableDiffusion(DiffusionPipeline):
54+
class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
5555
"""CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000
5656
- https://github.com/Jack000/glid-3-xl
5757
- https://github.dev/crowsonkb/k-diffusion
@@ -89,16 +89,6 @@ def __init__(
8989
set_requires_grad(self.text_encoder, False)
9090
set_requires_grad(self.clip_model, False)
9191

92-
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
93-
if slice_size == "auto":
94-
# half the attention head size is usually a good trade-off between
95-
# speed and memory
96-
slice_size = self.unet.config.attention_head_dim // 2
97-
self.unet.set_attention_slice(slice_size)
98-
99-
def disable_attention_slicing(self):
100-
self.enable_attention_slicing(None)
101-
10292
def freeze_vae(self):
10393
set_requires_grad(self.vae, False)
10494

examples/community/clip_guided_stable_diffusion_img2img.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
from diffusers import (
1313
AutoencoderKL,
1414
DDIMScheduler,
15-
DiffusionPipeline,
1615
DPMSolverMultistepScheduler,
1716
LMSDiscreteScheduler,
1817
PNDMScheduler,
1918
UNet2DConditionModel,
2019
)
20+
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
2121
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
2222
from diffusers.utils import PIL_INTERPOLATION, deprecate
2323
from diffusers.utils.torch_utils import randn_tensor
@@ -125,7 +125,7 @@ def set_requires_grad(model, value):
125125
param.requires_grad = value
126126

127127

128-
class CLIPGuidedStableDiffusion(DiffusionPipeline):
128+
class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
129129
"""CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000
130130
- https://github.com/Jack000/glid-3-xl
131131
- https://github.dev/crowsonkb/k-diffusion
@@ -163,16 +163,6 @@ def __init__(
163163
set_requires_grad(self.text_encoder, False)
164164
set_requires_grad(self.clip_model, False)
165165

166-
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
167-
if slice_size == "auto":
168-
# half the attention head size is usually a good trade-off between
169-
# speed and memory
170-
slice_size = self.unet.config.attention_head_dim // 2
171-
self.unet.set_attention_slice(slice_size)
172-
173-
def disable_attention_slicing(self):
174-
self.enable_attention_slicing(None)
175-
176166
def freeze_vae(self):
177167
set_requires_grad(self.vae, False)
178168

examples/community/composable_stable_diffusion.py

+3-58
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from diffusers import DiffusionPipeline
2323
from diffusers.configuration_utils import FrozenDict
2424
from diffusers.models import AutoencoderKL, UNet2DConditionModel
25+
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
2526
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
2627
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
2728
from diffusers.schedulers import (
@@ -32,13 +33,13 @@
3233
LMSDiscreteScheduler,
3334
PNDMScheduler,
3435
)
35-
from diffusers.utils import deprecate, is_accelerate_available, logging
36+
from diffusers.utils import deprecate, logging
3637

3738

3839
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
3940

4041

41-
class ComposableStableDiffusionPipeline(DiffusionPipeline):
42+
class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
4243
r"""
4344
Pipeline for text-to-image generation using Stable Diffusion.
4445
@@ -164,62 +165,6 @@ def __init__(
164165
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
165166
self.register_to_config(requires_safety_checker=requires_safety_checker)
166167

167-
def enable_vae_slicing(self):
168-
r"""
169-
Enable sliced VAE decoding.
170-
171-
When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
172-
steps. This is useful to save some memory and allow larger batch sizes.
173-
"""
174-
self.vae.enable_slicing()
175-
176-
def disable_vae_slicing(self):
177-
r"""
178-
Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
179-
computing decoding in one step.
180-
"""
181-
self.vae.disable_slicing()
182-
183-
def enable_sequential_cpu_offload(self, gpu_id=0):
184-
r"""
185-
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
186-
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
187-
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
188-
"""
189-
if is_accelerate_available():
190-
from accelerate import cpu_offload
191-
else:
192-
raise ImportError("Please install accelerate via `pip install accelerate`")
193-
194-
device = torch.device(f"cuda:{gpu_id}")
195-
196-
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
197-
if cpu_offloaded_model is not None:
198-
cpu_offload(cpu_offloaded_model, device)
199-
200-
if self.safety_checker is not None:
201-
# TODO(Patrick) - there is currently a bug with cpu offload of nn.Parameter in accelerate
202-
# fix by only offloading self.safety_checker for now
203-
cpu_offload(self.safety_checker.vision_model, device)
204-
205-
@property
206-
def _execution_device(self):
207-
r"""
208-
Returns the device on which the pipeline's models will be executed. After calling
209-
`pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
210-
hooks.
211-
"""
212-
if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"):
213-
return self.device
214-
for module in self.unet.modules():
215-
if (
216-
hasattr(module, "_hf_hook")
217-
and hasattr(module._hf_hook, "execution_device")
218-
and module._hf_hook.execution_device is not None
219-
):
220-
return torch.device(module._hf_hook.execution_device)
221-
return self.device
222-
223168
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
224169
r"""
225170
Encodes the prompt into text encoder hidden states.

examples/community/gluegen.py

+2-56
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from diffusers.loaders import LoraLoaderMixin
1111
from diffusers.models import AutoencoderKL, UNet2DConditionModel
1212
from diffusers.models.lora import adjust_lora_scale_text_encoder
13+
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
1314
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
1415
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
1516
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -193,7 +194,7 @@ def retrieve_timesteps(
193194
return timesteps, num_inference_steps
194195

195196

196-
class GlueGenStableDiffusionPipeline(DiffusionPipeline, LoraLoaderMixin):
197+
class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, LoraLoaderMixin):
197198
def __init__(
198199
self,
199200
vae: AutoencoderKL,
@@ -241,35 +242,6 @@ def load_language_adapter(
241242
)
242243
self.language_adapter.load_state_dict(torch.load(model_path))
243244

244-
def enable_vae_slicing(self):
245-
r"""
246-
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
247-
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
248-
"""
249-
self.vae.enable_slicing()
250-
251-
def disable_vae_slicing(self):
252-
r"""
253-
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
254-
computing decoding in one step.
255-
"""
256-
self.vae.disable_slicing()
257-
258-
def enable_vae_tiling(self):
259-
r"""
260-
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
261-
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
262-
processing larger images.
263-
"""
264-
self.vae.enable_tiling()
265-
266-
def disable_vae_tiling(self):
267-
r"""
268-
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
269-
computing decoding in one step.
270-
"""
271-
self.vae.disable_tiling()
272-
273245
def _adapt_language(self, prompt_embeds: torch.FloatTensor):
274246
prompt_embeds = prompt_embeds / 3
275247
prompt_embeds = self.language_adapter(prompt_embeds) * (self.tensor_norm / 2)
@@ -544,32 +516,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
544516
latents = latents * self.scheduler.init_noise_sigma
545517
return latents
546518

547-
def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
548-
r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
549-
550-
The suffixes after the scaling factors represent the stages where they are being applied.
551-
552-
Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
553-
that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
554-
555-
Args:
556-
s1 (`float`):
557-
Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
558-
mitigate "oversmoothing effect" in the enhanced denoising process.
559-
s2 (`float`):
560-
Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
561-
mitigate "oversmoothing effect" in the enhanced denoising process.
562-
b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
563-
b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
564-
"""
565-
if not hasattr(self, "unet"):
566-
raise ValueError("The pipeline must have `unet` for using FreeU.")
567-
self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
568-
569-
def disable_freeu(self):
570-
"""Disables the FreeU mechanism if enabled."""
571-
self.unet.disable_freeu()
572-
573519
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
574520
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
575521
"""

examples/community/imagic_stable_diffusion.py

+2-26
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from diffusers import DiffusionPipeline
2121
from diffusers.models import AutoencoderKL, UNet2DConditionModel
22+
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
2223
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
2324
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
2425
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -56,7 +57,7 @@ def preprocess(image):
5657
return 2.0 * image - 1.0
5758

5859

59-
class ImagicStableDiffusionPipeline(DiffusionPipeline):
60+
class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
6061
r"""
6162
Pipeline for imagic image editing.
6263
See paper here: https://arxiv.org/pdf/2210.09276.pdf
@@ -105,31 +106,6 @@ def __init__(
105106
feature_extractor=feature_extractor,
106107
)
107108

108-
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
109-
r"""
110-
Enable sliced attention computation.
111-
When this option is enabled, the attention module will split the input tensor in slices, to compute attention
112-
in several steps. This is useful to save some memory in exchange for a small speed decrease.
113-
Args:
114-
slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
115-
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
116-
a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
117-
`attention_head_dim` must be a multiple of `slice_size`.
118-
"""
119-
if slice_size == "auto":
120-
# half the attention head size is usually a good trade-off between
121-
# speed and memory
122-
slice_size = self.unet.config.attention_head_dim // 2
123-
self.unet.set_attention_slice(slice_size)
124-
125-
def disable_attention_slicing(self):
126-
r"""
127-
Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
128-
back to computing attention in one step.
129-
"""
130-
# set slice_size = `None` to disable `attention slicing`
131-
self.enable_attention_slicing(None)
132-
133109
def train(
134110
self,
135111
prompt: Union[str, List[str]],

examples/community/img2img_inpainting.py

-27
Original file line numberDiff line numberDiff line change
@@ -129,33 +129,6 @@ def __init__(
129129
feature_extractor=feature_extractor,
130130
)
131131

132-
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
133-
r"""
134-
Enable sliced attention computation.
135-
136-
When this option is enabled, the attention module will split the input tensor in slices, to compute attention
137-
in several steps. This is useful to save some memory in exchange for a small speed decrease.
138-
139-
Args:
140-
slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
141-
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
142-
a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
143-
`attention_head_dim` must be a multiple of `slice_size`.
144-
"""
145-
if slice_size == "auto":
146-
# half the attention head size is usually a good trade-off between
147-
# speed and memory
148-
slice_size = self.unet.config.attention_head_dim // 2
149-
self.unet.set_attention_slice(slice_size)
150-
151-
def disable_attention_slicing(self):
152-
r"""
153-
Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
154-
back to computing attention in one step.
155-
"""
156-
# set slice_size = `None` to disable `attention slicing`
157-
self.enable_attention_slicing(None)
158-
159132
@torch.no_grad()
160133
def __call__(
161134
self,

0 commit comments

Comments
 (0)