huggingface
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/index.mdx‎
Lines changed: 3 additions & 0 deletions b/‎docs/source/index.mdx‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/stable-diffusion/README.md‎
Lines changed: 25 additions & 0 deletions b/‎examples/stable-diffusion/README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎examples/stable-diffusion/text_to_image_generation.py‎
Lines changed: 20 additions & 0 deletions b/‎examples/stable-diffusion/text_to_image_generation.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎optimum/habana/diffusers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎optimum/habana/diffusers/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎optimum/habana/diffusers/models/transformers/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎optimum/habana/diffusers/models/transformers/__init__.py‎
Lines changed: 5 additions & 0 deletions
@@ -303,11 +303,13 @@ The following model architectures, tasks and device distributions have been vali
 | Stable Diffusion 3         | :heavy_check_mark:     | :heavy_check_mark:            | <ul><li>[text-to-image generation](/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li></ul>                                                                                         |
 | LDM3D                      |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](/examples/stable-diffusion#text-to-image-generation)</li></ul>                                                                                              |
 | FLUX.1                     | <ul><li>LoRA</li></ul> | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](/examples/stable-diffusion#flux1)</li><li>[image-to-image generation](/examples/stable-diffusion#flux1-image-to-image)</li></ul>                            |
+| Qwen Image                 |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](/examples/stable-diffusion#qwen-image)</li></ul>                                                                                                            |
 | Text to Video              |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-video generation](/examples/stable-diffusion#text-to-video-generation)</li></ul>                                                                                              |
 | Image to Video             |                        | <ul><li>Single card</li></ul> | <ul><li>[image-to-video generation](/examples/stable-diffusion#image-to-video-generation)</li></ul>                                                                                            |
 | i2vgen-xl                  |                        | <ul><li>Single card</li></ul> | <ul><li>[image-to-video generation](/examples/stable-diffusion#I2vgen-xl)</li></ul>                                                                                                            |
 | Wan                        |                        | :heavy_check_mark:            | <ul><li>[text-to-video generation](/examples/stable-diffusion#text-to-video-with-wan-22)</li><li>[image-to-video generation](/examples/stable-diffusion#image-to-video-with-wan-22)</li></ul>  |
 
+
 ### PyTorch Image Models/TIMM:
 
 | Architecture  | Training | Inference                     | Tasks                                                                    |
 
@@ -120,6 +120,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 
 - Diffusers
 
+
 | Architecture               | Training.              | Inference                     | Tasks                                                                                                                                                                                         |
 |----------------------------|:----------------------:|:-----------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | Stable Diffusion           | ✅                  | ✅                         | <ul><li>[text-to-image generation](/examples/stable-diffusion)</li></ul>                                                                                                                      |
@@ -128,11 +129,13 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | Stable Diffusion 3         | ✅                  | ✅                         | <ul><li>[text-to-image generation](/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li></ul>                                                                                        |
 | LDM3D                      |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](/examples/stable-diffusion)</li></ul>                                                                                                                      |
 | FLUX.1                     | <ul><li>LoRA</li></ul> | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](/examples/stable-diffusion)</li></ul>                                                                                                                      |
+| Qwen Image                 |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li></ul>                                                               |
 | Text to Video              |                        | <ul><li>Single card</li></ul> | <ul><li>[text-to-video generation](/examples/stable-diffusion#text-to-video-generation)</li></ul>                                                                                             |
 | Image to Video             |                        | <ul><li>Single card</li></ul> | <ul><li>[image-to-video generation](/examples/stable-diffusion#image-to-video-generation)</li></ul>                                                                                           |
 | i2vgen-xl                  |                        | <ul><li>Single card</li></ul> | <ul><li>[image-to-video generation](/examples/stable-diffusion#I2vgen-xl)</li></ul>                                                                                                           |
 | Wan                        |                        | ✅                         | <ul><li>[text-to-video generation](/examples/stable-diffusion#text-to-video-with-wan-22)</li><li>[image-to-video generation](/examples/stable-diffusion#image-to-video-with-wan-22)</li></ul> |
 
+
 - PyTorch Image Models/TIMM:
 
 | Architecture        | Training | Inference                     | Tasks                                                                     |
 
@@ -178,6 +178,31 @@ FLUX in quantization mode by setting runtime variable `QUANT_CONFIG=quantization
 
 To run with FLUX.1-schnell model, a distilled version of FLUX.1 (which is not gated), use `--model_name_or_path black-forest-labs/FLUX.1-schnell`.
 
+### Qwen-Image
+
+Qwen-Image was introduced Alibaba Cloud [here](https://www.alibabacloud.com/blog/introducing-qwen-image-novel-model-in-image-generation-and-editing_602447)
+
+Here is how to run Qwen-Image model:
+
+```bash
+PT_HPU_LAZY_MODE=1 python text_to_image_generation.py \
+    --model_name_or_path Qwen/Qwen-Image \
+    --prompts "A cat holding a sign that says hello world" \
+    --negative_prompts " " \
+    --num_images_per_prompt 10 \
+    --batch_size 1 \
+    --num_inference_steps 10 \
+    --image_save_dir /tmp/qwen-image \
+    --scheduler flow_match_euler_discrete \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+> [!NOTE]
+> If you don't add `--negative_prompts` then empty string will be added to it as default value.
 ## ControlNet
 
 ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models](https://huggingface.co/papers/2302.05543)
 
@@ -325,9 +325,11 @@ def main():
     sdxl_models = ["stable-diffusion-xl", "sdxl"]
     sd3_models = ["stable-diffusion-3", "sd3"]
     flux_models = ["FLUX.1", "flux"]
+    qwen_models = ["Qwen-Image", "qwen"]
     sdxl = True if any(model in args.model_name_or_path for model in sdxl_models) else False
     sd3 = True if any(model in args.model_name_or_path for model in sd3_models) else False
     flux = True if any(model in args.model_name_or_path for model in flux_models) else False
+    qwen = True if any(model in args.model_name_or_path for model in qwen_models) else False
     controlnet = True if args.control_image is not None else False
     inpainting = True if (args.base_image is not None) and (args.mask_image is not None) else False
 
@@ -549,6 +551,24 @@ def main():
                 **kwargs,
             )
 
+    elif qwen:
+        # QwenImage pipelines
+        if controlnet:
+            raise ValueError("QwenImage+ControlNet pipeline is not currenly supported")
+        elif inpainting:
+            raise ValueError("QwenImage Inpainting pipeline is not currenly supported")
+        else:
+            if negative_prompts is None:
+                logger.warning("Adding an empty string, because you do not have specific concept to remove.")
+                kwargs_call["negative_prompt"] = " "
+
+            from optimum.habana.diffusers import GaudiQwenImagePipeline
+
+            pipeline = GaudiQwenImagePipeline.from_pretrained(
+                args.model_name_or_path,
+                **kwargs,
+            )
+
     else:
         # SD pipelines (SD1.x, SD2.x)
         if controlnet:
 
@@ -10,6 +10,7 @@
 from .pipelines.flux.pipeline_flux_img2img import GaudiFluxImg2ImgPipeline
 from .pipelines.i2vgen_xl.pipeline_i2vgen_xl import GaudiI2VGenXLPipeline
 from .pipelines.pipeline_utils import GaudiDiffusionPipeline
+from .pipelines.qwenimage.pipeline_qwenimage import GaudiQwenImagePipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion import GaudiStableDiffusionPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion_depth2img import GaudiStableDiffusionDepth2ImgPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion_image_variation import (
 
@@ -0,0 +1,5 @@
+from .transformer_qwenimage import (
+    GaudiQwenDoubleStreamAttnProcessor2_0,
+    GaudiQwenEmbedRope,
+    GaudiQwenTimestepProjEmbeddings,
+)