DiffSynth-Studio/examples/hidream_o1_image/model_inference/HiDream-O1-Image.py at main · daqiege/DiffSynth-Studio · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import torch
from diffsynth.pipelines.hidream_o1_image import HiDreamO1ImagePipeline
from diffsynth.core.loader.config import ModelConfig
from PIL import Image
import os
from modelscope import dataset_snapshot_download


pipe = HiDreamO1ImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="model-*.safetensors"),
    ],
    processor_config=ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="./"),
)

# Text-to-Image
image = pipe(
    prompt="medium shot, eye-level, front view. A woman is seated in an ornate bedroom, illuminated by candlelight, with a calm and composed expression. The subject is a young woman with fair skin, light brown hair styled in an updo with loose tendrils framing her face, and blue eyes. She wears a cream-colored satin robe with delicate floral embroidery and lace trim along the neckline. Her ears are adorned with pearl drop earrings. She is seated on a bed with a dark, intricately carved wooden headboard. To her left, a wooden nightstand holds three lit white candles and a candelabra with multiple lit candles in the background. The bed is covered with patterned pillows and a dark, textured blanket. The walls are paneled with dark wood and feature a large, ornate tapestry with muted earth tones. The lighting creates soft highlights on her face and robe, with warm shadows cast across the room.",
    negative_prompt=" ",
    cfg_scale=4.0,
    height=2048,
    width=2048,
    seed=42,
    num_inference_steps=50,
)
image.save("image.jpg")

# Image-to-Image
image = pipe(
    prompt="change her clothes to blue",
    negative_prompt=" ",
    cfg_scale=4.0,
    height=2048,
    width=2048,
    seed=43,
    num_inference_steps=50,
    edit_image=[image],
)
image.save("image_edit.jpg")

dataset_snapshot_download(
    dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
    local_dir="data/diffsynth_example_dataset",
    allow_file_pattern="hidream_o1_image/HiDream-O1-Image/*",
)
# Multi-Reference Subject-Driven Personalization
ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image/IP/"
ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")]
prompt = "A young boy with blonde hair stands on steps wearing light blue jeans, a white t-shirt with logo, and blue and white sneakers. He wears a brown cord necklace with beads, a black wristwatch with digital display, and carries a yellow fanny pack with white zipper. In his hand is a red boxing glove with white top, a teal plastic toy car, and a plastic toy figure of Captain America. He wears a straw hat with cream band. Natural light illuminates the scene."
image = pipe(
    prompt=prompt,
    negative_prompt=" ",
    cfg_scale=4.0,
    height=2048,
    width=2048,
    seed=43,
    num_inference_steps=50,
    edit_image=ref_images,
)
image.save("ip.jpg")

# Multi-Reference Subject-Driven Personalization with Skeleton
ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image/IP_skeleton/"
ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")]
prompt = "Create a realistic try-on image of the person wearing the provided clothing."
image = pipe(
    prompt=prompt,
    negative_prompt=" ",
    cfg_scale=4.0,
    height=2048,
    width=2048,
    seed=43,
    num_inference_steps=50,
    edit_image=ref_images,
)
image.save("ip_skeleton.jpg")

# Multi-Reference Subject-Driven Personalization with Layout：relative coordinates [x1, x2, y1, y2]
layout_bboxes = [[0.21, 0.44, 0.49, 0.74], [0.58, 0.80, 0.09, 0.34]]
ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image/IP_layout/"
ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")]
prompt = "City council members pose with relaxed smiles on a sunlit terrace, warm approachable mood, golden hour, cinematic soft glow."
image = pipe(
    prompt=prompt,
    negative_prompt=" ",
    cfg_scale=4.0,
    height=2048,
    width=2048,
    seed=43,
    num_inference_steps=50,
    edit_image=ref_images,
    layout_bboxes=layout_bboxes,
)
image.save("ip_layout.jpg")