metaquery/app.py at main · facebookresearch/metaquery · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.

# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import gradio as gr
import numpy as np
import torch

from pipeline_metaquery import MetaQueryPipeline
from trainer_utils import find_newest_checkpoint
import random
import argparse

MIN_SEED = 0
MAX_SEED = np.iinfo(np.int32).max
MAX_INPUT_IMAGES = 4
DEFAULT_INPUT_IMAGES = 1
MAX_IMAGES_PER_PROMPT = 4
DEFAULT_IMAGES_PER_PROMPT = 1

# Add preset negative prompts at the top with other constants
PRESET_NEGATIVE_PROMPTS = {
    "None": "",
    "Basic": "low resolution, low quality, blurry",
    "Detailed": "bad anatomy, signature, watermark, username, error, missing limbs, error",
    "Artistic": "photographic, realistic, photo-realistic, sharp focus, 3d render, oversaturated",
}


def randomize_seed_fn(seed, randomize_seed):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    return seed


def variable_images(k):
    k = int(k)
    return [gr.update(visible=True)] * k + [gr.update(visible=False)] * (
        MAX_INPUT_IMAGES - k
    )


def process_interleaved_vision_language(
    prompt,
    negative_prompt,
    seed,
    guidance_scale,
    image_guidance_scale,
    num_inference_steps,
    num_images_per_prompt,
    *input_images,
):
    # Use the MetaQuery pipeline to generate images
    valid_images = [img for img in input_images if img is not None]

    images = pipeline(
        image=[valid_images] if len(valid_images) > 0 else None,
        caption=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        image_guidance_scale=image_guidance_scale,
        num_inference_steps=num_inference_steps,
        num_images_per_prompt=num_images_per_prompt,
        generator=torch.Generator().manual_seed(seed),
        enable_progress_bar=True,
    ).images
    return images


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--checkpoint_path", type=str, required=True, help="Path to the checkpoint"
    )
    args = parser.parse_args()

    pipeline = MetaQueryPipeline.from_pretrained(
        find_newest_checkpoint(args.checkpoint_path),
        ignore_mismatched_sizes=True,
        _gradient_checkpointing=False,
        torch_dtype=torch.bfloat16,
    )
    pipeline = pipeline.to(device="cuda", dtype=torch.bfloat16)

    with gr.Blocks(fill_width=True) as demo:
        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(
                    label="Prompt",
                    max_lines=1,
                    placeholder="Prompt",
                )
                negative_prompt_preset = gr.Dropdown(
                    choices=list(PRESET_NEGATIVE_PROMPTS.keys()),
                    value="None",
                    label="Negative Prompt Preset",
                )
                negative_prompt = gr.Textbox(
                    label="Negative Prompt",
                    max_lines=1,
                    value=PRESET_NEGATIVE_PROMPTS["None"],
                )

                # Add function to update negative prompt textbox when preset is selected
                def update_negative_prompt(preset_name):
                    return PRESET_NEGATIVE_PROMPTS[preset_name]

                negative_prompt_preset.change(
                    fn=update_negative_prompt,
                    inputs=[negative_prompt_preset],
                    outputs=[negative_prompt],
                )

                seed = gr.Slider(
                    label="Seed", minimum=MIN_SEED, maximum=MAX_SEED, step=1, value=0
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                guidance_scale = gr.Slider(
                    1, 30, step=0.5, value=4.5, label="Guidance Scale"
                )
                image_guidance_scale = gr.Slider(
                    1, 30, step=0.5, value=1.5, label="Image Guidance Scale"
                )
                with gr.Accordion("Advanced options", open=False):
                    num_inference_steps = gr.Slider(
                        1, 100, step=1, value=30, label="Number of Inference Steps"
                    )
                    num_images_per_prompt = gr.Slider(
                        1,
                        MAX_IMAGES_PER_PROMPT,
                        value=DEFAULT_IMAGES_PER_PROMPT,
                        step=1,
                        label="Number of Images",
                    )
                generate_btn = gr.Button("Generate Images")
                num_input_images = gr.Slider(
                    1,
                    MAX_INPUT_IMAGES,
                    value=DEFAULT_INPUT_IMAGES,
                    step=1,
                    label="Number of input images:",
                )
                input_images = [
                    gr.Image(
                        label=f"img{i}",
                        type="pil",
                        visible=True if i < DEFAULT_INPUT_IMAGES else False,
                    )
                    for i in range(MAX_INPUT_IMAGES)
                ]
                num_input_images.change(variable_images, num_input_images, input_images)
            with gr.Column():
                output_gallery = gr.Gallery(columns=2, label="Generated Images")

        prompt.submit(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=process_interleaved_vision_language,
            inputs=[
                prompt,
                negative_prompt,
                seed,
                guidance_scale,
                image_guidance_scale,
                num_inference_steps,
                num_images_per_prompt,
                *input_images,
            ],
            queue=False,
            outputs=output_gallery,
        )

        generate_btn.click(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=process_interleaved_vision_language,
            inputs=[
                prompt,
                negative_prompt,
                seed,
                guidance_scale,
                image_guidance_scale,
                num_inference_steps,
                num_images_per_prompt,
                *input_images,
            ],
            queue=False,
            outputs=output_gallery,
        )

        demo.launch(share=True)