Skip to content

Commit ebade48

Browse files
committed
add tests
1 parent dac8ea7 commit ebade48

File tree

2 files changed

+156
-3
lines changed

2 files changed

+156
-3
lines changed

onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ def _optimize_sd_pipeline(
297297
f"--force_fp32_ops shall be in the format of module:operator like unet:Attention, got {fp32_operator}"
298298
)
299299

300+
op_counters = {}
300301
for name, model_type in model_type_mapping.items():
301302
onnx_model_path = source_dir / name / "model.onnx"
302303
if not os.path.exists(onnx_model_path):
@@ -391,11 +392,13 @@ def _optimize_sd_pipeline(
391392
m = model_type_class_mapping[model_type](model)
392393

393394
m.get_operator_statistics()
394-
m.get_fused_operator_statistics()
395+
op_counters[name] = m.get_fused_operator_statistics()
395396
m.save_model_to_file(str(optimized_model_path), use_external_data_format=use_external_data_format)
396397
logger.info("%s is optimized", name)
397398
logger.info("*" * 20)
398399

400+
return op_counters
401+
399402

400403
def _copy_extra_directory(source_dir: Path, target_dir: Path, model_list: List[str]):
401404
"""Copy extra directory that does not have onnx model
@@ -463,7 +466,7 @@ def optimize_stable_diffusion_pipeline(
463466

464467
_copy_extra_directory(source_dir, target_dir, model_list)
465468

466-
_optimize_sd_pipeline(
469+
return _optimize_sd_pipeline(
467470
source_dir,
468471
target_dir,
469472
pipeline_type,
@@ -571,7 +574,9 @@ def main(argv: Optional[List[str]] = None):
571574
args = parse_arguments(argv)
572575

573576
logger.info("Arguments: %s", str(args))
574-
optimize_stable_diffusion_pipeline(
577+
578+
# Return op counters for testing purpose.
579+
return optimize_stable_diffusion_pipeline(
575580
args.input, args.output, args.overwrite, args.use_external_data_format, args.float16, args.inspect, args
576581
)
577582

onnxruntime/test/python/transformers/test_optimizer_stable_diffusion.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
TINY_MODELS = {
3030
"stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch",
3131
"stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl",
32+
"stable-diffusion-3": "optimum-internal-testing/tiny-random-stable-diffusion-3",
33+
"flux": "optimum-internal-testing/tiny-random-flux",
3234
}
3335

3436

@@ -267,5 +269,151 @@ def test_optimize_sdxl_fp16(self):
267269
self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0]))
268270

269271

272+
class TestSD3FluxOptimization(unittest.TestCase):
273+
def optimize_sd3_or_flux(
274+
self, model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16, atol
275+
):
276+
from optimum.onnxruntime import ORTPipelineForText2Image
277+
278+
if os.path.exists(export_onnx_dir):
279+
shutil.rmtree(export_onnx_dir, ignore_errors=True)
280+
281+
baseline = ORTPipelineForText2Image.from_pretrained(model_name, export=True, provider="CUDAExecutionProvider")
282+
if not os.path.exists(export_onnx_dir):
283+
baseline.save_pretrained(export_onnx_dir)
284+
285+
argv = [
286+
"--input",
287+
export_onnx_dir,
288+
"--output",
289+
optimized_onnx_dir,
290+
"--overwrite",
291+
"--disable_group_norm",
292+
"--disable_bias_splitgelu",
293+
]
294+
295+
if is_float16:
296+
argv.append("--float16")
297+
298+
op_counters = optimize_stable_diffusion(argv)
299+
300+
for name in expected_op_counters:
301+
self.assertTrue(name in op_counters)
302+
for op, count in expected_op_counters[name].items():
303+
self.assertTrue(op in op_counters[name])
304+
self.assertEqual(op_counters[name][op], count)
305+
306+
treatment = ORTPipelineForText2Image.from_pretrained(optimized_onnx_dir, provider="CUDAExecutionProvider")
307+
batch_size, num_images_per_prompt, height, width = 1, 1, 64, 64
308+
inputs = {
309+
"prompt": ["starry night by van gogh"] * batch_size,
310+
"num_inference_steps": 3,
311+
"num_images_per_prompt": num_images_per_prompt,
312+
"height": height,
313+
"width": width,
314+
"output_type": "np",
315+
}
316+
317+
seed = 123
318+
np.random.seed(seed)
319+
import torch
320+
321+
baseline_outputs = baseline(**inputs, generator=torch.Generator(device="cuda").manual_seed(seed))
322+
323+
np.random.seed(seed)
324+
treatment_outputs = treatment(**inputs, generator=torch.Generator(device="cuda").manual_seed(seed))
325+
326+
self.assertTrue(np.allclose(baseline_outputs.images[0], treatment_outputs.images[0], atol=atol))
327+
328+
@pytest.mark.slow
329+
def test_sd3(self):
330+
"""This tests optimization of stable diffusion 3 pipeline"""
331+
model_name = TINY_MODELS["stable-diffusion-3"]
332+
333+
expected_op_counters = {
334+
"transformer": {
335+
"FastGelu": 3,
336+
"MultiHeadAttention": 2,
337+
"LayerNormalization": 8,
338+
"SimplifiedLayerNormalization": 0,
339+
},
340+
"vae_encoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 17},
341+
"vae_decoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 25},
342+
"text_encoder": {
343+
"Attention": 2,
344+
"Gelu": 0,
345+
"LayerNormalization": 1,
346+
"QuickGelu": 2,
347+
"SkipLayerNormalization": 4,
348+
},
349+
"text_encoder_2": {
350+
"Attention": 2,
351+
"Gelu": 0,
352+
"LayerNormalization": 1,
353+
"QuickGelu": 0,
354+
"SkipLayerNormalization": 4,
355+
},
356+
"text_encoder_3": {
357+
"Attention": 2,
358+
"MultiHeadAttention": 0,
359+
"Gelu": 0,
360+
"FastGelu": 2,
361+
"BiasGelu": 0,
362+
"GemmFastGelu": 0,
363+
"LayerNormalization": 0,
364+
"SimplifiedLayerNormalization": 2,
365+
"SkipLayerNormalization": 0,
366+
"SkipSimplifiedLayerNormalization": 3,
367+
},
368+
}
369+
370+
export_onnx_dir = "tiny-random-stable-diffusion-3"
371+
optimized_onnx_dir = "tiny-random-stable-diffusion-3-optimized-fp32"
372+
self.optimize_sd3_or_flux(
373+
model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=False, atol=5e-3
374+
)
375+
376+
optimized_onnx_dir = "tiny-random-stable-diffusion-3-optimized-fp16"
377+
self.optimize_sd3_or_flux(
378+
model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=True, atol=5e-2
379+
)
380+
381+
@pytest.mark.slow
382+
def test_flux(self):
383+
"""This tests optimization of flux pipeline"""
384+
model_name = TINY_MODELS["flux"]
385+
386+
expected_op_counters = {
387+
"transformer": {
388+
"FastGelu": 3,
389+
"MultiHeadAttention": 2,
390+
"LayerNormalization": 6,
391+
"SimplifiedLayerNormalization": 6,
392+
},
393+
"vae_encoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 8},
394+
"vae_decoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 10},
395+
"text_encoder": {
396+
"Attention": 5,
397+
"Gelu": 0,
398+
"LayerNormalization": 1,
399+
"QuickGelu": 0,
400+
"SkipLayerNormalization": 10,
401+
},
402+
# The tiny flux uses clip, but FLUX.1-dev uses t5, so we skip op count verification for text_encoder_2.
403+
"text_encoder_2": {},
404+
}
405+
406+
export_onnx_dir = "tiny-random-flux"
407+
optimized_onnx_dir = "tiny-random-flux-optimized-fp32"
408+
self.optimize_sd3_or_flux(
409+
model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=False, atol=1e-3
410+
)
411+
412+
optimized_onnx_dir = "tiny-random-flux-optimized-fp16"
413+
self.optimize_sd3_or_flux(
414+
model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=True, atol=5e-2
415+
)
416+
417+
270418
if __name__ == "__main__":
271419
unittest.main()

0 commit comments

Comments
 (0)