Skip to content

Commit c8abea4

Browse files
committed
fix sdl
1 parent 1a6163d commit c8abea4

File tree

9 files changed

+78
-63
lines changed

9 files changed

+78
-63
lines changed

tests/python_tests/samples/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@
159159
},
160160
"tiny-random-ltx-video": {
161161
"name": "optimum-intel-internal-testing/tiny-random-ltx-video",
162-
"convert_args": ["--trust-remote-code"]
162+
"convert_args": ["--trust-remote-code"],
163163
},
164164
}
165165

@@ -424,6 +424,7 @@ def generate_test_content(request):
424424
logger.info(f"Removing test content: {file_path}")
425425
os.remove(file_path)
426426

427+
427428
@pytest.fixture(scope="session")
428429
def generate_llm_bench_input_generation_jsonl(request):
429430
"""Generate a JSONL file for image generation prompts."""

tests/python_tests/samples/test_tools_llm_benchmark.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,9 @@ def test_python_tool_llm_benchmark_video_prompts(self, download_test_content, co
337337
@pytest.mark.samples
338338
@pytest.mark.parametrize("convert_model, sample_args", [
339339
pytest.param("tiny-random-ltx-video",
340-
["-d", "cpu", "-n", "1", "--optimum", "--num_steps", "5", "--num_frames", "9", "--frame_rate", 23, "width", 256, "height", 256]),
340+
["-d", "cpu", "-n", "1", "--optimum", "--num_steps", "5", "--num_frames", "9", "--frame_rate", "23", "width", "256", "height", "256"]),
341+
pytest.param("tiny-random-ltx-video",
342+
[ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "width", "256", "height", "256", "--optimum"]),
341343
# pytest.param("tiny-random-ltx-video",
342344
# ["-d", "cpu", "-n", "1", "--genai", "--num_steps", "5", "--num_frames", "9", "width", 256, "height", 256]),
343345
], indirect=["convert_model"])
@@ -359,10 +361,7 @@ def test_python_tool_llm_benchmark_video_gen(self, convert_model, sample_args):
359361
@pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4", "--task", "text-to-video", "--optimum"]])
360362
@pytest.mark.parametrize("convert_model", ["tiny-random-ltx-video"], indirect=True)
361363
@pytest.mark.parametrize("generate_llm_bench_input_generation_jsonl", [("video_generation.jsonl", video_generation_json)], indirect=True)
362-
def test_python_tool_llm_benchmark_video_gen_json(self, convert_model, download_test_content, generate_llm_bench_input_generation_jsonl, sample_args):
363-
# to use the relative media and mask_image paths
364-
os.chdir(os.path.dirname(download_test_content))
365-
364+
def test_python_tool_llm_benchmark_video_gen_json(self, convert_model, generate_llm_bench_input_generation_jsonl, sample_args):
366365
# Run Python benchmark
367366
benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
368367
benchmark_py_command = [

tools/llm_bench/benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def get_argprser():
199199
help="Reshape image generation pipeline to specific width & height at pipeline creation time. Applicable for Image Generation.")
200200
parser.add_argument("--frame_rate", type=float, required=False, help="Frame rate for video generation and saving. Applicable only for Video Generation.")
201201
parser.add_argument("--num_frames", type=int, required=False, help="Generated guidance scale. Applicable only for Video Generation.")
202-
parser.add_argument("--negative_prompt", type=int, required=False, help="Negative prompts for Video Generation.")
202+
parser.add_argument("--negative_prompt", type=str, required=False, help="Negative prompts for Video Generation.")
203203
parser.add_argument('-mi', '--mask_image', default=None,
204204
help='Mask image for Inpainting pipelines. Can be directory or path to single image. Applicable for Image Generation.')
205205
parser.add_argument('-t', '--task', default=None,

tools/llm_bench/llm_bench_utils/config_class.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
SpeechT5HifiGan,
1313
AutoModelForSequenceClassification
1414
)
15-
from diffusers.pipelines import DiffusionPipeline, LDMSuperResolutionPipeline
15+
from diffusers.pipelines import DiffusionPipeline, LDMSuperResolutionPipeline, LTXPipeline
1616
from optimum.intel.openvino import (
1717
OVModelForCausalLM,
1818
OVModelForSeq2SeqLM,
@@ -23,15 +23,16 @@
2323
OVPipelineForImage2Image,
2424
OVModelForFeatureExtraction,
2525
OVModelForTextToSpeechSeq2Seq,
26-
OVModelForSequenceClassification
26+
OVModelForSequenceClassification,
27+
OVLTXPipeline,
2728
)
2829
from llm_bench_utils.ov_model_classes import OVMPTModel, OVLDMSuperResolutionPipeline, OVChatGLMModel
2930
from dataclasses import dataclass, field
3031

3132

3233
@dataclass
3334
class UseCase:
34-
task = ''
35+
task = ""
3536
model_types: list[str] = field(default_factory=list)
3637
ov_cls: type | None = None
3738
pt_cls: type | None = AutoModel
@@ -45,17 +46,17 @@ class UseCaseImageGen(UseCase):
4546
pt_cls: type | None = DiffusionPipeline
4647

4748
TASK = {
48-
"text2img": {"name": 'text-to-image', "ov_cls": OVDiffusionPipeline},
49-
"img2img": {"name": 'image-to-image', "ov_cls": OVPipelineForImage2Image},
50-
"inpainting": {"name": 'inpainting', "ov_cls": OVPipelineForInpainting}
49+
"text2img": {"name": "text-to-image", "ov_cls": OVDiffusionPipeline},
50+
"img2img": {"name": "image-to-image", "ov_cls": OVPipelineForImage2Image},
51+
"inpainting": {"name": "inpainting", "ov_cls": OVPipelineForInpainting}
5152
}
5253

5354

5455
@dataclass
5556
class UseCaseVideoGen(UseCase):
5657
task = "video_gen"
57-
ov_cls: type | None = OVDiffusionPipeline
58-
pt_cls: type | None = DiffusionPipeline
58+
ov_cls: type | None = OVLTXPipeline
59+
pt_cls: type | None = LTXPipeline
5960

6061

6162
@dataclass
@@ -81,35 +82,35 @@ class UseCaseTextGen(UseCase):
8182

8283
@dataclass
8384
class UseCaseCodeGen(UseCase):
84-
task = 'code_gen'
85+
task = "code_gen"
8586
ov_cls: type | None = OVModelForCausalLM
8687
pt_cls: type | None = AutoModelForCausalLM
8788

8889

8990
@dataclass
9091
class UseCaseImageCls(UseCase):
91-
task = 'image_cls'
92+
task = "image_cls"
9293
ov_cls: type | None = OVModelForCausalLM
9394
pt_cls: type | None = AutoModelForCausalLM
9495

9596

9697
@dataclass
9798
class UseCaseLDMSuperResolution(UseCase):
98-
task = 'ldm_super_resolution'
99+
task = "ldm_super_resolution"
99100
ov_cls: type | None = OVLDMSuperResolutionPipeline
100101
pt_cls: type | None = LDMSuperResolutionPipeline
101102

102103

103104
@dataclass
104105
class UseCaseTextEmbeddings(UseCase):
105-
task = 'text_embed'
106+
task = "text_embed"
106107
ov_cls: type | None = OVModelForFeatureExtraction
107108
pt_cls: type | None = AutoModel
108109

109110

110111
@dataclass
111112
class UseCaseTextReranker(UseCase):
112-
task = 'text_rerank'
113+
task = "text_rerank"
113114
ov_cls: type | None = OVModelForSequenceClassification
114115
pt_cls: type | None = AutoModelForSequenceClassification
115116

@@ -125,36 +126,36 @@ def is_qwen_causallm_arch(config):
125126

126127
@dataclass
127128
class UseCaseTextToSpeech(UseCase):
128-
task = 'text_to_speech'
129+
task = "text_to_speech"
129130
ov_cls: type | None = OVModelForTextToSpeechSeq2Seq
130131
pt_cls: type | None = SpeechT5ForTextToSpeech
131132
tokenizer_cls: type = SpeechT5Processor
132133
vocoder_cls: type = SpeechT5HifiGan
133134

134135

135136
USE_CASES = {
136-
'image_gen': [UseCaseImageGen(['stable-diffusion-', 'ssd-', 'tiny-sd', 'small-sd', 'lcm-', 'sdxl', 'dreamlike', "flux"])],
137-
'video_gen': [UseCaseVideoGen('ltx')],
137+
"image_gen": [UseCaseImageGen(["stable-diffusion-", "ssd-", "tiny-sd", "small-sd", "lcm-", "sdxl", "dreamlike", "flux"])],
138+
"video_gen": [UseCaseVideoGen(["ltx"])],
138139
"visual_text_gen": [UseCaseVLM(["llava", "llava-next", "qwen2-vl", "llava-qwen2", "internvl-chat", "minicpmv", "phi3-v",
139140
"minicpm-v", "minicpmo", "maira2", "qwen2-5-vl", "smolvlm"])],
140-
'speech_to_text': [UseCaseSpeech2Text(['whisper'])],
141-
'image_cls': [UseCaseImageCls(['vit'])],
142-
'code_gen': [UseCaseCodeGen(["codegen", "codegen2", "stable-code"]),
143-
UseCaseCodeGen(['replit'], ov_cls=OVMPTModel),
144-
UseCaseCodeGen(['codet5'], ov_cls=OVModelForSeq2SeqLM)],
145-
'text_gen': [UseCaseTextGen(['arcee', "decoder", "falcon", "glm", "aquila", "gpt", "gpt-", "gpt2", "open-llama", "openchat", "neural-chat", "llama",
141+
"speech_to_text": [UseCaseSpeech2Text(["whisper"])],
142+
"image_cls": [UseCaseImageCls(["vit"])],
143+
"code_gen": [UseCaseCodeGen(["codegen", "codegen2", "stable-code"]),
144+
UseCaseCodeGen(["replit"], ov_cls=OVMPTModel),
145+
UseCaseCodeGen(["codet5"], ov_cls=OVModelForSeq2SeqLM)],
146+
"text_gen": [UseCaseTextGen(["arcee", "decoder", "falcon", "glm", "aquila", "gpt", "gpt-", "gpt2", "open-llama", "openchat", "neural-chat", "llama",
146147
"tiny-llama", "tinyllama", "opt", "opt-", "pythia", "pythia-", "stablelm", "stablelm-", "stable-zephyr-", "rocket-",
147148
"vicuna", "dolly", "bloom", "red-pajama", "xgen", "longchat", "jais", "orca-mini", "baichuan", "qwen", "zephyr",
148149
"mistral", "mixtral", "phi", "phi2-", "minicpm", "gemma", "deci", "phi3", "internlm", "olmo", "starcoder", "instruct-gpt",
149150
"granite", "granitemoe", "gptj", "yi-"]),
150-
UseCaseTextGen(['t5'], ov_cls=OVModelForSeq2SeqLM, pt_cls=T5ForConditionalGeneration),
151-
UseCaseTextGen(['mpt'], OVMPTModel),
152-
UseCaseTextGen(['blenderbot'], ov_cls=OVModelForSeq2SeqLM, pt_cls=BlenderbotForConditionalGeneration),
153-
UseCaseTextGen(['chatglm'], ov_cls=OVChatGLMModel, pt_cls=AutoModel)],
154-
'ldm_super_resolution': [UseCaseLDMSuperResolution(['ldm-super-resolution'])],
155-
'text_embed': [UseCaseTextEmbeddings(["qwen3", "bge", "bert", "albert", "roberta", "xlm-roberta"])],
156-
'text_rerank': [UseCaseTextReranker(["qwen3", "bge", "bert", "albert", "roberta", "xlm-roberta"])],
157-
'text_to_speech': [UseCaseTextToSpeech(['speecht5'])],
151+
UseCaseTextGen(["t5"], ov_cls=OVModelForSeq2SeqLM, pt_cls=T5ForConditionalGeneration),
152+
UseCaseTextGen(["mpt"], OVMPTModel),
153+
UseCaseTextGen(["blenderbot"], ov_cls=OVModelForSeq2SeqLM, pt_cls=BlenderbotForConditionalGeneration),
154+
UseCaseTextGen(["chatglm"], ov_cls=OVChatGLMModel, pt_cls=AutoModel)],
155+
"ldm_super_resolution": [UseCaseLDMSuperResolution(["ldm-super-resolution"])],
156+
"text_embed": [UseCaseTextEmbeddings(["qwen3", "bge", "bert", "albert", "roberta", "xlm-roberta"])],
157+
"text_rerank": [UseCaseTextReranker(["qwen3", "bge", "bert", "albert", "roberta", "xlm-roberta"])],
158+
"text_to_speech": [UseCaseTextToSpeech(["speecht5"])],
158159
}
159160

160161
PA_ATTENTION_BACKEND = "PA"

tools/llm_bench/llm_bench_utils/model_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def get_param_from_file(args, input_key):
3939
elif args['use_case'].task == 'image_gen':
4040
data_list.append('sailing ship in storm by Leonardo da Vinci')
4141
elif args['use_case'].task == 'video_gen':
42-
data_dict["prompt"] = 'cat plays with ball on the christmas tree'
42+
data_list["prompt"] = 'cat plays with ball on the christmas tree'
4343
else:
4444
raise RuntimeError(f'== {input_key} and prompt file is empty ==')
4545
elif args[input_key] is not None and args['prompt_file'] is not None:

tools/llm_bench/llm_bench_utils/ov_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,11 +1271,11 @@ def create_video_gen_model(model_path, device, memory_data_collector, **kwargs):
12711271
start = time.perf_counter()
12721272
if kwargs.get("static_reshape", False):
12731273
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config, compile=False)
1274-
num_images_per_prompt = kwargs.get("batch_size", 1)
12751274
height = kwargs.get("height", 512)
12761275
width = kwargs.get("width", 512)
1277-
log.info(f"Video Pipeline reshape(batch_size=1, height={height}, width={width}, num_images_per_prompt={num_images_per_prompt})")
1278-
ov_model.reshape(batch_size=1, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
1276+
num_frames = kwargs.get("num_frames", 25)
1277+
log.info(f"Video Pipeline reshape(batch_size=1, height={height}, width={width}, num_frames={num_frames})")
1278+
ov_model.reshape(batch_size=1, height=height, width=width, num_frames=num_frames)
12791279
ov_model.compile()
12801280
else:
12811281
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)

tools/llm_bench/task/image_generation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
235235

236236
if framework == "ov" and not use_genai:
237237
stable_diffusion_hook.new_text_encoder(pipe)
238-
stable_diffusion_hook.new_unet(pipe)
238+
stable_diffusion_hook.new_main_model(pipe)
239239
stable_diffusion_hook.new_vae_decoder(pipe)
240240

241241
log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(image_list)}, prompt idx: {prompt_idx_list}')

tools/llm_bench/task/pipeline_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,8 @@ def collect_prompts_step(args, get_prompt_fn):
228228
return text_list, prompt_idx_list
229229

230230

231-
def launch(pipeline: CommonPipeline, iter_num: int, prompt_idx: int, iter_timestamp: dict, input_item: str|dict, proc_id: int, bench_hook: object | None) -> dict:
231+
def launch(pipeline: CommonPipeline, iter_num: int, prompt_idx: int, iter_timestamp: dict, input_item: str | dict,
232+
proc_id: int, bench_hook: object | None) -> dict:
232233
iter_timestamp[iter_num][prompt_idx]["start"] = datetime.datetime.now().isoformat()
233234
iter_data, _ = pipeline.run(input_item, iter_num, prompt_idx, proc_id, bench_hook)
234235
iter_timestamp[iter_num][prompt_idx]["end"] = datetime.datetime.now().isoformat()

tools/llm_bench/task/video_generation.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# -*- coding: utf-8 -*-
22
# Copyright (C) 2023-2025 Intel Corporation
33
# SPDX-License-Identifier: Apache-2.0
4-
import copy
5-
# import hashlib
64
import logging as log
75

86
from typing import Any
@@ -20,7 +18,7 @@
2018
from llm_bench_utils.prompt_utils import get_video_gen_prompt
2119
from task.pipeline_utils import CommonPipeline, execution_time_in_sec, collect_prompts_step, iteration_step
2220

23-
FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils}
21+
FW_UTILS = {"pt": llm_bench_utils.pt_utils, "ov": llm_bench_utils.ov_utils}
2422

2523
DEFAULT_NUM_FRAMES = 25
2624
DEFAULT_INFERENCE_STEPS = 25
@@ -29,28 +27,41 @@
2927
DEFAULT_FRAME_RATE = 25
3028

3129

32-
def collect_input_args(input_param: dict, width: int = None, height: int = None, num_steps: int = None, num_frames: int = None, frame_rate: int = None):
30+
def collect_input_args(
31+
input_param: dict,
32+
width: int = None,
33+
height: int = None,
34+
num_steps: int = None,
35+
num_frames: int = None,
36+
frame_rate: int = None,
37+
):
3338
input_args = {}
39+
input_args["width"] = input_param.get("width", width or DEFAULT_IMAGE_WIDTH)
40+
input_args["height"] = input_param.get("height", height or DEFAULT_IMAGE_HEIGHT)
41+
input_args["num_inference_steps"] = input_param.get("num_steps", num_steps or DEFAULT_INFERENCE_STEPS)
42+
input_args["num_frames"] = input_param.get("num_frames", num_frames or DEFAULT_NUM_FRAMES)
43+
input_args["frame_rate"] = input_param.get("frame_rate", frame_rate or DEFAULT_FRAME_RATE)
3444

35-
input_args["width"] = input_param.get('width', width or DEFAULT_IMAGE_WIDTH)
36-
input_args["height"] = input_param.get('height', height or DEFAULT_IMAGE_HEIGHT)
37-
input_args["num_inference_steps"] = input_param.get('num_steps', num_steps or DEFAULT_INFERENCE_STEPS)
38-
input_args["num_frames"] = input_param.get('num_frames', num_frames or DEFAULT_NUM_FRAMES)
39-
input_args["frame_rate"] = input_param.get('frame_rate', frame_rate or DEFAULT_FRAME_RATE)
40-
41-
guidance_scale = input_param.get('guidance_scale')
45+
guidance_scale = input_param.get("guidance_scale")
4246
if guidance_scale is not None:
4347
input_args["guidance_scale"] = guidance_scale
44-
guidance_rescale = input_param.get('guidance_scale')
48+
guidance_rescale = input_param.get("guidance_scale")
4549
if guidance_rescale is not None:
4650
input_args["guidance_rescale"] = guidance_rescale
4751

4852
return input_args
4953

5054

5155
class TextToVideoOptimum(CommonPipeline):
52-
def __init__(self, model: object, tokenizer: object | None, args: dict, model_path: Path,
53-
mem_consumption_meter: MemMonitorWrapper, time_collection_hook: StableDiffusionHook):
56+
def __init__(
57+
self,
58+
model: object,
59+
tokenizer: object | None,
60+
args: dict,
61+
model_path: Path,
62+
mem_consumption_meter: MemMonitorWrapper,
63+
time_collection_hook: StableDiffusionHook,
64+
):
5465
super().__init__(model, tokenizer, args, model_path, mem_consumption_meter)
5566
self.genai = False
5667

@@ -69,12 +80,14 @@ def generate(self, input_data: Any, **kwargs):
6980

7081
def print_batch_size_info(self, iter_num: int, input_args: dict):
7182
out_str = "[warm-up]" if iter_num == 0 else "[{}]".format(iter_num)
72-
out_str = f"Input params: Batch_size={self.batch_size}, " \
73-
f"steps={self.num_steps}, width={input_args['width']}, " \
74-
f"height={input_args['height']}, frame number={input_args['num_frames']}"
75-
if input_args.get('guidance_scale'):
83+
out_str = (
84+
f"Input params: Batch_size={self.batch_size}, "
85+
f"steps={self.num_steps}, width={input_args['width']}, "
86+
f"height={input_args['height']}, frame number={input_args['num_frames']}"
87+
)
88+
if input_args.get("guidance_scale"):
7689
out_str += f", guidance_scale={input_args['guidance_scale']}"
77-
if input_args.get('guidance_rescale'):
90+
if input_args.get("guidance_rescale"):
7891
out_str += f", guidance_rescale={input_args['guidance_rescale']}"
7992
log.info(out_str)
8093

@@ -196,5 +209,5 @@ def run_video_generation_benchmark(model_path, framework, device, args, num_iter
196209

197210
iter_data_list, iter_timestamp = iteration_step(image_gen_pipeline, num_iters, text_list, prompt_idx_list, bench_hook=None, subsequent=args['subsequent'])
198211

199-
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
212+
metrics_print.print_average(iter_data_list, prompt_idx_list, args["batch_size"], False)
200213
return iter_data_list, pretrain_time, iter_timestamp

0 commit comments

Comments
 (0)