kcz/add_mp4_disabling_into_frames

krzyczar · krzyczar · commit 1b26b1735352 · 2025-10-08T16:01:36.000+02:00
diff --git a/tools/llm_bench/llm_bench_utils/prompt_utils.py b/tools/llm_bench/llm_bench_utils/prompt_utils.py
@@ -2,10 +2,12 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+
+import os
+import cv2
 from .model_utils import get_param_from_file
 from .parse_json_data import parse_text_json_data
 
-
 def get_text_prompt(args):
     text_list = []
     output_data_list, is_json_data = get_param_from_file(args, 'prompt')
@@ -17,3 +19,10 @@ def get_text_prompt(args):
     else:
         text_list.append(output_data_list[0])
     return text_list
+
+
+def split_video_into_frames(video_path):
+    supported_files = set([".mp4"])
+    assert os.path.exists(video_path), "no input video file"
+    assert video_path.suffix.lower() in supported_files, "no supported video file"
+    cap = cv2.VideoCapture(video_path)
diff --git a/tools/llm_bench/requirements.txt b/tools/llm_bench/requirements.txt
@@ -18,3 +18,4 @@ librosa # For Whisper
 matplotlib
 jinja2>=3.1.0
 scipy
+opencv-python
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
@@ -17,9 +17,9 @@
 import llm_bench_utils.output_file
 import llm_bench_utils.gen_output_data as gen_output_data
 import llm_bench_utils.parse_json_data as parse_json_data
+import llm_bench_utils.prompt_utils as pu
 from pathlib import Path
 
-
 FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils}
 
 DEFAULT_OUTPUT_TOKEN_SIZE = 512
@@ -44,6 +44,12 @@ def run_visual_language_generation_optimum(
                     images.append(load_image(str(file)))
             else:
                 images.append(load_image(input_data["media"]))
+
+        if input_data.get("video", None):
+            entry = Path(input_data["video"])
+            ordered_frames = pu.split_video_into_frames(entry)
+            images.extend(ordered_frames)
+
         prompts.append(input_data["prompt"])
     prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
     log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')