diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 28791c3c83..7d267f21b0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -53,6 +53,10 @@ # Tests /tests/ @sgonorov +/tests/python_tests/test_image_generation_multi_call.py @likholat + +# Python bindings +/src/python/py_image_generation_pipelines.cpp @likholat # GenAI Tools /tools/ @sbalandi diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 5e370e3683..87e7fe2e6c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -586,7 +586,7 @@ jobs: run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} timeout: 360 - name: 'LLM & VLM' - cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/' + cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} timeout: 180 - name: 'Video Generation' diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5483120fa7..4ee27240ce 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -447,7 +447,7 @@ jobs: # timeout: 240 # Only supported on X64 or ARM with SVE support # - name: 'LLM & VLM' - # cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py' + # cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py' # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} # timeout: 180 - name: 'GGUF Reader tests' diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 5bb52b7876..f457b98e32 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -509,8 +509,8 @@ jobs: run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} timeout: 360 - name: 'LLM & VLM' - cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py ./tests/python_tests/test_image_generation.py ./tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} timeout: 180 - name: 'Video Generation' cmd: 'python -m pytest -v ./tests/python_tests/test_video_generation.py --override-ini cache_dir=/mount/caches/pytest/' diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index ebcff15e88..1da458df7a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -674,7 +674,7 @@ jobs: run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} timeout: 360 - name: 'LLM & VLM' - cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/' + cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} timeout: 180 - name: 'Video Generation' diff --git a/src/cpp/src/rag/text_embedding_pipeline.cpp b/src/cpp/src/rag/text_embedding_pipeline.cpp index 2d64419375..e791c9010f 100644 --- a/src/cpp/src/rag/text_embedding_pipeline.cpp +++ b/src/cpp/src/rag/text_embedding_pipeline.cpp @@ -4,6 +4,8 @@ #include "openvino/genai/rag/text_embedding_pipeline.hpp" #include +#include + #include #include "json_utils.hpp" @@ -92,7 +94,7 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl { ov::Core core = utils::singleton_core(); - auto model = core.read_model(models_path / "openvino_model.xml", {}, properties); + auto model = core.read_model(models_path / "openvino_model.xml", {}, std::as_const(properties)); bool is_seq_len_fixed = true; if (m_config.max_length) { diff --git a/src/cpp/src/tokenizer/tokenizer_impl.cpp b/src/cpp/src/tokenizer/tokenizer_impl.cpp index 01894d5a3c..df2f591c6f 100644 --- a/src/cpp/src/tokenizer/tokenizer_impl.cpp +++ b/src/cpp/src/tokenizer/tokenizer_impl.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 #include "tokenizer/tokenizer_impl.hpp" + +#include + #include "add_second_input_pass.hpp" #include "sampling/structured_output/structured_output_controller.hpp" #include "openvino/genai/version.hpp" @@ -332,11 +335,11 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::filesystem::path& mode return; } if (std::filesystem::exists(models_path / "openvino_tokenizer.xml")) { - ov_tokenizer = core.read_model(models_path / "openvino_tokenizer.xml", {}, filtered_properties); + ov_tokenizer = core.read_model(models_path / "openvino_tokenizer.xml", {}, std::as_const(filtered_properties)); } if (std::filesystem::exists(models_path / "openvino_detokenizer.xml")) { - ov_detokenizer = core.read_model(models_path / "openvino_detokenizer.xml", {}, filtered_properties); + ov_detokenizer = core.read_model(models_path / "openvino_detokenizer.xml", {}, std::as_const(filtered_properties)); } read_config(models_path); diff --git a/src/cpp/src/whisper/models/statefull_decoder.cpp b/src/cpp/src/whisper/models/statefull_decoder.cpp index 03be4be56b..bb8b21daa0 100644 --- a/src/cpp/src/whisper/models/statefull_decoder.cpp +++ b/src/cpp/src/whisper/models/statefull_decoder.cpp @@ -3,6 +3,8 @@ #include "statefull_decoder.hpp" +#include + #include "openvino/op/softmax.hpp" #include "openvino/pass/manager.hpp" #include "utils.hpp" @@ -28,7 +30,7 @@ WhisperStatefullDecoder::WhisperStatefullDecoder(const std::filesystem::path& mo : m_decompose_cross_attention_spda_ops(decompose_cross_attention_spda) { ov::Core core = utils::singleton_core(); - auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties); + auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties)); if (m_decompose_cross_attention_spda_ops) { ov::genai::decompose_scaled_dot_product_attention_for_whisper(model); diff --git a/src/cpp/src/whisper/pipeline.cpp b/src/cpp/src/whisper/pipeline.cpp index c9a2625cd1..bccf5523ee 100644 --- a/src/cpp/src/whisper/pipeline.cpp +++ b/src/cpp/src/whisper/pipeline.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include @@ -82,7 +84,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi ov::Core core = utils::singleton_core(); ov::CompiledModel compiled_model; if (device == "NPU") { - auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties_copy); + auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, std::as_const(properties_copy)); // NB: only batch_size == 1 is supported now for NPU reshape_to_static_encoder(encoder_model, 1, m_feature_extractor.feature_size); compiled_model = core.compile_model(encoder_model, "NPU", properties_copy); diff --git a/src/cpp/src/whisper/pipeline_static.cpp b/src/cpp/src/whisper/pipeline_static.cpp index e9cdec971f..0ee473f3ed 100644 --- a/src/cpp/src/whisper/pipeline_static.cpp +++ b/src/cpp/src/whisper/pipeline_static.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "openvino/runtime/intel_npu/properties.hpp" #include "utils.hpp" @@ -1038,7 +1039,7 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys ov::Core core = utils::singleton_core(); - auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties_copy); + auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, std::as_const(properties_copy)); reshape_to_static_encoder(encoder_model, m_feature_extractor.feature_size); auto last_hidden_state_shape = get_encoder_hidden_state_shape(encoder_model); @@ -1046,10 +1047,10 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys std::shared_ptr decoder_with_past_model; if (std::filesystem::exists(models_path / "openvino_decoder_with_past_model.xml") ) { - decoder_model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties_copy); - decoder_with_past_model = core.read_model(models_path / "openvino_decoder_with_past_model.xml", {}, properties_copy); + decoder_model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties_copy)); + decoder_with_past_model = core.read_model(models_path / "openvino_decoder_with_past_model.xml", {}, std::as_const(properties_copy)); } else { - auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties_copy); + auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties_copy)); ov::pass::StatefulToStateless().run_on_model(model); decoder_model = prepare_decoder_model(model); diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index 2d55a19afa..a0efe1a819 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -170,7 +170,6 @@ class TorchGenerator : public ov::genai::CppStdGenerator { if (e.matches(PyExc_ModuleNotFoundError)) { throw std::runtime_error("The 'torch' package is not installed. Please, call 'pip install torch' or use 'rng_seed' parameter."); } else { - // Re-throw other exceptions throw; } } @@ -179,6 +178,22 @@ class TorchGenerator : public ov::genai::CppStdGenerator { create_torch_generator(seed); } + ~TorchGenerator() override { + if (Py_IsInitialized()) { + try { + py::gil_scoped_acquire acquire; + m_torch_generator = py::object(); + m_float32 = py::object(); + m_torch = py::module_(); + return; + } catch (...) { + } + } + m_torch_generator.release(); + m_float32.release(); + m_torch.release(); + } + float next() override { py::gil_scoped_acquire acquire; return m_torch.attr("randn")(1, "generator"_a=m_torch_generator, "dtype"_a=m_float32).attr("item")().cast(); @@ -260,16 +275,6 @@ class TorchGenerator : public ov::genai::CppStdGenerator { } }; -bool params_have_torch_generator(ov::AnyMap params) { - std::shared_ptr generator = nullptr; - ov::genai::utils::read_anymap_param(params, "generator", generator); - if (std::dynamic_pointer_cast<::TorchGenerator>(generator)) { - return true; - } - return false; -} - - } // namespace void init_clip_text_model(py::module_& m); diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index 9de6223d98..974cb50117 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -23,6 +23,37 @@ #include "openvino/genai/rag/text_embedding_pipeline.hpp" namespace py = pybind11; + +namespace { + +class GilSafeGeneratorWrapper : public ov::genai::Generator { + std::shared_ptr m_impl; + py::object m_py_ref; + +public: + GilSafeGeneratorWrapper(std::shared_ptr&& impl, py::object&& py_ref) + : m_impl(std::move(impl)), m_py_ref(std::move(py_ref)) {} + + ~GilSafeGeneratorWrapper() override { + if (Py_IsInitialized()) { + try { + py::gil_scoped_acquire acquire; + m_impl.reset(); + m_py_ref = py::object(); + return; + } catch (...) { + } + } + m_py_ref.release(); + } + + float next() override { return m_impl->next(); } + ov::Tensor randn_tensor(const ov::Shape& shape) override { return m_impl->randn_tensor(shape); } + void seed(size_t new_seed) override { m_impl->seed(new_seed); } +}; + +} // namespace + namespace ov::genai::pybind::utils { py::str handle_utf8(const std::string& text) { @@ -374,7 +405,10 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) { } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { - return py::cast>(py_obj); + auto impl = py::cast>(py_obj); + std::shared_ptr wrapper = + std::make_shared(std::move(impl), py::reinterpret_borrow(py_obj)); + return wrapper; } else if (py::isinstance(py_obj) && property_name == "callback") { auto py_callback = py::cast(py_obj); auto shared_callback = std::shared_ptr( diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py index 52496a552c..f6cf3f224a 100644 --- a/tests/python_tests/conftest.py +++ b/tests/python_tests/conftest.py @@ -2,12 +2,17 @@ import gc import pytest import shutil +import subprocess # nosec B404 import logging +from pathlib import Path + from utils.constants import ( get_ov_cache_dir, get_ov_cache_downloaded_models_dir, get_ov_cache_converted_models_dir, ) +from utils.atomic_download import AtomicDownloadManager +from utils.network import retry_request # Configure logging logging.basicConfig(level=logging.INFO) @@ -64,6 +69,47 @@ def pytest_configure(config: pytest.Config): pytest.selected_model_ids = config.getoption("--model_ids", default=None) +IMAGE_GEN_MODELS = { + "tiny-random-latent-consistency": "echarlaix/tiny-random-latent-consistency", + "tiny-random-flux": "optimum-intel-internal-testing/tiny-random-flux", +} + +DEFAULT_IMAGE_GEN_MODEL_ID = "tiny-random-latent-consistency" + + +@pytest.fixture(scope="module") +def image_generation_model(request): + model_id = getattr(request, "param", DEFAULT_IMAGE_GEN_MODEL_ID) + model_name = IMAGE_GEN_MODELS[model_id] + models_dir = get_ov_cache_converted_models_dir() + model_path = Path(models_dir) / model_id / model_name + + manager = AtomicDownloadManager(model_path) + + def convert_model(temp_path: Path) -> None: + command = [ + "optimum-cli", + "export", + "openvino", + "--model", + model_name, + "--trust-remote-code", + "--weight-format", + "fp16", + str(temp_path), + ] + logger.info(f"Conversion command: {' '.join(command)}") + retry_request(lambda: subprocess.run(command, check=True, text=True, capture_output=True)) + + try: + manager.execute(convert_model) + except subprocess.CalledProcessError as error: + logger.exception(f"optimum-cli returned {error.returncode}. Output:\n{error.output}") + raise + + return str(model_path) + + @pytest.fixture(scope="module", autouse=True) def run_gc_after_test(): """ diff --git a/tests/python_tests/test_image_generation.py b/tests/python_tests/test_image_generation.py index c017e21215..0a323f4008 100644 --- a/tests/python_tests/test_image_generation.py +++ b/tests/python_tests/test_image_generation.py @@ -2,62 +2,14 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -import subprocess # nosec B404 -import logging -from pathlib import Path import numpy as np import openvino as ov import openvino_genai as ov_genai -from utils.constants import get_ov_cache_converted_models_dir, NPUW_CPU_PROPERTIES -from utils.atomic_download import AtomicDownloadManager -from utils.network import retry_request +from utils.constants import NPUW_CPU_PROPERTIES from utils.ov_genai_pipelines import should_skip_npuw_tests -logger = logging.getLogger(__name__) - -MODEL_ID = "tiny-random-latent-consistency" -MODEL_NAME = "echarlaix/tiny-random-latent-consistency" - FLUX_MODEL_ID = "tiny-random-flux" -FLUX_MODEL_NAME = "optimum-intel-internal-testing/tiny-random-flux" - -MODELS = { - MODEL_ID: MODEL_NAME, - FLUX_MODEL_ID: FLUX_MODEL_NAME, -} - - -@pytest.fixture(scope="module") -def image_generation_model(request): - model_id = getattr(request, "param", MODEL_ID) - model_name = MODELS[model_id] - models_dir = get_ov_cache_converted_models_dir() - model_path = Path(models_dir) / model_id / model_name - - manager = AtomicDownloadManager(model_path) - - def convert_model(temp_path: Path) -> None: - command = [ - "optimum-cli", - "export", - "openvino", - "--model", - model_name, - "--trust-remote-code", - "--weight-format", "fp16", - str(temp_path) - ] - logger.info(f"Conversion command: {' '.join(command)}") - retry_request(lambda: subprocess.run(command, check=True, text=True, capture_output=True)) - - try: - manager.execute(convert_model) - except subprocess.CalledProcessError as error: - logger.exception(f"optimum-cli returned {error.returncode}. Output:\n{error.output}") - raise - - return str(model_path) def get_random_image(height: int = 64, width: int = 64) -> ov.Tensor: diff --git a/tests/python_tests/test_image_generation_multi_call.py b/tests/python_tests/test_image_generation_multi_call.py new file mode 100644 index 0000000000..eab8e2ecd1 --- /dev/null +++ b/tests/python_tests/test_image_generation_multi_call.py @@ -0,0 +1,80 @@ +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import numpy as np +import openvino as ov +import openvino_genai as ov_genai + + +def get_random_image(height: int = 64, width: int = 64) -> ov.Tensor: + image_data = np.random.randint(0, 255, (1, height, width, 3), dtype=np.uint8) + return ov.Tensor(image_data) + + +def get_mask_image(height: int = 64, width: int = 64) -> ov.Tensor: + mask_data = np.zeros((1, height, width, 3), dtype=np.uint8) + mask_data[:, height // 4 : 3 * height // 4, width // 4 : 3 * width // 4, :] = 255 + return ov.Tensor(mask_data) + + +def create_generator(generator_type): + if generator_type == "cpp_std": + return ov_genai.CppStdGenerator(42) + if generator_type == "torch": + pytest.importorskip("torch") + return ov_genai.TorchGenerator(42) + return None + + +GENERATE_KWARGS = dict(width=64, height=64, num_inference_steps=2) +NUM_CALLS = 3 + + +class TestText2ImageMultipleGenerations: + @pytest.mark.parametrize("generator_type", [None, "cpp_std", "torch"]) + def test_multiple_generate(self, image_generation_model, generator_type): + pipe = ov_genai.Text2ImagePipeline(image_generation_model, "CPU") + for i in range(NUM_CALLS): + gen = create_generator(generator_type) + kwargs = {**GENERATE_KWARGS, **({"generator": gen} if gen else {})} + image = pipe.generate(f"prompt {i}", **kwargs) + assert image is not None + + @pytest.mark.parametrize("generator_type", [None, "torch"]) + def test_multiple_generate_with_callback(self, image_generation_model, generator_type): + pipe = ov_genai.Text2ImagePipeline(image_generation_model, "CPU") + for i in range(NUM_CALLS): + gen = create_generator(generator_type) + steps = [] + + def callback(step, num_steps, latent): + steps.append(step) + return False + + kwargs = {**GENERATE_KWARGS, "callback": callback, **({"generator": gen} if gen else {})} + image = pipe.generate(f"prompt {i}", **kwargs) + assert image is not None + assert len(steps) > 0 + + +class TestImage2ImageMultipleGenerations: + @pytest.mark.parametrize("generator_type", [None, "cpp_std", "torch"]) + def test_multiple_generate(self, image_generation_model, generator_type): + pipe = ov_genai.Image2ImagePipeline(image_generation_model, "CPU") + for i in range(NUM_CALLS): + gen = create_generator(generator_type) + kwargs = {**GENERATE_KWARGS, "strength": 0.8, **({"generator": gen} if gen else {})} + image = pipe.generate(f"prompt {i}", get_random_image(), **kwargs) + assert image is not None + + +class TestInpaintingMultipleGenerations: + @pytest.mark.parametrize("generator_type", [None, "cpp_std", "torch"]) + def test_multiple_generate(self, image_generation_model, generator_type): + pipe = ov_genai.InpaintingPipeline(image_generation_model, "CPU") + for i in range(NUM_CALLS): + gen = create_generator(generator_type) + kwargs = {**GENERATE_KWARGS, "strength": 0.8, **({"generator": gen} if gen else {})} + image = pipe.generate(f"prompt {i}", get_random_image(), get_mask_image(), **kwargs) + assert image is not None