openvinotoolkit · Wovchena · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
@@ -53,6 +53,10 @@
 
 # Tests
 /tests/ @sgonorov
+/tests/python_tests/test_image_generation_multi_call.py @likholat
+
+# Python bindings
+/src/python/py_image_generation_pipelines.cpp @likholat
 
 # GenAI Tools
 /tools/ @sbalandi
@@ -586,7 +586,7 @@ jobs:
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
             timeout: 360
           - name: 'LLM & VLM'
-            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
+            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
           - name: 'Video Generation'

@@ -447,7 +447,7 @@ jobs:
           #   timeout: 240
           # Only supported on X64 or ARM with SVE support
           # - name: 'LLM & VLM'
-          #   cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py'
+          #   cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py'
           #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
           #   timeout: 180
           - name: 'GGUF Reader tests'

@@ -509,8 +509,8 @@ jobs:
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
             timeout: 360
           - name: 'LLM & VLM'
-            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
-            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
+            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py ./tests/python_tests/test_image_generation.py ./tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/'
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
           - name: 'Video Generation'
             cmd: 'python -m pytest -v ./tests/python_tests/test_video_generation.py --override-ini cache_dir=/mount/caches/pytest/'

@@ -674,7 +674,7 @@ jobs:
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
             timeout: 360
           - name: 'LLM & VLM'
-            cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
+            cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
           - name: 'Video Generation'

@@ -4,6 +4,8 @@
 #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 
 #include <fstream>
+#include <utility>
+
 #include <nlohmann/json.hpp>
 
 #include "json_utils.hpp"
@@ -92,7 +94,7 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl {
 
         ov::Core core = utils::singleton_core();
 
-        auto model = core.read_model(models_path / "openvino_model.xml", {}, properties);
+        auto model = core.read_model(models_path / "openvino_model.xml", {}, std::as_const(properties));
 
         bool is_seq_len_fixed = true;
         if (m_config.max_length) {

@@ -2,6 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "tokenizer/tokenizer_impl.hpp"
+
+#include <utility>
+
 #include "add_second_input_pass.hpp"
 #include "sampling/structured_output/structured_output_controller.hpp"
 #include "openvino/genai/version.hpp"
@@ -332,11 +335,11 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::filesystem::path& mode
         return;
     }
     if (std::filesystem::exists(models_path / "openvino_tokenizer.xml")) {
-        ov_tokenizer = core.read_model(models_path / "openvino_tokenizer.xml", {}, filtered_properties);
+        ov_tokenizer = core.read_model(models_path / "openvino_tokenizer.xml", {}, std::as_const(filtered_properties));
     }
 
     if (std::filesystem::exists(models_path / "openvino_detokenizer.xml")) {
-        ov_detokenizer = core.read_model(models_path / "openvino_detokenizer.xml", {}, filtered_properties);
+        ov_detokenizer = core.read_model(models_path / "openvino_detokenizer.xml", {}, std::as_const(filtered_properties));
     }
 
     read_config(models_path);

@@ -3,6 +3,8 @@
 
 #include "statefull_decoder.hpp"
 
+#include <utility>
+
 #include "openvino/op/softmax.hpp"
 #include "openvino/pass/manager.hpp"
 #include "utils.hpp"
@@ -28,7 +30,7 @@ WhisperStatefullDecoder::WhisperStatefullDecoder(const std::filesystem::path& mo
     : m_decompose_cross_attention_spda_ops(decompose_cross_attention_spda) {
     ov::Core core = utils::singleton_core();
 
-    auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties);
+    auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties));
 
     if (m_decompose_cross_attention_spda_ops) {
         ov::genai::decompose_scaled_dot_product_attention_for_whisper(model);

@@ -3,6 +3,8 @@
 
 #include <algorithm>
 #include <filesystem>
+#include <utility>
+
 #include <openvino/openvino.hpp>
 #include <variant>
 
@@ -82,7 +84,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
         ov::Core core = utils::singleton_core();
         ov::CompiledModel compiled_model;
         if (device == "NPU") {
-            auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties_copy);
+            auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, std::as_const(properties_copy));
             // NB: only batch_size == 1 is supported now for NPU
             reshape_to_static_encoder(encoder_model, 1, m_feature_extractor.feature_size);
             compiled_model = core.compile_model(encoder_model, "NPU", properties_copy);

@@ -5,6 +5,7 @@
 
 #include <chrono>
 #include <regex>
+#include <utility>
 
 #include "openvino/runtime/intel_npu/properties.hpp"
 #include "utils.hpp"
@@ -1038,18 +1039,18 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys
 
     ov::Core core = utils::singleton_core();
 
-    auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties_copy);
+    auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, std::as_const(properties_copy));
     reshape_to_static_encoder(encoder_model, m_feature_extractor.feature_size);
     auto last_hidden_state_shape = get_encoder_hidden_state_shape(encoder_model);
 
     std::shared_ptr<ov::Model> decoder_model;
     std::shared_ptr<ov::Model> decoder_with_past_model;
 
     if (std::filesystem::exists(models_path / "openvino_decoder_with_past_model.xml") ) {
-        decoder_model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties_copy);
-        decoder_with_past_model = core.read_model(models_path / "openvino_decoder_with_past_model.xml", {}, properties_copy);
+        decoder_model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties_copy));
+        decoder_with_past_model = core.read_model(models_path / "openvino_decoder_with_past_model.xml", {}, std::as_const(properties_copy));
     } else {
-        auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, properties_copy);
+        auto model = core.read_model(models_path / "openvino_decoder_model.xml", {}, std::as_const(properties_copy));
         ov::pass::StatefulToStateless().run_on_model(model);
 
         decoder_model = prepare_decoder_model(model);

@@ -170,7 +170,6 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
             if (e.matches(PyExc_ModuleNotFoundError)) {
                 throw std::runtime_error("The 'torch' package is not installed. Please, call 'pip install torch' or use 'rng_seed' parameter.");
             } else {
-                // Re-throw other exceptions
                 throw;
             }
         }
@@ -179,6 +178,22 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
         create_torch_generator(seed);
     }
 
+    ~TorchGenerator() override {
+        if (Py_IsInitialized()) {
+            try {
+                py::gil_scoped_acquire acquire;
+                m_torch_generator = py::object();
+                m_float32 = py::object();
+                m_torch = py::module_();
+                return;
+            } catch (...) {
+            }
+        }
+        m_torch_generator.release();
+        m_float32.release();
+        m_torch.release();
+    }
+
     float next() override {
         py::gil_scoped_acquire acquire;
         return m_torch.attr("randn")(1, "generator"_a=m_torch_generator, "dtype"_a=m_float32).attr("item")().cast<float>();
@@ -260,16 +275,6 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
     }
 };
 
-bool params_have_torch_generator(ov::AnyMap params) {
-    std::shared_ptr<ov::genai::Generator> generator = nullptr;
-    ov::genai::utils::read_anymap_param(params, "generator", generator);
-    if (std::dynamic_pointer_cast<::TorchGenerator>(generator)) {
-        return true;
-    }
-    return false;
-}
-
-
 } // namespace
 
 void init_clip_text_model(py::module_& m);

@@ -23,6 +23,37 @@
 #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 
 namespace py = pybind11;
+
+namespace {
+
+class GilSafeGeneratorWrapper : public ov::genai::Generator {
+    std::shared_ptr<ov::genai::Generator> m_impl;
+    py::object m_py_ref;
+
+public:
+    GilSafeGeneratorWrapper(std::shared_ptr<ov::genai::Generator>&& impl, py::object&& py_ref)
+        : m_impl(std::move(impl)), m_py_ref(std::move(py_ref)) {}
+
+    ~GilSafeGeneratorWrapper() override {
+        if (Py_IsInitialized()) {
+            try {
+                py::gil_scoped_acquire acquire;
+                m_impl.reset();
+                m_py_ref = py::object();
+                return;
+            } catch (...) {
+            }
+        }
+        m_py_ref.release();
+    }
+
+    float next() override { return m_impl->next(); }
+    ov::Tensor randn_tensor(const ov::Shape& shape) override { return m_impl->randn_tensor(shape); }
+    void seed(size_t new_seed) override { m_impl->seed(new_seed); }
+};
+
+}  // namespace
+
 namespace ov::genai::pybind::utils {
 
 py::str handle_utf8(const std::string& text) {
@@ -374,7 +405,10 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
     } else if (py::isinstance<ov::genai::StopCriteria>(py_obj)) {
         return py::cast<ov::genai::StopCriteria>(py_obj);
     } else if (py::isinstance<ov::genai::Generator>(py_obj)) {
-        return py::cast<std::shared_ptr<ov::genai::Generator>>(py_obj);
+        auto impl = py::cast<std::shared_ptr<ov::genai::Generator>>(py_obj);
+        std::shared_ptr<ov::genai::Generator> wrapper =
+            std::make_shared<GilSafeGeneratorWrapper>(std::move(impl), py::reinterpret_borrow<py::object>(py_obj));
+        return wrapper;
     } else if (py::isinstance<py::function>(py_obj) && property_name == "callback") {
         auto py_callback = py::cast<py::function>(py_obj);
         auto shared_callback = std::shared_ptr<py::function>(

@@ -2,12 +2,17 @@
 import gc
 import pytest
 import shutil
+import subprocess  # nosec B404
 import logging
+from pathlib import Path
+
 from utils.constants import (
     get_ov_cache_dir,
     get_ov_cache_downloaded_models_dir,
     get_ov_cache_converted_models_dir,
 )
+from utils.atomic_download import AtomicDownloadManager
+from utils.network import retry_request
 
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -64,6 +69,47 @@ def pytest_configure(config: pytest.Config):
     pytest.selected_model_ids = config.getoption("--model_ids", default=None)
 
 
+IMAGE_GEN_MODELS = {
+    "tiny-random-latent-consistency": "echarlaix/tiny-random-latent-consistency",
+    "tiny-random-flux": "optimum-intel-internal-testing/tiny-random-flux",
+}
+
+DEFAULT_IMAGE_GEN_MODEL_ID = "tiny-random-latent-consistency"
+
+
+@pytest.fixture(scope="module")
+def image_generation_model(request):
+    model_id = getattr(request, "param", DEFAULT_IMAGE_GEN_MODEL_ID)
+    model_name = IMAGE_GEN_MODELS[model_id]
+    models_dir = get_ov_cache_converted_models_dir()
+    model_path = Path(models_dir) / model_id / model_name
+
+    manager = AtomicDownloadManager(model_path)
+
+    def convert_model(temp_path: Path) -> None:
+        command = [
+            "optimum-cli",
+            "export",
+            "openvino",
+            "--model",
+            model_name,
+            "--trust-remote-code",
+            "--weight-format",
+            "fp16",
+            str(temp_path),
+        ]
+        logger.info(f"Conversion command: {' '.join(command)}")
+        retry_request(lambda: subprocess.run(command, check=True, text=True, capture_output=True))
+
+    try:
+        manager.execute(convert_model)
+    except subprocess.CalledProcessError as error:
+        logger.exception(f"optimum-cli returned {error.returncode}. Output:\n{error.output}")
+        raise
+
+    return str(model_path)
+
+
 @pytest.fixture(scope="module", autouse=True)
 def run_gc_after_test():
     """

@@ -2,62 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-import subprocess  # nosec B404
-import logging
-from pathlib import Path
 import numpy as np
 import openvino as ov
 import openvino_genai as ov_genai
 
-from utils.constants import get_ov_cache_converted_models_dir, NPUW_CPU_PROPERTIES
-from utils.atomic_download import AtomicDownloadManager
-from utils.network import retry_request
+from utils.constants import NPUW_CPU_PROPERTIES
 from utils.ov_genai_pipelines import should_skip_npuw_tests
 
-logger = logging.getLogger(__name__)
-
-MODEL_ID = "tiny-random-latent-consistency"
-MODEL_NAME = "echarlaix/tiny-random-latent-consistency"
-
 FLUX_MODEL_ID = "tiny-random-flux"
-FLUX_MODEL_NAME = "optimum-intel-internal-testing/tiny-random-flux"
-
-MODELS = {
-    MODEL_ID: MODEL_NAME,
-    FLUX_MODEL_ID: FLUX_MODEL_NAME,
-}
-
-
-@pytest.fixture(scope="module")
-def image_generation_model(request):
-    model_id = getattr(request, "param", MODEL_ID)
-    model_name = MODELS[model_id]
-    models_dir = get_ov_cache_converted_models_dir()
-    model_path = Path(models_dir) / model_id / model_name
-
-    manager = AtomicDownloadManager(model_path)
-
-    def convert_model(temp_path: Path) -> None:
-        command = [
-            "optimum-cli",
-            "export",
-            "openvino",
-            "--model",
-            model_name,
-            "--trust-remote-code",
-            "--weight-format", "fp16",
-            str(temp_path)
-        ]
-        logger.info(f"Conversion command: {' '.join(command)}")
-        retry_request(lambda: subprocess.run(command, check=True, text=True, capture_output=True))
-
-    try:
-        manager.execute(convert_model)
-    except subprocess.CalledProcessError as error:
-        logger.exception(f"optimum-cli returned {error.returncode}. Output:\n{error.output}")
-        raise
-
-    return str(model_path)
 
 
 def get_random_image(height: int = 64, width: int = 64) -> ov.Tensor: