likholat · likholat · Feb 18, 2026 · Feb 20, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -34,6 +34,21 @@ install(TARGETS visual_language_chat
         COMPONENT samples_bin
         EXCLUDE_FROM_ALL)
 
+# create lora sample executable
+
+add_executable(visual_language_lora visual_language_lora.cpp load_image.cpp)
+target_include_directories(visual_language_lora PRIVATE "${CMAKE_BINARY_DIR}")
+target_link_libraries(visual_language_lora PRIVATE openvino::genai)
+
+set_target_properties(visual_language_lora PROPERTIES
+    # Ensure out of box LC_RPATH on macOS with SIP
+    INSTALL_RPATH_USE_LINK_PATH ON)
+
+install(TARGETS visual_language_lora
+        RUNTIME DESTINATION samples_bin/
+        COMPONENT samples_bin
+        EXCLUDE_FROM_ALL)
+
 # create encrypted model sample executable
 
 add_executable(encrypted_model_vlm encrypted_model_vlm.cpp load_image.cpp)

diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
@@ -7,6 +7,7 @@ The following are sample files:
  - [`visual_language_chat.cpp`](./visual_language_chat.cpp) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding.
  - [`video_to_text_chat.cpp`](./video_to_text_chat.cpp) demonstrates video to text usage of the VLM pipeline.
  - [`benchmark_vlm.cpp`](./benchmark_vlm.cpp) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics.
+ - [`visual_language_lora.cpp`](./visual_language_lora.cpp) demonstrates how to apply one or more LoRA adapters to a VLM at runtime.
 
 
 ## Download and convert the model and tokenizers
@@ -32,6 +33,37 @@ Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is
 
 Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#visual-language-models-vlms) for more details.
 
+## Run image-to-text sample with LoRA adapters:
+
+This sample runs generation twice for the same prompt and image: first with LoRA adapter applied, then without any adapters (base model).
+
+Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`:
+
+```sh
+wget -O adapter_model.safetensors \
+	https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors
+```
+
+This OpenVINO overview diagram can be used as a convenient image input:
+
+```sh
+wget -O openvino-overview-diagram.jpg \
+	https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg
+```
+
+`visual_language_lora ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0`
+
+> You can run with multiple LoRA adapters by providing multiple `<LORA_SAFETENSORS> <ALPHA>` pairs.
+
+> [!NOTE]
+> ### LoRA `alpha` interpretation in OpenVINO GenAI
+> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference.
+>
+> In this context, the `alpha` value already includes:
+> - normalization by LoRA rank (`alpha / rank`)
+> - any user-defined scaling factor (`weight`)
+>
+> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training.
 
 ## Run video-to-text chat sample:
 

diff --git a/samples/cpp/visual_language_chat/visual_language_lora.cpp b/samples/cpp/visual_language_chat/visual_language_lora.cpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "load_image.hpp"
+#include <openvino/core/except.hpp>
+#include <openvino/genai/visual_language/pipeline.hpp>
+#include <cstdlib>
+#include <filesystem>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+ov::genai::StreamingStatus print_subword(std::string&& subword) {
+    std::cout << subword << std::flush;
+    return ov::genai::StreamingStatus::RUNNING;
+}
+int main(int argc, char* argv[]) try {
+    // At least one LoRA adapter must be provided.
+    OPENVINO_ASSERT(argc >= 6 && ((argc - 4) % 2) == 0,
+                   "Usage: ", argv[0],
+                   " <MODEL_DIR> <IMAGE_FILE OR DIR_WITH_IMAGES> <PROMPT> <LORA_SAFETENSORS> <ALPHA> [<LORA_SAFETENSORS> <ALPHA> ...]");
+
+    std::vector<ov::Tensor> rgbs = utils::load_images(argv[2]);
+
+    const std::string device = "CPU";  // GPU can be used as well
+    ov::AnyMap pipeline_properties;
+
+    const std::string prompt = argv[3];
+
+    // LoRA args parsed as pairs: <LORA_SAFETENSORS> <ALPHA>
+    ov::genai::AdapterConfig adapter_config;
+    for (int idx = 4; idx + 1 < argc; idx += 2) {
+        ov::genai::Adapter adapter(argv[idx]);
+        float alpha = std::stof(argv[idx + 1]);
+        adapter_config.add(adapter, alpha);
+    }
+    pipeline_properties.insert({ov::genai::adapters(adapter_config)});
+
+    ov::genai::VLMPipeline pipe(argv[1], device, pipeline_properties);
+
+    ov::genai::GenerationConfig generation_config;
+    generation_config.max_new_tokens = 100;
+
+    std::cout << "Generating answer with LoRA adapters applied:\n";
+    pipe.generate(prompt,
+                  ov::genai::images(rgbs),
+                  ov::genai::generation_config(generation_config),
+                  ov::genai::streamer(print_subword));
+
+    std::cout << "\n----------\nGenerating answer without LoRA adapters applied:\n";
+    pipe.generate(prompt,
+                  ov::genai::images(rgbs),
+                  ov::genai::generation_config(generation_config),
+                  ov::genai::adapters(),
+                  ov::genai::streamer(print_subword));
+    std::cout << "\n----------\n";
+
+} catch (const std::exception& error) {
+    try {
+        std::cerr << error.what() << '\n';
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+} catch (...) {
+    try {
+        std::cerr << "Non-exception object thrown\n";
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+}
diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md
@@ -6,6 +6,7 @@ The following are sample files:
  - [`visual_language_chat.py`](./visual_language_chat.py) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding.
  - [`video_to_text_chat.py`](./video_to_text_chat.py) demonstrates video to text usage of the VLM pipeline.
  - [`benchmark_vlm.py`](./benchmark_vlm.py) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics.
+ - [`visual_language_lora.py`](./visual_language_lora.py) demonstrates how to apply one or more LoRA adapters to a VLM at runtime.
  - [`milebench_eval_vlm.py`](./milebench_eval_vlm.py) provides MileBench validation for VLMs, enabling evaluation of image–text reasoning and visual QA tasks across multiple subsets designed to assess the MultImodal Long-contExt capabilities of MLLMs.
 
 ## Download and convert the model and tokenizers
@@ -50,6 +51,38 @@ Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pi
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
 
+## Run image-to-text sample with LoRA adapters:
+
+This sample runs generation twice for the same prompt and image: first with LoRA adapter(s) applied, then without any adapters (base model).
+
+Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`:
+
+```sh
+wget -O adapter_model.safetensors \
+	https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors
+```
+
+This OpenVINO overview diagram can be used as a convenient image input:
+
+```sh
+wget -O openvino-overview-diagram.jpg \
+	https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg
+```
+
+`python visual_language_lora.py ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0`
+
+> You can run with multiple LoRA adapters by providing multiple `<LORA_SAFETENSORS> <ALPHA>` pairs.
+
+> [!NOTE]
+> ### LoRA `alpha` interpretation in OpenVINO GenAI
+> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference.
+>
+> In this context, the `alpha` value already includes:
+> - normalization by LoRA rank (`alpha / rank`)
+> - any user-defined scaling factor (`weight`)
+>
+> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training.
+
 ## Run video-to-text chat sample:
 
 A model that supports video input is required to run this sample, for example `llava-hf/LLaVA-NeXT-Video-7B-hf`.

diff --git a/samples/python/visual_language_chat/visual_language_lora.py b/samples/python/visual_language_chat/visual_language_lora.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import numpy as np
+import openvino_genai as ov_genai
+
+from pathlib import Path
+from PIL import Image
+from openvino import Tensor
+
+
+def streamer(subword: str) -> bool:
+    """
+
+    Args:
+        subword: sub-word of the generated text.
+
+    Returns: Return flag corresponds whether generation should be stopped.
+
+    """
+    print(subword, end="", flush=True)
+
+    # No value is returned as in this example we don't want to stop the generation in this method.
+    # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING".
+
+
+def read_image(path: str) -> Tensor:
+    """
+
+    Args:
+        path: The path to the image.
+
+    Returns: the ov.Tensor containing the image.
+
+    """
+    pic = Image.open(path).convert("RGB")
+    image_data = np.array(pic)
+    return Tensor(image_data)
+
+
+def read_images(path: str) -> list[Tensor]:
+    entry = Path(path)
+    if entry.is_dir():
+        return [read_image(str(file)) for file in sorted(entry.iterdir())]
+    return [read_image(path)]
+
+
+def parse_lora_pairs(raw):
+    if len(raw) < 2:
+        raise argparse.ArgumentTypeError(
+            "At least one LoRA adapter pair is required: <LORA_SAFETENSORS> <ALPHA> [<LORA_SAFETENSORS> <ALPHA> ...]"
+        )
+    if len(raw) % 2 != 0:
+        raise argparse.ArgumentTypeError("LoRA args must come in pairs: <LORA_SAFETENSORS> <ALPHA> ...")
+
+    pairs = []
+    for i in range(0, len(raw), 2):
+        path = raw[i]
+        try:
+            alpha = float(raw[i + 1])
+        except ValueError as e:
+            raise argparse.ArgumentTypeError(f"Invalid alpha '{raw[i + 1]}' for LoRA '{path}'") from e
+        pairs.append((path, alpha))
+    return pairs
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(
+        description="OpenVINO GenAI VLM sample: run with and without LoRA adapters.",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    p.add_argument("model_dir", help="Path to model directory")
+    p.add_argument("images_path", help="Image file OR directory with images")
+    p.add_argument("prompt", help="Prompt/question to ask")
+    p.add_argument(
+        "lora_pairs",
+        nargs="+",
+        metavar="LORA_ALPHA",
+        help="Pairs: <LORA_SAFETENSORS> <ALPHA> ...",
+    )
+
+    args = p.parse_args()
+    prompt = args.prompt
+    loras = parse_lora_pairs(args.lora_pairs)
+
+    rgbs = read_images(args.images_path)
+
+    device = "CPU"  # GPU can be used as well
+
+    pipe_kwargs = {}
+
+    # Configure LoRA adapters with weights (alphas)
+    if loras:
+        adapter_config = ov_genai.AdapterConfig()
+        for lora_path, alpha in loras:
+            adapter_config.add(ov_genai.Adapter(lora_path), alpha)
+        pipe_kwargs["adapters"] = adapter_config
+
+    pipe = ov_genai.VLMPipeline(args.model_dir, device, **pipe_kwargs)
+
+    gen_cfg = ov_genai.GenerationConfig()
+    gen_cfg.max_new_tokens = 100
+
+    print("Generating answer with LoRA adapters applied:")
+    pipe.generate(
+        prompt,
+        images=rgbs,
+        generation_config=gen_cfg,
+        streamer=streamer,
+    )
+
+    print("\n----------\nGenerating answer without LoRA adapters applied:")
+    pipe.generate(
+        prompt,
+        images=rgbs,
+        generation_config=gen_cfg,
+        adapters=ov_genai.AdapterConfig(),
+        streamer=streamer,
+    )
+
+    print("\n----------")
+    return 0
+
+
+if __name__ == "__main__":
+    main()
diff --git a/site/docs/guides/lora-adapters.mdx b/site/docs/guides/lora-adapters.mdx
@@ -13,7 +13,7 @@ LoRA adapters enable customization of model outputs for specific tasks, styles,
 For more details about LoRA, see [Low-Rank Adaptation (LoRA)](/docs/concepts/lora).
 :::
 
-OpenVINO GenAI provides built-in support for LoRA adapters in [text generation](/docs/use-cases/text-generation/) and [image generation](/docs/use-cases/image-generation/) pipelines.
+OpenVINO GenAI provides built-in support for LoRA adapters in [text generation](/docs/use-cases/text-generation/), [image generation](/docs/use-cases/image-generation/), and [image processing (VLM)](/docs/use-cases/image-processing) pipelines.
 This capability allows you to dynamically switch between or combine multiple adapters without recompiling the model.
 
 :::info

diff --git a/site/docs/supported-models/index.mdx b/site/docs/supported-models/index.mdx
@@ -51,8 +51,9 @@ Models should belong to the same family and have the same tokenizers.
 
 ## Visual Language Models (VLMs)
 
-:::info LoRA Support
-VLM pipeline does **not** support LoRA adapters.
+:::tip LoRA Support
+VLM pipeline supports LoRA adapters applied to the language-model (LLM) part.
+LoRA adapters targeting the vision encoder or other multimodal components are not supported.
 :::
 
 <VLMModelsTable />

diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
@@ -106,6 +106,13 @@ Similar to [text generation](/docs/use-cases/text-generation/#use-different-gene
   </LanguageTabs>
 </BasicGenerationConfiguration>
 
+### Working with LoRA Adapters
+
+For Visual Language Models (VLMs), LoRA adapters can customize the generated text by applying adapters to the language-model (LLM) part.
+LoRA adapters that target the vision encoder or other multimodal components are not supported.
+
+Refer to the [LoRA Adapters](/docs/guides/lora-adapters) guide for more details on working with LoRA adapters.
+
 <ChatScenario />
 
 <Streaming />
@@ -1038,6 +1038,9 @@ class SafetensorsAdapterImpl : public AdapterImpl {
 public:
 
     SafetensorsAdapterImpl(const std::filesystem::path& path) {
+        OPENVINO_ASSERT(std::filesystem::exists(path), "LoRA adapter path does not exist: ", path.string());
+        OPENVINO_ASSERT(path.extension().string() == ".safetensors", "Expected .safetensors file, got: ", path.string());
+
         auto safetensor_content = read_safetensors(path);
         constant_tensors = group_lora_constant_tensors(safetensor_content, default_lora_constant_patterns());
         for (const auto& constant_tensor : constant_tensors) {