diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt index 49b9969e6e..83d4ed4a4d 100644 --- a/samples/cpp/visual_language_chat/CMakeLists.txt +++ b/samples/cpp/visual_language_chat/CMakeLists.txt @@ -34,6 +34,21 @@ install(TARGETS visual_language_chat COMPONENT samples_bin EXCLUDE_FROM_ALL) +# create lora sample executable + +add_executable(visual_language_lora visual_language_lora.cpp load_image.cpp) +target_include_directories(visual_language_lora PRIVATE "${CMAKE_BINARY_DIR}") +target_link_libraries(visual_language_lora PRIVATE openvino::genai) + +set_target_properties(visual_language_lora PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS visual_language_lora + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + # create encrypted model sample executable add_executable(encrypted_model_vlm encrypted_model_vlm.cpp load_image.cpp) diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md index 87eccdeaef..e3c106c114 100644 --- a/samples/cpp/visual_language_chat/README.md +++ b/samples/cpp/visual_language_chat/README.md @@ -7,6 +7,7 @@ The following are sample files: - [`visual_language_chat.cpp`](./visual_language_chat.cpp) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding. - [`video_to_text_chat.cpp`](./video_to_text_chat.cpp) demonstrates video to text usage of the VLM pipeline. - [`benchmark_vlm.cpp`](./benchmark_vlm.cpp) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics. + - [`visual_language_lora.cpp`](./visual_language_lora.cpp) demonstrates how to apply one or more LoRA adapters to a VLM at runtime. ## Download and convert the model and tokenizers @@ -32,6 +33,37 @@ Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#visual-language-models-vlms) for more details. +## Run image-to-text sample with LoRA adapters: + +This sample runs generation twice for the same prompt and image: first with LoRA adapter applied, then without any adapters (base model). + +Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`: + +```sh +wget -O adapter_model.safetensors \ + https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors +``` + +This OpenVINO overview diagram can be used as a convenient image input: + +```sh +wget -O openvino-overview-diagram.jpg \ + https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg +``` + +`visual_language_lora ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0` + +> You can run with multiple LoRA adapters by providing multiple ` ` pairs. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. ## Run video-to-text chat sample: diff --git a/samples/cpp/visual_language_chat/visual_language_lora.cpp b/samples/cpp/visual_language_chat/visual_language_lora.cpp new file mode 100644 index 0000000000..aa15cb4360 --- /dev/null +++ b/samples/cpp/visual_language_chat/visual_language_lora.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "load_image.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +ov::genai::StreamingStatus print_subword(std::string&& subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; +} +int main(int argc, char* argv[]) try { + // At least one LoRA adapter must be provided. + OPENVINO_ASSERT(argc >= 6 && ((argc - 4) % 2) == 0, + "Usage: ", argv[0], + " [ ...]"); + + std::vector rgbs = utils::load_images(argv[2]); + + const std::string device = "CPU"; // GPU can be used as well + ov::AnyMap pipeline_properties; + + const std::string prompt = argv[3]; + + // LoRA args parsed as pairs: + ov::genai::AdapterConfig adapter_config; + for (int idx = 4; idx + 1 < argc; idx += 2) { + ov::genai::Adapter adapter(argv[idx]); + float alpha = std::stof(argv[idx + 1]); + adapter_config.add(adapter, alpha); + } + pipeline_properties.insert({ov::genai::adapters(adapter_config)}); + + ov::genai::VLMPipeline pipe(argv[1], device, pipeline_properties); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + + std::cout << "Generating answer with LoRA adapters applied:\n"; + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword)); + + std::cout << "\n----------\nGenerating answer without LoRA adapters applied:\n"; + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::adapters(), + ov::genai::streamer(print_subword)); + std::cout << "\n----------\n"; + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md index aa7146d9bf..b23f6b4958 100644 --- a/samples/python/visual_language_chat/README.md +++ b/samples/python/visual_language_chat/README.md @@ -6,6 +6,7 @@ The following are sample files: - [`visual_language_chat.py`](./visual_language_chat.py) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding. - [`video_to_text_chat.py`](./video_to_text_chat.py) demonstrates video to text usage of the VLM pipeline. - [`benchmark_vlm.py`](./benchmark_vlm.py) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics. + - [`visual_language_lora.py`](./visual_language_lora.py) demonstrates how to apply one or more LoRA adapters to a VLM at runtime. - [`milebench_eval_vlm.py`](./milebench_eval_vlm.py) provides MileBench validation for VLMs, enabling evaluation of image–text reasoning and visual QA tasks across multiple subsets designed to assess the MultImodal Long-contExt capabilities of MLLMs. ## Download and convert the model and tokenizers @@ -50,6 +51,38 @@ Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pi See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models. +## Run image-to-text sample with LoRA adapters: + +This sample runs generation twice for the same prompt and image: first with LoRA adapter(s) applied, then without any adapters (base model). + +Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`: + +```sh +wget -O adapter_model.safetensors \ + https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors +``` + +This OpenVINO overview diagram can be used as a convenient image input: + +```sh +wget -O openvino-overview-diagram.jpg \ + https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg +``` + +`python visual_language_lora.py ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0` + +> You can run with multiple LoRA adapters by providing multiple ` ` pairs. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + ## Run video-to-text chat sample: A model that supports video input is required to run this sample, for example `llava-hf/LLaVA-NeXT-Video-7B-hf`. diff --git a/samples/python/visual_language_chat/visual_language_lora.py b/samples/python/visual_language_chat/visual_language_lora.py new file mode 100644 index 0000000000..1678d35ad6 --- /dev/null +++ b/samples/python/visual_language_chat/visual_language_lora.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import numpy as np +import openvino_genai as ov_genai + +from pathlib import Path +from PIL import Image +from openvino import Tensor + + +def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + # No value is returned as in this example we don't want to stop the generation in this method. + # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING". + + +def read_image(path: str) -> Tensor: + """ + + Args: + path: The path to the image. + + Returns: the ov.Tensor containing the image. + + """ + pic = Image.open(path).convert("RGB") + image_data = np.array(pic) + return Tensor(image_data) + + +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + +def parse_lora_pairs(raw): + if len(raw) < 2: + raise argparse.ArgumentTypeError( + "At least one LoRA adapter pair is required: [ ...]" + ) + if len(raw) % 2 != 0: + raise argparse.ArgumentTypeError("LoRA args must come in pairs: ...") + + pairs = [] + for i in range(0, len(raw), 2): + path = raw[i] + try: + alpha = float(raw[i + 1]) + except ValueError as e: + raise argparse.ArgumentTypeError(f"Invalid alpha '{raw[i + 1]}' for LoRA '{path}'") from e + pairs.append((path, alpha)) + return pairs + + +def main() -> int: + p = argparse.ArgumentParser( + description="OpenVINO GenAI VLM sample: run with and without LoRA adapters.", + formatter_class=argparse.RawTextHelpFormatter, + ) + p.add_argument("model_dir", help="Path to model directory") + p.add_argument("images_path", help="Image file OR directory with images") + p.add_argument("prompt", help="Prompt/question to ask") + p.add_argument( + "lora_pairs", + nargs="+", + metavar="LORA_ALPHA", + help="Pairs: ...", + ) + + args = p.parse_args() + prompt = args.prompt + loras = parse_lora_pairs(args.lora_pairs) + + rgbs = read_images(args.images_path) + + device = "CPU" # GPU can be used as well + + pipe_kwargs = {} + + # Configure LoRA adapters with weights (alphas) + if loras: + adapter_config = ov_genai.AdapterConfig() + for lora_path, alpha in loras: + adapter_config.add(ov_genai.Adapter(lora_path), alpha) + pipe_kwargs["adapters"] = adapter_config + + pipe = ov_genai.VLMPipeline(args.model_dir, device, **pipe_kwargs) + + gen_cfg = ov_genai.GenerationConfig() + gen_cfg.max_new_tokens = 100 + + print("Generating answer with LoRA adapters applied:") + pipe.generate( + prompt, + images=rgbs, + generation_config=gen_cfg, + streamer=streamer, + ) + + print("\n----------\nGenerating answer without LoRA adapters applied:") + pipe.generate( + prompt, + images=rgbs, + generation_config=gen_cfg, + adapters=ov_genai.AdapterConfig(), + streamer=streamer, + ) + + print("\n----------") + return 0 + + +if __name__ == "__main__": + main() diff --git a/site/docs/guides/lora-adapters.mdx b/site/docs/guides/lora-adapters.mdx index 7caf169cba..56d9a86344 100644 --- a/site/docs/guides/lora-adapters.mdx +++ b/site/docs/guides/lora-adapters.mdx @@ -13,7 +13,7 @@ LoRA adapters enable customization of model outputs for specific tasks, styles, For more details about LoRA, see [Low-Rank Adaptation (LoRA)](/docs/concepts/lora). ::: -OpenVINO GenAI provides built-in support for LoRA adapters in [text generation](/docs/use-cases/text-generation/) and [image generation](/docs/use-cases/image-generation/) pipelines. +OpenVINO GenAI provides built-in support for LoRA adapters in [text generation](/docs/use-cases/text-generation/), [image generation](/docs/use-cases/image-generation/), and [image processing (VLM)](/docs/use-cases/image-processing) pipelines. This capability allows you to dynamically switch between or combine multiple adapters without recompiling the model. :::info diff --git a/site/docs/supported-models/index.mdx b/site/docs/supported-models/index.mdx index 23ba731c97..529f40d141 100644 --- a/site/docs/supported-models/index.mdx +++ b/site/docs/supported-models/index.mdx @@ -51,8 +51,9 @@ Models should belong to the same family and have the same tokenizers. ## Visual Language Models (VLMs) -:::info LoRA Support -VLM pipeline does **not** support LoRA adapters. +:::tip LoRA Support +VLM pipeline supports LoRA adapters applied to the language-model (LLM) part. +LoRA adapters targeting the vision encoder or other multimodal components are not supported. ::: diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx index b953df27bf..531fc22399 100644 --- a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx +++ b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx @@ -106,6 +106,13 @@ Similar to [text generation](/docs/use-cases/text-generation/#use-different-gene +### Working with LoRA Adapters + +For Visual Language Models (VLMs), LoRA adapters can customize the generated text by applying adapters to the language-model (LLM) part. +LoRA adapters that target the vision encoder or other multimodal components are not supported. + +Refer to the [LoRA Adapters](/docs/guides/lora-adapters) guide for more details on working with LoRA adapters. + diff --git a/src/cpp/src/lora/adapter.cpp b/src/cpp/src/lora/adapter.cpp index 2e8adcc349..a41794939f 100644 --- a/src/cpp/src/lora/adapter.cpp +++ b/src/cpp/src/lora/adapter.cpp @@ -1038,6 +1038,9 @@ class SafetensorsAdapterImpl : public AdapterImpl { public: SafetensorsAdapterImpl(const std::filesystem::path& path) { + OPENVINO_ASSERT(std::filesystem::exists(path), "LoRA adapter path does not exist: ", path.string()); + OPENVINO_ASSERT(path.extension().string() == ".safetensors", "Expected .safetensors file, got: ", path.string()); + auto safetensor_content = read_safetensors(path); constant_tensors = group_lora_constant_tensors(safetensor_content, default_lora_constant_patterns()); for (const auto& constant_tensor : constant_tensors) { diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 76da8f5f69..a4f58a0377 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -23,6 +23,7 @@ #include "sampling/sampler.hpp" #include "utils.hpp" #include "lm_encoding.hpp" +#include "lora/helper.hpp" using namespace ov::genai; @@ -61,6 +62,8 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ // position_ids[N, conversation length], beam_idx[N]. // Output shape: logits[N, conversation length, vocab_size]. ov::InferRequest m_language; + // LoRA adapter controller + std::optional m_adapter_controller; // True if chat mode is activated to save conversation // history between generate() calls. bool m_is_chat_conversation = false; @@ -93,7 +96,8 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ } { m_is_npu = device.find("NPU") != std::string::npos; - auto properties_copy = properties; + auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); + auto& properties_copy = filtered_properties.fork(); auto language_model_path = models_dir / "openvino_language_model.xml"; auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy); auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model); @@ -111,6 +115,13 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ ? properties_copy : utils::pop_or_default(device_properties, device, {}); + if (m_generation_config.adapters) { + m_generation_config.adapters->set_tensor_name_prefix( + m_generation_config.adapters->get_tensor_name_prefix().value_or("base_model.model.") + ); + m_adapter_controller = AdapterController(language_model, *m_generation_config.adapters, device); + } + ov::CompiledModel compiled_language_model; auto embedder_device = device; if (m_is_npu) { @@ -167,18 +178,34 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ OPENVINO_ASSERT(!m_is_npu, "VLMPipeline initialization from string isn't supported for NPU device"); - m_inputs_embedder = std::make_shared(models_map, tokenizer, config_dir_path, device, properties); + auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); + auto& properties_copy = filtered_properties.fork(); + + m_inputs_embedder = std::make_shared(models_map, tokenizer, config_dir_path, device, properties_copy); m_tokenizer = m_inputs_embedder->get_tokenizer(); m_embedding = m_inputs_embedder->get_embedding_model(); auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); + auto language_model = utils::singleton_core().read_model(m_language_pair.first, m_language_pair.second); + auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model); + + if (m_generation_config.adapters) { + m_generation_config.adapters->set_tensor_name_prefix( + m_generation_config.adapters->get_tensor_name_prefix().value_or("base_model.model.") + ); + m_adapter_controller = AdapterController(language_model, *m_generation_config.adapters, device); + } + m_language = utils::singleton_core().compile_model( - m_language_pair.first, m_language_pair.second, device, properties + m_language_pair.first, m_language_pair.second, device, properties_copy ).create_infer_request(); m_language.get_tensor("attention_mask").set_shape({1, 0}); + utils::KVCacheState& kv_cache_state = m_inputs_embedder->get_kv_cache_state(); + kv_cache_state.seq_length_axis = kv_pos.seq_len; + // If eos_token_id was not provided, take value if (m_generation_config.eos_token_id == -1) { m_generation_config.set_eos_token_id(m_tokenizer.get_eos_token_id()); @@ -211,7 +238,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ auto& raw_counters = perf_metrics.raw_metrics; if (!m_is_chat_conversation) { - m_language.reset_state(); + reset_language_state(); m_language.get_tensor("attention_mask").set_shape({1, 0}); } @@ -380,7 +407,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ bool use_full_history = processed_chat_data.needs_kv_cache_reset || m_use_full_chat_history; if (use_full_history) { - m_language.reset_state(); + reset_language_state(); m_language.get_tensor("attention_mask").set_shape({1, 0}); m_inputs_embedder->start_chat(""); } @@ -481,7 +508,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ m_image_id = 0; m_video_id = 0; // Resetting state may be slow. - m_language.reset_state(); + reset_language_state(); m_language.get_tensor("attention_mask").set_shape({0, 0}); // clear all chat history m_inputs_embedder->finish_chat(); @@ -518,6 +545,19 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ } private: + void reset_language_state() { + if (m_adapter_controller) { + // Preserve adapter-owned state variables + for (auto& state : m_language.query_state()) { + if (!m_adapter_controller->has_state_name(state.get_name())) { + state.reset(); + } + } + } else { + m_language.reset_state(); + } + } + void setup_generation_config(GenerationConfig& generation_config) { // If stop_token_ids were not provided, take value from default m_generation_config if (generation_config.stop_token_ids.empty()) @@ -586,13 +626,17 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ if (m_is_chat_conversation) { if (m_use_full_chat_history) { kv_cache_state.reset_state(); - m_language.reset_state(); + reset_language_state(); m_language.get_tensor("attention_mask").set_shape({1, 0}); } else { - utils::trim_kv_cache(m_language, kv_cache_state, std::nullopt); + utils::trim_kv_cache(m_language, kv_cache_state, m_adapter_controller); } } + if (m_adapter_controller) { + m_adapter_controller->apply(m_language, generation_config.adapters); + } + std::vector requests; size_t request_id = 0; size_t block_size = 1; // not used diff --git a/src/cpp/src/visual_language/pipeline_base.hpp b/src/cpp/src/visual_language/pipeline_base.hpp index aaeaf0795d..cdd31a7a43 100644 --- a/src/cpp/src/visual_language/pipeline_base.hpp +++ b/src/cpp/src/visual_language/pipeline_base.hpp @@ -129,4 +129,4 @@ class ov::genai::VLMPipeline::VLMPipelineBase { return m_load_time_ms; } }; -} +} // namespace ov::genai diff --git a/tests/python_tests/samples/conftest.py b/tests/python_tests/samples/conftest.py index 61403fe6d2..ae7fc08b71 100644 --- a/tests/python_tests/samples/conftest.py +++ b/tests/python_tests/samples/conftest.py @@ -189,7 +189,8 @@ "cat": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", "3283_1447_000.tar.gz": "https://huggingface.co/datasets/facebook/multilingual_librispeech/resolve/main/data/mls_polish/train/audio/3283_1447_000.tar.gz", "cmu_us_awb_arctic-wav-arctic_a0001.bin": "https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_awb_arctic-wav-arctic_a0001.bin", - "video0.mp4": "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4" + "video0.mp4": "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", + "qwen2b_lora_100_adapter_model.safetensors": "https://huggingface.co/saim1212/qwen2b-lora-100/resolve/main/adapter_model.safetensors", } SAMPLES_PY_DIR = Path( @@ -368,44 +369,56 @@ def download_test_content(request): test_data = request.config.cache.get("TEST_DATA", None) - file_name = request.param - file_url = TEST_FILES[file_name] - file_path = os.path.join(test_data, file_name) - - if not os.path.exists(file_path): - logger.info(f"Downloading test content from {file_url} to {file_path}...") - os.makedirs(os.path.dirname(file_path), exist_ok=True) - response = requests.get(file_url, stream=True) - response.raise_for_status() - with open(file_path, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - logger.info(f"Downloaded test content to {file_path}") - else: - logger.info(f"Test content already exists at {file_path}") - - # If the file is a tarball, extract it - extracted_dir = None - if file_name.endswith(".tar.gz"): - extracted_dir = os.path.join(test_data, os.path.splitext(file_name)[0]) - if not os.path.exists(extracted_dir): - os.makedirs(extracted_dir, exist_ok=True) - shutil.unpack_archive(file_path, extracted_dir) - logger.info(f"Extracted tarball to {extracted_dir}") + def download_one(file_name: str): + file_url = TEST_FILES[file_name] + file_path = os.path.join(test_data, file_name) + + if not os.path.exists(file_path): + logger.info(f"Downloading test content from {file_url} to {file_path}...") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + response = requests.get(file_url, stream=True) + response.raise_for_status() + with open(file_path, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logger.info(f"Downloaded test content to {file_path}") else: - logger.info(f"Extracted folder already exists at {extracted_dir}") - yield extracted_dir - else: - yield file_path + logger.info(f"Test content already exists at {file_path}") + + extracted_dir = None + if file_name.endswith(".tar.gz"): + extracted_dir = os.path.join(test_data, os.path.splitext(file_name)[0]) + if not os.path.exists(extracted_dir): + os.makedirs(extracted_dir, exist_ok=True) + shutil.unpack_archive(file_path, extracted_dir) + logger.info(f"Extracted tarball to {extracted_dir}") + else: + logger.info(f"Extracted folder already exists at {extracted_dir}") + return extracted_dir, file_path, extracted_dir + + return file_path, file_path, extracted_dir + + request_param = request.param + is_multi = isinstance(request_param, (list, tuple)) + file_names = request_param if is_multi else [request_param] + + downloaded_paths = [] + cleanup_items = [] + for file_name in file_names: + downloaded_path, file_path, extracted_dir = download_one(file_name) + downloaded_paths.append(downloaded_path) + cleanup_items.append((file_path, extracted_dir)) + + yield downloaded_paths if is_multi else downloaded_paths[0] - # Cleanup the test content after tests if os.environ.get("CLEANUP_CACHE", "false").lower() == "true": - if extracted_dir and os.path.exists(extracted_dir): - logger.info(f"Removing extracted folder: {extracted_dir}") - shutil.rmtree(extracted_dir) - if os.path.exists(file_path): - logger.info(f"Removing test content: {file_path}") - os.remove(file_path) + for file_path, extracted_dir in cleanup_items: + if extracted_dir and os.path.exists(extracted_dir): + logger.info(f"Removing extracted folder: {extracted_dir}") + shutil.rmtree(extracted_dir) + if os.path.exists(file_path): + logger.info(f"Removing test content: {file_path}") + os.remove(file_path) @pytest.fixture(scope="session") diff --git a/tests/python_tests/samples/test_lora.py b/tests/python_tests/samples/test_lora.py index 7a63a13327..3b7c19985f 100644 --- a/tests/python_tests/samples/test_lora.py +++ b/tests/python_tests/samples/test_lora.py @@ -5,7 +5,7 @@ import pytest import sys -from conftest import SAMPLES_PY_DIR +from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR from test_utils import run_sample class TestLora: @@ -18,3 +18,126 @@ def test_python_sample_lora(self, convert_model, download_test_content, sample_a py_script = SAMPLES_PY_DIR / "text_generation/lora_greedy_causal_lm.py" py_command = [sys.executable, py_script, convert_model, download_test_content, sample_args] run_sample(py_command) + + @pytest.mark.vlm + @pytest.mark.samples + @pytest.mark.parametrize( + "convert_model, download_test_content, prompt, alpha", + [ + pytest.param( + "Qwen2-VL-2B-Instruct", + ("qwen2b_lora_100_adapter_model.safetensors", "monalisa.jpg"), + "Who drew this painting?", + "2.0", + ), + ], + indirect=["convert_model", "download_test_content"], + ) + def test_sample_visual_language_lora(self, convert_model, download_test_content, prompt, alpha): + adapter_path, image_path = download_test_content + assert os.path.exists(image_path), f"Missing test image: {image_path}" + + # Test CPP sample + cpp_sample = SAMPLES_CPP_DIR / "visual_language_lora" + cpp_command = [cpp_sample, convert_model, image_path, prompt, adapter_path, alpha] + cpp_result = run_sample(cpp_command) + + # Test Python sample + py_script = SAMPLES_PY_DIR / "visual_language_chat/visual_language_lora.py" + py_command = [sys.executable, py_script, convert_model, image_path, prompt, adapter_path, alpha] + py_result = run_sample(py_command) + + # Compare results + assert py_result.stdout == cpp_result.stdout, f"Results should match" + + @pytest.mark.vlm + @pytest.mark.samples + @pytest.mark.parametrize( + "convert_model, download_test_content, prompt, alpha", + [ + pytest.param( + "Qwen2-VL-2B-Instruct", + ("qwen2b_lora_100_adapter_model.safetensors", "monalisa.jpg"), + "Who drew this painting?", + "2.0", + ), + ], + indirect=["convert_model", "download_test_content"], + ) + def test_sample_visual_language_lora_multi_alpha_zero_matches_single( + self, convert_model, download_test_content, prompt, alpha + ): + adapter_path, image_path = download_test_content + assert os.path.exists(image_path), f"Missing test image: {image_path}" + + # Baseline: single adapter + cpp_sample = SAMPLES_CPP_DIR / "visual_language_lora" + cpp_single_command = [cpp_sample, convert_model, image_path, prompt, adapter_path, alpha] + cpp_single_result = run_sample(cpp_single_command) + + py_script = SAMPLES_PY_DIR / "visual_language_chat/visual_language_lora.py" + py_single_command = [sys.executable, py_script, convert_model, image_path, prompt, adapter_path, alpha] + py_single_result = run_sample(py_single_command) + assert py_single_result.stdout == cpp_single_result.stdout, "Single-LoRA C++/Python results should match" + + # Multi-LoRA: add the same adapter with alpha=0.0) + cpp_multi_command = [cpp_sample, convert_model, image_path, prompt, adapter_path, alpha, adapter_path, "0.0"] + cpp_multi_result = run_sample(cpp_multi_command) + + py_multi_command = [ + sys.executable, + py_script, + convert_model, + image_path, + prompt, + adapter_path, + alpha, + adapter_path, + "0.0", + ] + py_multi_result = run_sample(py_multi_command) + + assert py_multi_result.stdout == cpp_multi_result.stdout, "Multi-LoRA C++/Python results should match" + assert cpp_multi_result.stdout == cpp_single_result.stdout, ( + "Multi-LoRA (with alpha=0) should match single-LoRA output" + ) + assert py_multi_result.stdout == py_single_result.stdout, ( + "Multi-LoRA (with alpha=0) should match single-LoRA output" + ) + + @pytest.mark.vlm + @pytest.mark.samples + @pytest.mark.parametrize( + "convert_model, download_test_content, prompt", + [ + pytest.param( + "Qwen2-VL-2B-Instruct", + ("qwen2b_lora_100_adapter_model.safetensors", "monalisa.jpg"), + "Who drew this painting?", + ), + ], + indirect=["convert_model", "download_test_content"], + ) + def test_sample_visual_language_lora_multi_both_nonzero(self, convert_model, download_test_content, prompt): + adapter_path, image_path = download_test_content + assert os.path.exists(image_path), f"Missing test image: {image_path}" + + cpp_sample = SAMPLES_CPP_DIR / "visual_language_lora" + cpp_command = [cpp_sample, convert_model, image_path, prompt, adapter_path, "1.0", adapter_path, "1.0"] + cpp_result = run_sample(cpp_command) + + py_script = SAMPLES_PY_DIR / "visual_language_chat/visual_language_lora.py" + py_command = [ + sys.executable, + py_script, + convert_model, + image_path, + prompt, + adapter_path, + "1.0", + adapter_path, + "1.0", + ] + py_result = run_sample(py_command) + + assert py_result.stdout == cpp_result.stdout, "Multi-LoRA C++/Python results should match" diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt index d9cd2a27c2..b7c9a15fcc 100644 --- a/tools/who_what_benchmark/requirements.txt +++ b/tools/who_what_benchmark/requirements.txt @@ -25,3 +25,5 @@ vocos==0.1.0 vector-quantize-pytorch==1.27.20 torchaudio>=2.1.0,<=2.10.0 torchvision>=0.16,<=0.25.0 +# For LoRA adapters +peft==0.18.1 diff --git a/tools/who_what_benchmark/tests/conftest.py b/tools/who_what_benchmark/tests/conftest.py index 9f20784cf2..0b04f7550d 100644 --- a/tools/who_what_benchmark/tests/conftest.py +++ b/tools/who_what_benchmark/tests/conftest.py @@ -50,6 +50,10 @@ "name": "optimum-intel-internal-testing/tiny-random-llava", "convert_args": ["--trust-remote-code", "--task", "image-text-to-text"], }, + "Qwen2-VL-2B-Instruct": { + "name": "Qwen/Qwen2-VL-2B-Instruct", + "convert_args": ["--trust-remote-code", "--task", "image-text-to-text"], + }, "tiny-random-stable-diffusion-xl": {"name": "echarlaix/tiny-random-stable-diffusion-xl", "convert_args": []}, "stable-diffusion-3-tiny-random": {"name": "yujiepan/stable-diffusion-3-tiny-random", "convert_args": []}, "tiny-random-flux": {"name": "optimum-intel-internal-testing/tiny-random-flux", "convert_args": []}, diff --git a/tools/who_what_benchmark/tests/ov_utils.py b/tools/who_what_benchmark/tests/ov_utils.py index 49e6c2f390..8040152ca0 100644 --- a/tools/who_what_benchmark/tests/ov_utils.py +++ b/tools/who_what_benchmark/tests/ov_utils.py @@ -6,6 +6,7 @@ import uuid import shutil import logging +import subprocess # nosec B404 from pathlib import Path from typing import Callable @@ -133,3 +134,67 @@ def _cleanup_temp(self) -> None: shutil.rmtree(self.temp_path) except Exception: logger.exception("Could not clean up temp directory") + + +def download_hf_files_to_cache(repo_id: str, cache_dir: Path, filenames: list[str]) -> Path: + """Download a set of files from a Hugging Face repo into a local cache directory. + + This helper is designed for tests that share a cache across CI jobs. If the + destination directory already exists, it verifies that all required files are + present and only downloads missing ones. + + Args: + repo_id: Hugging Face repo id (e.g. "org/model"). + cache_dir: Destination directory. + filenames: List of repo file paths to download. + + Returns: + Path to the destination directory containing the downloaded files. + """ + + dest_dir = Path(cache_dir) + + def download_to_local_dir(local_dir: Path) -> None: + for filename in filenames: + command = [ + "huggingface-cli", + "download", + repo_id, + filename, + "--local-dir", + str(local_dir), + ] + + def _run_download() -> None: + subprocess.run(command, check=True, text=True, capture_output=True) + + retry_request(_run_download) + + # If destination exists (e.g. shared CI cache), make sure all required files are present. + if dest_dir.exists(): + dest_dir.mkdir(parents=True, exist_ok=True) + missing = [name for name in filenames if not (dest_dir / name).exists()] + if missing: + temp_dir = dest_dir.parent / f".tmp_{dest_dir.name}_{uuid.uuid4().hex[:8]}" + temp_dir.mkdir(parents=True, exist_ok=True) + try: + download_to_local_dir(temp_dir) + for filename in missing: + src = temp_dir / filename + if not src.exists(): + raise AssertionError(f"Download failed: {src}") + dst = dest_dir / filename + dst.parent.mkdir(parents=True, exist_ok=True) + src.replace(dst) + finally: + shutil.rmtree(temp_dir, ignore_errors=True) + else: + manager = AtomicDownloadManager(dest_dir) + manager.execute(download_to_local_dir) + + for filename in filenames: + downloaded = dest_dir / filename + if not downloaded.exists(): + raise AssertionError(f"Download failed: {downloaded}") + + return dest_dir diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index 4efa9b5d30..9801e991ee 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -87,6 +87,93 @@ def run_test(model_id, model_type, optimum_threshold, genai_threshold, tmp_path) ]) +def run_test_with_lora( + model_id: str, + model_type: str, + lora_repo_id: str, + lora_cache_subdir: str, + hf_alpha: float, + genai_alpha: float, + tmp_path, + *, + genai_threshold: float, +): + if sys.platform == "darwin": + pytest.xfail("Ticket 173169") + if sys.platform == "win32": + pytest.xfail("Ticket 178790") + + gt_file = tmp_path / "gt.csv" + model_path = convert_model(model_id) + + from ov_utils import get_ov_cache_dir + from ov_utils import download_hf_files_to_cache + + lora_filenames = ["adapter_model.safetensors", "adapter_config.json"] + lora_cache_dir = get_ov_cache_dir() / "test_data" / lora_cache_subdir + lora_adapter_dir = download_hf_files_to_cache(lora_repo_id, lora_cache_dir, lora_filenames) + lora_adapter_file = lora_adapter_dir / "adapter_model.safetensors" + assert lora_adapter_file.exists(), f"LoRA adapter wasn't downloaded: {lora_adapter_file}" + + # 1) Generate GT using HF + LoRA. + run_wwb( + [ + "--base-model", + model_id, + "--num-samples", + "1", + "--gt-data", + gt_file, + "--device", + "CPU", + "--model-type", + model_type, + "--hf", + "--adapters", + str(lora_adapter_dir), + "--alphas", + str(hf_alpha), + "--max_new_tokens", + "32", + ] + ) + assert gt_file.exists(), f"GT wasn't generated: {gt_file}" + + # 2) Target: GenAI + LoRA + outputs_genai = tmp_path / "genai_lora" + out_genai = run_wwb( + [ + "--target-model", + model_path, + "--num-samples", + "1", + "--gt-data", + gt_file, + "--device", + "CPU", + "--model-type", + model_type, + "--genai", + "--max_new_tokens", + "32", + "--output", + outputs_genai, + "--adapters", + str(lora_adapter_file), + "--alphas", + str(genai_alpha), + ] + ) + + assert (outputs_genai / "target.csv").exists() + assert (outputs_genai / "metrics_per_question.csv").exists() + assert (outputs_genai / "metrics.csv").exists() + assert "Metrics for model" in out_genai + similarity_genai = get_similarity(out_genai) + + assert similarity_genai >= genai_threshold + + @pytest.mark.parametrize( ("model_id", "model_type"), [ @@ -117,3 +204,44 @@ def test_vlm_nanollava(model_id, model_type, optimum_threshold, genai_threshold, ) def test_vlm_video(model_id, model_type, tmp_path): run_test(model_id, model_type, 0.8, 0.8, tmp_path) + + +@pytest.mark.parametrize( + ( + "model_id", + "model_type", + "lora_repo_id", + "hf_alpha", + "genai_alpha", + "genai_threshold", + ), + [ + ( + "optimum-intel-internal-testing/tiny-random-qwen2vl", + "visual-text", + "likholat/tiny-random-qwen2vl-lora", + 1.0, + 2.0, + 0.99, + ), + ], +) +def test_vlm_lora( + model_id, + model_type, + lora_repo_id, + hf_alpha, + genai_alpha, + genai_threshold, + tmp_path, +): + run_test_with_lora( + model_id=model_id, + model_type=model_type, + lora_repo_id=lora_repo_id, + lora_cache_subdir="wwb_tiny_random_qwen2vl_lora", + hf_alpha=hf_alpha, + genai_alpha=genai_alpha, + tmp_path=tmp_path, + genai_threshold=genai_threshold, + ) diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py index de5014e2fb..da2bbace74 100644 --- a/tools/who_what_benchmark/whowhatbench/model_loaders.py +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -19,18 +19,34 @@ is_qwen3, ) from .utils import ( + apply_peft_adapters, mock_torch_cuda_is_available, mock_AwqQuantizer_validate_environment, disable_diffusers_model_progress_bar, + get_json_config, + normalize_lora_adapters_and_alphas, ) import os -from whowhatbench.utils import get_json_config - logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +def _create_genai_adapter_config(adapters=None, alphas=None, *, none_if_empty=False): + import openvino_genai + + adapter_config = openvino_genai.AdapterConfig() + if adapters is None: + return None if none_if_empty else adapter_config + + adapters, alphas = normalize_lora_adapters_and_alphas(adapters, alphas) + for adapter, alpha in zip(adapters, alphas): + ov_adapter = openvino_genai.Adapter(adapter) + adapter_config.add(ov_adapter, alpha) + + return adapter_config + + class GenAIModelWrapper: """ A helper class to store additional attributes for GenAI models @@ -116,11 +132,10 @@ def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None, **kwargs): if kwargs.get('gguf_file'): pipeline_path = os.path.join(model_dir, kwargs['gguf_file']) - adapter_config = openvino_genai.AdapterConfig() - if kwargs.get("adapters") is not None: - for adapter, alpha in zip(kwargs['adapters'], kwargs['alphas']): - ov_adapter = openvino_genai.Adapter(adapter) - adapter_config.add(ov_adapter, alpha) + adapter_config = _create_genai_adapter_config( + adapters=kwargs.get("adapters"), + alphas=kwargs.get("alphas", None), + ) draft_model_path = kwargs.get("draft_model", '') if draft_model_path: @@ -193,23 +208,7 @@ def load_text_hf_pipeline(model_id, device, **kwargs): ) if kwargs.get("adapters") is not None: - adapters = kwargs["adapters"] - alphas = kwargs.get("alphas", None) - - from peft import PeftModel - adapter_names = ["adapter_0"] - model = PeftModel.from_pretrained(model, adapters[0], adapter_name=adapter_names[0]) - - for idx, adapter in enumerate(adapters[1:], start=1): - model.load_adapter(adapter, adapter_name=f"adapter_{idx}") - adapter_names.append(f"adapter_{idx}") - - print('alphas', alphas) - - assert len(alphas) == len(adapter_names), "`alphas` must be the same length as `adapters`" - model.add_weighted_adapter(adapter_names, alphas, "merged_lora") - - model.set_adapter("merged_lora") + model = apply_peft_adapters(model, kwargs["adapters"], kwargs.get("alphas", None)) model.eval() return model @@ -268,11 +267,10 @@ def load_text2image_genai_pipeline(model_dir, device="CPU", ov_config=None, **kw "Failed to import openvino_genai package. Please install it.") exit(-1) - adapter_config = openvino_genai.AdapterConfig() - if "adapters" in kwargs and kwargs["adapters"] is not None: - for adapter, alpha in zip(kwargs['adapters'], kwargs['alphas']): - ov_adapter = openvino_genai.Adapter(adapter) - adapter_config.add(ov_adapter, alpha) + adapter_config = _create_genai_adapter_config( + adapters=kwargs.get("adapters"), + alphas=kwargs.get("alphas", None), + ) return GenAIModelWrapper( openvino_genai.Text2ImagePipeline(model_dir, device=device, adapters=adapter_config, **ov_config), @@ -294,16 +292,20 @@ def load_text2image_model( model = DiffusionPipeline.from_pretrained(model_id) except Exception: model = DiffusionPipeline.from_pretrained(model_id, trust_remote_code=True) - if 'adapters' in kwargs and kwargs['adapters'] is not None: - for idx, adapter in enumerate(kwargs['adapters']): + if kwargs.get("adapters") is not None: + adapters = kwargs["adapters"] + alphas = kwargs.get("alphas", None) + adapters, alphas = normalize_lora_adapters_and_alphas(adapters, alphas) + + for idx, adapter in enumerate(adapters): model.load_lora_weights(adapter, adapter_name=f"adapter_{idx}") - model.set_adapters([f"adapter_{idx}" for idx in range(len(kwargs['adapters']))], adapter_weights=kwargs['alphas']) + model.set_adapters([f"adapter_{idx}" for idx in range(len(adapters))], adapter_weights=alphas) else: logger.info("Using Optimum API") from optimum.intel import OVPipelineForText2Image TEXT2IMAGEPipeline = OVPipelineForText2Image - if 'adapters' in kwargs and kwargs['adapters'] is not None: + if "adapters" in kwargs and kwargs["adapters"] is not None: raise ValueError("Adapters are not supported for OVPipelineForText2Image.") model_kwargs = {"ov_config": ov_config, "safety_checker": None} @@ -333,13 +335,29 @@ def load_visual_text_genai_pipeline(model_dir, device="CPU", ov_config=None, **k is_continuous_batching = kwargs.get("cb_config", None) is not None + adapter_config = _create_genai_adapter_config( + adapters=kwargs.get("adapters"), + alphas=kwargs.get("alphas", None), + none_if_empty=True, + ) + + pipeline_kwargs = { + "device": device, + **ov_config, + } + + if adapter_config is not None: + pipeline_kwargs["adapters"] = adapter_config + if is_continuous_batching: logger.info("Using OpenVINO GenAI Continuous Batching API") scheduler_config = get_scheduler_config_genai(kwargs["cb_config"]) - pipeline = openvino_genai.VLMPipeline(model_dir, device=device, scheduler_config=scheduler_config, ATTENTION_BACKEND="PA", **ov_config) + pipeline_kwargs["scheduler_config"] = scheduler_config + pipeline_kwargs["ATTENTION_BACKEND"] = "PA" + pipeline = openvino_genai.VLMPipeline(model_dir, **pipeline_kwargs) else: logger.info("Using OpenVINO GenAI VLMPipeline API") - pipeline = openvino_genai.VLMPipeline(model_dir, device=device, **ov_config) + pipeline = openvino_genai.VLMPipeline(model_dir, **pipeline_kwargs) return GenAIModelWrapper( pipeline, @@ -399,6 +417,7 @@ def load_visual_text_model( **from_pretrained_kwargs, ) + # phi4mm modality-specific LoRA adapters (handled internally by the pipeline/model) if config.model_type == "phi4mm": use_lora = False if hasattr(config, "vision_lora") and config.vision_lora is not None: @@ -413,6 +432,10 @@ def load_visual_text_model( if hasattr(model.model, "_require_grads_hook"): model.model.disable_input_require_grads() + # Common LoRA support via PEFT + if kwargs.get("adapters") is not None: + model = apply_peft_adapters(model, kwargs["adapters"], kwargs.get("alphas", None)) + model.eval() try: model.get_vision_tower().load_model() @@ -429,6 +452,9 @@ def load_visual_text_model( else: logger.info("Using Optimum API") from optimum.intel.openvino import OVModelForVisualCausalLM + + if "adapters" in kwargs and kwargs["adapters"] is not None: + raise ValueError("Adapters are not supported for OVModelForVisualCausalLM.") try: model = OVModelForVisualCausalLM.from_pretrained( model_id, device=device, ov_config=ov_config diff --git a/tools/who_what_benchmark/whowhatbench/utils.py b/tools/who_what_benchmark/whowhatbench/utils.py index c8f2500f6b..ea52c76964 100644 --- a/tools/who_what_benchmark/whowhatbench/utils.py +++ b/tools/who_what_benchmark/whowhatbench/utils.py @@ -146,6 +146,51 @@ def get_json_config(config): return json_config +def normalize_lora_adapters_and_alphas(adapters, alphas): + if adapters is None: + return None, None + + if isinstance(adapters, (str, Path, os.PathLike)): + adapters = [adapters] + elif not isinstance(adapters, (list, tuple)): + raise ValueError("`adapters` must be a non-empty list/tuple, or a single adapter path") + + if len(adapters) == 0: + raise ValueError("`adapters` must be a non-empty list/tuple") + + if alphas is None: + raise ValueError("`alphas` must be provided and match the number of adapters") + + if isinstance(alphas, (int, float)): + alphas = [alphas] + elif not isinstance(alphas, (list, tuple)): + raise ValueError("`alphas` must be a list/tuple with one value per adapter, or a single float") + + if len(alphas) != len(adapters): + raise ValueError("`alphas` must be the same length as `adapters`") + + return list(adapters), list(alphas) + + +def apply_peft_adapters(model, adapters, alphas, merged_adapter_name="merged_lora"): + adapters, alphas = normalize_lora_adapters_and_alphas(adapters, alphas) + + from peft import PeftModel + + adapter_names = ["adapter_0"] + model = PeftModel.from_pretrained(model, adapters[0], adapter_name=adapter_names[0]) + + for idx, adapter in enumerate(adapters[1:], start=1): + adapter_name = f"adapter_{idx}" + model.load_adapter(adapter, adapter_name=adapter_name) + adapter_names.append(adapter_name) + + model.add_weighted_adapter(adapter_names, alphas, merged_adapter_name) + model.set_adapter(merged_adapter_name) + + return model + + # preapre default dataset for visualtext(VLM) evalutor def preprocess_fn(example): return {