xipingyan
diff --git a/‎.github/workflows/linux.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/linux.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.github/workflows/mac.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/mac.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/manylinux_2_28.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/manylinux_2_28.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.github/workflows/windows.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/windows.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cmake/features.cmake‎
Lines changed: 1 addition & 0 deletions b/‎cmake/features.cmake‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎samples/cpp/image_generation/README.md‎
Lines changed: 1 addition & 1 deletion b/‎samples/cpp/image_generation/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎samples/cpp/rag/README.md‎
Lines changed: 1 addition & 1 deletion b/‎samples/cpp/rag/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎samples/cpp/speech_generation/README.md‎
Lines changed: 1 addition & 1 deletion b/‎samples/cpp/speech_generation/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎samples/cpp/text_generation/README.md‎
Lines changed: 2 additions & 2 deletions b/‎samples/cpp/text_generation/README.md‎
Lines changed: 2 additions & 2 deletions
@@ -620,7 +620,7 @@ jobs:
             timeout: 90
           - name: 'WWB tests (nanollava)'
             cmd: |
-              python -m pip install transformers==4.48.0
+              python -m pip install transformers==4.48.0 diffusers==0.35.2
               python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
             timeout: 90
@@ -630,6 +630,12 @@ jobs:
               python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
             timeout: 60
+          - name: 'VLM (qwen3-vl)'
+            cmd: |
+              python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 60
     defaults:
       run:
         shell: bash
 
@@ -473,7 +473,7 @@ jobs:
             timeout: 120
           - name: 'WWB tests (nanollava)'
             cmd: |
-              python -m pip install transformers==4.48.0
+              python -m pip install transformers==4.48.0 diffusers==0.35.2
               python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
             timeout: 90
 
@@ -543,7 +543,7 @@ jobs:
             timeout: 90
           - name: 'WWB tests (nanollava)'
             cmd: |
-              python -m pip install transformers==4.48.0
+              python -m pip install transformers==4.48.0 diffusers==0.35.2
               python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
             timeout: 90
@@ -553,6 +553,12 @@ jobs:
               python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
             timeout: 60
+          - name: 'VLM (qwen3-vl)'
+            cmd: |
+              python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 60
     defaults:
       run:
         shell: bash
 
@@ -708,7 +708,7 @@ jobs:
             timeout: 90
           - name: 'WWB tests (nanollava)'
             cmd: |
-              python -m pip install transformers==4.48.0
+              python -m pip install transformers==4.48.0 diffusers==0.35.2
               python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
             timeout: 90
@@ -718,6 +718,12 @@ jobs:
               python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
             timeout: 60
+          - name: 'VLM (qwen3-vl)'
+            cmd: |
+              python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 60
     defaults:
       run:
         shell: pwsh
 
@@ -73,7 +73,7 @@ Library efficiently supports LoRA adapters for Text and Image generation scenari
 - Select active adapters for every generation
 - Mix multiple adapters with coefficients via alpha blending
 
-All scenarios are run on top of OpenVINO Runtime that supports inference on CPU, GPU and NPU. See [here](https://docs.openvino.ai/2025/about-openvino/release-notes-openvino/system-requirements.html) for platform support matrix.
+All scenarios are run on top of OpenVINO Runtime that supports inference on CPU, GPU and NPU. See [here](https://docs.openvino.ai/2026/about-openvino/release-notes-openvino/system-requirements.html) for platform support matrix.
 
 <a id="optimization-methods"></a>
 
@@ -87,12 +87,12 @@ OpenVINO™ GenAI library provides a transparent way to use state-of-the-art gen
 Additionally, OpenVINO™ GenAI library implements a continuous batching approach to use OpenVINO within LLM serving. The continuous batching library could be used in LLM serving frameworks and supports the following features:
 - Prefix caching that caches fragments of previous generation requests and corresponding KVCache entries internally and uses them in case of repeated query.
 
-Continuous batching functionality is used within OpenVINO Model Server (OVMS) to serve LLMs, see [here](https://docs.openvino.ai/2025/openvino-workflow/model-server/ovms_what_is_openvino_model_server.html) for more details.
+Continuous batching functionality is used within OpenVINO Model Server (OVMS) to serve LLMs, see [here](https://docs.openvino.ai/2026/model-server/ovms_what_is_openvino_model_server.html) for more details.
 
 
 ## Additional Resources
 
-- [OpenVINO Generative AI workflow](https://docs.openvino.ai/2025/openvino-workflow-generative.html)
+- [OpenVINO Generative AI workflow](https://docs.openvino.ai/2026/openvino-workflow-generative.html)
 - [Optimum Intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export)
 - [OpenVINO Notebooks with GenAI](https://openvinotoolkit.github.io/openvino_notebooks/?libraries=OpenVINO+GenAI)
 
 
@@ -9,6 +9,7 @@ option(ENABLE_SAMPLES "Enable samples build" ON)
 option(ENABLE_TESTS "Enable tests build" ON)
 option(ENABLE_TOOLS "Enable tools build" ON)
 option(ENABLE_GGUF "Enable support for GGUF format" ON)
+option(ENABLE_SAFETENSORS "Enable support for Safetensors format" ON)
 option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON)
 option(ENABLE_DYNAMIC_WEIGHT_MANAGEMENT "Enable offloading model weights (load/release)" OFF)
 option(ENABLE_OPENVINO_NEW_ARCH "Enable OpenVINO new architecture for QWen3.5 etc models support" OFF)
 
@@ -42,7 +42,7 @@ optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task sta
 
 ## Run text to image
 
-Follow [Get Started with Samples](https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
 
 `stable_diffusion ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting'`
 
 
@@ -27,7 +27,7 @@ optimum-cli export openvino --task text-classification --model cross-encoder/ms-
 
 ## Run
 
-Follow [Get Started with Samples](https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
 
 ### 1. Text Embedding Sample (`text_embeddings.cpp`)
 - **Description:**
 
@@ -38,7 +38,7 @@ python create_speaker_embedding.py
 
 ## Run Text-to-speech sample
 
-Follow [Get Started with Samples](https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/get-started-demos.html)
+Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html)
 to run the sample.
 
 `text-to-speech speecht5_tts "Hello OpenVINO GenAI" speaker_embedding.bin`
 
@@ -32,7 +32,7 @@ and architectures, we still recommend converting the model to the IR format usin
 
 ## Sample Descriptions
 ### Common information
-Follow [Get Started with Samples](https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples.
+Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples.
 Follow [build instruction](../../../src/docs/BUILD.md) to build GenAI samples
 
 GPUs usually provide better performance compared to CPUs. Modify the source code to change the device for inference to the GPU.
@@ -64,7 +64,7 @@ The following template can be used as a default, but it may not work properly wi
 #### NPU support
 
 NPU device is supported with some limitations. See [NPU inference of
-LLMs](https://docs.openvino.ai/2025/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.html) documentation. In particular:
+LLMs](https://docs.openvino.ai/2026/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.html) documentation. In particular:
 
 - Models must be exported with symmetric INT4 quantization (`optimum-cli export openvino --weight-format int4 --sym --model <model> <output_folder>`).
   For models with more than 4B parameters, channel wise quantization should be used (`--group-size -1`).