openvinotoolkit · andrei-kochin · May 21, 2025 · May 8, 2025 · May 14, 2025 · May 14, 2025
@@ -9,8 +9,8 @@ on:
 permissions: read-all
 
 jobs:
-  test_ubuntu20:
-    runs-on: ubuntu-20.04
+  test_ubuntu22:
+    runs-on: ubuntu-22.04
     steps:
       - name: Download repo
         uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6

@@ -22,9 +22,9 @@ ARG GOVERSION=1.24.1
 RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 
-RUN wget https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_ubuntu20_2025.2.0.0.dev20250320_x86_64.tar.gz
-RUN tar -xzf openvino_genai_ubuntu20_2025.2.0.0.dev20250320_x86_64.tar.gz
-ENV GENAI_DIR=/home/ollama_ov_server/openvino_genai_ubuntu20_2025.2.0.0.dev20250320_x86_64
+RUN wget https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64.tar.gz
+RUN tar -xzf openvino_genai_ubuntu20_2025.2.0.0.dev20250513_x86_64.tar.gz
+ENV GENAI_DIR=/home/ollama_ov_server/openvino_genai_ubuntu20_2025.2.0.0.dev20250513_x86_64
 
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GENAI_DIR/runtime/3rdparty/tbb/lib:$GENAI_DIR/runtime/lib/intel64
 
@@ -35,8 +35,8 @@ ENV CGO_LDFLAGS=-L$GENAI_DIR/runtime/lib/intel64
 ENV CGO_CFLAGS=-I$GENAI_DIR/runtime/include
 
 WORKDIR /home/ollama_ov_server
-RUN git clone https://github.com/zhaohb/ollama_ov.git
-WORKDIR /home/ollama_ov_server/ollama_ov
+RUN git clone https://github.com/openvinotoolkit/openvino_contrib.git
+WORKDIR /home/ollama_ov_server/openvino_contrib/modules/ollama_openvino
 
 RUN go build -o /bin/ollama .
 

@@ -584,17 +584,17 @@ Getting started with large language models and using the [GenAI](https://github.
 We provide two ways to download the executable file of Ollama, one is to download it from Google Drive, and the other is to download it from Baidu Drive:
 ## Google Driver
 ### Windows
-[Download exe](https://drive.google.com/file/d/1Sep1IdGn7mJaE8PCXKYxp_aj1ljiPvpN/view?usp=sharing) + [Download OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_windows_2025.2.0.0.dev20250320_x86_64.zip)
+[Download exe](https://drive.google.com/file/d/1Xo3ohbfC852KtJy_4xtn_YrYaH4Y_507/view?usp=sharing) + [Download OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_windows_2025.2.0.0.dev20250513_x86_64.zip)
 
 ### Linux(Ubuntu 22.04)
-[Download](https://drive.google.com/file/d/1DdBoEGp_eoyJPbpMGVbEivihYSKrCMGt/view?usp=sharing) + [Donwload OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_ubuntu22_2025.2.0.0.dev20250320_x86_64.tar.gz)
+[Download](https://drive.google.com/file/d/1_P7CQqFUqeyx4q5y5bQ-xQsb10T9gzJD/view?usp=sharing) + [Donwload OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64.tar.gz)
 
 ## 百度云盘
 ### Windows
-[Download exe](https://pan.baidu.com/s/1TCH7rYSPr8jQDHLvCeXdLw?pwd=6bk9) + [Download OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_windows_2025.2.0.0.dev20250320_x86_64.zip)
+[Download exe](https://pan.baidu.com/s/1uIUjji7Mxf594CJy1vbrVw?pwd=36mq) + [Download OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_windows_2025.2.0.0.dev20250513_x86_64.zip)
 
 ### Linux(Ubuntu 22.04)
-[Download](https://pan.baidu.com/s/1UVO0ZK4DFTjTwfarQ8LUIw?pwd=pxkd) + [Donwload OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_ubuntu22_2025.2.0.0.dev20250320_x86_64.tar.gz)
+[Download](https://pan.baidu.com/s/1OCq3aKJBiCrtjLKa7kXbMw?pwd=exhz) + [Donwload OpenVINO GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64.tar.gz)
 
 ## Docker
 ### Linux
@@ -715,7 +715,7 @@ Let's take [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://hf-mirror.com/deeps
     ```
 
 3. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
-   For convenience, we have put the model file of DeepSeek-R1-Distill-Qwen-7B-int4-ov model under example dir: [Modelfile for DeepSeek](https://github.com/zhaohb/ollama_ov/blob/main/examples/modelfile/deepseek_r1_distill_qwen/Modelfile), we can use it directly.
+   For convenience, we have put the model file of DeepSeek-R1-Distill-Qwen-7B-int4-ov model under example dir: [Modelfile for DeepSeek](https://github.com/openvinotoolkit/openvino_contrib/blob/master/modules/ollama_openvino/examples/modelfile/deepseek_r1_distill_qwen/Modelfile), we can use it directly.
 
    Note:
 
@@ -725,7 +725,7 @@ Let's take [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://hf-mirror.com/deeps
 
 4. Unzip OpenVINO GenAI package and set environment
    ```shell
-   cd openvino_genai_windows_2025.2.0.0.dev20250320_x86_64
+   cd openvino_genai_windows_2025.2.0.0.dev20250513_x86_64
    setupvars.bat
    ```
 
@@ -792,7 +792,8 @@ Then build and run Ollama from the root directory of the repository:
 1. clone repo
    ```shell
    git lfs install
-   git lfs clone https://github.com/zhaohb/ollama_ov.git
+   git clone https://github.com/openvinotoolkit/openvino_contrib.git
+   cd openvino_contrib/modules/ollama_openvino
    ```
 
 2. Enable CGO:
@@ -802,9 +803,9 @@ Then build and run Ollama from the root directory of the repository:
 
 3. Initialize the GenAI environment
 
-   Download GenAI runtime from [GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_windows_2025.2.0.0.dev20250320_x86_64.zip), then extract it to a directory openvino_genai_windows_2025.2.0.0.dev20250320_x86_64 .
+   Download GenAI runtime from [GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_windows_2025.2.0.0.dev20250513_x86_64.zip), then extract it to a directory openvino_genai_windows_2025.2.0.0.dev20250513_x86_64.
    ```shell
-   cd openvino_genai_windows_2025.2.0.0.dev20250320_x86_64
+   cd openvino_genai_windows_2025.2.0.0.dev20250513_x86_64
    setupvars.bat
    ```
 
@@ -819,7 +820,7 @@ Then build and run Ollama from the root directory of the repository:
    go build -o ollama.exe
    ```
 
-6. If you don't want to recompile ollama, you can choose to directly use the compiled executable file, and then initialize the genai environment in `step 3` to run ollama directly [ollama](https://drive.google.com/file/d/1iizO9iLhSJGFUu6BgY3EwOchrCyzImUN/view?usp=drive_link).
+6. If you don't want to recompile ollama, you can choose to directly use the compiled executable file, and then initialize the genai environment in `step 3` to run ollama directly.
 
    But if you encounter the error when executing ollama.exe, it is recommended that you recompile from source code.
    ```shell
@@ -830,7 +831,8 @@ Then build and run Ollama from the root directory of the repository:
 1. clone repo
    ```shell
    git lfs install
-   git lfs clone https://github.com/zhaohb/ollama_ov.git
+   git clone https://github.com/openvinotoolkit/openvino_contrib.git
+   cd openvino_contrib/modules/ollama_openvino
    ```
 
 2. Enable CGO:
@@ -840,9 +842,9 @@ Then build and run Ollama from the root directory of the repository:
 
 3. Initialize the GenAI environment
 
-   Download GenAI runtime from [GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250320/openvino_genai_ubuntu22_2025.2.0.0.dev20250320_x86_64.tar.gz), then extract it to a directory openvino_genai_ubuntu22_2025.2.0.0.dev20250320_x86_64.
+   Download GenAI runtime from [GenAI](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64.tar.gz), then extract it to a directory openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64.
    ```shell
-   cd openvino_genai_ubuntu22_2025.2.0.0.dev20250320_x86_64 
+   cd openvino_genai_ubuntu22_2025.2.0.0.dev20250513_x86_64
    source setupvars.sh
    ```
 

@@ -22,6 +22,19 @@ package genai
 typedef int (*callback_function)(const char*, void*);
 
 extern int goCallbackBridge(char* input, void* ptr);
+
+static ov_status_e ov_genai_llm_pipeline_create_npu_output_2048(const char* models_path,
+																  const char* device,
+                                                                  ov_genai_llm_pipeline** pipe) {
+	return 	ov_genai_llm_pipeline_create(models_path, "NPU", 4, pipe, "MAX_PROMPT_LEN", "2048", "MIN_RESPONSE_LEN", "256");
+}
+
+static ov_status_e ov_genai_llm_pipeline_create_cgo(const char* models_path,
+																  const char* device,
+                                                                  ov_genai_llm_pipeline** pipe) {
+	return 	ov_genai_llm_pipeline_create(models_path, device, 0, pipe);
+}
+
 */
 import "C"
 
@@ -111,7 +124,12 @@ func CreatePipeline(modelsPath string, device string) *C.ov_genai_llm_pipeline {
 	defer C.free(unsafe.Pointer(cModelsPath))
 	defer C.free(unsafe.Pointer(cDevice))
 
-	C.ov_genai_llm_pipeline_create(cModelsPath, cDevice, &pipeline)
+	// C.ov_genai_llm_pipeline_create(cModelsPath, cDevice, &pipeline)
+	if device == "NPU" {
+		C.ov_genai_llm_pipeline_create_npu_output_2048(cModelsPath, cDevice, &pipeline)
+	} else {
+		C.ov_genai_llm_pipeline_create_cgo(cModelsPath, cDevice, &pipeline)
+	}
 	return pipeline
 }
 

@@ -108,6 +108,36 @@ func SelectDevice(device string, supportedDevices []string) string {
 	return device
 }
 
+func addIndexToDuplicates(input []string) []string {
+	// output := make([]string, 0, len(input))
+	var output []string
+	counters := make(map[string]int)    // Used to record the occurrence count of each value
+	duplicates := make(map[string]bool) // Used to mark which values are duplicates
+
+	// First pass: Count the occurrences of each value and mark duplicates
+	for _, item := range input {
+		counters[item]++
+		if counters[item] > 1 {
+			duplicates[item] = true
+		}
+	}
+
+	// Second pass: Add an index to duplicate values
+	for _, item := range input {
+		if duplicates[item] { // If it's a duplicate
+			output = append(output, fmt.Sprintf("%s:%d", item, counters[item]-1))
+			counters[item]-- // Update the counter
+		} else { // If it's not a duplicate
+			output = append(output, item)
+		}
+	}
+	if ContainsInSlice(input, "GPU") {
+		output = append(output, "GPU")
+	}
+
+	return output
+}
+
 // NewGenaiServer will run a server
 func NewGenaiServer(gpus discover.GpuInfoList, model string, modelname string, inferdevice string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (GenaiServer, error) {
 	systemInfo := discover.GetSystemInfo()
@@ -121,6 +151,7 @@ func NewGenaiServer(gpus discover.GpuInfoList, model string, modelname string, i
 	for i := 0; i < int(len(genai_device)); i++ {
 		genai_device_list = append(genai_device_list, genai_device[i]["device_name"])
 	}
+	genai_device_list = addIndexToDuplicates(genai_device_list)
 	inferdevice = SelectDevice(inferdevice, genai_device_list)
 
 	params := []string{