[JS API] Add text_generation/benchmark_genai.js sample (openvinotoolkit#2826)

almilosz · Retribution98 · web-flow · commit 5099a65f86de · 2025-10-22T06:01:16.000Z
## Description Add new sample for js api similar to python [one](https://github.com/openvinotoolkit/openvino.genai/blob/87e37a9f006a4d9720d0a1d1f6c0210bdabeca34/samples/python/text_generation/benchmark_genai.py). Add [yargs](https://www.npmjs.com/package/yargs) to manage the sample arguments. Expose `PerfMetrics.add()` as JavaScript doesn't support operator overloading to return custom objects like Python does. Run sample in tests/python_tests/samples/test_benchmark_genai.py  Ticket: [CVS-172877](https://jira.devtools.intel.com/browse/CVS-172877) --------- Co-authored-by: Kirill Suvorov <kirill_suvorov@mail.ru>
diff --git a/samples/js/package-lock.json b/samples/js/package-lock.json
diff --git a/samples/js/package.json b/samples/js/package.json
@@ -4,7 +4,8 @@
   "license": "Apache-2.0",
   "type": "module",
   "devDependencies": {
-    "openvino-genai-node": "^2025.4.0"
+    "openvino-genai-node": "^2025.4.0",
+    "yargs": "^18.0.0"
   },
   "engines": {
     "node": ">=21.0.0"
diff --git a/samples/js/text_generation/README.md b/samples/js/text_generation/README.md
@@ -29,9 +29,16 @@ and architectures, we still recommend converting the model to the IR format usin
 ## Sample Descriptions
 ### Common information
 
-Compile GenAI JavaScript bindings archive first using the instructions in [../../../src/js/README.md](../../../src/js/README.md#build-bindings).
+When you use the [openvino.genai](https://github.com/openvinotoolkit/openvino.genai) **release branch**, install dependencies before running samples.
+In the current directory, run:
+```bash
+npm install
+```
+
+If you use the master branch, you may need to follow 
+[this instruction](../../../src/js/README.md#build-bindings) 
+to build the latest version of `openvino-genai-node` from source first, then install dependencies.
 
-Run `npm install` and the examples will be ready to run.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
@@ -92,6 +99,26 @@ Recommended models: Qwen/Qwen2.5-3B-Instruct, Qwen/Qwen2.5-7B-Instruct
   node react_sample.js model_dir
   ```
 
+### 6. LLMs benchmarking sample (`benchmark_genai`)
+- **Description:** 
+  This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
+
+  For more information on how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics).
+- **Main Feature:** Benchmark model via GenAI
+- **Run Command:**
+  ```bash
+  node benchmark_genai.js [-m MODEL] [-p PROMPT] [--nw NUM_WARMUP] [-n NUM_ITER] [--mt MAX_NEW_TOKENS] [-d DEVICE]
+  ```
+
+#### Options
+- `-m`, `--model`: Path to model and tokenizers base directory. [string] [required]
+- `-p`, `--prompt`: The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`. [string]
+- `--prompt_file`, `--pf`: Read prompt from file. [string]
+- `--num_warmup`, `--nw`: Number of warmup iterations. [number] [default: 1]
+- `-n`, `--num_iter`: Number of iterations. [number] [default: 2]
+- `--max_new_tokens`, `--mt`: Maximal number of new tokens. [number] [default: 20]
+- `-d`, `--device`: Device to run the model on. [string] [default: "CPU"]
+
 ### Troubleshooting
 
 #### Unicode characters encoding error on Windows
diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js
@@ -0,0 +1,112 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { LLMPipeline } from "openvino-genai-node";
+import yargs from "yargs/yargs";
+import { hideBin } from "yargs/helpers";
+import { readFileSync } from "fs";
+
+main();
+
+async function main() {
+  const argv = yargs(hideBin(process.argv))
+    .option("model", {
+      alias: "m",
+      type: "string",
+      demandOption: true,
+      describe: "Path to model and tokenizers base directory.",
+    })
+    .option("prompt", {
+      alias: "p",
+      type: "string",
+      describe:
+        "The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`.",
+    })
+    .option("prompt_file", {
+      alias: "pf",
+      type: "string",
+      describe: "Read prompt from file.",
+    })
+    .option("num_warmup", {
+      alias: "nw",
+      type: "number",
+      default: 1,
+      describe: "Number of warmup iterations.",
+    })
+    .option("num_iter", {
+      alias: "n",
+      type: "number",
+      default: 2,
+      describe: "Number of iterations.",
+    })
+    .option("max_new_tokens", {
+      alias: "mt",
+      type: "number",
+      default: 20,
+      describe: "Maximal number of new tokens.",
+    })
+    .option("device", {
+      alias: "d",
+      type: "string",
+      default: "CPU",
+      describe: "Device.",
+    })
+    .parse();
+
+  let prompt;
+  if (argv.prompt !== undefined && argv.prompt_file !== undefined) {
+    console.error(`Cannot specify both --prompt and --prompt_file options simultaneously!`);
+    process.exit(1);
+  } else {
+    if (argv.prompt_file !== undefined) {
+      prompt = [readFileSync(argv.prompt_file, "utf-8")];
+    } else {
+      prompt = argv.prompt === undefined ? ["The Sky is blue because"] : [argv.prompt];
+    }
+  }
+  if (prompt.length === 0 || prompt[0].trim() === "") {
+    throw new Error("Prompt is empty!");
+  }
+
+  const modelsPath = argv.model;
+  const { device } = argv;
+  const numWarmup = argv.num_warmup;
+  const numIter = argv.num_iter;
+
+  const config = {
+    max_new_tokens: argv.max_new_tokens,
+    apply_chat_template: false,
+    return_decoded_results: true,
+  };
+
+  let pipe;
+  if (device === "NPU") {
+    pipe = await LLMPipeline(modelsPath, device);
+  } else {
+    const schedulerConfig = {
+      enable_prefix_caching: false,
+      max_num_batched_tokens: Number.MAX_SAFE_INTEGER,
+    };
+    pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
+  }
+
+  for (let i = 0; i < numWarmup; i++) {
+    await pipe.generate(prompt, config);
+  }
+
+  let res = await pipe.generate(prompt, config);
+  let { perfMetrics } = res;
+  for (let i = 0; i < numIter - 1; i++) {
+    res = await pipe.generate(prompt, config);
+    perfMetrics.add(res.perfMetrics);
+  }
+
+  console.log(`Output token size: ${perfMetrics.getNumGeneratedTokens()}`);
+  console.log(`Load time: ${perfMetrics.getLoadTime()} ms`);
+  console.log(`Generate time: ${perfMetrics.getGenerateDuration().mean} ± ${perfMetrics.getGenerateDuration().std} ms`);
+  console.log(`Tokenization time: ${perfMetrics.getTokenizationDuration().mean} ± ${perfMetrics.getTokenizationDuration().std} ms`);
+  console.log(`Detokenization time: ${perfMetrics.getDetokenizationDuration().mean} ± ${perfMetrics.getDetokenizationDuration().std} ms`);
+  console.log(`TTFT: ${perfMetrics.getTTFT().mean} ± ${perfMetrics.getTTFT().std} ms`);
+  console.log(`TPOT: ${perfMetrics.getTPOT().mean} ± ${perfMetrics.getTPOT().std} ms`);
+  console.log(`Throughput : ${perfMetrics.getThroughput().mean} ± ${perfMetrics.getThroughput().std} tokens/s`);
+}
diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md
@@ -185,9 +185,9 @@ LLMPipeline and Tokenizer objects can be initialized directly from the memory bu
 
 ### 9. LLMs benchmarking sample (`benchmark_genai`)
 - **Description:** 
-This sample script demonstrates how to benchmark an LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
+This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
 
-For more information how performance metrics are calculated please follow [performance-metrics tutorial](../../../src/README.md#performance-metrics).
+For more information how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics).
 - **Main Feature:** Benchmark model via GenAI
 - **Run Command:**
   ```bash
diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py
@@ -21,7 +21,7 @@ def main():
     args = parser.parse_args()
 
     if args.prompt is not None and args.prompt_file is not None:
-        raise RuntimeError("Prompt and prompt file should not exist together!")
+        raise RuntimeError("Cannot specify both --prompt and --prompt_file options simultaneously!")
     else:
         if args.prompt_file is not None:
             with open(args.prompt_file, "r", encoding="utf-8") as f:
diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp
@@ -47,6 +47,17 @@ ov::genai::ChatHistory js_to_cpp<ov::genai::ChatHistory>(const Napi::Env& env, c
 template <>
 ov::genai::SchedulerConfig js_to_cpp<ov::genai::SchedulerConfig>(const Napi::Env& env, const Napi::Value& value);
 
+/**
+ * @brief  Unwraps a C++ object from a JavaScript wrapper.
+ * @tparam TargetType The C++ class type to extract.
+ * @return Reference to the unwrapped C++ object.
+ */
+template <typename TargetType>
+TargetType& unwrap(const Napi::Env& env, const Napi::Value& value);
+
+template <>
+ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value);
+
 /**
  * @brief  Template function to convert C++ data types into Javascript data types
  * @tparam TargetType Destinated Javascript data type.
diff --git a/src/js/include/perf_metrics.hpp b/src/js/include/perf_metrics.hpp
@@ -28,6 +28,8 @@ class PerfMetricsWrapper : public Napi::ObjectWrap<PerfMetricsWrapper> {
     Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
 
     Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
+    Napi::Value add(const Napi::CallbackInfo& info);
+    ov::genai::PerfMetrics& get_value();
 
 private:
     ov::genai::PerfMetrics _metrics;
diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts
@@ -113,6 +113,11 @@ export interface PerfMetrics {
   getGrammarCompileTime(): SummaryStats;
   /** A structure of RawPerfMetrics type that holds raw metrics. */
   rawMetrics: RawMetrics;
+
+  /** Adds the metrics from another PerfMetrics object to this one.
+   * @returns The current PerfMetrics instance.
+   */
+  add(other: PerfMetrics): this;
 }
 
 export class DecodedResults {
diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp
@@ -1,5 +1,8 @@
 #include "include/helper.hpp"
 
+#include "include/addon.hpp"
+#include "include/perf_metrics.hpp"
+
 namespace {
 constexpr const char* JS_SCHEDULER_CONFIG_KEY = "schedulerConfig";
 constexpr const char* CPP_SCHEDULER_CONFIG_KEY = "scheduler_config";
@@ -173,6 +176,19 @@ ov::genai::SchedulerConfig js_to_cpp<ov::genai::SchedulerConfig>(const Napi::Env
     return config;
 }
 
+template <>
+ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value) {
+    const auto obj = value.As<Napi::Object>();
+    const auto& prototype = env.GetInstanceData<AddonData>()->perf_metrics;
+
+    OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
+    OPENVINO_ASSERT(obj.InstanceOf(prototype.Value().As<Napi::Function>()),
+                    "Passed argument is not of type PerfMetrics");
+
+    const auto js_metrics = Napi::ObjectWrap<PerfMetricsWrapper>::Unwrap(obj);
+    return js_metrics->get_value();
+}
+
 template <>
 Napi::Value cpp_to_js<ov::genai::EmbeddingResult, Napi::Value>(const Napi::Env& env,
                                                                const ov::genai::EmbeddingResult embedding_result) {
diff --git a/src/js/src/perf_metrics.cpp b/src/js/src/perf_metrics.cpp
@@ -30,6 +30,7 @@ Napi::Function PerfMetricsWrapper::get_class(Napi::Env env) {
             InstanceMethod("getGrammarCompilerInitTimes", &PerfMetricsWrapper::get_grammar_compiler_init_times),
             InstanceMethod("getGrammarCompileTime", &PerfMetricsWrapper::get_grammar_compile_time),
             InstanceAccessor<&PerfMetricsWrapper::get_raw_metrics>("rawMetrics"),
+            InstanceMethod("add", &PerfMetricsWrapper::add),
         });
 }
 
@@ -167,3 +168,18 @@ Napi::Value PerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info)
 
     return obj;
 }
+
+Napi::Value PerfMetricsWrapper::add(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 1, "add()");
+    const auto env = info.Env();
+    try {
+        _metrics += unwrap<ov::genai::PerfMetrics>(env, info[0]);
+    } catch (const std::exception& ex) {
+        Napi::TypeError::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return info.This();
+}
+
+ov::genai::PerfMetrics& PerfMetricsWrapper::get_value() {
+    return _metrics;
+}
diff --git a/src/js/tests/module.test.js b/src/js/tests/module.test.js
@@ -289,6 +289,28 @@ describe("LLMPipeline.generate()", () => {
     assert.ok(perfMetrics.rawMetrics.inferenceDurations.length > 0);
     assert.ok(perfMetrics.rawMetrics.grammarCompileTimes.length === 0);
   });
+
+  it("test perfMetrics.add()", async () => {
+    const config = {
+      max_new_tokens: 5,
+      return_decoded_results: true,
+    };
+    const res1 = await pipeline.generate("prompt1", config);
+    const res2 = await pipeline.generate("prompt2", config);
+
+    const perfMetrics1 = res1.perfMetrics;
+    const perfMetrics2 = res2.perfMetrics;
+
+    const totalNumGeneratedTokens =
+      perfMetrics1.getNumGeneratedTokens() + perfMetrics2.getNumGeneratedTokens();
+
+    perfMetrics1.add(perfMetrics2);
+    assert.strictEqual(perfMetrics1.getNumGeneratedTokens(), totalNumGeneratedTokens);
+
+    assert.throws(() => perfMetrics1.add({}), {
+      message: /Passed argument is not of type PerfMetrics/,
+    });
+  });
 });
 
 describe("stream()", () => {
diff --git a/tests/python_tests/samples/test_benchmark_genai.py b/tests/python_tests/samples/test_benchmark_genai.py
@@ -5,7 +5,7 @@
 import pytest
 import sys
 
-from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR, SAMPLES_C_DIR
+from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR, SAMPLES_C_DIR, SAMPLES_JS_DIR
 from test_utils import run_sample
 
 class TestBenchmarkGenAI:
@@ -55,3 +55,20 @@ def test_cpp_sample_benchmark_genai(self, convert_model, prompt, sample_args):
         c_sample = os.path.join(SAMPLES_C_DIR, 'benchmark_genai_c')
         c_command =[c_sample, '-m', convert_model, '-p', f'"{prompt}"'] + sample_args
         run_sample(c_command)
+
+    @pytest.mark.llm
+    @pytest.mark.samples
+    @pytest.mark.parametrize(
+        "convert_model, prompt, sample_args",
+        [
+            pytest.param("TinyLlama-1.1B-Chat-v1.0", "Why is the sun yellow?", ["--nw", "2", "-n", "3", "--mt", "50", "-d", "CPU"]),
+        ],
+        indirect=["convert_model"],
+    )
+    def test_js_sample_benchmark_genai(self, convert_model, prompt, sample_args):
+        if sys.platform == 'darwin':
+            pytest.xfail("Ticket 173586")
+        # Test JS sample
+        js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/benchmark_genai.js")
+        js_command =['node', js_sample, '-m', convert_model, '-p', f'"{prompt}"'] + sample_args
+        run_sample(js_command)