Skip to content

Commit 5099a65

Browse files
[JS API] Add text_generation/benchmark_genai.js sample (openvinotoolkit#2826)
## Description Add new sample for js api similar to python [one](https://github.com/openvinotoolkit/openvino.genai/blob/87e37a9f006a4d9720d0a1d1f6c0210bdabeca34/samples/python/text_generation/benchmark_genai.py). Add [yargs](https://www.npmjs.com/package/yargs) to manage the sample arguments. Expose `PerfMetrics.add()` as JavaScript doesn't support operator overloading to return custom objects like Python does. Run sample in tests/python_tests/samples/test_benchmark_genai.py <!--- Jira ticket number (e.g., 123). Delete if there's no ticket. Don't include full link or project name. --> Ticket: [CVS-172877](https://jira.devtools.intel.com/browse/CVS-172877) --------- Co-authored-by: Kirill Suvorov <kirill_suvorov@mail.ru>
1 parent a7df37c commit 5099a65

13 files changed

Lines changed: 493 additions & 48 deletions

File tree

samples/js/package-lock.json

Lines changed: 257 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

samples/js/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"license": "Apache-2.0",
55
"type": "module",
66
"devDependencies": {
7-
"openvino-genai-node": "^2025.4.0"
7+
"openvino-genai-node": "^2025.4.0",
8+
"yargs": "^18.0.0"
89
},
910
"engines": {
1011
"node": ">=21.0.0"

samples/js/text_generation/README.md

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,16 @@ and architectures, we still recommend converting the model to the IR format usin
2929
## Sample Descriptions
3030
### Common information
3131

32-
Compile GenAI JavaScript bindings archive first using the instructions in [../../../src/js/README.md](../../../src/js/README.md#build-bindings).
32+
When you use the [openvino.genai](https://github.com/openvinotoolkit/openvino.genai) **release branch**, install dependencies before running samples.
33+
In the current directory, run:
34+
```bash
35+
npm install
36+
```
37+
38+
If you use the master branch, you may need to follow
39+
[this instruction](../../../src/js/README.md#build-bindings)
40+
to build the latest version of `openvino-genai-node` from source first, then install dependencies.
3341

34-
Run `npm install` and the examples will be ready to run.
3542

3643
Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
3744

@@ -92,6 +99,26 @@ Recommended models: Qwen/Qwen2.5-3B-Instruct, Qwen/Qwen2.5-7B-Instruct
9299
node react_sample.js model_dir
93100
```
94101

102+
### 6. LLMs benchmarking sample (`benchmark_genai`)
103+
- **Description:**
104+
This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
105+
106+
For more information on how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics).
107+
- **Main Feature:** Benchmark model via GenAI
108+
- **Run Command:**
109+
```bash
110+
node benchmark_genai.js [-m MODEL] [-p PROMPT] [--nw NUM_WARMUP] [-n NUM_ITER] [--mt MAX_NEW_TOKENS] [-d DEVICE]
111+
```
112+
113+
#### Options
114+
- `-m`, `--model`: Path to model and tokenizers base directory. [string] [required]
115+
- `-p`, `--prompt`: The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`. [string]
116+
- `--prompt_file`, `--pf`: Read prompt from file. [string]
117+
- `--num_warmup`, `--nw`: Number of warmup iterations. [number] [default: 1]
118+
- `-n`, `--num_iter`: Number of iterations. [number] [default: 2]
119+
- `--max_new_tokens`, `--mt`: Maximal number of new tokens. [number] [default: 20]
120+
- `-d`, `--device`: Device to run the model on. [string] [default: "CPU"]
121+
95122
### Troubleshooting
96123

97124
#### Unicode characters encoding error on Windows
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
import { LLMPipeline } from "openvino-genai-node";
5+
import yargs from "yargs/yargs";
6+
import { hideBin } from "yargs/helpers";
7+
import { readFileSync } from "fs";
8+
9+
main();
10+
11+
async function main() {
12+
const argv = yargs(hideBin(process.argv))
13+
.option("model", {
14+
alias: "m",
15+
type: "string",
16+
demandOption: true,
17+
describe: "Path to model and tokenizers base directory.",
18+
})
19+
.option("prompt", {
20+
alias: "p",
21+
type: "string",
22+
describe:
23+
"The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`.",
24+
})
25+
.option("prompt_file", {
26+
alias: "pf",
27+
type: "string",
28+
describe: "Read prompt from file.",
29+
})
30+
.option("num_warmup", {
31+
alias: "nw",
32+
type: "number",
33+
default: 1,
34+
describe: "Number of warmup iterations.",
35+
})
36+
.option("num_iter", {
37+
alias: "n",
38+
type: "number",
39+
default: 2,
40+
describe: "Number of iterations.",
41+
})
42+
.option("max_new_tokens", {
43+
alias: "mt",
44+
type: "number",
45+
default: 20,
46+
describe: "Maximal number of new tokens.",
47+
})
48+
.option("device", {
49+
alias: "d",
50+
type: "string",
51+
default: "CPU",
52+
describe: "Device.",
53+
})
54+
.parse();
55+
56+
let prompt;
57+
if (argv.prompt !== undefined && argv.prompt_file !== undefined) {
58+
console.error(`Cannot specify both --prompt and --prompt_file options simultaneously!`);
59+
process.exit(1);
60+
} else {
61+
if (argv.prompt_file !== undefined) {
62+
prompt = [readFileSync(argv.prompt_file, "utf-8")];
63+
} else {
64+
prompt = argv.prompt === undefined ? ["The Sky is blue because"] : [argv.prompt];
65+
}
66+
}
67+
if (prompt.length === 0 || prompt[0].trim() === "") {
68+
throw new Error("Prompt is empty!");
69+
}
70+
71+
const modelsPath = argv.model;
72+
const { device } = argv;
73+
const numWarmup = argv.num_warmup;
74+
const numIter = argv.num_iter;
75+
76+
const config = {
77+
max_new_tokens: argv.max_new_tokens,
78+
apply_chat_template: false,
79+
return_decoded_results: true,
80+
};
81+
82+
let pipe;
83+
if (device === "NPU") {
84+
pipe = await LLMPipeline(modelsPath, device);
85+
} else {
86+
const schedulerConfig = {
87+
enable_prefix_caching: false,
88+
max_num_batched_tokens: Number.MAX_SAFE_INTEGER,
89+
};
90+
pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
91+
}
92+
93+
for (let i = 0; i < numWarmup; i++) {
94+
await pipe.generate(prompt, config);
95+
}
96+
97+
let res = await pipe.generate(prompt, config);
98+
let { perfMetrics } = res;
99+
for (let i = 0; i < numIter - 1; i++) {
100+
res = await pipe.generate(prompt, config);
101+
perfMetrics.add(res.perfMetrics);
102+
}
103+
104+
console.log(`Output token size: ${perfMetrics.getNumGeneratedTokens()}`);
105+
console.log(`Load time: ${perfMetrics.getLoadTime()} ms`);
106+
console.log(`Generate time: ${perfMetrics.getGenerateDuration().mean} ± ${perfMetrics.getGenerateDuration().std} ms`);
107+
console.log(`Tokenization time: ${perfMetrics.getTokenizationDuration().mean} ± ${perfMetrics.getTokenizationDuration().std} ms`);
108+
console.log(`Detokenization time: ${perfMetrics.getDetokenizationDuration().mean} ± ${perfMetrics.getDetokenizationDuration().std} ms`);
109+
console.log(`TTFT: ${perfMetrics.getTTFT().mean} ± ${perfMetrics.getTTFT().std} ms`);
110+
console.log(`TPOT: ${perfMetrics.getTPOT().mean} ± ${perfMetrics.getTPOT().std} ms`);
111+
console.log(`Throughput : ${perfMetrics.getThroughput().mean} ± ${perfMetrics.getThroughput().std} tokens/s`);
112+
}

samples/python/text_generation/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,9 @@ LLMPipeline and Tokenizer objects can be initialized directly from the memory bu
185185

186186
### 9. LLMs benchmarking sample (`benchmark_genai`)
187187
- **Description:**
188-
This sample script demonstrates how to benchmark an LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
188+
This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics.
189189

190-
For more information how performance metrics are calculated please follow [performance-metrics tutorial](../../../src/README.md#performance-metrics).
190+
For more information how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics).
191191
- **Main Feature:** Benchmark model via GenAI
192192
- **Run Command:**
193193
```bash

samples/python/text_generation/benchmark_genai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def main():
2121
args = parser.parse_args()
2222

2323
if args.prompt is not None and args.prompt_file is not None:
24-
raise RuntimeError("Prompt and prompt file should not exist together!")
24+
raise RuntimeError("Cannot specify both --prompt and --prompt_file options simultaneously!")
2525
else:
2626
if args.prompt_file is not None:
2727
with open(args.prompt_file, "r", encoding="utf-8") as f:

src/js/include/helper.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ ov::genai::ChatHistory js_to_cpp<ov::genai::ChatHistory>(const Napi::Env& env, c
4747
template <>
4848
ov::genai::SchedulerConfig js_to_cpp<ov::genai::SchedulerConfig>(const Napi::Env& env, const Napi::Value& value);
4949

50+
/**
51+
* @brief Unwraps a C++ object from a JavaScript wrapper.
52+
* @tparam TargetType The C++ class type to extract.
53+
* @return Reference to the unwrapped C++ object.
54+
*/
55+
template <typename TargetType>
56+
TargetType& unwrap(const Napi::Env& env, const Napi::Value& value);
57+
58+
template <>
59+
ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value);
60+
5061
/**
5162
* @brief Template function to convert C++ data types into Javascript data types
5263
* @tparam TargetType Destinated Javascript data type.

src/js/include/perf_metrics.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class PerfMetricsWrapper : public Napi::ObjectWrap<PerfMetricsWrapper> {
2828
Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
2929

3030
Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
31+
Napi::Value add(const Napi::CallbackInfo& info);
32+
ov::genai::PerfMetrics& get_value();
3133

3234
private:
3335
ov::genai::PerfMetrics _metrics;

src/js/lib/pipelines/llmPipeline.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ export interface PerfMetrics {
113113
getGrammarCompileTime(): SummaryStats;
114114
/** A structure of RawPerfMetrics type that holds raw metrics. */
115115
rawMetrics: RawMetrics;
116+
117+
/** Adds the metrics from another PerfMetrics object to this one.
118+
* @returns The current PerfMetrics instance.
119+
*/
120+
add(other: PerfMetrics): this;
116121
}
117122

118123
export class DecodedResults {

src/js/src/helper.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "include/helper.hpp"
22

3+
#include "include/addon.hpp"
4+
#include "include/perf_metrics.hpp"
5+
36
namespace {
47
constexpr const char* JS_SCHEDULER_CONFIG_KEY = "schedulerConfig";
58
constexpr const char* CPP_SCHEDULER_CONFIG_KEY = "scheduler_config";
@@ -173,6 +176,19 @@ ov::genai::SchedulerConfig js_to_cpp<ov::genai::SchedulerConfig>(const Napi::Env
173176
return config;
174177
}
175178

179+
template <>
180+
ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value) {
181+
const auto obj = value.As<Napi::Object>();
182+
const auto& prototype = env.GetInstanceData<AddonData>()->perf_metrics;
183+
184+
OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
185+
OPENVINO_ASSERT(obj.InstanceOf(prototype.Value().As<Napi::Function>()),
186+
"Passed argument is not of type PerfMetrics");
187+
188+
const auto js_metrics = Napi::ObjectWrap<PerfMetricsWrapper>::Unwrap(obj);
189+
return js_metrics->get_value();
190+
}
191+
176192
template <>
177193
Napi::Value cpp_to_js<ov::genai::EmbeddingResult, Napi::Value>(const Napi::Env& env,
178194
const ov::genai::EmbeddingResult embedding_result) {

0 commit comments

Comments
 (0)