@@ -9,12 +9,265 @@ For chat applications, OpenVINO GenAI provides special optimizations to maintain
99
1010Refer to the [ How It Works] ( /docs/concepts/how-it-works ) for more information about KV-cache.
1111
12- :::tip
13- Use ` start_chat() ` and ` finish_chat() ` to properly manage the chat session's KV-cache. This improves performance by reusing context between messages .
12+ :::info
13+ Chat mode is supported for both ` LLMPipeline ` and ` VLMPipeline ` .
1414:::
1515
16+ ## ` ChatHistory `
17+
18+ [ ` ChatHistory ` ] ( https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.ChatHistory.html ) stores conversation messages and optional metadata for chat templates.
19+ Messages are stored as JSON-like objects, so it supports various nested message structures with any field names your model or chat template requires (not just simple ` "role" ` and ` "content" ` fields).
20+
21+ A simple chat example (with grouped beam search decoding):
22+
23+ <LanguageTabs >
24+ <TabItemPython >
25+ ``` python showLineNumbers
26+ import openvino_genai as ov_genai
27+
28+ pipe = ov_genai.LLMPipeline(model_path, ' CPU' )
29+
30+ config = {' max_new_tokens' : 100 , ' num_beam_groups' : 3 , ' num_beams' : 15 , ' diversity_penalty' : 1.5 }
31+ pipe.set_generation_config(config)
32+
33+ # highlight-next-line
34+ chat_history = ov_genai.ChatHistory()
35+
36+ while True :
37+ try :
38+ prompt = input (' question:\n ' )
39+ except EOFError :
40+ break
41+
42+ # highlight-next-line
43+ chat_history.append({" role" : " user" , " content" : prompt})
44+ # highlight-next-line
45+ decoded_results = pipe.generate(chat_history)
46+ # Add assistant's response to chat history
47+ # highlight-next-line
48+ chat_history.append({" role" : " assistant" , " content" : decoded_results.texts[0 ]})
49+
50+ print (' answer:\n ' )
51+ print (decoded_results.texts[0 ])
52+ print (' \n ----------\n ' )
53+ ```
54+ </TabItemPython >
55+ <TabItemCpp >
56+ ``` cpp showLineNumbers
57+ #include " openvino/genai/llm_pipeline.hpp"
58+ #include < iostream>
59+
60+ int main (int argc, char* argv[ ] ) {
61+ std ::string prompt ;
62+
63+ std ::string model_path = argv [1 ];
64+ ov ::genai ::LLMPipeline pipe (model_path , " CPU" );
65+
66+ ov ::genai ::GenerationConfig config ;
67+ config .max_new_tokens = 100 ;
68+ config .num_beam_groups = 3 ;
69+ config .num_beams = 15 ;
70+ config .diversity_penalty = 1.0f ;
71+
72+ // highlight-next-line
73+ ov ::genai ::ChatHistory chat_history ;
74+
75+ std ::cout << " question:\n " ;
76+ while (std ::getline (std ::cin , prompt )) {
77+ // highlight-next-line
78+ chat_history.push_back({{" role" , " user" }, {" content" , prompt }});
79+ // highlight-next-line
80+ auto decoded_results = pipe .generate (chat_history , config );
81+ // Add assistant's response to chat history
82+ // highlight-next-line
83+ chat_history.push_back({{" role" , " assistant" }, {" content" , decoded_results.texts[0 ]}});
84+
85+ std::cout << " answer:\n " ;
86+ std ::cout << decoded_results .texts [0 ] << std ::endl ;
87+ std ::cout << " \n ----------\n "
88+ " question:\n " ;
89+ }
90+ }
91+ ` ` `
92+ </TabItemCpp>
93+ <TabItemJS>
94+ ` ` ` js showLineNumbers
95+ import { LLMPipeline, ChatHistory } from " openvino-genai-node" ;
96+ import readline from ' readline' ;
97+
98+ const pipe = await LLMPipeline(model_path, ' CPU' );
99+
100+ const config = {
101+ max_new_tokens: 100 ,
102+ num_beam_groups: 3 ,
103+ num_beams: 15 ,
104+ diversity_penalty: 1.5
105+ };
106+
107+ // highlight-next-line
108+ const chatHistory = new ChatHistory ();
109+
110+ const rl = readline .createInterface ({
111+ input: process .stdin ,
112+ output: process .stdout ,
113+ });
114+
115+ console .log (' question:' );
116+ rl .on (' line' , async (prompt ) => {
117+ // highlight-next-line
118+ chatHistory .push ({ role: ' user' , content: prompt });
119+ // highlight-next-line
120+ const decodedResults = await pipe .generate (chatHistory , config );
121+ // Add assistant's response to chat history
122+ // highlight-next-line
123+ chatHistory .push ({ role: ' assistant' , content: decodedResults .toString () });
124+
125+ console .log (' answer:' );
126+ console .log (decodedResults .toString ());
127+ console .log (' \n ----------\n question:' );
128+ });
129+
130+ rl .on (' close' , async () => {
131+ process .exit (0 );
132+ });
133+ ` ` `
134+ </TabItemJS>
135+ </LanguageTabs>
136+
16137:::info
17- Chat mode is supported for both ` LLMPipeline ` and ` VLMPipeline ` .
138+ ` ChatHistory ` messages are not updated automatically when using ` pipe .generate ()` .
139+ You need to manually append user prompts and model responses to the ` ChatHistory ` instance as shown in the examples above.
140+ :::
141+
142+ ### System Prompt
143+
144+ Add a system message at the beginning to set the assistant's behavior:
145+
146+ <LanguageTabs>
147+ <TabItemPython>
148+ ` ` ` python showLineNumbers
149+ import openvino_genai as ov_genai
150+
151+ chat_history = ov_genai .ChatHistory ()
152+ chat_history .append ({" role" : " system" , " content" : " You are a helpful assistant." })
153+
154+ # Or using constructor
155+ chat_history = ov_genai .ChatHistory ([
156+ {" role" : " system" , " content" : " You are a helpful assistant." }
157+ ])
158+ ` ` `
159+ </TabItemPython>
160+ <TabItemCpp>
161+ ` ` ` cpp showLineNumbers
162+ #include " openvino/genai/chat_history.hpp"
163+
164+ ov ::genai ::ChatHistory chat_history ;
165+ chat_history .push_back ({{" role" , " system" }, {" content" , " You are a helpful assistant." }});
166+
167+ // Or using constructor
168+ ov ::genai ::ChatHistory chat_history ({
169+ {{" role" , " system" }, {" content" , " You are a helpful assistant." }}
170+ });
171+ ` ` `
172+ </TabItemCpp>
173+ <TabItemJS>
174+ ` ` ` js showLineNumbers
175+ import { ChatHistory } from " openvino-genai-node" ;
176+
177+ const chatHistory = new ChatHistory ();
178+ chatHistory .push ({ role: ' system' , content: ' You are a helpful assistant.' });
179+
180+ // Or using constructor
181+ const chatHistory = new ChatHistory ([
182+ { role: ' system' , content: ' You are a helpful assistant.' }
183+ ]);
184+ ` ` `
185+ </TabItemJS>
186+ </LanguageTabs>
187+
188+ ### Chat History Metadata
189+
190+ Additionally, ` ChatHistory ` manages optional metadata for consistent chat template application:
191+ - Tools definitions for function calling and agentic scenarios
192+ - Custom chat template variables (e.g. ` enable_thinking ` for models with extended reasoning like Qwen3)
193+
194+ <LanguageTabs>
195+ <TabItemPython>
196+ ` ` ` python showLineNumbers
197+ import openvino_genai as ov_genai
198+ import json
199+
200+ chat_history = ov_genai .ChatHistory ()
201+ chat_history .append ({" role" : " system" , " content" : system_prompt })
202+
203+ # Load tools from JSON string
204+ tools : list [dict ] = json .loads (" ..." )
205+
206+ # Set tools definitions
207+ # highlight - next - line
208+ chat_history .set_tools (tools )
209+ # Set custom chat template variables
210+ # highlight - next - line
211+ chat_history .set_extra_context ({ " enable_thinking" : True })
212+
213+ chat_history .append ({" role" : " user" , " content" : user_prompt })
214+ decoded_results = pipe .generate (chat_history , config )
215+ # Add assistant ' s response to chat history
216+ chat_history .append ({" role" : " assistant" , " content" : decoded_results .texts [0 ]})
217+ ` ` `
218+ </TabItemPython>
219+ <TabItemCpp>
220+ ` ` ` cpp showLineNumbers
221+ #include " openvino/genai/chat_history.hpp"
222+
223+ ov ::genai ::ChatHistory chat_history ;
224+ chat_history .push_back ({{" role" , " system" }, {" content" , system_prompt }});
225+
226+ // Load tools from JSON string
227+ ov ::genai ::JsonContainer tools = ov ::genai ::JsonContainer ::from_json_string (" ..." );
228+
229+ // Set tools definitions
230+ // highlight-next-line
231+ chat_history .set_tools (tools );
232+ // Set custom chat template variables
233+ // highlight-next-line
234+ chat_history .set_extra_context ({{" enable_thinking" , true }});
235+
236+ chat_history .push_back ({{" role" , " user" }, {" content" , user_prompt }});
237+ auto decoded_results = pipe .generate (chat_history , config );
238+ // Add assistant's response to chat history
239+ chat_history .push_back ({{" role" , " assistant" }, {" content" , decoded_results .texts [0 ]}});
240+ ` ` `
241+ </TabItemCpp>
242+ <TabItemJS>
243+ ` ` ` js showLineNumbers
244+ import { ChatHistory } from " openvino-genai-node" ;
245+
246+ const chatHistory = new ChatHistory ();
247+ chatHistory .push ({ role: ' system' , content: systemPrompt });
248+
249+ // Load tools from JSON string
250+ const tools = JSON .parse (" ..." );
251+
252+ // Set tools definitions
253+ // highlight-next-line
254+ chatHistory .setTools (tools );
255+ // Set custom chat template variables
256+ // highlight-next-line
257+ chatHistory .setExtraContext ({ enable_thinking: true });
258+
259+ chatHistory .push ({ role: ' user' , content: userPrompt });
260+ const decodedResults = await pipe .generate (chatHistory , config );
261+ // Add assistant's response to chat history
262+ chatHistory .push ({ role: ' assistant' , content: decodedResults .toString () });
263+ ` ` `
264+ </TabItemJS>
265+ </LanguageTabs>
266+
267+ ## ` start_chat ()` / ` finish_chat ()` API
268+
269+ :::warning Deprecation Notice
270+ ` start_chat ()` / ` finish_chat ()` API is deprecated and will be removed in the next major release. It is recommended to use ` ChatHistory ` for managing chat conversations.
18271:::
19272
20273A simple chat example (with grouped beam search decoding):
@@ -23,6 +276,7 @@ A simple chat example (with grouped beam search decoding):
23276 <TabItemPython>
24277 ` ` ` python showLineNumbers
25278 import openvino_genai as ov_genai
279+
26280 pipe = ov_genai .LLMPipeline (model_path , ' CPU' )
27281
28282 config = {' max_new_tokens' : 100 , ' num_beam_groups' : 3 , ' num_beams' : 15 , ' diversity_penalty' : 1.5 }
@@ -82,16 +336,16 @@ A simple chat example (with grouped beam search decoding):
82336
83337 const pipe = await LLMPipeline (model_path , ' CPU' );
84338
85- const config = {
86- max_new_tokens : 100 ,
87- num_beam_groups : 3 ,
88- num_beams : 15 ,
89- diversity_penalty : 1.5
339+ const config = {
340+ max_new_tokens: 100 ,
341+ num_beam_groups: 3 ,
342+ num_beams: 15 ,
343+ diversity_penalty: 1.5
90344 };
91345
92346 // highlight-next-line
93347 await pipe .startChat ();
94-
348+
95349 const rl = readline .createInterface ({
96350 input: process .stdin ,
97351 output: process .stdout ,
0 commit comments