[JS] Fix is_generating check fails (openvinotoolkit#3490)

Retribution98 · web-flow · commit 1dabb8c22554 · 2026-03-17T07:44:41.000Z
## Description Reset `is_generating` flag before calling `report_error()` in the error path of `generatePerformInferenceThread` to prevent a race condition where the JS event loop could schedule the next `generate()` call before the native thread clears the flag.  CVS-182798 ## Checklist: - [x] This PR follows [GenAI Contributing guidelines](https://github.com/openvinotoolkit/openvino.genai?tab=contributing-ov-file#contributing).  - [ ] Tests have been updated or added to cover the new code.  - [x] This PR fully addresses the ticket.  - [ ] I have made corresponding changes to the documentation.  --------- Signed-off-by: Kirill Suvorov <kirill.suvorov@intel.com>
diff --git a/src/js/include/llm_pipeline/init_worker.hpp b/src/js/include/llm_pipeline/init_worker.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include "openvino/genai/llm_pipeline.hpp"
@@ -10,7 +11,7 @@ class InitWorker : public AsyncWorker {
 public:
     InitWorker(Function& callback,
                std::shared_ptr<ov::genai::LLMPipeline>& pipe,
-               std::shared_ptr<bool> is_initializing,
+               std::shared_ptr<std::atomic<bool>> is_initializing,
                const std::string model_path,
                std::string device,
                ov::AnyMap properties);
@@ -22,7 +23,7 @@ class InitWorker : public AsyncWorker {
 
 private:
     std::shared_ptr<ov::genai::LLMPipeline>& pipe;
-    std::shared_ptr<bool> is_initializing;
+    std::shared_ptr<std::atomic<bool>> is_initializing;
     std::string model_path;
     std::string device;
     ov::AnyMap properties;
diff --git a/src/js/include/llm_pipeline/llm_pipeline_wrapper.hpp b/src/js/include/llm_pipeline/llm_pipeline_wrapper.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include <thread>
@@ -22,6 +23,6 @@ class LLMPipelineWrapper : public Napi::ObjectWrap<LLMPipelineWrapper> {
 
 private:
     std::shared_ptr<ov::genai::LLMPipeline> pipe = nullptr;
-    std::shared_ptr<bool> is_initializing = std::make_shared<bool>(false);
-    std::shared_ptr<bool> is_generating = std::make_shared<bool>(false);
+    std::shared_ptr<std::atomic<bool>> is_initializing = std::make_shared<std::atomic<bool>>(false);
+    std::shared_ptr<std::atomic<bool>> is_generating = std::make_shared<std::atomic<bool>>(false);
 };
diff --git a/src/js/include/vlm_pipeline/init_worker.hpp b/src/js/include/vlm_pipeline/init_worker.hpp
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include "openvino/genai/visual_language/pipeline.hpp"
@@ -13,7 +14,7 @@ class VLMInitWorker : public AsyncWorker {
 public:
     VLMInitWorker(Function& callback,
                   std::shared_ptr<ov::genai::VLMPipeline>& pipe,
-                  std::shared_ptr<bool> is_initializing,
+                  std::shared_ptr<std::atomic<bool>> is_initializing,
                   const std::string model_path,
                   std::string device,
                   ov::AnyMap properties);
@@ -25,7 +26,7 @@ class VLMInitWorker : public AsyncWorker {
 
 private:
     std::shared_ptr<ov::genai::VLMPipeline>& pipe;
-    std::shared_ptr<bool> is_initializing;
+    std::shared_ptr<std::atomic<bool>> is_initializing;
     std::string model_path;
     std::string device;
     ov::AnyMap properties;
diff --git a/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp b/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include <thread>
@@ -26,6 +27,6 @@ class VLMPipelineWrapper : public Napi::ObjectWrap<VLMPipelineWrapper> {
 
 private:
     std::shared_ptr<ov::genai::VLMPipeline> pipe = nullptr;
-    std::shared_ptr<bool> is_initializing = std::make_shared<bool>(false);
-    std::shared_ptr<bool> is_generating = std::make_shared<bool>(false);
+    std::shared_ptr<std::atomic<bool>> is_initializing = std::make_shared<std::atomic<bool>>(false);
+    std::shared_ptr<std::atomic<bool>> is_generating = std::make_shared<std::atomic<bool>>(false);
 };
diff --git a/src/js/include/whisper_pipeline/init_worker.hpp b/src/js/include/whisper_pipeline/init_worker.hpp
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include "openvino/genai/whisper_pipeline.hpp"
@@ -11,7 +12,7 @@ class WhisperInitWorker : public Napi::AsyncWorker {
 public:
     WhisperInitWorker(Napi::Function& callback,
                       std::shared_ptr<ov::genai::WhisperPipeline>& pipe,
-                      std::shared_ptr<bool> is_initializing,
+                      std::shared_ptr<std::atomic<bool>> is_initializing,
                       std::string&& model_path,
                       std::string&& device,
                       ov::AnyMap&& properties);
@@ -22,7 +23,7 @@ class WhisperInitWorker : public Napi::AsyncWorker {
 
 private:
     std::shared_ptr<ov::genai::WhisperPipeline>& pipe;
-    std::shared_ptr<bool> is_initializing;
+    std::shared_ptr<std::atomic<bool>> is_initializing;
     std::string model_path;
     std::string device;
     ov::AnyMap properties;
diff --git a/src/js/include/whisper_pipeline/pipeline_wrapper.hpp b/src/js/include/whisper_pipeline/pipeline_wrapper.hpp
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <napi.h>
 
 #include "openvino/genai/whisper_pipeline.hpp"
@@ -19,6 +20,6 @@ class WhisperPipelineWrapper : public Napi::ObjectWrap<WhisperPipelineWrapper> {
 
 private:
     std::shared_ptr<ov::genai::WhisperPipeline> pipe = nullptr;
-    std::shared_ptr<bool> is_initializing = std::make_shared<bool>(false);
-    std::shared_ptr<bool> is_generating = std::make_shared<bool>(false);
+    std::shared_ptr<std::atomic<bool>> is_initializing = std::make_shared<std::atomic<bool>>(false);
+    std::shared_ptr<std::atomic<bool>> is_generating = std::make_shared<std::atomic<bool>>(false);
 };
diff --git a/src/js/src/llm_pipeline/init_worker.cpp b/src/js/src/llm_pipeline/init_worker.cpp
@@ -2,7 +2,7 @@
 
 InitWorker::InitWorker(Function& callback,
                        std::shared_ptr<ov::genai::LLMPipeline>& pipe,
-                       std::shared_ptr<bool> is_initializing,
+                       std::shared_ptr<std::atomic<bool>> is_initializing,
                        const std::string model_path,
                        const std::string device,
                        const ov::AnyMap properties)
diff --git a/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp b/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
@@ -11,7 +11,7 @@
 #include "include/tokenizer.hpp"
 
 struct TsfnContext {
-    TsfnContext(GenerateInputs inputs, std::shared_ptr<bool> is_generating)
+    TsfnContext(GenerateInputs inputs, std::shared_ptr<std::atomic<bool>> is_generating)
         : inputs(inputs),
           is_generating(is_generating) {};
     ~TsfnContext() {};
@@ -21,7 +21,7 @@ struct TsfnContext {
     std::optional<Napi::ThreadSafeFunction> streamer_tsfn;
 
     GenerateInputs inputs;
-    std::shared_ptr<bool> is_generating;
+    std::shared_ptr<std::atomic<bool>> is_generating;
     std::shared_ptr<ov::genai::LLMPipeline> pipe = nullptr;
     std::shared_ptr<ov::AnyMap> generation_config = nullptr;
     std::shared_ptr<ov::AnyMap> options = nullptr;
@@ -33,7 +33,7 @@ void performInferenceThread(TsfnContext* context) {
             try {
                 jsCallback.Call(
                     {Napi::Error::New(env, "performInferenceThread error. " + message).Value(), env.Null()});
-            } catch (std::exception& err) {
+            } catch (const std::exception& err) {
                 std::cerr << "The callback failed when attempting to return an error from performInferenceThread. "
                              "Details:\n"
                           << err.what() << std::endl;
@@ -73,7 +73,7 @@ void performInferenceThread(TsfnContext* context) {
                             } else {
                                 resultPromise.set_value(ov::genai::StreamingStatus::RUNNING);
                             }
-                        } catch (std::exception& err) {
+                        } catch (const std::exception& err) {
                             streamer_exceptions.push_back(err.what());
                             resultPromise.set_value(ov::genai::StreamingStatus::CANCEL);
                         }
@@ -100,8 +100,11 @@ void performInferenceThread(TsfnContext* context) {
                               }},
                    context->inputs);
 
-    } catch (std::exception& e) {
+    } catch (const std::exception& e) {
+        *context->is_generating = false;
         report_error(e.what());
+        finalize();
+        return;
     }
     // should be called right after inference to release the flag asap
     *context->is_generating = false;
@@ -124,7 +127,7 @@ void performInferenceThread(TsfnContext* context) {
                             env.Null(),                     // Error should be null in normal case
                             to_decoded_result(env, result)  // Return DecodedResults as the final result
                         });
-                    } catch (std::exception& err) {
+                    } catch (const std::exception& err) {
                         report_error("The final callback failed. Details:\n" + std::string(err.what()));
                     }
                 });
@@ -133,7 +136,7 @@ void performInferenceThread(TsfnContext* context) {
                 report_error("The final BlockingCall failed with status " + status);
             }
         }
-    } catch (std::exception& e) {
+    } catch (const std::exception& e) {
         report_error(e.what());
     }
     finalize();
diff --git a/src/js/src/vlm_pipeline/init_worker.cpp b/src/js/src/vlm_pipeline/init_worker.cpp
@@ -5,7 +5,7 @@
 
 VLMInitWorker::VLMInitWorker(Function& callback,
                              std::shared_ptr<ov::genai::VLMPipeline>& pipe,
-                             std::shared_ptr<bool> is_initializing,
+                             std::shared_ptr<std::atomic<bool>> is_initializing,
                              const std::string model_path,
                              const std::string device,
                              const ov::AnyMap properties)
diff --git a/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp b/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp
@@ -14,7 +14,7 @@
 #include "include/vlm_pipeline/start_chat_worker.hpp"
 
 struct VLMTsfnContext {
-    VLMTsfnContext(VLMGenerateInputs inputs, std::shared_ptr<bool> is_generating)
+    VLMTsfnContext(VLMGenerateInputs inputs, std::shared_ptr<std::atomic<bool>> is_generating)
         : inputs(std::move(inputs)),
           is_generating(is_generating) {};
     ~VLMTsfnContext() {};
@@ -26,7 +26,7 @@ struct VLMTsfnContext {
     VLMGenerateInputs inputs;
     std::vector<ov::Tensor> images;
     std::vector<ov::Tensor> videos;
-    std::shared_ptr<bool> is_generating;
+    std::shared_ptr<std::atomic<bool>> is_generating;
     std::shared_ptr<ov::genai::VLMPipeline> pipe = nullptr;
     std::shared_ptr<ov::AnyMap> generation_config = nullptr;
 };
@@ -37,7 +37,7 @@ void vlmPerformInferenceThread(VLMTsfnContext* context) {
             try {
                 jsCallback.Call(
                     {Napi::Error::New(env, "vlmPerformInferenceThread error. " + message).Value(), env.Null()});
-            } catch (std::exception& err) {
+            } catch (const std::exception& err) {
                 std::cerr << "The callback failed when attempting to return an error from vlmPerformInferenceThread. "
                              "Details:\n"
                           << err.what() << std::endl;
@@ -76,7 +76,7 @@ void vlmPerformInferenceThread(VLMTsfnContext* context) {
                             } else {
                                 resultPromise.set_value(ov::genai::StreamingStatus::RUNNING);
                             }
-                        } catch (std::exception& err) {
+                        } catch (const std::exception& err) {
                             streamer_exceptions.push_back(err.what());
                             resultPromise.set_value(ov::genai::StreamingStatus::CANCEL);
                         }
@@ -102,9 +102,9 @@ void vlmPerformInferenceThread(VLMTsfnContext* context) {
                        }},
             context->inputs);
 
-    } catch (std::exception& e) {
-        report_error(e.what());
+    } catch (const std::exception& e) {
         *context->is_generating = false;
+        report_error(e.what());
         finalize();
         return;
     }
@@ -127,7 +127,7 @@ void vlmPerformInferenceThread(VLMTsfnContext* context) {
                             env.Null(),
                             to_vlm_decoded_result(env, result),
                         });
-                    } catch (std::exception& err) {
+                    } catch (const std::exception& err) {
                         report_error("The final callback failed. Details:\n" + std::string(err.what()));
                     }
                 });
@@ -136,7 +136,7 @@ void vlmPerformInferenceThread(VLMTsfnContext* context) {
                 report_error("The final BlockingCall failed with status " + status);
             }
         }
-    } catch (std::exception& e) {
+    } catch (const std::exception& e) {
         report_error(e.what());
     }
     finalize();
@@ -223,8 +223,8 @@ Napi::Value VLMPipelineWrapper::generate(const Napi::CallbackInfo& info) {
         }
         context->native_thread = std::thread(vlmPerformInferenceThread, context);
     } catch (const std::exception& ex) {
-        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
         *this->is_generating = false;
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
     }
     return env.Undefined();
 }
diff --git a/src/js/src/whisper_pipeline/init_worker.cpp b/src/js/src/whisper_pipeline/init_worker.cpp
@@ -9,7 +9,7 @@
 
 WhisperInitWorker::WhisperInitWorker(Napi::Function& callback,
                                      std::shared_ptr<ov::genai::WhisperPipeline>& pipe,
-                                     std::shared_ptr<bool> is_initializing,
+                                     std::shared_ptr<std::atomic<bool>> is_initializing,
                                      std::string&& model_path,
                                      std::string&& device,
                                      ov::AnyMap&& properties)
diff --git a/src/js/src/whisper_pipeline/pipeline_wrapper.cpp b/src/js/src/whisper_pipeline/pipeline_wrapper.cpp
@@ -14,7 +14,7 @@
 #include "include/whisper_pipeline/init_worker.hpp"
 
 struct WhisperTsfnContext {
-    WhisperTsfnContext(std::vector<float> raw_speech, ov::AnyMap generation_config, std::shared_ptr<bool> is_generating)
+    WhisperTsfnContext(std::vector<float> raw_speech, ov::AnyMap generation_config, std::shared_ptr<std::atomic<bool>> is_generating)
         : raw_speech(std::move(raw_speech)),
           generation_config(std::move(generation_config)),
           is_generating(is_generating) {}
@@ -26,7 +26,7 @@ struct WhisperTsfnContext {
 
     std::vector<float> raw_speech;
     ov::AnyMap generation_config;
-    std::shared_ptr<bool> is_generating;
+    std::shared_ptr<std::atomic<bool>> is_generating;
     std::shared_ptr<ov::genai::WhisperPipeline> pipe = nullptr;
 };
 
@@ -97,8 +97,8 @@ void whisperPerformInferenceThread(WhisperTsfnContext* context) {
             result = context->pipe->generate(context->raw_speech, context->generation_config);
         }
     } catch (const std::exception& e) {
-        report_error(e.what());
         *context->is_generating = false;
+        report_error(e.what());
         finalize();
         return;
     }