royshil · umireon · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -85,8 +85,6 @@ else()
   find_package(OpenCV CONFIG REQUIRED)
 endif()
 
-target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE DISABLE_ONNXRUNTIME_GPU)
-
 if(USE_SYSTEM_ONNXRUNTIME)
   if(OS_LINUX)
     find_package(Onnxruntime 1.16.3 REQUIRED)
@@ -164,6 +162,31 @@ endif()
 add_subdirectory(src/update-checker/CurlClient)
 target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE CurlClient OpenCV::opencv_core OpenCV::opencv_imgproc)
 
+# Check for ONNX Runtime Execution Providers
+include(CheckLibraryExists)
+
+check_library_exists(
+  onnxruntime::onnxruntime
+  OrtSessionOptionsAppendExecutionProvider_CUDA
+  onnxruntime_c_api.h
+  HAVE_ONNXRUNTIME_CUDA_EP
+)
+if(HAVE_ONNXRUNTIME_CUDA_EP)
+  message(STATUS "ONNX Runtime CUDA Execution Provider found")
+  target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE HAVE_ONNXRUNTIME_CUDA_EP)
+endif()
+
+check_library_exists(
+  onnxruntime::onnxruntime
+  OrtSessionOptionsAppendExecutionProvider_ROCM
+  onnxruntime_c_api.h
+  HAVE_ONNXRUNTIME_ROCM_EP
+)
+if(HAVE_ONNXRUNTIME_ROCM_EP)
+  message(STATUS "ONNX Runtime ROCM Execution Provider found")
+  target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE HAVE_ONNXRUNTIME_ROCM_EP)
+endif()
+
 target_sources(
   ${CMAKE_PROJECT_NAME}
   PRIVATE

diff --git a/README.md b/README.md
@@ -57,14 +57,12 @@ If you are looking for hands-on help or private consultation please select a [sp
 
 ### Technical Details
 
-<!--
 GPU support:
 
-- Currently on Windows we support DirectML, which should reduce CPU usage by 95% and effectively use the systems accelerators (GPUs if available).
-- On Mac we support CoreML for acceleration, which is available on M1 and M2 (not Intel, sorry).
-- CUDA is supported in this plugin through TensorRT, however it is supported only on Linux.
+- On Windows, we plan to support WinML acceleration.
+- On Mac we support CoreML for acceleration, which is efficient on Appli Silicon.
+- On Linux CUDA and ROCM are supported if this plugin is built from source. Ensure your ONNX Runtime installation has CUDA or ROCM support.
 - The goal of this plugin is to be available for everyone on every system, even if they don't own a GPU.
--->
 
 Number of CPU threads is controllable through the UI settings. A 2-thread setting works best.
 

diff --git a/data/locale/ar-EG.ini b/data/locale/ar-EG.ini
@@ -8,7 +8,7 @@ BackgroundColor="لون الخلفية"
 InferenceDevice="جهاز الإستدلال"
 CPU="وحدة المعالجة المركزية"
 GPUCUDA="الوحدة المركزية - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="نموذج التقسيم"
 SINet="SINet"

diff --git a/data/locale/bn-IN.ini b/data/locale/bn-IN.ini
@@ -8,7 +8,7 @@ BackgroundColor="পটভূমি রঙ"
 InferenceDevice="নিখরচনা ডিভাইস"
 CPU="সিপিইউ"
 GPUCUDA="জিপিইউ - কুড়া"
-GPUTensorRT="জিপিইউ - টেনসরআরটি"
+GPUROCM="GPU - ROCM"
 CoreML="কোরএমএল"
 SegmentationModel="সেগমেন্টেশন মডেল"
 SINet="এসআইনেট"

diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
@@ -8,7 +8,7 @@ BackgroundColor="Background Color"
 InferenceDevice="Inference device"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Segmentation model"
 SINet="SINet"

diff --git a/data/locale/es-SP.ini b/data/locale/es-SP.ini
@@ -8,7 +8,7 @@ BackgroundColor="Color de fondo"
 InferenceDevice="Dispositivo de inferencia"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Modelo de segmentación"
 SINet="SINet"

diff --git a/data/locale/fr-FR.ini b/data/locale/fr-FR.ini
@@ -8,7 +8,7 @@ BackgroundColor="Couleur de l'arrière-plan"
 InferenceDevice="Dispositif d'inférence"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Modèle de segmentation"
 SINet="SINet"

diff --git a/data/locale/hi-IN.ini b/data/locale/hi-IN.ini
@@ -8,7 +8,7 @@ BackgroundColor="बैकग्राउंड रंग"
 InferenceDevice="संदर्भ डिवाइस"
 CPU="सीपीयू"
 GPUCUDA="जीपीयू - क्यूडा"
-GPUTensorRT="जीपीयू - टेंसरआरटी"
+GPUROCM="जीपीयू - ROCM"
 CoreML="कोरएमएल"
 SegmentationModel="सेगमेंटेशन मॉडल"
 SINet="ऐसआईनेट"

diff --git a/data/locale/it-IT.ini b/data/locale/it-IT.ini
@@ -8,7 +8,7 @@ BackgroundColor="Colore di sfondo"
 InferenceDevice="Dispositivo di inferenza"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Modello di segmentazione".
 SINet="SINet"

diff --git a/data/locale/ja-JP.ini b/data/locale/ja-JP.ini
@@ -8,7 +8,7 @@ BackgroundColor="背景色"
 InferenceDevice="推論デバイス"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="セグメンテーションモデル"
 SINet="SINet"

diff --git a/data/locale/ko-KR.ini b/data/locale/ko-KR.ini
@@ -8,7 +8,7 @@ BackgroundColor="배경 색상"
 InferenceDevice="추론 장치"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="세분화 모델"
 SINet="SINet"

diff --git a/data/locale/pt-BR.ini b/data/locale/pt-BR.ini
@@ -8,7 +8,7 @@ BackgroundColor="Cor de Fundo"
 InferenceDevice="Dispositivo de Inferência"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Modelo de segmentação"
 SINet="SINet"

diff --git a/data/locale/ru-RU.ini b/data/locale/ru-RU.ini
@@ -8,7 +8,7 @@ BackgroundColor="Цвет фона"
 InferenceDevice="Устройство вывода"
 CPU="ЦПУ"
 GPUCUDA="ГПУ - CUDA"
-GPUTensorRT="ГПУ - TensorRT"
+GPUROCM="ГПУ - ROCM"
 CoreML="CoreML"
 SegmentationModel="Модель сегментации"
 SINet="SINet"

diff --git a/data/locale/ta-IN.ini b/data/locale/ta-IN.ini
@@ -8,7 +8,7 @@ BackgroundColor="பின்னணி நிறம்"
 InferenceDevice="பரிமாற்ற சாதனம்"
 CPU="சிபியூ"
 GPUCUDA="GPU-CUDA"
-GPUTensorRT="GPU-TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="பிரிவு மாதிரி"
 SINet="SINet"

diff --git a/data/locale/tr-TR.ini b/data/locale/tr-TR.ini
@@ -8,7 +8,7 @@ BackgroundColor="Arka Plan Rengi"
 InferenceDevice="Tahmin Cihazı"
 CPU="CPU"
 GPUCUDA="GPU - CUDA"
-GPUTensorRT="GPU - TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="Segmentasyon Modeli"
 SINet="SINet"

diff --git a/data/locale/zh-CN.ini b/data/locale/zh-CN.ini
@@ -8,7 +8,7 @@ BackgroundColor="背景颜色"
 InferenceDevice="推断设备"
 CPU="CPU"
 GPUCUDA="GPU-CUDA"
-GPUTensorRT="GPU-TensorRT"
+GPUROCM="GPU - ROCM"
 CoreML="CoreML"
 SegmentationModel="分割模型"
 SINet="SINet"

diff --git a/src/background-filter.cpp b/src/background-filter.cpp
@@ -153,9 +153,11 @@ obs_properties_t *background_filter_properties(void *data)
 							    OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
 
 	obs_property_list_add_string(p_use_gpu, obs_module_text("CPU"), USEGPU_CPU);
-#if defined(__linux__) && defined(__x86_64__)
-	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUTensorRT"), USEGPU_TENSORRT);
-	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCUDA"), USEGPU_CUDA);
+#ifdef HAVE_ONNXRUNTIME_CUDA_CP
+	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCuda"), USEGPU_CUDA);
+#endif
+#ifdef HAVE_ONNXRUNTIME_ROCM_EP
+	obs_property_list_add_string(p_use_gpu, obs_module_text("GPURocm"), USEGPU_ROCM);
 #endif
 #if defined(__APPLE__)
 	obs_property_list_add_string(p_use_gpu, obs_module_text("CoreML"), USEGPU_COREML);

diff --git a/src/consts.h b/src/consts.h
@@ -15,7 +15,7 @@ const char *const MODEL_RMBG = "models/bria_rmbg_1_4_qint8.onnx";
 
 const char *const USEGPU_CPU = "cpu";
 const char *const USEGPU_CUDA = "cuda";
-const char *const USEGPU_TENSORRT = "tensorrt";
+const char *const USEGPU_ROCM = "rocm";
 const char *const USEGPU_COREML = "coreml";
 
 const char *const EFFECT_PATH = "effects/mask_alpha_filter.effect";

diff --git a/src/enhance-filter.cpp b/src/enhance-filter.cpp
@@ -53,10 +53,12 @@ obs_properties_t *enhance_filter_properties(void *data)
 	obs_property_t *p_use_gpu = obs_properties_add_list(props, "useGPU", obs_module_text("InferenceDevice"),
 							    OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
 	obs_property_list_add_string(p_use_gpu, obs_module_text("CPU"), USEGPU_CPU);
-#ifdef __linux__
-	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUTensorRT"), USEGPU_TENSORRT);
+#ifdef HAVE_ONNXRUNTIME_CUDA_EP
 	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCUDA"), USEGPU_CUDA);
 #endif
+#ifdef HAVE_ONNXRUNTIME_ROCM_EP
+	obs_property_list_add_string(p_use_gpu, obs_module_text("GPUROCM"), USEGPU_ROCM);
+#endif
 #if defined(__APPLE__)
 	obs_property_list_add_string(p_use_gpu, obs_module_text("CoreML"), USEGPU_COREML);
 #endif

diff --git a/src/ort-utils/ort-session-utils.cpp b/src/ort-utils/ort-session-utils.cpp
@@ -55,50 +55,16 @@ int createOrtSession(filter_data *tf)
 	bfree(modelFilepath_rawPtr);
 
 	try {
-#if defined(__linux__) && defined(__x86_64__) && !defined(DISABLE_ONNXRUNTIME_GPU)
-		if (tf->useGPU == USEGPU_TENSORRT) {
-			const auto &api = Ort::GetApi();
-
-			// Folder in which TensorRT will place its cache
-			const char *tensorrt_cache_path = "~/.cache/obs-backgroundremoval/tensorrt";
-
-			// Initialize TensorRT provider options
-			OrtTensorRTProviderOptionsV2 *tensorrt_options;
-			Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
-
-			// Create cache folder if it does not exist
-			std::filesystem::path cache_folder(tensorrt_cache_path);
-			if (!std::filesystem::exists(cache_folder)) {
-				std::filesystem::create_directories(cache_folder);
-			}
-
-			// Define TensorRT provider options
-			std::vector<const char *> option_keys = {
-				"device_id",
-				"trt_engine_cache_enable",
-				"trt_engine_cache_path",
-				"trt_timing_cache_enable",
-				"trt_timing_cache_path",
-			};
-
-			std::vector<const char *> option_values = {
-				"0",                 // Device ID 0
-				"1",                 // Enable engine cache
-				tensorrt_cache_path, // Engine cache path
-				"1",                 // Enable timing cache
-				tensorrt_cache_path, // Timing cache path
-			};
-
-			// Update provider options
-			Ort::ThrowOnError(api.UpdateTensorRTProviderOptions(tensorrt_options, option_keys.data(),
-									    option_values.data(), option_keys.size()));
-
-			// Append execution provider
-			sessionOptions.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);
-		} else if (tf->useGPU == USEGPU_CUDA) {
+#ifdef HAVE_ONNXRUNTIME_CUDA_EP
+		if (tf->useGPU == USEGPU_CUDA) {
 			Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0));
 		}
 #endif
+#ifdef HAVE_ONNXRUNTIME_ROCM_EP
+		if (tf->useGPU == USEGPU_ROCM) {
+			Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(sessionOptions, 0));
+		}
+#endif
 #if defined(__APPLE__)
 		if (tf->useGPU == USEGPU_COREML) {
 			uint32_t coreml_flags = 0;