diff --git a/CMakeLists.txt b/CMakeLists.txt index 39fb6ff3..52f4140a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,8 +85,6 @@ else() find_package(OpenCV CONFIG REQUIRED) endif() -target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE DISABLE_ONNXRUNTIME_GPU) - if(USE_SYSTEM_ONNXRUNTIME) if(OS_LINUX) find_package(Onnxruntime 1.16.3 REQUIRED) @@ -164,6 +162,31 @@ endif() add_subdirectory(src/update-checker/CurlClient) target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE CurlClient OpenCV::opencv_core OpenCV::opencv_imgproc) +# Check for ONNX Runtime Execution Providers +include(CheckLibraryExists) + +check_library_exists( + onnxruntime::onnxruntime + OrtSessionOptionsAppendExecutionProvider_CUDA + onnxruntime_c_api.h + HAVE_ONNXRUNTIME_CUDA_EP +) +if(HAVE_ONNXRUNTIME_CUDA_EP) + message(STATUS "ONNX Runtime CUDA Execution Provider found") + target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE HAVE_ONNXRUNTIME_CUDA_EP) +endif() + +check_library_exists( + onnxruntime::onnxruntime + OrtSessionOptionsAppendExecutionProvider_ROCM + onnxruntime_c_api.h + HAVE_ONNXRUNTIME_ROCM_EP +) +if(HAVE_ONNXRUNTIME_ROCM_EP) + message(STATUS "ONNX Runtime ROCM Execution Provider found") + target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE HAVE_ONNXRUNTIME_ROCM_EP) +endif() + target_sources( ${CMAKE_PROJECT_NAME} PRIVATE diff --git a/README.md b/README.md index ffc4b794..aafe2975 100644 --- a/README.md +++ b/README.md @@ -57,14 +57,12 @@ If you are looking for hands-on help or private consultation please select a [sp ### Technical Details - Number of CPU threads is controllable through the UI settings. A 2-thread setting works best. diff --git a/data/locale/ar-EG.ini b/data/locale/ar-EG.ini index a4009bec..e462efa4 100644 --- a/data/locale/ar-EG.ini +++ b/data/locale/ar-EG.ini @@ -8,7 +8,7 @@ BackgroundColor="لون الخلفية" InferenceDevice="جهاز الإستدلال" CPU="وحدة المعالجة المركزية" GPUCUDA="الوحدة المركزية - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="نموذج التقسيم" SINet="SINet" diff --git a/data/locale/bn-IN.ini b/data/locale/bn-IN.ini index db5ac448..932f79c2 100644 --- a/data/locale/bn-IN.ini +++ b/data/locale/bn-IN.ini @@ -8,7 +8,7 @@ BackgroundColor="পটভূমি রঙ" InferenceDevice="নিখরচনা ডিভাইস" CPU="সিপিইউ" GPUCUDA="জিপিইউ - কুড়া" -GPUTensorRT="জিপিইউ - টেনসরআরটি" +GPUROCM="GPU - ROCM" CoreML="কোরএমএল" SegmentationModel="সেগমেন্টেশন মডেল" SINet="এসআইনেট" diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index c2ae325d..8c9ca6b5 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -8,7 +8,7 @@ BackgroundColor="Background Color" InferenceDevice="Inference device" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Segmentation model" SINet="SINet" diff --git a/data/locale/es-SP.ini b/data/locale/es-SP.ini index c1951b66..566c12c8 100644 --- a/data/locale/es-SP.ini +++ b/data/locale/es-SP.ini @@ -8,7 +8,7 @@ BackgroundColor="Color de fondo" InferenceDevice="Dispositivo de inferencia" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Modelo de segmentación" SINet="SINet" diff --git a/data/locale/fr-FR.ini b/data/locale/fr-FR.ini index ea064e76..ade34566 100644 --- a/data/locale/fr-FR.ini +++ b/data/locale/fr-FR.ini @@ -8,7 +8,7 @@ BackgroundColor="Couleur de l'arrière-plan" InferenceDevice="Dispositif d'inférence" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Modèle de segmentation" SINet="SINet" diff --git a/data/locale/hi-IN.ini b/data/locale/hi-IN.ini index ad8745a9..f8ae693f 100644 --- a/data/locale/hi-IN.ini +++ b/data/locale/hi-IN.ini @@ -8,7 +8,7 @@ BackgroundColor="बैकग्राउंड रंग" InferenceDevice="संदर्भ डिवाइस" CPU="सीपीयू" GPUCUDA="जीपीयू - क्यूडा" -GPUTensorRT="जीपीयू - टेंसरआरटी" +GPUROCM="जीपीयू - ROCM" CoreML="कोरएमएल" SegmentationModel="सेगमेंटेशन मॉडल" SINet="ऐसआईनेट" diff --git a/data/locale/it-IT.ini b/data/locale/it-IT.ini index 73b8101b..ec16fafa 100644 --- a/data/locale/it-IT.ini +++ b/data/locale/it-IT.ini @@ -8,7 +8,7 @@ BackgroundColor="Colore di sfondo" InferenceDevice="Dispositivo di inferenza" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Modello di segmentazione". SINet="SINet" diff --git a/data/locale/ja-JP.ini b/data/locale/ja-JP.ini index 0041c0da..f912f34f 100644 --- a/data/locale/ja-JP.ini +++ b/data/locale/ja-JP.ini @@ -8,7 +8,7 @@ BackgroundColor="背景色" InferenceDevice="推論デバイス" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="セグメンテーションモデル" SINet="SINet" diff --git a/data/locale/ko-KR.ini b/data/locale/ko-KR.ini index 67175029..f8825fc4 100644 --- a/data/locale/ko-KR.ini +++ b/data/locale/ko-KR.ini @@ -8,7 +8,7 @@ BackgroundColor="배경 색상" InferenceDevice="추론 장치" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="세분화 모델" SINet="SINet" diff --git a/data/locale/pt-BR.ini b/data/locale/pt-BR.ini index 0f74ba7a..89005778 100644 --- a/data/locale/pt-BR.ini +++ b/data/locale/pt-BR.ini @@ -8,7 +8,7 @@ BackgroundColor="Cor de Fundo" InferenceDevice="Dispositivo de Inferência" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Modelo de segmentação" SINet="SINet" diff --git a/data/locale/ru-RU.ini b/data/locale/ru-RU.ini index 67e94f47..4acdab2b 100644 --- a/data/locale/ru-RU.ini +++ b/data/locale/ru-RU.ini @@ -8,7 +8,7 @@ BackgroundColor="Цвет фона" InferenceDevice="Устройство вывода" CPU="ЦПУ" GPUCUDA="ГПУ - CUDA" -GPUTensorRT="ГПУ - TensorRT" +GPUROCM="ГПУ - ROCM" CoreML="CoreML" SegmentationModel="Модель сегментации" SINet="SINet" diff --git a/data/locale/ta-IN.ini b/data/locale/ta-IN.ini index 253257cd..832e8728 100644 --- a/data/locale/ta-IN.ini +++ b/data/locale/ta-IN.ini @@ -8,7 +8,7 @@ BackgroundColor="பின்னணி நிறம்" InferenceDevice="பரிமாற்ற சாதனம்" CPU="சிபியூ" GPUCUDA="GPU-CUDA" -GPUTensorRT="GPU-TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="பிரிவு மாதிரி" SINet="SINet" diff --git a/data/locale/tr-TR.ini b/data/locale/tr-TR.ini index d03cd715..23980257 100644 --- a/data/locale/tr-TR.ini +++ b/data/locale/tr-TR.ini @@ -8,7 +8,7 @@ BackgroundColor="Arka Plan Rengi" InferenceDevice="Tahmin Cihazı" CPU="CPU" GPUCUDA="GPU - CUDA" -GPUTensorRT="GPU - TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="Segmentasyon Modeli" SINet="SINet" diff --git a/data/locale/zh-CN.ini b/data/locale/zh-CN.ini index 56fae8c8..1bbdfe9f 100644 --- a/data/locale/zh-CN.ini +++ b/data/locale/zh-CN.ini @@ -8,7 +8,7 @@ BackgroundColor="背景颜色" InferenceDevice="推断设备" CPU="CPU" GPUCUDA="GPU-CUDA" -GPUTensorRT="GPU-TensorRT" +GPUROCM="GPU - ROCM" CoreML="CoreML" SegmentationModel="分割模型" SINet="SINet" diff --git a/src/background-filter.cpp b/src/background-filter.cpp index ccb938a3..baf1eb29 100644 --- a/src/background-filter.cpp +++ b/src/background-filter.cpp @@ -153,9 +153,11 @@ obs_properties_t *background_filter_properties(void *data) OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); obs_property_list_add_string(p_use_gpu, obs_module_text("CPU"), USEGPU_CPU); -#if defined(__linux__) && defined(__x86_64__) - obs_property_list_add_string(p_use_gpu, obs_module_text("GPUTensorRT"), USEGPU_TENSORRT); - obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCUDA"), USEGPU_CUDA); +#ifdef HAVE_ONNXRUNTIME_CUDA_CP + obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCuda"), USEGPU_CUDA); +#endif +#ifdef HAVE_ONNXRUNTIME_ROCM_EP + obs_property_list_add_string(p_use_gpu, obs_module_text("GPURocm"), USEGPU_ROCM); #endif #if defined(__APPLE__) obs_property_list_add_string(p_use_gpu, obs_module_text("CoreML"), USEGPU_COREML); diff --git a/src/consts.h b/src/consts.h index f1c71c45..e5d02e2a 100644 --- a/src/consts.h +++ b/src/consts.h @@ -15,7 +15,7 @@ const char *const MODEL_RMBG = "models/bria_rmbg_1_4_qint8.onnx"; const char *const USEGPU_CPU = "cpu"; const char *const USEGPU_CUDA = "cuda"; -const char *const USEGPU_TENSORRT = "tensorrt"; +const char *const USEGPU_ROCM = "rocm"; const char *const USEGPU_COREML = "coreml"; const char *const EFFECT_PATH = "effects/mask_alpha_filter.effect"; diff --git a/src/enhance-filter.cpp b/src/enhance-filter.cpp index c0802fe0..c9c937e6 100644 --- a/src/enhance-filter.cpp +++ b/src/enhance-filter.cpp @@ -53,10 +53,12 @@ obs_properties_t *enhance_filter_properties(void *data) obs_property_t *p_use_gpu = obs_properties_add_list(props, "useGPU", obs_module_text("InferenceDevice"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); obs_property_list_add_string(p_use_gpu, obs_module_text("CPU"), USEGPU_CPU); -#ifdef __linux__ - obs_property_list_add_string(p_use_gpu, obs_module_text("GPUTensorRT"), USEGPU_TENSORRT); +#ifdef HAVE_ONNXRUNTIME_CUDA_EP obs_property_list_add_string(p_use_gpu, obs_module_text("GPUCUDA"), USEGPU_CUDA); #endif +#ifdef HAVE_ONNXRUNTIME_ROCM_EP + obs_property_list_add_string(p_use_gpu, obs_module_text("GPUROCM"), USEGPU_ROCM); +#endif #if defined(__APPLE__) obs_property_list_add_string(p_use_gpu, obs_module_text("CoreML"), USEGPU_COREML); #endif diff --git a/src/ort-utils/ort-session-utils.cpp b/src/ort-utils/ort-session-utils.cpp index 01e24a54..49ea7032 100644 --- a/src/ort-utils/ort-session-utils.cpp +++ b/src/ort-utils/ort-session-utils.cpp @@ -55,50 +55,16 @@ int createOrtSession(filter_data *tf) bfree(modelFilepath_rawPtr); try { -#if defined(__linux__) && defined(__x86_64__) && !defined(DISABLE_ONNXRUNTIME_GPU) - if (tf->useGPU == USEGPU_TENSORRT) { - const auto &api = Ort::GetApi(); - - // Folder in which TensorRT will place its cache - const char *tensorrt_cache_path = "~/.cache/obs-backgroundremoval/tensorrt"; - - // Initialize TensorRT provider options - OrtTensorRTProviderOptionsV2 *tensorrt_options; - Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options)); - - // Create cache folder if it does not exist - std::filesystem::path cache_folder(tensorrt_cache_path); - if (!std::filesystem::exists(cache_folder)) { - std::filesystem::create_directories(cache_folder); - } - - // Define TensorRT provider options - std::vector option_keys = { - "device_id", - "trt_engine_cache_enable", - "trt_engine_cache_path", - "trt_timing_cache_enable", - "trt_timing_cache_path", - }; - - std::vector option_values = { - "0", // Device ID 0 - "1", // Enable engine cache - tensorrt_cache_path, // Engine cache path - "1", // Enable timing cache - tensorrt_cache_path, // Timing cache path - }; - - // Update provider options - Ort::ThrowOnError(api.UpdateTensorRTProviderOptions(tensorrt_options, option_keys.data(), - option_values.data(), option_keys.size())); - - // Append execution provider - sessionOptions.AppendExecutionProvider_TensorRT_V2(*tensorrt_options); - } else if (tf->useGPU == USEGPU_CUDA) { +#ifdef HAVE_ONNXRUNTIME_CUDA_EP + if (tf->useGPU == USEGPU_CUDA) { Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0)); } #endif +#ifdef HAVE_ONNXRUNTIME_ROCM_EP + if (tf->useGPU == USEGPU_ROCM) { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(sessionOptions, 0)); + } +#endif #if defined(__APPLE__) if (tf->useGPU == USEGPU_COREML) { uint32_t coreml_flags = 0;