Merge pull request #224 from saeugetier/220-reduced-resolution-neural-network-for-preview

saeugetier · web-flow · commit 29a3ed0877b4 · 2025-11-13T21:54:10.000+01:00
220 reduced resolution neural network for preview
diff --git a/qml/SettingsMenuForm.ui.qml b/qml/SettingsMenuForm.ui.qml
@@ -242,8 +242,8 @@ Item {
                             id: comboBoxNeuralNetworkRuntime
                             textRole: "text"
                             valueRole: "value"
-                            model: [{text: "ONNX Runtime", value: "ONNX"}, {text: "NCNN Runtime", value: "NCNN"}]
-                            Layout.preferredWidth: 200
+                            model: [{text: "ONNX Runtime", value: "ONNX"}, {text: "NCNN Runtime", value: "NCNN"}, {text: "NCNN Runtime (faster preview)", value: "NCNN_LOW_RES"} ]
+                            Layout.preferredWidth: 250
                         }
                     }
                 }
diff --git a/src/replacebackgroundvideofilter.cpp b/src/replacebackgroundvideofilter.cpp
@@ -57,6 +57,10 @@ void ReplaceBackgroundVideoFilter::setNeuralNetworkRuntime(QString runtime)
     {
         newRuntime = NeuralNetworkRuntime::ONNX;
     }
+    else if (runtime.contains("NCNN_LOW_RES"))
+    {
+        newRuntime = NeuralNetworkRuntime::NCNN_LOW_RES;
+    }
     else if (runtime.contains("NCNN"))
     {
         newRuntime = NeuralNetworkRuntime::NCNN;
@@ -111,6 +115,10 @@ QString ReplaceBackgroundVideoFilter::getNeuralNetworkRuntime() const
     {
         return QString("NCNN");
     }
+    else if (mNeuralNetworkRuntime == NeuralNetworkRuntime::NCNN_LOW_RES)
+    {
+        return QString("NCNN_LOW_RES");
+    }
     else
     {
         return QString("Unknown");
@@ -317,6 +325,12 @@ void ReplaceBackgroundFilterRunable::changeNeuralNetworkRuntime(const NeuralNetw
         mYoloSegmentorPreview.reset(new YOLOv11SegDetectorNcnn("yolo11n-seg_ncnn_model", "coco.names", false));
         mYoloSegmentorHighRes.reset(new YOLOv11SegDetectorNcnn("yolo11x-seg_ncnn_model", "coco.names", false));
     }
+    else if (runtime == NeuralNetworkRuntime::NCNN_LOW_RES)
+    {
+        qDebug() << "[INFO] Change YOLOv11Segmentation runtime to NCNN_LOW_RES";
+        mYoloSegmentorPreview.reset(new YOLOv11SegDetectorNcnn("yolo11n-seg_ncnn_model_320", "coco.names", false, true));
+        mYoloSegmentorHighRes.reset(new YOLOv11SegDetectorNcnn("yolo11x-seg_ncnn_model", "coco.names", false));
+    }
 }
 
 void ReplaceBackgroundFilterRunable::prepareBackground(cv::Mat &bg, cv::Size size)
diff --git a/src/replacebackgroundvideofilter.h b/src/replacebackgroundvideofilter.h
@@ -17,7 +17,8 @@ class ReplaceBackgroundFilterRunable;
 enum class NeuralNetworkRuntime
 {
     ONNX,
-    NCNN
+    NCNN,
+    NCNN_LOW_RES
 };
 
 class ReplaceBackgroundVideoFilter : public QVideoFrameInput
diff --git a/src/yolo11segncnn.cpp b/src/yolo11segncnn.cpp
@@ -9,7 +9,7 @@
 
 YOLOv11SegDetectorNcnn::YOLOv11SegDetectorNcnn(const std::string &modelPath,
                                                const std::string &labelsPath,
-                                               bool useGPU) : Yolo11Segementation(labelsPath)
+                                               bool useGPU, bool use320x320input) : Yolo11Segementation(labelsPath)
 {
     QString ressourcePathGeneric = QStandardPaths::locate(QStandardPaths::GenericDataLocation, "models", QStandardPaths::LocateDirectory);
     QString ressourcePathApp = QStandardPaths::locate(QStandardPaths::AppDataLocation, "models", QStandardPaths::LocateDirectory);
@@ -48,7 +48,14 @@ YOLOv11SegDetectorNcnn::YOLOv11SegDetectorNcnn(const std::string &modelPath,
     numOutputNodes = net.output_names().size();
 
     isDynamicInputShape = false;          // Assume static input shape by default. NCNN models typically have fixed input shapes.
-    inputImageShape = cv::Size(640, 640); // Default shape. This is fixed for YOLOv11SegNCNN
+    
+    if(use320x320input) {
+        inputImageShape = cv::Size(320, 320);
+    }
+    else
+    {
+        inputImageShape = cv::Size(640, 640); // Default shape. This is fixed for YOLOv11SegNCNN
+    }
 
     // Input
     if (numInputNodes != 1)
@@ -277,7 +284,7 @@ std::vector<Segmentation> YOLOv11SegDetectorNcnn::segment(const cv::Mat &image,
                      cv::Scalar(114, 114, 114), /*auto_=*/false,
                      /*scaleFill=*/false, /*scaleUp=*/true, /*stride=*/32);
 
-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(letterboxImage.data, ncnn::Mat::PIXEL_BGR2RGB, letterboxImage.cols, letterboxImage.rows, 640, 640);
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(letterboxImage.data, ncnn::Mat::PIXEL_BGR2RGB, letterboxImage.cols, letterboxImage.rows, inputImageShape.width, inputImageShape.height);
 
     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
     in.substract_mean_normalize(0, norm_vals);
diff --git a/src/yolo11segncnn.h b/src/yolo11segncnn.h
@@ -30,7 +30,7 @@ class YOLOv11SegDetectorNcnn : public Yolo11Segementation {
 public:
     YOLOv11SegDetectorNcnn(const std::string &modelPath,
                        const std::string &labelsPath,
-                       bool useGPU = false);
+                       bool useGPU = false, bool use320x320input = false);
 
     // Main API
     std::vector<Segmentation> segment(const cv::Mat &image,

Original file line number	Diff line number	Diff line change
`@@ -242,8 +242,8 @@ Item {`
`242`	`242`	`id: comboBoxNeuralNetworkRuntime`
`243`	`243`	`textRole: "text"`
`244`	`244`	`valueRole: "value"`
`245`		`- model: [{text: "ONNX Runtime", value: "ONNX"}, {text: "NCNN Runtime", value: "NCNN"}]`
`246`		`- Layout.preferredWidth: 200`
	`245`	`+ model: [{text: "ONNX Runtime", value: "ONNX"}, {text: "NCNN Runtime", value: "NCNN"}, {text: "NCNN Runtime (faster preview)", value: "NCNN_LOW_RES"} ]`
	`246`	`+ Layout.preferredWidth: 250`
`247`	`247`	`}`
`248`	`248`	`}`
`249`	`249`	`}`
Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,10 @@ void ReplaceBackgroundVideoFilter::setNeuralNetworkRuntime(QString runtime)`
`57`	`57`	`{`
`58`	`58`	`newRuntime = NeuralNetworkRuntime::ONNX;`
`59`	`59`	`}`
	`60`	`+ else if (runtime.contains("NCNN_LOW_RES"))`
	`61`	`+ {`
	`62`	`+ newRuntime = NeuralNetworkRuntime::NCNN_LOW_RES;`
	`63`	`+ }`
`60`	`64`	`else if (runtime.contains("NCNN"))`
`61`	`65`	`{`
`62`	`66`	`newRuntime = NeuralNetworkRuntime::NCNN;`
`@@ -111,6 +115,10 @@ QString ReplaceBackgroundVideoFilter::getNeuralNetworkRuntime() const`
`111`	`115`	`{`
`112`	`116`	`return QString("NCNN");`
`113`	`117`	`}`
	`118`	`+ else if (mNeuralNetworkRuntime == NeuralNetworkRuntime::NCNN_LOW_RES)`
	`119`	`+ {`
	`120`	`+ return QString("NCNN_LOW_RES");`
	`121`	`+ }`
`114`	`122`	`else`
`115`	`123`	`{`
`116`	`124`	`return QString("Unknown");`
`@@ -317,6 +325,12 @@ void ReplaceBackgroundFilterRunable::changeNeuralNetworkRuntime(const NeuralNetw`
`317`	`325`	`mYoloSegmentorPreview.reset(new YOLOv11SegDetectorNcnn("yolo11n-seg_ncnn_model", "coco.names", false));`
`318`	`326`	`mYoloSegmentorHighRes.reset(new YOLOv11SegDetectorNcnn("yolo11x-seg_ncnn_model", "coco.names", false));`
`319`	`327`	`}`
	`328`	`+ else if (runtime == NeuralNetworkRuntime::NCNN_LOW_RES)`
	`329`	`+ {`
	`330`	`+ qDebug() << "[INFO] Change YOLOv11Segmentation runtime to NCNN_LOW_RES";`
	`331`	`+ mYoloSegmentorPreview.reset(new YOLOv11SegDetectorNcnn("yolo11n-seg_ncnn_model_320", "coco.names", false, true));`
	`332`	`+ mYoloSegmentorHighRes.reset(new YOLOv11SegDetectorNcnn("yolo11x-seg_ncnn_model", "coco.names", false));`
	`333`	`+ }`
`320`	`334`	`}`
`321`	`335`
`322`	`336`	`void ReplaceBackgroundFilterRunable::prepareBackground(cv::Mat &bg, cv::Size size)`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,8 @@ class ReplaceBackgroundFilterRunable;`
`17`	`17`	`enum class NeuralNetworkRuntime`
`18`	`18`	`{`
`19`	`19`	`ONNX,`
`20`		`- NCNN`
	`20`	`+ NCNN,`
	`21`	`+ NCNN_LOW_RES`
`21`	`22`	`};`
`22`	`23`
`23`	`24`	`class ReplaceBackgroundVideoFilter : public QVideoFrameInput`