SiBensberg · LarsCG · Apr 3, 2023 · May 8, 2023 · May 13, 2023 · May 26, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -52,17 +52,22 @@ else()
 endif()
 
 # ONNX
-set(ONNXRUNTIME_ROOT_PATH /mnt/Ubuntu_01/onnxruntime-1.13.1)
+set(ONNXRUNTIME_ROOT_PATH /mnt/Ubuntu_01/onnxruntime-1.13.1/onnxruntime/)
+message(${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime)
 set(ONNXRUNTIME_INCLUDE_DIRS
-        ${ONNXRUNTIME_ROOT_PATH}/onnxruntime/include/onnxruntime
-        ${ONNXRUNTIME_ROOT_PATH}/onnxruntime
-        ${ONNXRUNTIME_ROOT_PATH}/onnxruntime/include/onnxruntime/core/session
+        ${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime
+        ${ONNXRUNTIME_ROOT_PATH}
+        ${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime/core/session
         )
 
-set(ONNXRUNTIME_LIB ${ONNXRUNTIME_ROOT_PATH}/onnxruntime/build/Linux/Release/libonnxruntime.so)
+set(ONNXRUNTIME_LIB ${ONNXRUNTIME_ROOT_PATH}/build/Linux/Release/libonnxruntime.so)
 #/mnt/Ubuntu_01/onnxruntime-1.13.1/onnxruntime/build/Linux/Release/libonnxruntime.so
 
+IF(NOT MSVC)
+    SET(SPECIAL_OS_LIBS "pthread")
+ENDIF()
+
 target_include_directories(ZED_inference PRIVATE ${ONNXRUNTIME_INCLUDE_DIRS})
-target_link_libraries(ZED_inference PRIVATE ${ONNXRUNTIME_LIB} ${ZED_LIBRARIES} stdc++fs -lstdc++fs ${ZED_LIBS} ${OpenCV_LIBRARIES})
+target_link_libraries(ZED_inference PRIVATE ${ONNXRUNTIME_LIB} ${ZED_LIBRARIES} ${SPECIAL_OS_LIBS} stdc++fs -lstdc++fs ${ZED_LIBS} ${OpenCV_LIBRARIES})
 
 #TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${ZED_LIBS} ${OpenCV_LIBRARIES})
diff --git a/main.cpp b/main.cpp
@@ -5,8 +5,10 @@
 int main() {
     std::cout << "Starting ZED inference: \n \n" << std::endl;
 
+    // Create ZedInference Class. Inference Session will be automatically initialized.
     ZedInference zed_inf;
 
+    // Run cameras and inference
     zed_inf.run();
 
 

diff --git a/model/saved_model_b2.onnx b/model/saved_model_b2.onnx
diff --git a/object_detector.cpp b/object_detector.cpp
@@ -13,6 +13,11 @@ const std::vector<cv::Scalar> COLORS = {BLUE, YELLOW, ORANGE, BIGORANGE};
 
 
 ObjectDetector::ObjectDetector(const std::string &modelPath) {
+    /**
+     * Init an object detector class taht loads a ONNX model from model path and uses it to infer bounding boxes
+     *
+     * @param modelPath path to ONNX model.
+     */
     std::cout << "Initiating ObjectDetector: " << std::endl;
     // Create Environment:
     std::string instance_Name = "Object Detector";
@@ -87,20 +92,43 @@ ObjectDetector::ObjectDetector(const std::string &modelPath) {
 
 }
 
-std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBGR) const {
+std::vector<std::vector<std::vector<float>>> ObjectDetector::inference(const std::vector<cv::Mat> &imagesBGR) const {
+    /**
+     * Inferences bounding boxes on the given images.
+     * Input is vector of n images which will be batch inferred.
+     * If batch size doesn't fit the input dimensions an error will be thrown.
+     *
+     * @param imagesBGR vector with all images to infer. cv:Mat
+     * @return returns vector with detected bounding boxes above confidence 0.09
+     */
     // for time measuring
     const auto start = clock_time::now();
+    auto num_images = imagesBGR.size();
+
+    if (num_images > mDefaultInputDims[0]) {
+        throw std::domain_error("More Camera images then the network can inference. "
+                                "Adjust network input dimensions or lower number of cameras.");
+    }
 
     // Calculate flat tensor input size:
     long inputTensorSize = 1;
     for (const auto &e: mInputDims) {
         inputTensorSize *= e;
     }
+    // todo: assertion for the following?
+    long input_image_size = inputTensorSize / num_images;
 
     // inputTensorValues is flattened array with chw format.
     // inputTensorValues must be reordered to hwc format
-    std::vector<uint8_t> inputTensorValues(inputTensorSize);
-    createTensorFromImage(imageBGR, inputTensorValues);
+    // vector of input tensor values:
+    std::vector<uint8_t> inputTensorValues;
+    for (int i=0; i<num_images; ++i) {
+        std::vector<uint8_t> input_image_values(input_image_size);
+        createTensorFromImage(imagesBGR[i], input_image_values);
+        inputTensorValues.insert(inputTensorValues.end(), input_image_values.begin(), input_image_values.end());
+
+        //input_tensor_values_vector[i] = createTensorFromImage(imagesBGR[i]);
+    }
 
     //Assign memory
     std::vector<Ort::Value> inputTensors;
@@ -154,12 +182,11 @@ std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBG
     const sec inference_time = clock_time::now() - start;
     // std::cout<< "The inference takes " << inference_time.count() << "s" << std::endl;
 
-    // debug: try to show image
     auto outputBoxes = this->calculateBoxes(outputTensors.back());
 
     const sec after = clock_time::now() - start;
 
-    // std::cout << "Image Precessing and inference taking a overall: " << after.count() << "s" << std::endl;
+    // std::cout << "Image Precessing and inference taking an overall: " << after.count() << "s" << std::endl;
 
     return outputBoxes;
 }
@@ -168,6 +195,11 @@ std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBG
 // Create a tensor from the input image
 void ObjectDetector::createTensorFromImage(
         const cv::Mat &img, std::vector<uint8_t> &inputTensorValues) const {
+    /**
+     * Creates a ONNX tensor for the session. Takes the cv:Mat as input and writes the to inputTensorValues.
+     * @param img Reference of cv:Mat image to be inferred.
+     * @param inputTensorValues Flat uint8 vector with all the values from the input image.
+     */
     auto type = img.type();
     auto input_height = mInputDims.at(1);
     auto input_width = mInputDims.at(2);
@@ -215,7 +247,15 @@ void ObjectDetector::createTensorFromImage(
                              preprocessedImage.data + (preprocessedImage.total() * preprocessedImage.channels()));
 }
 
-std::vector<std::vector<float>> ObjectDetector::calculateBoxes(const Ort::Value &outputTensor) const {
+std::vector<std::vector<std::vector<float>>> ObjectDetector::calculateBoxes(const Ort::Value &outputTensor) const {
+    /**
+     * Extract the output boxes data from the flat output vector.
+     * Also scales them back to initial image size.
+     * Filters out every box with confidence score <= 0.09.
+     *
+     * @param outputTensor flat output tensor from ONNX session
+     * @return Scaled output boxes in vector. First vector is for the image second for each box
+     */
     // Calculate Factors for later upscaling of boxes with very sexy casts
     auto width_factor = (float) cameraInputDims[1] / (float) mInputDims.at(2);
     auto height_factor = (float) cameraInputDims[0] / (float) mInputDims.at(1);
@@ -224,24 +264,28 @@ std::vector<std::vector<float>> ObjectDetector::calculateBoxes(const Ort::Value
     // Get data from tensor:
     const auto data = outputTensor.GetTensorData<float>();
 
-    std::vector<std::vector<float>> outputBoxes;
-
-    // for every of the 100 boxes:
-    for (int row = 0; row < shape[1]; ++row) {
-        // init indexes for easy access of flattened array.
-        const auto confidence = *(data + (row * 7 + 5)); // confidence value is on the 5th place of the row
-        const auto class_id = *(data + (row * 7 + 6));
-
-        if (confidence >= 0.09) {
-            std::vector<float> box_data{class_id, confidence,
-                                        *(data + (row * 7 + 1)) * height_factor,
-                                        *(data + (row * 7 + 2)) * width_factor,
-                                        *(data + (row * 7 + 3)) * height_factor,
-                                        *(data + (row * 7 + 4)) * width_factor};
-            outputBoxes.push_back(box_data);
+    std::vector<std::vector<std::vector<float>>> outputBoxes(shape[0]); //one vector for each box, for each image
+
+    // for every image
+    for (int img = 0; img < shape[0]; ++img) {
+        // for every of the 100 boxes:
+        for (int row = 0; row < shape[1]; ++row) {
+            // init indexes for easy access of flattened array.
+            const auto confidence = *(data + (img * shape[1] * 7) + (row * 7 + 5)); // confidence value is on the 5th place of the row
+            const auto class_id = *(data + (img * shape[1] * 7) + (row * 7 + 6));
+
+            if (confidence >= 0.09) {
+                std::vector<float> box_data{class_id, confidence, //test1, test2, test3, test4};
+                                            *(data + (img * shape[1] * 7) + (row * 7 + 1)) * height_factor,
+                                            *(data + (img * shape[1] * 7) + (row * 7 + 2)) * width_factor,
+                                            *(data + (img * shape[1] * 7) + (row * 7 + 3)) * height_factor,
+                                            *(data + (img * shape[1] * 7) + (row * 7 + 4)) * width_factor};
+                outputBoxes[img].push_back(box_data);
+            }
         }
     }
 
+
     return outputBoxes;
 }
 
diff --git a/object_detector.h b/object_detector.h
@@ -24,7 +24,7 @@ using sec = std::chrono::duration<double>;
 class ObjectDetector {
 public:
     explicit ObjectDetector(const std::string& modelPath);
-    std::vector<std::vector<float>> inference(const cv::Mat &imageBGR) const;
+    std::vector<std::vector<std::vector<float>>> inference(const std::vector<cv::Mat> &imagesBGR) const;
     bool hwc = true; // whether input to model is HWC or CHW
 private:
     // ORT Environment
@@ -37,7 +37,8 @@ class ObjectDetector {
     // Inputs
     char* mInputName;
     std::vector<int64_t> mInputDims; // b x h x w x c
-    static inline std::vector<int64_t> mDefaultInputDims = {1, 512, 512, 3};
+    //todo: shape anpassen
+    static inline std::vector<int64_t> mDefaultInputDims = {2, 512, 512, 3};
     // Outputs
     char* mOutputName;
     std::vector<int64_t> mOutputDims; // b x h x w x c
@@ -47,8 +48,7 @@ class ObjectDetector {
 
     void createTensorFromImage(const cv::Mat& img,
                                std::vector<uint8_t>& inputTensorValues) const;
-
-    std::vector<std::vector<float>> calculateBoxes(const Ort::Value &outputTensor) const;
+    std::vector<std::vector<std::vector<float>>> calculateBoxes(const Ort::Value &outputTensor) const;
 };