Skip to content

Dual camera #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,22 @@ else()
endif()

# ONNX
set(ONNXRUNTIME_ROOT_PATH /mnt/Ubuntu_01/onnxruntime-1.13.1)
set(ONNXRUNTIME_ROOT_PATH /mnt/Ubuntu_01/onnxruntime-1.13.1/onnxruntime/)
message(${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime)
set(ONNXRUNTIME_INCLUDE_DIRS
${ONNXRUNTIME_ROOT_PATH}/onnxruntime/include/onnxruntime
${ONNXRUNTIME_ROOT_PATH}/onnxruntime
${ONNXRUNTIME_ROOT_PATH}/onnxruntime/include/onnxruntime/core/session
${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime
${ONNXRUNTIME_ROOT_PATH}
${ONNXRUNTIME_ROOT_PATH}/include/onnxruntime/core/session
)

set(ONNXRUNTIME_LIB ${ONNXRUNTIME_ROOT_PATH}/onnxruntime/build/Linux/Release/libonnxruntime.so)
set(ONNXRUNTIME_LIB ${ONNXRUNTIME_ROOT_PATH}/build/Linux/Release/libonnxruntime.so)
#/mnt/Ubuntu_01/onnxruntime-1.13.1/onnxruntime/build/Linux/Release/libonnxruntime.so

IF(NOT MSVC)
SET(SPECIAL_OS_LIBS "pthread")
ENDIF()

target_include_directories(ZED_inference PRIVATE ${ONNXRUNTIME_INCLUDE_DIRS})
target_link_libraries(ZED_inference PRIVATE ${ONNXRUNTIME_LIB} ${ZED_LIBRARIES} stdc++fs -lstdc++fs ${ZED_LIBS} ${OpenCV_LIBRARIES})
target_link_libraries(ZED_inference PRIVATE ${ONNXRUNTIME_LIB} ${ZED_LIBRARIES} ${SPECIAL_OS_LIBS} stdc++fs -lstdc++fs ${ZED_LIBS} ${OpenCV_LIBRARIES})

#TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${ZED_LIBS} ${OpenCV_LIBRARIES})
2 changes: 2 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
int main() {
std::cout << "Starting ZED inference: \n \n" << std::endl;

// Create ZedInference Class. Inference Session will be automatically initialized.
ZedInference zed_inf;

// Run cameras and inference
zed_inf.run();


Expand Down
Binary file added model/saved_model_b2.onnx
Binary file not shown.
86 changes: 65 additions & 21 deletions object_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ const std::vector<cv::Scalar> COLORS = {BLUE, YELLOW, ORANGE, BIGORANGE};


ObjectDetector::ObjectDetector(const std::string &modelPath) {
/**
* Init an object detector class taht loads a ONNX model from model path and uses it to infer bounding boxes
*
* @param modelPath path to ONNX model.
*/
std::cout << "Initiating ObjectDetector: " << std::endl;
// Create Environment:
std::string instance_Name = "Object Detector";
Expand Down Expand Up @@ -87,20 +92,43 @@ ObjectDetector::ObjectDetector(const std::string &modelPath) {

}

std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBGR) const {
std::vector<std::vector<std::vector<float>>> ObjectDetector::inference(const std::vector<cv::Mat> &imagesBGR) const {
/**
* Inferences bounding boxes on the given images.
* Input is vector of n images which will be batch inferred.
* If batch size doesn't fit the input dimensions an error will be thrown.
*
* @param imagesBGR vector with all images to infer. cv:Mat
* @return returns vector with detected bounding boxes above confidence 0.09
*/
// for time measuring
const auto start = clock_time::now();
auto num_images = imagesBGR.size();

if (num_images > mDefaultInputDims[0]) {
throw std::domain_error("More Camera images then the network can inference. "
"Adjust network input dimensions or lower number of cameras.");
}

// Calculate flat tensor input size:
long inputTensorSize = 1;
for (const auto &e: mInputDims) {
inputTensorSize *= e;
}
// todo: assertion for the following?
long input_image_size = inputTensorSize / num_images;

// inputTensorValues is flattened array with chw format.
// inputTensorValues must be reordered to hwc format
std::vector<uint8_t> inputTensorValues(inputTensorSize);
createTensorFromImage(imageBGR, inputTensorValues);
// vector of input tensor values:
std::vector<uint8_t> inputTensorValues;
for (int i=0; i<num_images; ++i) {
std::vector<uint8_t> input_image_values(input_image_size);
createTensorFromImage(imagesBGR[i], input_image_values);
inputTensorValues.insert(inputTensorValues.end(), input_image_values.begin(), input_image_values.end());

//input_tensor_values_vector[i] = createTensorFromImage(imagesBGR[i]);
}

//Assign memory
std::vector<Ort::Value> inputTensors;
Expand Down Expand Up @@ -154,12 +182,11 @@ std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBG
const sec inference_time = clock_time::now() - start;
// std::cout<< "The inference takes " << inference_time.count() << "s" << std::endl;

// debug: try to show image
auto outputBoxes = this->calculateBoxes(outputTensors.back());

const sec after = clock_time::now() - start;

// std::cout << "Image Precessing and inference taking a overall: " << after.count() << "s" << std::endl;
// std::cout << "Image Precessing and inference taking an overall: " << after.count() << "s" << std::endl;

return outputBoxes;
}
Expand All @@ -168,6 +195,11 @@ std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBG
// Create a tensor from the input image
void ObjectDetector::createTensorFromImage(
const cv::Mat &img, std::vector<uint8_t> &inputTensorValues) const {
/**
* Creates a ONNX tensor for the session. Takes the cv:Mat as input and writes the to inputTensorValues.
* @param img Reference of cv:Mat image to be inferred.
* @param inputTensorValues Flat uint8 vector with all the values from the input image.
*/
auto type = img.type();
auto input_height = mInputDims.at(1);
auto input_width = mInputDims.at(2);
Expand Down Expand Up @@ -215,7 +247,15 @@ void ObjectDetector::createTensorFromImage(
preprocessedImage.data + (preprocessedImage.total() * preprocessedImage.channels()));
}

std::vector<std::vector<float>> ObjectDetector::calculateBoxes(const Ort::Value &outputTensor) const {
std::vector<std::vector<std::vector<float>>> ObjectDetector::calculateBoxes(const Ort::Value &outputTensor) const {
/**
* Extract the output boxes data from the flat output vector.
* Also scales them back to initial image size.
* Filters out every box with confidence score <= 0.09.
*
* @param outputTensor flat output tensor from ONNX session
* @return Scaled output boxes in vector. First vector is for the image second for each box
*/
// Calculate Factors for later upscaling of boxes with very sexy casts
auto width_factor = (float) cameraInputDims[1] / (float) mInputDims.at(2);
auto height_factor = (float) cameraInputDims[0] / (float) mInputDims.at(1);
Expand All @@ -224,24 +264,28 @@ std::vector<std::vector<float>> ObjectDetector::calculateBoxes(const Ort::Value
// Get data from tensor:
const auto data = outputTensor.GetTensorData<float>();

std::vector<std::vector<float>> outputBoxes;

// for every of the 100 boxes:
for (int row = 0; row < shape[1]; ++row) {
// init indexes for easy access of flattened array.
const auto confidence = *(data + (row * 7 + 5)); // confidence value is on the 5th place of the row
const auto class_id = *(data + (row * 7 + 6));

if (confidence >= 0.09) {
std::vector<float> box_data{class_id, confidence,
*(data + (row * 7 + 1)) * height_factor,
*(data + (row * 7 + 2)) * width_factor,
*(data + (row * 7 + 3)) * height_factor,
*(data + (row * 7 + 4)) * width_factor};
outputBoxes.push_back(box_data);
std::vector<std::vector<std::vector<float>>> outputBoxes(shape[0]); //one vector for each box, for each image

// for every image
for (int img = 0; img < shape[0]; ++img) {
// for every of the 100 boxes:
for (int row = 0; row < shape[1]; ++row) {
// init indexes for easy access of flattened array.
const auto confidence = *(data + (img * shape[1] * 7) + (row * 7 + 5)); // confidence value is on the 5th place of the row
const auto class_id = *(data + (img * shape[1] * 7) + (row * 7 + 6));

if (confidence >= 0.09) {
std::vector<float> box_data{class_id, confidence, //test1, test2, test3, test4};
*(data + (img * shape[1] * 7) + (row * 7 + 1)) * height_factor,
*(data + (img * shape[1] * 7) + (row * 7 + 2)) * width_factor,
*(data + (img * shape[1] * 7) + (row * 7 + 3)) * height_factor,
*(data + (img * shape[1] * 7) + (row * 7 + 4)) * width_factor};
outputBoxes[img].push_back(box_data);
}
}
}


return outputBoxes;
}

8 changes: 4 additions & 4 deletions object_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ using sec = std::chrono::duration<double>;
class ObjectDetector {
public:
explicit ObjectDetector(const std::string& modelPath);
std::vector<std::vector<float>> inference(const cv::Mat &imageBGR) const;
std::vector<std::vector<std::vector<float>>> inference(const std::vector<cv::Mat> &imagesBGR) const;
bool hwc = true; // whether input to model is HWC or CHW
private:
// ORT Environment
Expand All @@ -37,7 +37,8 @@ class ObjectDetector {
// Inputs
char* mInputName;
std::vector<int64_t> mInputDims; // b x h x w x c
static inline std::vector<int64_t> mDefaultInputDims = {1, 512, 512, 3};
//todo: shape anpassen
static inline std::vector<int64_t> mDefaultInputDims = {2, 512, 512, 3};
// Outputs
char* mOutputName;
std::vector<int64_t> mOutputDims; // b x h x w x c
Expand All @@ -47,8 +48,7 @@ class ObjectDetector {

void createTensorFromImage(const cv::Mat& img,
std::vector<uint8_t>& inputTensorValues) const;

std::vector<std::vector<float>> calculateBoxes(const Ort::Value &outputTensor) const;
std::vector<std::vector<std::vector<float>>> calculateBoxes(const Ort::Value &outputTensor) const;
};


Expand Down
Loading