Skip to content

Commit 963fd7f

Browse files
committed
removed multi output, clean up readme
1 parent 3ca3831 commit 963fd7f

File tree

8 files changed

+198
-518
lines changed

8 files changed

+198
-518
lines changed

deep_object_detection/README.md

Lines changed: 143 additions & 165 deletions
Large diffs are not rendered by default.

deep_object_detection/config/generic_model_params.yaml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ deep_object_detection_node:
77
Model:
88
num_classes: 80
99
bbox_format: "cxcywh"
10+
output_shape: [1, 8400, 84]
1011

1112
Preprocessing:
1213
input_width: 640
@@ -23,15 +24,11 @@ deep_object_detection_node:
2324
nms_iou_threshold: 0.45
2425
score_activation: "sigmoid"
2526
enable_nms: true
26-
use_multi_output: false
27-
output_boxes_idx: 0
28-
output_scores_idx: 1
29-
output_classes_idx: 2
3027
class_score_mode: "all_classes"
3128
class_score_start_idx: -1
3229
class_score_count: -1
30+
3331
layout:
34-
auto_detect: true
3532
batch_dim: 0
3633
detection_dim: 1
3734
feature_dim: 2
@@ -41,8 +38,8 @@ deep_object_detection_node:
4138
class_idx: 5
4239

4340
Backend:
44-
plugin: "onnxruntime_gpu" # Options: "onnxruntime_cpu" or "onnxruntime_gpu"
45-
execution_provider: "tensorrt" # For onnxruntime_gpu: "cuda" or "tensorrt"
41+
plugin: "onnxruntime_gpu"
42+
execution_provider: "tensorrt"
4643
device_id: 0
4744
trt_engine_cache_enable: true
4845
trt_engine_cache_path: "/tmp/deep_ros_ort_trt_cache"

deep_object_detection/include/deep_object_detection/detection_types.hpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,17 @@ struct ModelMetadata
9393
int num_classes; ///< Number of detection classes
9494
std::string class_names_file; ///< Path to class names file (one per line, optional)
9595
std::string bbox_format; ///< Bounding box format used by model ("cxcywh", "xyxy", "xywh")
96+
std::vector<size_t> output_shape; ///< Model output tensor shape (optional, for logging/validation)
9697
};
9798

9899
/**
99100
* @brief Output tensor layout configuration
100101
*
101-
* Describes the structure of model output tensors for manual layout specification.
102-
* Only used when auto_detect is false.
102+
* Describes the structure of model output tensors.
103+
* All layout parameters must be manually specified in the config file.
103104
*/
104105
struct OutputLayoutConfig
105106
{
106-
bool auto_detect; ///< True to auto-detect layout, false to use manual config
107107
int batch_dim; ///< Batch dimension index
108108
int detection_dim; ///< Detection dimension index
109109
int feature_dim; ///< Feature dimension index
@@ -122,10 +122,6 @@ struct PostprocessingConfig
122122
float nms_iou_threshold; ///< IoU threshold for Non-Maximum Suppression
123123
std::string score_activation; ///< Score activation function ("sigmoid", "softmax", "none")
124124
bool enable_nms; ///< Enable Non-Maximum Suppression
125-
bool use_multi_output; ///< True if model has separate outputs for boxes, scores, classes
126-
int output_boxes_idx; ///< Output index for bounding boxes (if use_multi_output)
127-
int output_scores_idx; ///< Output index for scores (if use_multi_output)
128-
int output_classes_idx; ///< Output index for class IDs (if use_multi_output)
129125
std::string class_score_mode; ///< How class scores are extracted ("all_classes", "single_confidence")
130126
int class_score_start_idx; ///< Start index for class scores (-1 = use all)
131127
int class_score_count; ///< Count of class scores (-1 = use all)

deep_object_detection/include/deep_object_detection/generic_postprocessor.hpp

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* @brief Generic postprocessor for object detection model outputs
1818
*
1919
* This header defines the GenericPostprocessor class which:
20-
* - Automatically detects output tensor layouts
20+
* - Uses manually configured output tensor layouts (from YAML config)
2121
* - Applies score activation and thresholding
2222
* - Performs Non-Maximum Suppression (NMS)
2323
* - Transforms coordinates from preprocessed to original image space
@@ -43,7 +43,7 @@ namespace deep_object_detection
4343
* @brief Generic postprocessor for object detection models
4444
*
4545
* Handles postprocessing pipeline for various ONNX model output formats:
46-
* - Automatic layout detection (supports [batch, detections, features], [batch, features, detections], etc.)
46+
* - Manual layout configuration (supports [batch, detections, features], [batch, features, detections], etc.)
4747
* - Score activation (sigmoid, softmax, or none)
4848
* - Score thresholding
4949
* - Non-maximum suppression (NMS)
@@ -52,6 +52,7 @@ namespace deep_object_detection
5252
*
5353
* Supports both single-output models (boxes + scores + classes in one tensor)
5454
* and multi-output models (separate tensors for boxes, scores, classes).
55+
* Layout must be manually configured in the YAML config file.
5556
*/
5657
class GenericPostprocessor
5758
{
@@ -76,7 +77,6 @@ class GenericPostprocessor
7677
size_t class_idx = 5; ///< Index for class ID in feature dimension
7778
bool has_separate_class_output = false; ///< True if class IDs are in separate output tensor
7879
size_t class_output_idx = 0; ///< Output index for separate class tensor (if applicable)
79-
bool auto_detect = true; ///< True if layout should be auto-detected
8080
};
8181

8282
/**
@@ -97,28 +97,16 @@ class GenericPostprocessor
9797
bool use_letterbox);
9898

9999
/**
100-
* @brief Automatically detect output tensor layout from shape
101-
* @param output_shape Model output tensor shape
102-
* @return Detected OutputLayout
103-
*
104-
* Analyzes tensor shape to determine layout:
105-
* - [batch, detections, features] -> standard layout
106-
* - [batch, features, detections] -> transposed layout
107-
* - [batch, queries, 4+classes] -> query-based (DETR-style)
108-
* - Other shapes -> heuristic-based detection
109-
*/
110-
static OutputLayout detectLayout(const std::vector<size_t> & output_shape);
111-
112-
/**
113-
* @brief Auto-configure output layout based on config and optional output shape
114-
* @param output_shape Model output shape (can be empty for deferred detection)
100+
* @brief Configure output layout from manual configuration
101+
* @param output_shape Model output shape (optional, for validation/logging)
115102
* @param layout_config Layout configuration from parameters
116103
* @return Configured OutputLayout
117104
*
118-
* Handles both manual and auto-detection modes. If auto_detect is true and output_shape
119-
* is available, automatically detects layout. Otherwise uses manual config or defers detection.
105+
* Creates OutputLayout from manual configuration parameters.
106+
* All layout parameters must be specified in the config file.
120107
*/
121-
static OutputLayout autoConfigure(const std::vector<size_t> & output_shape, const OutputLayoutConfig & layout_config);
108+
static OutputLayout configureLayout(
109+
const std::vector<size_t> & output_shape, const OutputLayoutConfig & layout_config);
122110

123111
/**
124112
* @brief Decode model output tensor to detections
@@ -133,18 +121,6 @@ class GenericPostprocessor
133121
std::vector<std::vector<SimpleDetection>> decode(
134122
const deep_ros::Tensor & output, const std::vector<ImageMeta> & metas) const;
135123

136-
/**
137-
* @brief Decode multi-output model to detections
138-
* @param outputs Vector of model output tensors (boxes, scores, classes separately)
139-
* @param metas Image metadata for coordinate transformation
140-
* @return Vector of detections per image in batch
141-
*
142-
* For models with separate outputs for boxes, scores, and classes.
143-
* Uses output_boxes_idx, output_scores_idx, output_classes_idx from config.
144-
*/
145-
std::vector<std::vector<SimpleDetection>> decodeMultiOutput(
146-
const std::vector<deep_ros::Tensor> & outputs, const std::vector<ImageMeta> & metas) const;
147-
148124
/**
149125
* @brief Fill ROS Detection2DArray message with detections
150126
* @param header ROS message header (timestamp and frame_id)

deep_object_detection/package.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<version>0.1.0</version>
55
<description>
66
Generic model-agnostic deep learning object detection node using ONNX Runtime with explicit provider selection (TensorRT/CUDA/CPU).
7-
Works with any ONNX-compatible object detection model - automatically detects and adapts to model output format.
7+
Works with any ONNX-compatible object detection model. Output tensor layout must be configured in YAML config file.
88
Features dynamic batching, multi-camera support, configurable preprocessing/postprocessing, and fail-fast error handling.
99
</description>
1010

deep_object_detection/src/deep_object_detection_node.cpp

Lines changed: 25 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ void DeepObjectDetectionNode::declareParameters()
6161
this->declare_parameter<std::string>("class_names_path", "");
6262
this->declare_parameter<int>("Model.num_classes", 80);
6363
this->declare_parameter<std::string>("Model.bbox_format", "cxcywh");
64+
this->declare_parameter<std::vector<int64_t>>("Model.output_shape", std::vector<int64_t>());
6465

6566
this->declare_parameter<int>("Preprocessing.input_width", 640);
6667
this->declare_parameter<int>("Preprocessing.input_height", 640);
@@ -75,15 +76,10 @@ void DeepObjectDetectionNode::declareParameters()
7576
this->declare_parameter<double>("Postprocessing.nms_iou_threshold", 0.45);
7677
this->declare_parameter<std::string>("Postprocessing.score_activation", "sigmoid");
7778
this->declare_parameter<bool>("Postprocessing.enable_nms", true);
78-
this->declare_parameter<bool>("Postprocessing.use_multi_output", false);
79-
this->declare_parameter<int>("Postprocessing.output_boxes_idx", 0);
80-
this->declare_parameter<int>("Postprocessing.output_scores_idx", 1);
81-
this->declare_parameter<int>("Postprocessing.output_classes_idx", 2);
8279
this->declare_parameter<std::string>("Postprocessing.class_score_mode", "all_classes");
8380
this->declare_parameter<int>("Postprocessing.class_score_start_idx", -1);
8481
this->declare_parameter<int>("Postprocessing.class_score_count", -1);
8582

86-
this->declare_parameter<bool>("Postprocessing.layout.auto_detect", true);
8783
this->declare_parameter<int>("Postprocessing.layout.batch_dim", 0);
8884
this->declare_parameter<int>("Postprocessing.layout.detection_dim", 1);
8985
this->declare_parameter<int>("Postprocessing.layout.feature_dim", 2);
@@ -114,6 +110,11 @@ void DeepObjectDetectionNode::declareParameters()
114110
params_.model_metadata.num_classes = this->get_parameter("Model.num_classes").as_int();
115111
params_.model_metadata.class_names_file = this->get_parameter("class_names_path").as_string();
116112
params_.model_metadata.bbox_format = this->get_parameter("Model.bbox_format").as_string();
113+
auto output_shape_int = this->get_parameter("Model.output_shape").as_integer_array();
114+
params_.model_metadata.output_shape.clear();
115+
for (auto dim : output_shape_int) {
116+
params_.model_metadata.output_shape.push_back(static_cast<size_t>(dim));
117+
}
117118

118119
// Preprocessing parameters
119120
params_.preprocessing.input_width = this->get_parameter("Preprocessing.input_width").as_int();
@@ -136,15 +137,10 @@ void DeepObjectDetectionNode::declareParameters()
136137
static_cast<float>(this->get_parameter("Postprocessing.nms_iou_threshold").as_double());
137138
params_.postprocessing.score_activation = this->get_parameter("Postprocessing.score_activation").as_string();
138139
params_.postprocessing.enable_nms = this->get_parameter("Postprocessing.enable_nms").as_bool();
139-
params_.postprocessing.use_multi_output = this->get_parameter("Postprocessing.use_multi_output").as_bool();
140-
params_.postprocessing.output_boxes_idx = this->get_parameter("Postprocessing.output_boxes_idx").as_int();
141-
params_.postprocessing.output_scores_idx = this->get_parameter("Postprocessing.output_scores_idx").as_int();
142-
params_.postprocessing.output_classes_idx = this->get_parameter("Postprocessing.output_classes_idx").as_int();
143140
params_.postprocessing.class_score_mode = this->get_parameter("Postprocessing.class_score_mode").as_string();
144141
params_.postprocessing.class_score_start_idx = this->get_parameter("Postprocessing.class_score_start_idx").as_int();
145142
params_.postprocessing.class_score_count = this->get_parameter("Postprocessing.class_score_count").as_int();
146143

147-
params_.postprocessing.layout.auto_detect = this->get_parameter("Postprocessing.layout.auto_detect").as_bool();
148144
params_.postprocessing.layout.batch_dim = this->get_parameter("Postprocessing.layout.batch_dim").as_int();
149145
params_.postprocessing.layout.detection_dim = this->get_parameter("Postprocessing.layout.detection_dim").as_int();
150146
params_.postprocessing.layout.feature_dim = this->get_parameter("Postprocessing.layout.feature_dim").as_int();
@@ -182,42 +178,11 @@ deep_ros::CallbackReturn DeepObjectDetectionNode::on_configure_impl(const rclcpp
182178
loadClassNames();
183179
preprocessor_ = std::make_unique<ImagePreprocessor>(params_.preprocessing);
184180

185-
// Get allocator from base class
186-
auto allocator = get_current_allocator();
187-
if (!allocator) {
188-
RCLCPP_ERROR(this->get_logger(), "Plugin did not provide allocator");
189-
return deep_ros::CallbackReturn::FAILURE;
190-
}
191-
192-
// dynamically get the output shape by running a dummy inference
193-
std::vector<size_t> input_shape = {
194-
1,
195-
RGB_CHANNELS,
196-
static_cast<size_t>(params_.preprocessing.input_height),
197-
static_cast<size_t>(params_.preprocessing.input_width)};
198-
std::vector<size_t> output_shape;
199-
try {
200-
PackedInput dummy;
201-
dummy.shape = input_shape;
202-
size_t total_elements = 1;
203-
for (size_t dim : input_shape) {
204-
total_elements *= dim;
205-
}
206-
dummy.data.assign(total_elements, 0.0f);
207-
208-
deep_ros::Tensor input_tensor(dummy.shape, deep_ros::DataType::FLOAT32, allocator);
209-
const size_t bytes = dummy.data.size() * sizeof(float);
210-
allocator->copy_from_host(input_tensor.data(), dummy.data.data(), bytes);
211-
212-
auto output_tensor = run_inference(input_tensor);
213-
output_shape = output_tensor.shape();
214-
} catch (const std::exception & e) {
215-
RCLCPP_WARN(this->get_logger(), "Could not determine output shape: %s", e.what());
216-
output_shape.clear();
217-
}
181+
// Get output shape from config (optional, for logging/validation)
182+
const std::vector<size_t> & output_shape = params_.model_metadata.output_shape;
218183

219184
auto formatShape = [](const std::vector<size_t> & shape) {
220-
if (shape.empty()) return std::string("auto-detect");
185+
if (shape.empty()) return std::string("not specified");
221186
std::string result;
222187
for (size_t i = 0; i < shape.size(); ++i) {
223188
result += std::to_string(shape[i]);
@@ -227,30 +192,19 @@ deep_ros::CallbackReturn DeepObjectDetectionNode::on_configure_impl(const rclcpp
227192
};
228193

229194
if (!output_shape.empty()) {
230-
RCLCPP_INFO(this->get_logger(), "Detected model output shape: [%s]", formatShape(output_shape).c_str());
195+
RCLCPP_INFO(this->get_logger(), "Configured model output shape: [%s]", formatShape(output_shape).c_str());
231196
}
232197

233198
const bool use_letterbox = (params_.preprocessing.resize_method == "letterbox");
234199

235200
GenericPostprocessor::OutputLayout layout =
236-
GenericPostprocessor::autoConfigure(output_shape, params_.postprocessing.layout);
237-
if (layout.auto_detect && !output_shape.empty()) {
238-
RCLCPP_INFO(
239-
this->get_logger(),
240-
"Auto-detected layout: batch_dim=%zu, detection_dim=%zu, feature_dim=%zu",
241-
layout.batch_dim,
242-
layout.detection_dim,
243-
layout.feature_dim);
244-
} else if (!layout.auto_detect) {
245-
RCLCPP_INFO(
246-
this->get_logger(),
247-
"Using manual layout: batch_dim=%zu, detection_dim=%zu, feature_dim=%zu",
248-
layout.batch_dim,
249-
layout.detection_dim,
250-
layout.feature_dim);
251-
} else {
252-
RCLCPP_INFO(this->get_logger(), "Layout will be auto-detected from first inference");
253-
}
201+
GenericPostprocessor::configureLayout(output_shape, params_.postprocessing.layout);
202+
RCLCPP_INFO(
203+
this->get_logger(),
204+
"Using configured layout: batch_dim=%zu, detection_dim=%zu, feature_dim=%zu",
205+
layout.batch_dim,
206+
layout.detection_dim,
207+
layout.feature_dim);
254208

255209
postprocessor_ = std::make_unique<GenericPostprocessor>(
256210
params_.postprocessing,
@@ -516,24 +470,27 @@ void DeepObjectDetectionNode::processImages(
516470
auto start_time = std::chrono::steady_clock::now();
517471
std::vector<cv::Mat> processed;
518472
std::vector<ImageMeta> metas;
473+
std::vector<std_msgs::msg::Header> processed_headers;
519474
processed.reserve(images.size());
520475
metas.reserve(images.size());
476+
processed_headers.reserve(images.size());
521477

522478
// Preprocess all images
523-
for (const auto & img : images) {
524-
if (img.empty()) {
479+
for (size_t i = 0; i < images.size() && i < headers.size(); ++i) {
480+
if (images[i].empty()) {
525481
RCLCPP_WARN(this->get_logger(), "Received empty image, skipping");
526482
continue;
527483
}
528484

529485
ImageMeta meta;
530-
cv::Mat preprocessed = preprocessor_->preprocess(img, meta);
486+
cv::Mat preprocessed = preprocessor_->preprocess(images[i], meta);
531487
if (preprocessed.empty()) {
532488
RCLCPP_WARN(this->get_logger(), "Preprocessing returned empty image, skipping");
533489
continue;
534490
}
535491
processed.push_back(std::move(preprocessed));
536492
metas.push_back(meta);
493+
processed_headers.push_back(headers[i]);
537494
}
538495

539496
if (processed.empty()) {
@@ -553,14 +510,8 @@ void DeepObjectDetectionNode::processImages(
553510
allocator->copy_from_host(input_tensor.data(), packed_input.data.data(), bytes);
554511

555512
// Run inference
556-
std::vector<std::vector<SimpleDetection>> batch_detections;
557-
if (params_.postprocessing.use_multi_output) {
558-
auto output_tensor = run_inference(input_tensor);
559-
batch_detections = postprocessor_->decodeMultiOutput({output_tensor}, metas);
560-
} else {
561-
auto output_tensor = run_inference(input_tensor);
562-
batch_detections = postprocessor_->decode(output_tensor, metas);
563-
}
513+
auto output_tensor = run_inference(input_tensor);
514+
std::vector<std::vector<SimpleDetection>> batch_detections = postprocessor_->decode(output_tensor, metas);
564515

565516
auto end_time = std::chrono::steady_clock::now();
566517
auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
@@ -576,13 +527,6 @@ void DeepObjectDetectionNode::processImages(
576527
return sum + dets.size();
577528
}));
578529

579-
// Use headers that match the processed images (may be fewer if some were skipped)
580-
std::vector<std_msgs::msg::Header> processed_headers;
581-
processed_headers.reserve(processed.size());
582-
for (size_t i = 0; i < processed.size() && i < headers.size(); ++i) {
583-
processed_headers.push_back(headers[i]);
584-
}
585-
586530
publishDetections(batch_detections, processed_headers, metas);
587531
}
588532

0 commit comments

Comments
 (0)