Skip to content

Commit 86a7a7e

Browse files
authored
Add Sample Inference Node with onnxruntime backend plugin (#18)
* add sample inference node * tests pass, and pre-commit * tests
1 parent 2823d0b commit 86a7a7e

File tree

13 files changed

+1200
-25
lines changed

13 files changed

+1200
-25
lines changed

deep_conversions/include/deep_conversions/image_conversions.hpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ namespace deep_ros
2929
namespace ros_conversions
3030
{
3131

32+
/**
33+
* @brief Tensor layout format for image data
34+
*/
35+
enum class TensorLayout
36+
{
37+
HWC, ///< Height, Width, Channels (e.g., [batch, height, width, channels])
38+
CHW ///< Channels, Height, Width (e.g., [batch, channels, height, width])
39+
};
40+
3241
/**
3342
* @brief Image encoding information
3443
*/
@@ -51,20 +60,27 @@ ImageEncoding get_image_encoding_info(const std::string & encoding);
5160
* @brief Convert sensor_msgs::msg::Image to Tensor
5261
* @param image ROS Image message
5362
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
54-
* @return Tensor with shape [1, height, width, channels] or [1, height, width]
63+
* @param layout Tensor layout format (HWC or CHW)
64+
* @return Tensor with shape [1, height, width, channels] (HWC) or [1, channels, height, width] (CHW)
5565
* @throws std::runtime_error if image dimensions are invalid or data size mismatches
5666
*/
57-
Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
67+
Tensor from_image(
68+
const sensor_msgs::msg::Image & image,
69+
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
70+
TensorLayout layout = TensorLayout::HWC);
5871

5972
/**
6073
* @brief Convert vector of sensor_msgs::msg::Image to batched Tensor
6174
* @param images Vector of ROS Image messages
6275
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
63-
* @return Tensor with shape [batch_size, height, width, channels] or [batch_size, height, width]
76+
* @param layout Tensor layout format (HWC or CHW)
77+
* @return Tensor with shape [batch_size, height, width, channels] (HWC) or [batch_size, channels, height, width] (CHW)
6478
* @throws std::invalid_argument if batch is empty or images have mismatched dimensions/encodings
6579
*/
6680
Tensor from_image(
67-
const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
81+
const std::vector<sensor_msgs::msg::Image> & images,
82+
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
83+
TensorLayout layout = TensorLayout::HWC);
6884

6985
/**
7086
* @brief Convert Tensor to sensor_msgs::msg::Image (single image from batch)

deep_conversions/src/image_conversions.cpp

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ ImageEncoding get_image_encoding_info(const std::string & encoding)
124124
throw std::runtime_error("Unsupported image encoding: " + encoding);
125125
}
126126

127-
Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator)
127+
Tensor from_image(
128+
const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator, TensorLayout layout)
128129
{
129130
if (image.height == 0 || image.width == 0) {
130131
throw std::runtime_error(
@@ -133,10 +134,17 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
133134

134135
auto encoding_info = get_image_encoding_info(image.encoding);
135136

136-
// Create tensor with proper shape - always include batch dimension (size 1 for single image)
137-
std::vector<size_t> shape = {1, image.height, image.width};
138-
if (encoding_info.channels > 1) {
139-
shape.push_back(encoding_info.channels);
137+
// Create tensor with proper shape based on layout
138+
std::vector<size_t> shape;
139+
if (layout == TensorLayout::CHW) {
140+
// CHW: [batch, channels, height, width]
141+
shape = {1, encoding_info.channels, image.height, image.width};
142+
} else {
143+
// HWC: [batch, height, width, channels]
144+
shape = {1, image.height, image.width};
145+
if (encoding_info.channels > 1) {
146+
shape.push_back(encoding_info.channels);
147+
}
140148
}
141149

142150
// Validate step size (bytes per row)
@@ -154,18 +162,39 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
154162
std::to_string(image.data.size()));
155163
}
156164

157-
// Direct copy
158165
Tensor tensor(shape, encoding_info.dtype, allocator);
159-
if (allocator) {
160-
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
166+
167+
if (layout == TensorLayout::HWC) {
168+
// Direct copy for HWC layout
169+
if (allocator) {
170+
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
171+
} else {
172+
std::memcpy(tensor.data(), image.data.data(), image.data.size());
173+
}
161174
} else {
162-
std::memcpy(tensor.data(), image.data.data(), image.data.size());
175+
// Transpose HWC to CHW for CHW layout
176+
const auto * src = image.data.data();
177+
auto * dst = static_cast<uint8_t *>(tensor.data());
178+
size_t pixel_bytes = encoding_info.bytes_per_channel;
179+
180+
for (size_t c = 0; c < encoding_info.channels; ++c) {
181+
for (size_t h = 0; h < image.height; ++h) {
182+
for (size_t w = 0; w < image.width; ++w) {
183+
size_t src_idx = ((h * image.width + w) * encoding_info.channels + c) * pixel_bytes;
184+
size_t dst_idx = ((c * image.height + h) * image.width + w) * pixel_bytes;
185+
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
186+
}
187+
}
188+
}
163189
}
190+
164191
return tensor;
165192
}
166193

167194
Tensor from_image(
168-
const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator)
195+
const std::vector<sensor_msgs::msg::Image> & images,
196+
std::shared_ptr<BackendMemoryAllocator> allocator,
197+
TensorLayout layout)
169198
{
170199
if (images.empty()) {
171200
throw std::invalid_argument("Image batch is empty");
@@ -174,10 +203,17 @@ Tensor from_image(
174203
// Get encoding info from first image
175204
auto encoding_info = get_image_encoding_info(images[0].encoding);
176205

177-
// Create batch shape: [batch_size, height, width, channels] or [batch_size, height, width]
178-
std::vector<size_t> shape = {images.size(), images[0].height, images[0].width};
179-
if (encoding_info.channels > 1) {
180-
shape.push_back(encoding_info.channels);
206+
// Create batch shape based on layout
207+
std::vector<size_t> shape;
208+
if (layout == TensorLayout::CHW) {
209+
// CHW: [batch_size, channels, height, width]
210+
shape = {images.size(), encoding_info.channels, images[0].height, images[0].width};
211+
} else {
212+
// HWC: [batch_size, height, width, channels]
213+
shape = {images.size(), images[0].height, images[0].width};
214+
if (encoding_info.channels > 1) {
215+
shape.push_back(encoding_info.channels);
216+
}
181217
}
182218

183219
// Validate all images have same dimensions and encoding
@@ -196,17 +232,39 @@ Tensor from_image(
196232
}
197233
}
198234

199-
// Direct copy
200235
Tensor tensor(shape, encoding_info.dtype, allocator);
201236
auto * dst = static_cast<uint8_t *>(tensor.data());
202-
203-
for (size_t i = 0; i < images.size(); ++i) {
204-
if (allocator) {
205-
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
206-
} else {
207-
std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
237+
size_t height = images[0].height;
238+
size_t width = images[0].width;
239+
size_t pixel_bytes = encoding_info.bytes_per_channel;
240+
241+
if (layout == TensorLayout::HWC) {
242+
// Direct copy for HWC layout
243+
for (size_t i = 0; i < images.size(); ++i) {
244+
if (allocator) {
245+
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
246+
} else {
247+
std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
248+
}
249+
}
250+
} else {
251+
// Transpose HWC to CHW for each image in batch
252+
for (size_t b = 0; b < images.size(); ++b) {
253+
const auto * src = images[b].data.data();
254+
size_t batch_offset = b * encoding_info.channels * height * width * pixel_bytes;
255+
256+
for (size_t c = 0; c < encoding_info.channels; ++c) {
257+
for (size_t h = 0; h < height; ++h) {
258+
for (size_t w = 0; w < width; ++w) {
259+
size_t src_idx = ((h * width + w) * encoding_info.channels + c) * pixel_bytes;
260+
size_t dst_idx = batch_offset + ((c * height + h) * width + w) * pixel_bytes;
261+
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
262+
}
263+
}
264+
}
208265
}
209266
}
267+
210268
return tensor;
211269
}
212270

deep_conversions/test/test_conversions.cpp

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,152 @@ TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Error handling and edge ca
399399
}
400400
}
401401

402+
TEST_CASE_METHOD(
403+
deep_ros::test::MockBackendFixture, "TensorLayout HWC vs CHW conversion", "[conversions][image][layout]")
404+
{
405+
auto allocator = getAllocator();
406+
407+
SECTION("HWC layout (default)")
408+
{
409+
sensor_msgs::msg::Image ros_image;
410+
ros_image.height = 32;
411+
ros_image.width = 32;
412+
ros_image.encoding = "32FC3";
413+
ros_image.step = 32 * 3 * 4;
414+
ros_image.data.resize(32 * 32 * 3 * 4);
415+
416+
// Fill with known pattern
417+
float * float_data = reinterpret_cast<float *>(ros_image.data.data());
418+
for (size_t h = 0; h < 32; ++h) {
419+
for (size_t w = 0; w < 32; ++w) {
420+
for (size_t c = 0; c < 3; ++c) {
421+
float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
422+
}
423+
}
424+
}
425+
426+
auto tensor_hwc = from_image(ros_image, allocator, TensorLayout::HWC);
427+
428+
REQUIRE(tensor_hwc.shape().size() == 4);
429+
REQUIRE(tensor_hwc.shape()[0] == 1); // Batch
430+
REQUIRE(tensor_hwc.shape()[1] == 32); // Height
431+
REQUIRE(tensor_hwc.shape()[2] == 32); // Width
432+
REQUIRE(tensor_hwc.shape()[3] == 3); // Channels
433+
REQUIRE(tensor_hwc.dtype() == DataType::FLOAT32);
434+
435+
// Verify data is in HWC order
436+
const float * tensor_data = tensor_hwc.data_as<float>();
437+
// Check pixel at (0,0): should have channels [0, 1, 2] values
438+
REQUIRE(tensor_data[0] == Approx(0.0f)); // Channel 0
439+
REQUIRE(tensor_data[1] == Approx(1.0f)); // Channel 1
440+
REQUIRE(tensor_data[2] == Approx(2.0f)); // Channel 2
441+
}
442+
443+
SECTION("CHW layout for ONNX models")
444+
{
445+
sensor_msgs::msg::Image ros_image;
446+
ros_image.height = 32;
447+
ros_image.width = 32;
448+
ros_image.encoding = "32FC3";
449+
ros_image.step = 32 * 3 * 4;
450+
ros_image.data.resize(32 * 32 * 3 * 4);
451+
452+
// Fill with known pattern
453+
float * float_data = reinterpret_cast<float *>(ros_image.data.data());
454+
for (size_t h = 0; h < 32; ++h) {
455+
for (size_t w = 0; w < 32; ++w) {
456+
for (size_t c = 0; c < 3; ++c) {
457+
float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
458+
}
459+
}
460+
}
461+
462+
auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);
463+
464+
REQUIRE(tensor_chw.shape().size() == 4);
465+
REQUIRE(tensor_chw.shape()[0] == 1); // Batch
466+
REQUIRE(tensor_chw.shape()[1] == 3); // Channels
467+
REQUIRE(tensor_chw.shape()[2] == 32); // Height
468+
REQUIRE(tensor_chw.shape()[3] == 32); // Width
469+
REQUIRE(tensor_chw.dtype() == DataType::FLOAT32);
470+
471+
// Verify data is in CHW order
472+
const float * tensor_data = tensor_chw.data_as<float>();
473+
// Check that channel 0 comes first (all channel 0 values before channel 1)
474+
// At (h=0, w=0), channel 0 should be at index 0
475+
REQUIRE(tensor_data[0] == Approx(0.0f)); // h=0, w=0, c=0
476+
// At (h=0, w=1), channel 0 should be at index 1
477+
REQUIRE(tensor_data[1] == Approx(10.0f)); // h=0, w=1, c=0
478+
// At (h=1, w=0), channel 0 should be at index 32
479+
REQUIRE(tensor_data[32] == Approx(1000.0f)); // h=1, w=0, c=0
480+
481+
// Check that channel 1 starts after all channel 0 data
482+
size_t channel1_start = 32 * 32;
483+
REQUIRE(tensor_data[channel1_start] == Approx(1.0f)); // h=0, w=0, c=1
484+
}
485+
486+
SECTION("Batch conversion with CHW layout")
487+
{
488+
std::vector<sensor_msgs::msg::Image> images;
489+
for (int i = 0; i < 2; ++i) {
490+
sensor_msgs::msg::Image img;
491+
img.height = 8;
492+
img.width = 8;
493+
img.encoding = "32FC3";
494+
img.step = 8 * 3 * 4;
495+
img.data.resize(8 * 8 * 3 * 4);
496+
497+
float * data = reinterpret_cast<float *>(img.data.data());
498+
for (size_t j = 0; j < 8 * 8 * 3; ++j) {
499+
data[j] = static_cast<float>(i * 100 + j);
500+
}
501+
images.push_back(img);
502+
}
503+
504+
auto batch_chw = from_image(images, allocator, TensorLayout::CHW);
505+
506+
REQUIRE(batch_chw.shape().size() == 4);
507+
REQUIRE(batch_chw.shape()[0] == 2); // Batch
508+
REQUIRE(batch_chw.shape()[1] == 3); // Channels
509+
REQUIRE(batch_chw.shape()[2] == 8); // Height
510+
REQUIRE(batch_chw.shape()[3] == 8); // Width
511+
}
512+
513+
SECTION("uint8 RGB8 to CHW conversion")
514+
{
515+
sensor_msgs::msg::Image ros_image;
516+
ros_image.height = 4;
517+
ros_image.width = 4;
518+
ros_image.encoding = "rgb8";
519+
ros_image.step = 4 * 3;
520+
ros_image.data.resize(4 * 4 * 3);
521+
522+
// Fill with pattern: pixel(h,w) = [h*16+w*4+0, h*16+w*4+1, h*16+w*4+2]
523+
for (size_t h = 0; h < 4; ++h) {
524+
for (size_t w = 0; w < 4; ++w) {
525+
for (size_t c = 0; c < 3; ++c) {
526+
ros_image.data[(h * 4 + w) * 3 + c] = static_cast<uint8_t>(h * 16 + w * 4 + c);
527+
}
528+
}
529+
}
530+
531+
auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);
532+
533+
REQUIRE(tensor_chw.shape()[0] == 1);
534+
REQUIRE(tensor_chw.shape()[1] == 3);
535+
REQUIRE(tensor_chw.shape()[2] == 4);
536+
REQUIRE(tensor_chw.shape()[3] == 4);
537+
REQUIRE(tensor_chw.dtype() == DataType::UINT8);
538+
539+
const uint8_t * data = tensor_chw.data_as<uint8_t>();
540+
// Verify CHW layout: channel 0 of all pixels, then channel 1, then channel 2
541+
REQUIRE(data[0] == 0); // (0,0) channel 0
542+
REQUIRE(data[1] == 4); // (0,1) channel 0
543+
REQUIRE(data[16] == 1); // (0,0) channel 1, starts at 4*4
544+
REQUIRE(data[32] == 2); // (0,0) channel 2, starts at 4*4*2
545+
}
546+
}
547+
402548
TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Performance and memory efficiency", "[conversions][performance]")
403549
{
404550
auto allocator = getAllocator();

0 commit comments

Comments
 (0)