Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \

# Install Claude Code CLI via npm
# hadolint ignore=DL3016
RUN npm install -g @anthropic-ai/claude-code
# RUN npm install -g @anthropic-ai/claude-code

# Initialize rosdep
RUN rosdep init || true
Expand Down
24 changes: 20 additions & 4 deletions deep_conversions/include/deep_conversions/image_conversions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ namespace deep_ros
namespace ros_conversions
{

/**
* @brief Tensor layout format for image data
*/
enum class TensorLayout
{
HWC, ///< Height, Width, Channels (e.g., [batch, height, width, channels])
CHW ///< Channels, Height, Width (e.g., [batch, channels, height, width])
};

/**
* @brief Image encoding information
*/
Expand All @@ -51,20 +60,27 @@ ImageEncoding get_image_encoding_info(const std::string & encoding);
* @brief Convert sensor_msgs::msg::Image to Tensor
* @param image ROS Image message
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
* @return Tensor with shape [1, height, width, channels] or [1, height, width]
* @param layout Tensor layout format (HWC or CHW)
* @return Tensor with shape [1, height, width, channels] (HWC) or [1, channels, height, width] (CHW)
* @throws std::runtime_error if image dimensions are invalid or data size mismatches
*/
Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
Tensor from_image(
const sensor_msgs::msg::Image & image,
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
TensorLayout layout = TensorLayout::HWC);

/**
* @brief Convert vector of sensor_msgs::msg::Image to batched Tensor
* @param images Vector of ROS Image messages
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
* @return Tensor with shape [batch_size, height, width, channels] or [batch_size, height, width]
* @param layout Tensor layout format (HWC or CHW)
* @return Tensor with shape [batch_size, height, width, channels] (HWC) or [batch_size, channels, height, width] (CHW)
* @throws std::invalid_argument if batch is empty or images have mismatched dimensions/encodings
*/
Tensor from_image(
const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
const std::vector<sensor_msgs::msg::Image> & images,
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
TensorLayout layout = TensorLayout::HWC);

/**
* @brief Convert Tensor to sensor_msgs::msg::Image (single image from batch)
Expand Down
100 changes: 79 additions & 21 deletions deep_conversions/src/image_conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ ImageEncoding get_image_encoding_info(const std::string & encoding)
throw std::runtime_error("Unsupported image encoding: " + encoding);
}

Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator)
Tensor from_image(
const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator, TensorLayout layout)
{
if (image.height == 0 || image.width == 0) {
throw std::runtime_error(
Expand All @@ -133,10 +134,17 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend

auto encoding_info = get_image_encoding_info(image.encoding);

// Create tensor with proper shape - always include batch dimension (size 1 for single image)
std::vector<size_t> shape = {1, image.height, image.width};
if (encoding_info.channels > 1) {
shape.push_back(encoding_info.channels);
// Create tensor with proper shape based on layout
std::vector<size_t> shape;
if (layout == TensorLayout::CHW) {
// CHW: [batch, channels, height, width]
shape = {1, encoding_info.channels, image.height, image.width};
} else {
// HWC: [batch, height, width, channels]
shape = {1, image.height, image.width};
if (encoding_info.channels > 1) {
shape.push_back(encoding_info.channels);
}
}

// Validate step size (bytes per row)
Expand All @@ -154,18 +162,39 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
std::to_string(image.data.size()));
}

// Direct copy
Tensor tensor(shape, encoding_info.dtype, allocator);
if (allocator) {
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());

if (layout == TensorLayout::HWC) {
// Direct copy for HWC layout
if (allocator) {
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
} else {
std::memcpy(tensor.data(), image.data.data(), image.data.size());
}
} else {
std::memcpy(tensor.data(), image.data.data(), image.data.size());
// Transpose HWC to CHW for CHW layout
const auto * src = image.data.data();
auto * dst = static_cast<uint8_t *>(tensor.data());
size_t pixel_bytes = encoding_info.bytes_per_channel;

for (size_t c = 0; c < encoding_info.channels; ++c) {
for (size_t h = 0; h < image.height; ++h) {
for (size_t w = 0; w < image.width; ++w) {
size_t src_idx = ((h * image.width + w) * encoding_info.channels + c) * pixel_bytes;
size_t dst_idx = ((c * image.height + h) * image.width + w) * pixel_bytes;
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
}
}
}
}

return tensor;
}

Tensor from_image(
const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator)
const std::vector<sensor_msgs::msg::Image> & images,
std::shared_ptr<BackendMemoryAllocator> allocator,
TensorLayout layout)
{
if (images.empty()) {
throw std::invalid_argument("Image batch is empty");
Expand All @@ -174,10 +203,17 @@ Tensor from_image(
// Get encoding info from first image
auto encoding_info = get_image_encoding_info(images[0].encoding);

// Create batch shape: [batch_size, height, width, channels] or [batch_size, height, width]
std::vector<size_t> shape = {images.size(), images[0].height, images[0].width};
if (encoding_info.channels > 1) {
shape.push_back(encoding_info.channels);
// Create batch shape based on layout
std::vector<size_t> shape;
if (layout == TensorLayout::CHW) {
// CHW: [batch_size, channels, height, width]
shape = {images.size(), encoding_info.channels, images[0].height, images[0].width};
} else {
// HWC: [batch_size, height, width, channels]
shape = {images.size(), images[0].height, images[0].width};
if (encoding_info.channels > 1) {
shape.push_back(encoding_info.channels);
}
}

// Validate all images have same dimensions and encoding
Expand All @@ -196,17 +232,39 @@ Tensor from_image(
}
}

// Direct copy
Tensor tensor(shape, encoding_info.dtype, allocator);
auto * dst = static_cast<uint8_t *>(tensor.data());

for (size_t i = 0; i < images.size(); ++i) {
if (allocator) {
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
} else {
std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
size_t height = images[0].height;
size_t width = images[0].width;
size_t pixel_bytes = encoding_info.bytes_per_channel;

if (layout == TensorLayout::HWC) {
// Direct copy for HWC layout
for (size_t i = 0; i < images.size(); ++i) {
if (allocator) {
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
} else {
std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
}
}
} else {
// Transpose HWC to CHW for each image in batch
for (size_t b = 0; b < images.size(); ++b) {
const auto * src = images[b].data.data();
size_t batch_offset = b * encoding_info.channels * height * width * pixel_bytes;

for (size_t c = 0; c < encoding_info.channels; ++c) {
for (size_t h = 0; h < height; ++h) {
for (size_t w = 0; w < width; ++w) {
size_t src_idx = ((h * width + w) * encoding_info.channels + c) * pixel_bytes;
size_t dst_idx = batch_offset + ((c * height + h) * width + w) * pixel_bytes;
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
}
}
}
}
}

return tensor;
}

Expand Down
146 changes: 146 additions & 0 deletions deep_conversions/test/test_conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,152 @@ TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Error handling and edge ca
}
}

TEST_CASE_METHOD(
deep_ros::test::MockBackendFixture, "TensorLayout HWC vs CHW conversion", "[conversions][image][layout]")
{
auto allocator = getAllocator();

SECTION("HWC layout (default)")
{
sensor_msgs::msg::Image ros_image;
ros_image.height = 32;
ros_image.width = 32;
ros_image.encoding = "32FC3";
ros_image.step = 32 * 3 * 4;
ros_image.data.resize(32 * 32 * 3 * 4);

// Fill with known pattern
float * float_data = reinterpret_cast<float *>(ros_image.data.data());
for (size_t h = 0; h < 32; ++h) {
for (size_t w = 0; w < 32; ++w) {
for (size_t c = 0; c < 3; ++c) {
float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
}
}
}

auto tensor_hwc = from_image(ros_image, allocator, TensorLayout::HWC);

REQUIRE(tensor_hwc.shape().size() == 4);
REQUIRE(tensor_hwc.shape()[0] == 1); // Batch
REQUIRE(tensor_hwc.shape()[1] == 32); // Height
REQUIRE(tensor_hwc.shape()[2] == 32); // Width
REQUIRE(tensor_hwc.shape()[3] == 3); // Channels
REQUIRE(tensor_hwc.dtype() == DataType::FLOAT32);

// Verify data is in HWC order
const float * tensor_data = tensor_hwc.data_as<float>();
// Check pixel at (0,0): should have channels [0, 1, 2] values
REQUIRE(tensor_data[0] == Approx(0.0f)); // Channel 0
REQUIRE(tensor_data[1] == Approx(1.0f)); // Channel 1
REQUIRE(tensor_data[2] == Approx(2.0f)); // Channel 2
}

SECTION("CHW layout for ONNX models")
{
sensor_msgs::msg::Image ros_image;
ros_image.height = 32;
ros_image.width = 32;
ros_image.encoding = "32FC3";
ros_image.step = 32 * 3 * 4;
ros_image.data.resize(32 * 32 * 3 * 4);

// Fill with known pattern
float * float_data = reinterpret_cast<float *>(ros_image.data.data());
for (size_t h = 0; h < 32; ++h) {
for (size_t w = 0; w < 32; ++w) {
for (size_t c = 0; c < 3; ++c) {
float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
}
}
}

auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);

REQUIRE(tensor_chw.shape().size() == 4);
REQUIRE(tensor_chw.shape()[0] == 1); // Batch
REQUIRE(tensor_chw.shape()[1] == 3); // Channels
REQUIRE(tensor_chw.shape()[2] == 32); // Height
REQUIRE(tensor_chw.shape()[3] == 32); // Width
REQUIRE(tensor_chw.dtype() == DataType::FLOAT32);

// Verify data is in CHW order
const float * tensor_data = tensor_chw.data_as<float>();
// Check that channel 0 comes first (all channel 0 values before channel 1)
// At (h=0, w=0), channel 0 should be at index 0
REQUIRE(tensor_data[0] == Approx(0.0f)); // h=0, w=0, c=0
// At (h=0, w=1), channel 0 should be at index 1
REQUIRE(tensor_data[1] == Approx(10.0f)); // h=0, w=1, c=0
// At (h=1, w=0), channel 0 should be at index 32
REQUIRE(tensor_data[32] == Approx(1000.0f)); // h=1, w=0, c=0

// Check that channel 1 starts after all channel 0 data
size_t channel1_start = 32 * 32;
REQUIRE(tensor_data[channel1_start] == Approx(1.0f)); // h=0, w=0, c=1
}

SECTION("Batch conversion with CHW layout")
{
std::vector<sensor_msgs::msg::Image> images;
for (int i = 0; i < 2; ++i) {
sensor_msgs::msg::Image img;
img.height = 8;
img.width = 8;
img.encoding = "32FC3";
img.step = 8 * 3 * 4;
img.data.resize(8 * 8 * 3 * 4);

float * data = reinterpret_cast<float *>(img.data.data());
for (size_t j = 0; j < 8 * 8 * 3; ++j) {
data[j] = static_cast<float>(i * 100 + j);
}
images.push_back(img);
}

auto batch_chw = from_image(images, allocator, TensorLayout::CHW);

REQUIRE(batch_chw.shape().size() == 4);
REQUIRE(batch_chw.shape()[0] == 2); // Batch
REQUIRE(batch_chw.shape()[1] == 3); // Channels
REQUIRE(batch_chw.shape()[2] == 8); // Height
REQUIRE(batch_chw.shape()[3] == 8); // Width
}

SECTION("uint8 RGB8 to CHW conversion")
{
sensor_msgs::msg::Image ros_image;
ros_image.height = 4;
ros_image.width = 4;
ros_image.encoding = "rgb8";
ros_image.step = 4 * 3;
ros_image.data.resize(4 * 4 * 3);

// Fill with pattern: pixel(h,w) = [h*16+w*4+0, h*16+w*4+1, h*16+w*4+2]
for (size_t h = 0; h < 4; ++h) {
for (size_t w = 0; w < 4; ++w) {
for (size_t c = 0; c < 3; ++c) {
ros_image.data[(h * 4 + w) * 3 + c] = static_cast<uint8_t>(h * 16 + w * 4 + c);
}
}
}

auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);

REQUIRE(tensor_chw.shape()[0] == 1);
REQUIRE(tensor_chw.shape()[1] == 3);
REQUIRE(tensor_chw.shape()[2] == 4);
REQUIRE(tensor_chw.shape()[3] == 4);
REQUIRE(tensor_chw.dtype() == DataType::UINT8);

const uint8_t * data = tensor_chw.data_as<uint8_t>();
// Verify CHW layout: channel 0 of all pixels, then channel 1, then channel 2
REQUIRE(data[0] == 0); // (0,0) channel 0
REQUIRE(data[1] == 4); // (0,1) channel 0
REQUIRE(data[16] == 1); // (0,0) channel 1, starts at 4*4
REQUIRE(data[32] == 2); // (0,0) channel 2, starts at 4*4*2
}
}

TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Performance and memory efficiency", "[conversions][performance]")
{
auto allocator = getAllocator();
Expand Down
Loading