WATonomous · Edwardius · Oct 12, 2025 · Sep 20, 2025 · Oct 9, 2025 · Oct 10, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -68,7 +68,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
 
 # Install Claude Code CLI via npm
 # hadolint ignore=DL3016
-RUN npm install -g @anthropic-ai/claude-code
+# RUN npm install -g @anthropic-ai/claude-code
 
 # Initialize rosdep
 RUN rosdep init || true

diff --git a/deep_conversions/include/deep_conversions/image_conversions.hpp b/deep_conversions/include/deep_conversions/image_conversions.hpp
@@ -29,6 +29,15 @@ namespace deep_ros
 namespace ros_conversions
 {
 
+/**
+ * @brief Tensor layout format for image data
+ */
+enum class TensorLayout
+{
+  HWC,  ///< Height, Width, Channels (e.g., [batch, height, width, channels])
+  CHW  ///< Channels, Height, Width (e.g., [batch, channels, height, width])
+};
+
 /**
  * @brief Image encoding information
  */
@@ -51,20 +60,27 @@ ImageEncoding get_image_encoding_info(const std::string & encoding);
  * @brief Convert sensor_msgs::msg::Image to Tensor
  * @param image ROS Image message
  * @param allocator Memory allocator to use (uses CPU allocator if nullptr)
- * @return Tensor with shape [1, height, width, channels] or [1, height, width]
+ * @param layout Tensor layout format (HWC or CHW)
+ * @return Tensor with shape [1, height, width, channels] (HWC) or [1, channels, height, width] (CHW)
  * @throws std::runtime_error if image dimensions are invalid or data size mismatches
  */
-Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
+Tensor from_image(
+  const sensor_msgs::msg::Image & image,
+  std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
+  TensorLayout layout = TensorLayout::HWC);
 
 /**
  * @brief Convert vector of sensor_msgs::msg::Image to batched Tensor
  * @param images Vector of ROS Image messages
  * @param allocator Memory allocator to use (uses CPU allocator if nullptr)
- * @return Tensor with shape [batch_size, height, width, channels] or [batch_size, height, width]
+ * @param layout Tensor layout format (HWC or CHW)
+ * @return Tensor with shape [batch_size, height, width, channels] (HWC) or [batch_size, channels, height, width] (CHW)
  * @throws std::invalid_argument if batch is empty or images have mismatched dimensions/encodings
  */
 Tensor from_image(
-  const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator = nullptr);
+  const std::vector<sensor_msgs::msg::Image> & images,
+  std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
+  TensorLayout layout = TensorLayout::HWC);
 
 /**
  * @brief Convert Tensor to sensor_msgs::msg::Image (single image from batch)

diff --git a/deep_conversions/src/image_conversions.cpp b/deep_conversions/src/image_conversions.cpp
@@ -124,7 +124,8 @@ ImageEncoding get_image_encoding_info(const std::string & encoding)
   throw std::runtime_error("Unsupported image encoding: " + encoding);
 }
 
-Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator)
+Tensor from_image(
+  const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator, TensorLayout layout)
 {
   if (image.height == 0 || image.width == 0) {
     throw std::runtime_error(
@@ -133,10 +134,17 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
 
   auto encoding_info = get_image_encoding_info(image.encoding);
 
-  // Create tensor with proper shape - always include batch dimension (size 1 for single image)
-  std::vector<size_t> shape = {1, image.height, image.width};
-  if (encoding_info.channels > 1) {
-    shape.push_back(encoding_info.channels);
+  // Create tensor with proper shape based on layout
+  std::vector<size_t> shape;
+  if (layout == TensorLayout::CHW) {
+    // CHW: [batch, channels, height, width]
+    shape = {1, encoding_info.channels, image.height, image.width};
+  } else {
+    // HWC: [batch, height, width, channels]
+    shape = {1, image.height, image.width};
+    if (encoding_info.channels > 1) {
+      shape.push_back(encoding_info.channels);
+    }
   }
 
   // Validate step size (bytes per row)
@@ -154,18 +162,39 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
       std::to_string(image.data.size()));
   }
 
-  // Direct copy
   Tensor tensor(shape, encoding_info.dtype, allocator);
-  if (allocator) {
-    allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
+
+  if (layout == TensorLayout::HWC) {
+    // Direct copy for HWC layout
+    if (allocator) {
+      allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
+    } else {
+      std::memcpy(tensor.data(), image.data.data(), image.data.size());
+    }
   } else {
-    std::memcpy(tensor.data(), image.data.data(), image.data.size());
+    // Transpose HWC to CHW for CHW layout
+    const auto * src = image.data.data();
+    auto * dst = static_cast<uint8_t *>(tensor.data());
+    size_t pixel_bytes = encoding_info.bytes_per_channel;
+
+    for (size_t c = 0; c < encoding_info.channels; ++c) {
+      for (size_t h = 0; h < image.height; ++h) {
+        for (size_t w = 0; w < image.width; ++w) {
+          size_t src_idx = ((h * image.width + w) * encoding_info.channels + c) * pixel_bytes;
+          size_t dst_idx = ((c * image.height + h) * image.width + w) * pixel_bytes;
+          std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
+        }
+      }
+    }
   }
+
   return tensor;
 }
 
 Tensor from_image(
-  const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator)
+  const std::vector<sensor_msgs::msg::Image> & images,
+  std::shared_ptr<BackendMemoryAllocator> allocator,
+  TensorLayout layout)
 {
   if (images.empty()) {
     throw std::invalid_argument("Image batch is empty");
@@ -174,10 +203,17 @@ Tensor from_image(
   // Get encoding info from first image
   auto encoding_info = get_image_encoding_info(images[0].encoding);
 
-  // Create batch shape: [batch_size, height, width, channels] or [batch_size, height, width]
-  std::vector<size_t> shape = {images.size(), images[0].height, images[0].width};
-  if (encoding_info.channels > 1) {
-    shape.push_back(encoding_info.channels);
+  // Create batch shape based on layout
+  std::vector<size_t> shape;
+  if (layout == TensorLayout::CHW) {
+    // CHW: [batch_size, channels, height, width]
+    shape = {images.size(), encoding_info.channels, images[0].height, images[0].width};
+  } else {
+    // HWC: [batch_size, height, width, channels]
+    shape = {images.size(), images[0].height, images[0].width};
+    if (encoding_info.channels > 1) {
+      shape.push_back(encoding_info.channels);
+    }
   }
 
   // Validate all images have same dimensions and encoding
@@ -196,17 +232,39 @@ Tensor from_image(
     }
   }
 
-  // Direct copy
   Tensor tensor(shape, encoding_info.dtype, allocator);
   auto * dst = static_cast<uint8_t *>(tensor.data());
-
-  for (size_t i = 0; i < images.size(); ++i) {
-    if (allocator) {
-      allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
-    } else {
-      std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
+  size_t height = images[0].height;
+  size_t width = images[0].width;
+  size_t pixel_bytes = encoding_info.bytes_per_channel;
+
+  if (layout == TensorLayout::HWC) {
+    // Direct copy for HWC layout
+    for (size_t i = 0; i < images.size(); ++i) {
+      if (allocator) {
+        allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
+      } else {
+        std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
+      }
+    }
+  } else {
+    // Transpose HWC to CHW for each image in batch
+    for (size_t b = 0; b < images.size(); ++b) {
+      const auto * src = images[b].data.data();
+      size_t batch_offset = b * encoding_info.channels * height * width * pixel_bytes;
+
+      for (size_t c = 0; c < encoding_info.channels; ++c) {
+        for (size_t h = 0; h < height; ++h) {
+          for (size_t w = 0; w < width; ++w) {
+            size_t src_idx = ((h * width + w) * encoding_info.channels + c) * pixel_bytes;
+            size_t dst_idx = batch_offset + ((c * height + h) * width + w) * pixel_bytes;
+            std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
+          }
+        }
+      }
     }
   }
+
   return tensor;
 }
 

diff --git a/deep_conversions/test/test_conversions.cpp b/deep_conversions/test/test_conversions.cpp
@@ -399,6 +399,152 @@ TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Error handling and edge ca
   }
 }
 
+TEST_CASE_METHOD(
+  deep_ros::test::MockBackendFixture, "TensorLayout HWC vs CHW conversion", "[conversions][image][layout]")
+{
+  auto allocator = getAllocator();
+
+  SECTION("HWC layout (default)")
+  {
+    sensor_msgs::msg::Image ros_image;
+    ros_image.height = 32;
+    ros_image.width = 32;
+    ros_image.encoding = "32FC3";
+    ros_image.step = 32 * 3 * 4;
+    ros_image.data.resize(32 * 32 * 3 * 4);
+
+    // Fill with known pattern
+    float * float_data = reinterpret_cast<float *>(ros_image.data.data());
+    for (size_t h = 0; h < 32; ++h) {
+      for (size_t w = 0; w < 32; ++w) {
+        for (size_t c = 0; c < 3; ++c) {
+          float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
+        }
+      }
+    }
+
+    auto tensor_hwc = from_image(ros_image, allocator, TensorLayout::HWC);
+
+    REQUIRE(tensor_hwc.shape().size() == 4);
+    REQUIRE(tensor_hwc.shape()[0] == 1);  // Batch
+    REQUIRE(tensor_hwc.shape()[1] == 32);  // Height
+    REQUIRE(tensor_hwc.shape()[2] == 32);  // Width
+    REQUIRE(tensor_hwc.shape()[3] == 3);  // Channels
+    REQUIRE(tensor_hwc.dtype() == DataType::FLOAT32);
+
+    // Verify data is in HWC order
+    const float * tensor_data = tensor_hwc.data_as<float>();
+    // Check pixel at (0,0): should have channels [0, 1, 2] values
+    REQUIRE(tensor_data[0] == Approx(0.0f));  // Channel 0
+    REQUIRE(tensor_data[1] == Approx(1.0f));  // Channel 1
+    REQUIRE(tensor_data[2] == Approx(2.0f));  // Channel 2
+  }
+
+  SECTION("CHW layout for ONNX models")
+  {
+    sensor_msgs::msg::Image ros_image;
+    ros_image.height = 32;
+    ros_image.width = 32;
+    ros_image.encoding = "32FC3";
+    ros_image.step = 32 * 3 * 4;
+    ros_image.data.resize(32 * 32 * 3 * 4);
+
+    // Fill with known pattern
+    float * float_data = reinterpret_cast<float *>(ros_image.data.data());
+    for (size_t h = 0; h < 32; ++h) {
+      for (size_t w = 0; w < 32; ++w) {
+        for (size_t c = 0; c < 3; ++c) {
+          float_data[(h * 32 + w) * 3 + c] = static_cast<float>(h * 1000 + w * 10 + c);
+        }
+      }
+    }
+
+    auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);
+
+    REQUIRE(tensor_chw.shape().size() == 4);
+    REQUIRE(tensor_chw.shape()[0] == 1);  // Batch
+    REQUIRE(tensor_chw.shape()[1] == 3);  // Channels
+    REQUIRE(tensor_chw.shape()[2] == 32);  // Height
+    REQUIRE(tensor_chw.shape()[3] == 32);  // Width
+    REQUIRE(tensor_chw.dtype() == DataType::FLOAT32);
+
+    // Verify data is in CHW order
+    const float * tensor_data = tensor_chw.data_as<float>();
+    // Check that channel 0 comes first (all channel 0 values before channel 1)
+    // At (h=0, w=0), channel 0 should be at index 0
+    REQUIRE(tensor_data[0] == Approx(0.0f));  // h=0, w=0, c=0
+    // At (h=0, w=1), channel 0 should be at index 1
+    REQUIRE(tensor_data[1] == Approx(10.0f));  // h=0, w=1, c=0
+    // At (h=1, w=0), channel 0 should be at index 32
+    REQUIRE(tensor_data[32] == Approx(1000.0f));  // h=1, w=0, c=0
+
+    // Check that channel 1 starts after all channel 0 data
+    size_t channel1_start = 32 * 32;
+    REQUIRE(tensor_data[channel1_start] == Approx(1.0f));  // h=0, w=0, c=1
+  }
+
+  SECTION("Batch conversion with CHW layout")
+  {
+    std::vector<sensor_msgs::msg::Image> images;
+    for (int i = 0; i < 2; ++i) {
+      sensor_msgs::msg::Image img;
+      img.height = 8;
+      img.width = 8;
+      img.encoding = "32FC3";
+      img.step = 8 * 3 * 4;
+      img.data.resize(8 * 8 * 3 * 4);
+
+      float * data = reinterpret_cast<float *>(img.data.data());
+      for (size_t j = 0; j < 8 * 8 * 3; ++j) {
+        data[j] = static_cast<float>(i * 100 + j);
+      }
+      images.push_back(img);
+    }
+
+    auto batch_chw = from_image(images, allocator, TensorLayout::CHW);
+
+    REQUIRE(batch_chw.shape().size() == 4);
+    REQUIRE(batch_chw.shape()[0] == 2);  // Batch
+    REQUIRE(batch_chw.shape()[1] == 3);  // Channels
+    REQUIRE(batch_chw.shape()[2] == 8);  // Height
+    REQUIRE(batch_chw.shape()[3] == 8);  // Width
+  }
+
+  SECTION("uint8 RGB8 to CHW conversion")
+  {
+    sensor_msgs::msg::Image ros_image;
+    ros_image.height = 4;
+    ros_image.width = 4;
+    ros_image.encoding = "rgb8";
+    ros_image.step = 4 * 3;
+    ros_image.data.resize(4 * 4 * 3);
+
+    // Fill with pattern: pixel(h,w) = [h*16+w*4+0, h*16+w*4+1, h*16+w*4+2]
+    for (size_t h = 0; h < 4; ++h) {
+      for (size_t w = 0; w < 4; ++w) {
+        for (size_t c = 0; c < 3; ++c) {
+          ros_image.data[(h * 4 + w) * 3 + c] = static_cast<uint8_t>(h * 16 + w * 4 + c);
+        }
+      }
+    }
+
+    auto tensor_chw = from_image(ros_image, allocator, TensorLayout::CHW);
+
+    REQUIRE(tensor_chw.shape()[0] == 1);
+    REQUIRE(tensor_chw.shape()[1] == 3);
+    REQUIRE(tensor_chw.shape()[2] == 4);
+    REQUIRE(tensor_chw.shape()[3] == 4);
+    REQUIRE(tensor_chw.dtype() == DataType::UINT8);
+
+    const uint8_t * data = tensor_chw.data_as<uint8_t>();
+    // Verify CHW layout: channel 0 of all pixels, then channel 1, then channel 2
+    REQUIRE(data[0] == 0);  // (0,0) channel 0
+    REQUIRE(data[1] == 4);  // (0,1) channel 0
+    REQUIRE(data[16] == 1);  // (0,0) channel 1, starts at 4*4
+    REQUIRE(data[32] == 2);  // (0,0) channel 2, starts at 4*4*2
+  }
+}
+
 TEST_CASE_METHOD(deep_ros::test::MockBackendFixture, "Performance and memory efficiency", "[conversions][performance]")
 {
   auto allocator = getAllocator();