@@ -124,7 +124,8 @@ ImageEncoding get_image_encoding_info(const std::string & encoding)
124124 throw std::runtime_error (" Unsupported image encoding: " + encoding);
125125}
126126
127- Tensor from_image (const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator)
127+ Tensor from_image (
128+ const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator, TensorLayout layout)
128129{
129130 if (image.height == 0 || image.width == 0 ) {
130131 throw std::runtime_error (
@@ -133,10 +134,17 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
133134
134135 auto encoding_info = get_image_encoding_info (image.encoding );
135136
136- // Create tensor with proper shape - always include batch dimension (size 1 for single image)
137- std::vector<size_t > shape = {1 , image.height , image.width };
138- if (encoding_info.channels > 1 ) {
139- shape.push_back (encoding_info.channels );
137+ // Create tensor with proper shape based on layout
138+ std::vector<size_t > shape;
139+ if (layout == TensorLayout::CHW) {
140+ // CHW: [batch, channels, height, width]
141+ shape = {1 , encoding_info.channels , image.height , image.width };
142+ } else {
143+ // HWC: [batch, height, width, channels]
144+ shape = {1 , image.height , image.width };
145+ if (encoding_info.channels > 1 ) {
146+ shape.push_back (encoding_info.channels );
147+ }
140148 }
141149
142150 // Validate step size (bytes per row)
@@ -154,18 +162,39 @@ Tensor from_image(const sensor_msgs::msg::Image & image, std::shared_ptr<Backend
154162 std::to_string (image.data .size ()));
155163 }
156164
157- // Direct copy
158165 Tensor tensor (shape, encoding_info.dtype , allocator);
159- if (allocator) {
160- allocator->copy_from_host (tensor.data (), image.data .data (), image.data .size ());
166+
167+ if (layout == TensorLayout::HWC) {
168+ // Direct copy for HWC layout
169+ if (allocator) {
170+ allocator->copy_from_host (tensor.data (), image.data .data (), image.data .size ());
171+ } else {
172+ std::memcpy (tensor.data (), image.data .data (), image.data .size ());
173+ }
161174 } else {
162- std::memcpy (tensor.data (), image.data .data (), image.data .size ());
175+ // Transpose HWC to CHW for CHW layout
176+ const auto * src = image.data .data ();
177+ auto * dst = static_cast <uint8_t *>(tensor.data ());
178+ size_t pixel_bytes = encoding_info.bytes_per_channel ;
179+
180+ for (size_t c = 0 ; c < encoding_info.channels ; ++c) {
181+ for (size_t h = 0 ; h < image.height ; ++h) {
182+ for (size_t w = 0 ; w < image.width ; ++w) {
183+ size_t src_idx = ((h * image.width + w) * encoding_info.channels + c) * pixel_bytes;
184+ size_t dst_idx = ((c * image.height + h) * image.width + w) * pixel_bytes;
185+ std::memcpy (dst + dst_idx, src + src_idx, pixel_bytes);
186+ }
187+ }
188+ }
163189 }
190+
164191 return tensor;
165192}
166193
167194Tensor from_image (
168- const std::vector<sensor_msgs::msg::Image> & images, std::shared_ptr<BackendMemoryAllocator> allocator)
195+ const std::vector<sensor_msgs::msg::Image> & images,
196+ std::shared_ptr<BackendMemoryAllocator> allocator,
197+ TensorLayout layout)
169198{
170199 if (images.empty ()) {
171200 throw std::invalid_argument (" Image batch is empty" );
@@ -174,10 +203,17 @@ Tensor from_image(
174203 // Get encoding info from first image
175204 auto encoding_info = get_image_encoding_info (images[0 ].encoding );
176205
177- // Create batch shape: [batch_size, height, width, channels] or [batch_size, height, width]
178- std::vector<size_t > shape = {images.size (), images[0 ].height , images[0 ].width };
179- if (encoding_info.channels > 1 ) {
180- shape.push_back (encoding_info.channels );
206+ // Create batch shape based on layout
207+ std::vector<size_t > shape;
208+ if (layout == TensorLayout::CHW) {
209+ // CHW: [batch_size, channels, height, width]
210+ shape = {images.size (), encoding_info.channels , images[0 ].height , images[0 ].width };
211+ } else {
212+ // HWC: [batch_size, height, width, channels]
213+ shape = {images.size (), images[0 ].height , images[0 ].width };
214+ if (encoding_info.channels > 1 ) {
215+ shape.push_back (encoding_info.channels );
216+ }
181217 }
182218
183219 // Validate all images have same dimensions and encoding
@@ -196,17 +232,39 @@ Tensor from_image(
196232 }
197233 }
198234
199- // Direct copy
200235 Tensor tensor (shape, encoding_info.dtype , allocator);
201236 auto * dst = static_cast <uint8_t *>(tensor.data ());
202-
203- for (size_t i = 0 ; i < images.size (); ++i) {
204- if (allocator) {
205- allocator->copy_from_host (dst + i * images[i].data .size (), images[i].data .data (), images[i].data .size ());
206- } else {
207- std::memcpy (dst + i * images[i].data .size (), images[i].data .data (), images[i].data .size ());
237+ size_t height = images[0 ].height ;
238+ size_t width = images[0 ].width ;
239+ size_t pixel_bytes = encoding_info.bytes_per_channel ;
240+
241+ if (layout == TensorLayout::HWC) {
242+ // Direct copy for HWC layout
243+ for (size_t i = 0 ; i < images.size (); ++i) {
244+ if (allocator) {
245+ allocator->copy_from_host (dst + i * images[i].data .size (), images[i].data .data (), images[i].data .size ());
246+ } else {
247+ std::memcpy (dst + i * images[i].data .size (), images[i].data .data (), images[i].data .size ());
248+ }
249+ }
250+ } else {
251+ // Transpose HWC to CHW for each image in batch
252+ for (size_t b = 0 ; b < images.size (); ++b) {
253+ const auto * src = images[b].data .data ();
254+ size_t batch_offset = b * encoding_info.channels * height * width * pixel_bytes;
255+
256+ for (size_t c = 0 ; c < encoding_info.channels ; ++c) {
257+ for (size_t h = 0 ; h < height; ++h) {
258+ for (size_t w = 0 ; w < width; ++w) {
259+ size_t src_idx = ((h * width + w) * encoding_info.channels + c) * pixel_bytes;
260+ size_t dst_idx = batch_offset + ((c * height + h) * width + w) * pixel_bytes;
261+ std::memcpy (dst + dst_idx, src + src_idx, pixel_bytes);
262+ }
263+ }
264+ }
208265 }
209266 }
267+
210268 return tensor;
211269}
212270
0 commit comments