Skip to content

Commit 3073c5f

Browse files
committed
copy with permutation
1 parent 86a7a7e commit 3073c5f

File tree

8 files changed

+247
-81
lines changed

8 files changed

+247
-81
lines changed

deep_conversions/include/deep_conversions/image_conversions.hpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,27 +59,28 @@ ImageEncoding get_image_encoding_info(const std::string & encoding);
5959
/**
6060
* @brief Convert sensor_msgs::msg::Image to Tensor
6161
* @param image ROS Image message
62-
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
62+
* @param allocator Memory allocator to use (required)
6363
* @param layout Tensor layout format (HWC or CHW)
6464
* @return Tensor with shape [1, height, width, channels] (HWC) or [1, channels, height, width] (CHW)
65-
* @throws std::runtime_error if image dimensions are invalid or data size mismatches
65+
* @throws std::runtime_error if image dimensions are invalid, data size mismatches, or allocator is nullptr
6666
*/
6767
Tensor from_image(
6868
const sensor_msgs::msg::Image & image,
69-
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
69+
std::shared_ptr<BackendMemoryAllocator> allocator,
7070
TensorLayout layout = TensorLayout::HWC);
7171

7272
/**
7373
* @brief Convert vector of sensor_msgs::msg::Image to batched Tensor
7474
* @param images Vector of ROS Image messages
75-
* @param allocator Memory allocator to use (uses CPU allocator if nullptr)
75+
* @param allocator Memory allocator to use (required)
7676
* @param layout Tensor layout format (HWC or CHW)
7777
* @return Tensor with shape [batch_size, height, width, channels] (HWC) or [batch_size, channels, height, width] (CHW)
7878
* @throws std::invalid_argument if batch is empty or images have mismatched dimensions/encodings
79+
* @throws std::runtime_error if allocator is nullptr
7980
*/
8081
Tensor from_image(
8182
const std::vector<sensor_msgs::msg::Image> & images,
82-
std::shared_ptr<BackendMemoryAllocator> allocator = nullptr,
83+
std::shared_ptr<BackendMemoryAllocator> allocator,
8384
TensorLayout layout = TensorLayout::HWC);
8485

8586
/**

deep_conversions/src/image_conversions.cpp

Lines changed: 78 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -127,26 +127,17 @@ ImageEncoding get_image_encoding_info(const std::string & encoding)
127127
Tensor from_image(
128128
const sensor_msgs::msg::Image & image, std::shared_ptr<BackendMemoryAllocator> allocator, TensorLayout layout)
129129
{
130+
if (!allocator) {
131+
throw std::runtime_error("Memory allocator is required for image conversion");
132+
}
133+
130134
if (image.height == 0 || image.width == 0) {
131135
throw std::runtime_error(
132136
"Invalid image dimensions: height=" + std::to_string(image.height) + ", width=" + std::to_string(image.width));
133137
}
134138

135139
auto encoding_info = get_image_encoding_info(image.encoding);
136140

137-
// Create tensor with proper shape based on layout
138-
std::vector<size_t> shape;
139-
if (layout == TensorLayout::CHW) {
140-
// CHW: [batch, channels, height, width]
141-
shape = {1, encoding_info.channels, image.height, image.width};
142-
} else {
143-
// HWC: [batch, height, width, channels]
144-
shape = {1, image.height, image.width};
145-
if (encoding_info.channels > 1) {
146-
shape.push_back(encoding_info.channels);
147-
}
148-
}
149-
150141
// Validate step size (bytes per row)
151142
size_t expected_step = image.width * encoding_info.channels * encoding_info.bytes_per_channel;
152143
if (image.step != expected_step) {
@@ -162,30 +153,39 @@ Tensor from_image(
162153
std::to_string(image.data.size()));
163154
}
164155

165-
Tensor tensor(shape, encoding_info.dtype, allocator);
166-
167-
if (layout == TensorLayout::HWC) {
168-
// Direct copy for HWC layout
169-
if (allocator) {
170-
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
171-
} else {
172-
std::memcpy(tensor.data(), image.data.data(), image.data.size());
156+
// Create tensor with shape and copy data based on layout
157+
std::vector<size_t> shape;
158+
Tensor tensor;
159+
160+
switch (layout) {
161+
case TensorLayout::CHW: {
162+
// CHW: [batch, channels, height, width]
163+
shape = {1, encoding_info.channels, image.height, image.width};
164+
tensor = Tensor(shape, encoding_info.dtype, allocator);
165+
166+
// Use copy_from_host_permuted to copy and transpose in one operation
167+
// Source is HWC: [1, height, width, channels]
168+
// Permutation [0, 3, 1, 2] converts BHWC to BCHW
169+
std::vector<size_t> src_shape = {1, image.height, image.width, encoding_info.channels};
170+
std::vector<size_t> permutation = {0, 3, 1, 2};
171+
allocator->copy_from_host_permuted(
172+
tensor.data(), image.data.data(), src_shape, permutation, encoding_info.bytes_per_channel);
173+
break;
173174
}
174-
} else {
175-
// Transpose HWC to CHW for CHW layout
176-
const auto * src = image.data.data();
177-
auto * dst = static_cast<uint8_t *>(tensor.data());
178-
size_t pixel_bytes = encoding_info.bytes_per_channel;
179-
180-
for (size_t c = 0; c < encoding_info.channels; ++c) {
181-
for (size_t h = 0; h < image.height; ++h) {
182-
for (size_t w = 0; w < image.width; ++w) {
183-
size_t src_idx = ((h * image.width + w) * encoding_info.channels + c) * pixel_bytes;
184-
size_t dst_idx = ((c * image.height + h) * image.width + w) * pixel_bytes;
185-
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
186-
}
175+
case TensorLayout::HWC: {
176+
// HWC: [batch, height, width, channels]
177+
shape = {1, image.height, image.width};
178+
if (encoding_info.channels > 1) {
179+
shape.push_back(encoding_info.channels);
187180
}
181+
tensor = Tensor(shape, encoding_info.dtype, allocator);
182+
183+
// Direct copy for HWC layout
184+
allocator->copy_from_host(tensor.data(), image.data.data(), image.data.size());
185+
break;
188186
}
187+
default:
188+
throw std::invalid_argument("Unsupported tensor layout");
189189
}
190190

191191
return tensor;
@@ -196,26 +196,17 @@ Tensor from_image(
196196
std::shared_ptr<BackendMemoryAllocator> allocator,
197197
TensorLayout layout)
198198
{
199+
if (!allocator) {
200+
throw std::runtime_error("Memory allocator is required for image conversion");
201+
}
202+
199203
if (images.empty()) {
200204
throw std::invalid_argument("Image batch is empty");
201205
}
202206

203207
// Get encoding info from first image
204208
auto encoding_info = get_image_encoding_info(images[0].encoding);
205209

206-
// Create batch shape based on layout
207-
std::vector<size_t> shape;
208-
if (layout == TensorLayout::CHW) {
209-
// CHW: [batch_size, channels, height, width]
210-
shape = {images.size(), encoding_info.channels, images[0].height, images[0].width};
211-
} else {
212-
// HWC: [batch_size, height, width, channels]
213-
shape = {images.size(), images[0].height, images[0].width};
214-
if (encoding_info.channels > 1) {
215-
shape.push_back(encoding_info.channels);
216-
}
217-
}
218-
219210
// Validate all images have same dimensions and encoding
220211
size_t expected_size = images[0].height * images[0].width * encoding_info.channels * encoding_info.bytes_per_channel;
221212
for (size_t i = 0; i < images.size(); ++i) {
@@ -232,37 +223,50 @@ Tensor from_image(
232223
}
233224
}
234225

235-
Tensor tensor(shape, encoding_info.dtype, allocator);
236-
auto * dst = static_cast<uint8_t *>(tensor.data());
237-
size_t height = images[0].height;
238-
size_t width = images[0].width;
239-
size_t pixel_bytes = encoding_info.bytes_per_channel;
240-
241-
if (layout == TensorLayout::HWC) {
242-
// Direct copy for HWC layout
243-
for (size_t i = 0; i < images.size(); ++i) {
244-
if (allocator) {
245-
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
246-
} else {
247-
std::memcpy(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
226+
// Create batch tensor with shape and copy data based on layout
227+
std::vector<size_t> shape;
228+
Tensor tensor;
229+
230+
switch (layout) {
231+
case TensorLayout::CHW: {
232+
// CHW: [batch_size, channels, height, width]
233+
shape = {images.size(), encoding_info.channels, images[0].height, images[0].width};
234+
tensor = Tensor(shape, encoding_info.dtype, allocator);
235+
auto * dst = static_cast<uint8_t *>(tensor.data());
236+
237+
// Use copy_from_host_permuted for each image
238+
std::vector<size_t> src_shape = {1, images[0].height, images[0].width, encoding_info.channels};
239+
std::vector<size_t> permutation = {0, 3, 1, 2};
240+
size_t single_image_chw_size =
241+
encoding_info.channels * images[0].height * images[0].width * encoding_info.bytes_per_channel;
242+
243+
for (size_t i = 0; i < images.size(); ++i) {
244+
allocator->copy_from_host_permuted(
245+
dst + i * single_image_chw_size,
246+
images[i].data.data(),
247+
src_shape,
248+
permutation,
249+
encoding_info.bytes_per_channel);
248250
}
251+
break;
249252
}
250-
} else {
251-
// Transpose HWC to CHW for each image in batch
252-
for (size_t b = 0; b < images.size(); ++b) {
253-
const auto * src = images[b].data.data();
254-
size_t batch_offset = b * encoding_info.channels * height * width * pixel_bytes;
255-
256-
for (size_t c = 0; c < encoding_info.channels; ++c) {
257-
for (size_t h = 0; h < height; ++h) {
258-
for (size_t w = 0; w < width; ++w) {
259-
size_t src_idx = ((h * width + w) * encoding_info.channels + c) * pixel_bytes;
260-
size_t dst_idx = batch_offset + ((c * height + h) * width + w) * pixel_bytes;
261-
std::memcpy(dst + dst_idx, src + src_idx, pixel_bytes);
262-
}
263-
}
253+
case TensorLayout::HWC: {
254+
// HWC: [batch_size, height, width, channels]
255+
shape = {images.size(), images[0].height, images[0].width};
256+
if (encoding_info.channels > 1) {
257+
shape.push_back(encoding_info.channels);
258+
}
259+
tensor = Tensor(shape, encoding_info.dtype, allocator);
260+
auto * dst = static_cast<uint8_t *>(tensor.data());
261+
262+
// Direct copy for HWC layout
263+
for (size_t i = 0; i < images.size(); ++i) {
264+
allocator->copy_from_host(dst + i * images[i].data.size(), images[i].data.data(), images[i].data.size());
264265
}
266+
break;
265267
}
268+
default:
269+
throw std::invalid_argument("Unsupported tensor layout");
266270
}
267271

268272
return tensor;

deep_core/include/deep_core/plugin_interfaces/backend_memory_allocator.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <memory>
1818
#include <string>
19+
#include <vector>
1920

2021
namespace deep_ros
2122
{
@@ -61,6 +62,25 @@ class BackendMemoryAllocator
6162
*/
6263
void copy_from_host(void * dst, const void * src, size_t bytes);
6364

65+
/**
66+
* @brief Copy data from host memory to allocated memory with permutation
67+
*
68+
* Copies and transposes data in a single operation.
69+
*
70+
* @param dst Destination pointer (allocated by this allocator)
71+
* @param src Source pointer (host memory)
72+
* @param src_shape Shape of the source data
73+
* @param permutation Dimension permutation to apply during copy
74+
* @param elem_size Size of each element in bytes
75+
* @throws std::invalid_argument if parameters are invalid
76+
*/
77+
void copy_from_host_permuted(
78+
void * dst,
79+
const void * src,
80+
const std::vector<size_t> & src_shape,
81+
const std::vector<size_t> & permutation,
82+
size_t elem_size);
83+
6484
/**
6585
* @brief Copy data from allocated memory to host (CPU) memory
6686
* @param dst Destination pointer (host memory)
@@ -85,6 +105,16 @@ class BackendMemoryAllocator
85105
*/
86106
virtual void copy_from_host_impl(void * dst, const void * src, size_t bytes) = 0;
87107

108+
/**
109+
* @brief Implementation of copy_from_host_permuted (to be overridden by backends)
110+
*/
111+
virtual void copy_from_host_permuted_impl(
112+
void * dst,
113+
const void * src,
114+
const std::vector<size_t> & src_shape,
115+
const std::vector<size_t> & permutation,
116+
size_t elem_size) = 0;
117+
88118
/**
89119
* @brief Implementation of copy_to_host (to be overridden by backends)
90120
*/

deep_core/src/backend_memory_allocator.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "deep_core/plugin_interfaces/backend_memory_allocator.hpp"
1616

1717
#include <stdexcept>
18+
#include <vector>
1819

1920
namespace deep_ros
2021
{
@@ -27,6 +28,25 @@ void BackendMemoryAllocator::copy_from_host(void * dst, const void * src, size_t
2728
copy_from_host_impl(dst, src, bytes);
2829
}
2930

31+
void BackendMemoryAllocator::copy_from_host_permuted(
32+
void * dst,
33+
const void * src,
34+
const std::vector<size_t> & src_shape,
35+
const std::vector<size_t> & permutation,
36+
size_t elem_size)
37+
{
38+
if (dst == nullptr || src == nullptr) {
39+
throw std::invalid_argument("Null pointer passed to copy_from_host_permuted");
40+
}
41+
if (src_shape.empty() || permutation.empty()) {
42+
throw std::invalid_argument("Empty shape or permutation passed to copy_from_host_permuted");
43+
}
44+
if (src_shape.size() != permutation.size()) {
45+
throw std::invalid_argument("Shape and permutation size mismatch in copy_from_host_permuted");
46+
}
47+
copy_from_host_permuted_impl(dst, src, src_shape, permutation, elem_size);
48+
}
49+
3050
void BackendMemoryAllocator::copy_to_host(void * dst, const void * src, size_t bytes)
3151
{
3252
if (bytes > 0 && (dst == nullptr || src == nullptr)) {

deep_ort_backend_plugin/include/deep_ort_backend_plugin/ort_cpu_memory_allocator.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include <memory>
2020
#include <string>
21+
#include <vector>
2122

2223
#include <deep_core/plugin_interfaces/backend_memory_allocator.hpp>
2324

@@ -90,6 +91,21 @@ class OrtCpuMemoryAllocator : public deep_ros::BackendMemoryAllocator
9091
*/
9192
void copy_from_host_impl(void * dst, const void * src, size_t bytes) override;
9293

94+
/**
95+
* @brief Copy from host memory with permutation
96+
* @param dst Destination pointer
97+
* @param src Source pointer
98+
* @param src_shape Shape of source data
99+
* @param permutation Dimension permutation
100+
* @param elem_size Element size in bytes
101+
*/
102+
void copy_from_host_permuted_impl(
103+
void * dst,
104+
const void * src,
105+
const std::vector<size_t> & src_shape,
106+
const std::vector<size_t> & permutation,
107+
size_t elem_size) override;
108+
93109
/**
94110
* @brief Copy to host memory (same as device for CPU)
95111
* @param dst Destination pointer

0 commit comments

Comments
 (0)