@@ -28,6 +28,14 @@ namespace cvtool {
2828 }
2929 return default_device;
3030 }
31+
32+ bool can_get_default_device () {
33+ return default_device_set || !torch::mps::is_available ();
34+ }
35+
36+ torch::Device get_host_device () {
37+ return torch::Device (torch::kCPU );
38+ }
3139}
3240
3341static torch::Device default_device (torch::kCPU );
@@ -92,12 +100,47 @@ std::shared_ptr<at::Tensor> create_frame_buffer_tensor(int height,int width,torc
92100}
93101
94102at::Tensor to_tensor (cv::Mat &img) {
103+
104+
95105 auto t = torch::from_blob (img.data , {1 , img.rows , img.cols , 3 }, torch::kUInt8 ).clone ();
96106 t = t.to (default_device);
97107 t = t.to (torch::kFloat32 ).permute ({0 , 3 , 1 , 2 }) / 255.0 ;
98108 return t;// .to(default_device,true);
99109}
100110
111+ // --------------------------------------------------------------------
112+ // • img : any H×W×C OpenCV matrix (CV_8U, CV_32F, CV_16F …, planar or packed)
113+ // • device : torch::kCUDA, torch::kMPS or torch::kCPU (default = current CUDA if available)
114+ // --------------------------------------------------------------------
115+ at::Tensor to_tensor_ (const cv::Mat& img, torch::Device device = get_default_device())
116+ {
117+ // 1. Make sure the source data are contiguous
118+ cv::Mat contiguous = img.isContinuous () ? img : img.clone ();
119+
120+ // 2. Convert pixel type to 32‑bit float in [0,1] so we keep enough
121+ // head‑room for the later FP16 cast. (OpenCV has only limited
122+ // native FP16 support, so converting to CV_32F first is usually
123+ // safer and portable.)
124+ cv::Mat float32;
125+ contiguous.convertTo (float32, CV_32F, 1.0 / 255.0 ); // scale if img was CV_8U
126+
127+ // 3. Wrap the OpenCV buffer with a *view* tensor (no copy yet).
128+ auto tmp = torch::from_blob (
129+ float32.data , // raw pointer
130+ {float32.rows , float32.cols , float32.channels ()},
131+ torch::TensorOptions ().dtype (torch::kFloat32 ));
132+
133+ // 4. Re‑arrange to CHW, move to wanted device, cast to FP16 *and* copy
134+ // so that the returned tensor owns its storage (clone() is mandatory).
135+ auto t = tmp.permute ({2 , 0 , 1 }) // HWC → CHW
136+ .to (device, /* dtype=*/ torch::kFloat16 ,
137+ /* non_blocking=*/ true , /* copy=*/ true ) // copy = true ⇒ owns memory
138+ .clone (); // guarantees ownership
139+
140+ return t; // C×H×W, float16, on CUDA / MPS / CPU
141+ }
142+
143+
101144cv::Mat to_mat (at::Tensor &tensor) {
102145 // Ensure the tensor is on the CPU and not on the GPU
103146 // at::Tensor cpu_tensor = tensor.to(torch::kCPU);
0 commit comments