Iainmon
diff --git a/‎demos/video/include/cvtool.hpp‎
Lines changed: 43 additions & 0 deletions b/‎demos/video/include/cvtool.hpp‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎demos/video/style-transfer/models/mosaic_float16.pt‎
0 Bytes b/‎demos/video/style-transfer/models/mosaic_float16.pt‎
0 Bytes
diff --git a/‎demos/video/style-transfer/models/mosaic_float32.pt‎
6.47 MB b/‎demos/video/style-transfer/models/mosaic_float32.pt‎
6.47 MB
diff --git a/‎demos/video/style-transfer/style_transfer.cpp‎
Lines changed: 19 additions & 2 deletions b/‎demos/video/style-transfer/style_transfer.cpp‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎examples/pytorch-examples/fast_neural_style/models/mosaic_float16.pt‎
3.27 MB b/‎examples/pytorch-examples/fast_neural_style/models/mosaic_float16.pt‎
3.27 MB
diff --git a/‎examples/pytorch-examples/fast_neural_style/models/mosaic_float32.pt‎
6.47 MB b/‎examples/pytorch-examples/fast_neural_style/models/mosaic_float32.pt‎
6.47 MB
diff --git a/‎examples/pytorch-examples/fast_neural_style/neural_style/neural_style.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/pytorch-examples/fast_neural_style/neural_style/neural_style.py‎
Lines changed: 3 additions & 2 deletions
@@ -28,6 +28,14 @@ namespace cvtool {
         }
         return default_device;
     }
+
+    bool can_get_default_device() {
+        return default_device_set || !torch::mps::is_available();
+    }
+
+    torch::Device get_host_device() {
+        return torch::Device(torch::kCPU);
+    }
 }
 
 static torch::Device default_device(torch::kCPU);
@@ -92,12 +100,47 @@ std::shared_ptr<at::Tensor> create_frame_buffer_tensor(int height,int width,torc
 }
 
 at::Tensor to_tensor(cv::Mat &img) {
+
+
     auto t = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kUInt8).clone();
     t = t.to(default_device);
     t = t.to(torch::kFloat32).permute({0, 3, 1, 2}) / 255.0;
     return t;//.to(default_device,true);
 }
 
+//--------------------------------------------------------------------
+// • img : any H×W×C OpenCV matrix (CV_8U, CV_32F, CV_16F …, planar or packed)
+// • device : torch::kCUDA, torch::kMPS or torch::kCPU (default = current CUDA if available)
+//--------------------------------------------------------------------
+at::Tensor to_tensor_(const cv::Mat& img, torch::Device device = get_default_device())
+{
+    // 1. Make sure the source data are contiguous
+    cv::Mat contiguous = img.isContinuous() ? img : img.clone();
+
+    // 2. Convert pixel type to 32‑bit float in [0,1] so we keep enough
+    //    head‑room for the later FP16 cast.  (OpenCV has only limited
+    //    native FP16 support, so converting to CV_32F first is usually
+    //    safer and portable.)
+    cv::Mat float32;
+    contiguous.convertTo(float32, CV_32F, 1.0 / 255.0);   // scale if img was CV_8U
+
+    // 3. Wrap the OpenCV buffer with a *view* tensor (no copy yet).
+    auto tmp = torch::from_blob(
+                  float32.data,                             // raw pointer
+                  {float32.rows, float32.cols, float32.channels()},
+                  torch::TensorOptions().dtype(torch::kFloat32));
+
+    // 4. Re‑arrange to CHW, move to wanted device, cast to FP16 *and* copy
+    //    so that the returned tensor owns its storage (clone() is mandatory).
+    auto t = tmp.permute({2, 0, 1})                        // HWC → CHW
+                 .to(device, /*dtype=*/torch::kFloat16,
+                     /*non_blocking=*/true, /*copy=*/true) // copy = true ⇒ owns memory
+                 .clone();                                 // guarantees ownership
+
+    return t; //  C×H×W, float16, on CUDA / MPS / CPU
+}
+
+
 cv::Mat to_mat(at::Tensor &tensor) {
     // Ensure the tensor is on the CPU and not on the GPU
     // at::Tensor cpu_tensor = tensor.to(torch::kCPU);
 
@@ -14,7 +14,7 @@
 
 int run_webcam_model(torch::jit::Module& module, int cam_index, int max_fps, bool is_video_loop, std::string vid_path);
 
-static torch::Device default_device_st = torch::Device(torch::kCPU);
+static torch::Device default_device_st = torch::Device(torch::kMPS);
 
 
 torch::jit::Module load_model(const std::string& model_path) {
@@ -48,6 +48,23 @@ torch::Tensor preprocess_input(const torch::Tensor& input) {
 }
 
 torch::Tensor run_model(torch::jit::Module& module, const torch::Tensor& input) {
+
+    auto input_dtype = input.dtype();
+    std::cout.flush();
+    std::cout << "Input dtype: " << input.dtype() << std::endl;
+    std::cout << "Input sizes: " << input.sizes() << std::endl;
+    std::cout << "Input device: " << input.device() << std::endl;
+    std::cout.flush();
+    std::system("pause");
+
+    // auto model_dtype = module.dtype();
+    // std::cout << "Module: " << module << std::endl;
+
+
+    module.to(torch::kMPS);
+    module.eval();
+
+
     std::vector<torch::jit::IValue> inputs;
     inputs.push_back(input);
 
@@ -80,7 +97,7 @@ int main() {
 
     // default_device = default_device_st;
 
-    std::string model_path = "style-transfer/models/mosaic.pt";
+    std::string model_path = "style-transfer/models/mosaic_float32.pt";
     torch::jit::Module module = load_model(model_path);
     torch::Tensor input = torch::randn({1, 3, 1428, 1904}, device);
     torch::Tensor output = run_model(module, input);
 
@@ -185,8 +185,9 @@ def stylize(args):
             utils.save_image(args.output_image, output[0])
             from pathlib import Path
             model_name = Path(args.model).stem
-            sm = torch.jit.script(style_model)
-            sm.save(f"models/{model_name}.pt")
+
+            sm = torch.jit.script(style_model.to(torch.float32))
+            sm.save(f"models/{model_name}_float32.pt")
 
             sm = torch.jit.script(style_model.to(torch.float16))
             sm.save(f"models/{model_name}_float16.pt")