Image normalization working for vgg pre processing.

Iainmon · Iainmon · commit 5c469658222c · 2025-04-18T23:51:59.000-07:00
diff --git a/bridge/include/bridge.h b/bridge/include/bridge.h
@@ -34,6 +34,7 @@ bridge_tensor_t load_tensor_from_file(const uint8_t* file_path);
 bridge_tensor_t load_tensor_dict_from_file(const uint8_t* file_path,const uint8_t* tensor_key);
 bridge_tensor_t load_run_model(const uint8_t* model_path, bridge_tensor_t input);
 bridge_tensor_t resize(bridge_tensor_t input,int height,int width);
+bridge_tensor_t imagenet_normalize(bridge_tensor_t input);
 
 
 int baz(void);
diff --git a/bridge/lib/bridge.cpp b/bridge/lib/bridge.cpp
@@ -206,57 +206,6 @@ extern "C" bridge_tensor_t max_pool2d(
     return torch_to_bridge(output);
 }
 
-/*
-
- * Resize the last two dimensions of a tensor, mimicking torchvision.transforms.Resize.
- *
- *  • Works with 2‑D (H,W), 3‑D (C,H,W or N,H,W), 4‑D (N,C,H,W) … tensors – any rank ≥ 2.  
- *  • Leading dimensions are preserved; only the final H × W are resized.  
- *  • Defaults match torchvision: bilinear for floating tensors, align_corners=false.  
- *
- * @param input          Tensor on CPU or CUDA.
- * @param new_h          Target height.
- * @param new_w          Target width.
- * @param mode           Interpolation mode (“bilinear”, “nearest”, “bicubic”, …).
- * @param align_corners  Forwarded to F::interpolate (ignored for “nearest”).
- * @return               Tensor with shape …, new_h, new_w and same dtype / device.
-
-inline torch::Tensor resize_tensor_last2(
-        const torch::Tensor& input,
-        int64_t              new_h,
-        int64_t              new_w,
-        const std::string&   mode           = "bilinear",
-        bool                 align_corners  = false) {
-
-    // Keep dtype/device; cast to float if interpolate needs it
-    const bool need_cast = !input.is_floating_point() && mode != "nearest";
-    auto x = need_cast ? input.to(torch::kFloat32) : input;
-    x = x.contiguous();                         // guarantees a re‑view is safe
-
-    // Collapse every axis except the last two into a single batch dimension.
-    const int64_t h  = x.size(-2);
-    const int64_t w  = x.size(-1);
-    const int64_t flat = x.numel() / (h * w);   // product of leading dims
-
-    auto x4d = x.view({flat, 1, h, w});         // N=flat, C=1, H, W
-
-    // Interpolate – equivalent to torchvision.transforms.Resize for tensors.
-    auto y4d = torch::nn::functional::interpolate(
-        x4d,
-        torch::nn::functional::InterpolateFuncOptions()
-            .size(std::vector<int64_t>{new_h, new_w})
-            .mode(mode)
-            .align_corners(align_corners));
-
-    // Restore the original leading shape.
-    std::vector<int64_t> out_shape(input.sizes().begin(), input.sizes().end() - 2);
-    out_shape.push_back(new_h);
-    out_shape.push_back(new_w);
-
-    auto y = y4d.view(out_shape);
-    return need_cast ? y.to(input.scalar_type()) : y;
-}
-*/
 extern "C" bridge_tensor_t resize(
     bridge_tensor_t input,
     int height,
@@ -294,6 +243,26 @@ extern "C" bridge_tensor_t resize(
     }
 }
 
+extern "C" bridge_tensor_t imagenet_normalize(bridge_tensor_t input) {
+    auto t_input = bridge_to_torch(input);
+    torch::Tensor image = t_input; //.to(torch::kFloat32);// / 255.0;
+
+    static const std::vector<float> kMean{0.485, 0.456, 0.406};
+    static const std::vector<float> kStd {0.229, 0.224, 0.225};
+    auto opts = image.options();
+    auto mean = torch::tensor(kMean).reshape({3, 1, 1});  // (3,1,1)
+    auto std  = torch::tensor(kStd).reshape({3, 1, 1});
+
+    // if (image.dim() == 4) {
+    //     mean = mean.unsqueeze(0); // (1,3,1,1)
+    //     std = std.unsqueeze(0);
+    // }
+
+    auto output = (image - mean) / std;
+    return torch_to_bridge(output);
+}
+
+
 
 // extern "C"
 
diff --git a/examples/torch_model_loading/torch_load.chpl b/examples/torch_model_loading/torch_load.chpl
@@ -11,17 +11,24 @@ proc main(args: [] string) {
     writeln("Loaded image: ", args[1]);
     writeln("Image shape: ", image.shape);
 
-    image = image.resize(224,224);
-    writeln("Resized image: ", image.shape);
+    writeln("image : ", max reduce image.data);
 
-    var batchedImage = ndarray.loadFrom(args[1],3,real(32)).unsqueeze(0);
-    writeln("Batched image: ", batchedImage.shape);
+    image = image.imageNetNormalize();
+    writeln("image : ", max reduce image.data);
 
-    batchedImage = batchedImage.resize(224,224);
-    writeln("Batched image resized: ", batchedImage.shape);
 
-    image = batchedImage.squeeze(3);
-    writeln("Squeezed image: ", image.shape);
+
+    // image = image.resize(224,224).imageNetNormalize();
+    // writeln("Resized image: ", image.shape);
+
+    // var batchedImage = ndarray.loadFrom(args[1],3,real(32)).unsqueeze(0);
+    // writeln("Batched image: ", batchedImage.shape);
+
+    // batchedImage = batchedImage.resize(224,224);
+    // writeln("Batched image resized: ", batchedImage.shape);
+
+    // image = batchedImage.squeeze(3).imageNetNormalize();
+    // writeln("Squeezed image: ", image.shape);
 
     image.saveImage("test.jpg");
 }
diff --git a/lib/Bridge.chpl b/lib/Bridge.chpl
@@ -78,6 +78,9 @@ module Bridge {
         in input: bridge_tensor_t, 
         in height: int(32), 
         in width: int(32)): bridge_tensor_t;
+
+    extern "imagenet_normalize" proc imageNetNormalize(
+        in input: bridge_tensor_t): bridge_tensor_t;
     
 
 
diff --git a/lib/NDArray.chpl b/lib/NDArray.chpl
@@ -2230,6 +2230,11 @@ proc ndarray.resize(height: int,width: int) {
         width : int(32)) : ndarray(rank,eltType);
 }
 
+proc ndarray.imageNetNormalize() {
+    return Bridge.imageNetNormalize(
+        this : Bridge.tensorHandle(eltType)) : ndarray(rank,eltType);
+}
+
 proc type ndarray.loadImage(imagePath: string, type eltType = defaultEltType): ndarray(3,eltType) throws {
     import Image;