Update forward & backward for rendered alpha image (#70)

Zhuoyang-Pan · web-flow · commit 0c305abd816c · 2024-01-16T10:46:07.000-08:00
* finish alpha forward &amp; backward

* black format

* fix some merging issues

* remove unnecessary .cuda()

* add return_alpha keyword

* add some notes

* black reformat
diff --git a/examples/test_rasterize.py b/examples/test_rasterize.py
@@ -155,7 +155,7 @@ def train(self, iterations: int = 1000, lr: float = 0.01, save_imgs: bool = True
         frames = []
         for i in range(iterations):
             optimizer.zero_grad()
-            slow_out = self.forward_slow()
+            slow_out, _ = self.forward_slow()
 
             loss = mse_loss(slow_out, self.gt_image)
             loss.backward()
@@ -168,7 +168,7 @@ def train(self, iterations: int = 1000, lr: float = 0.01, save_imgs: bool = True
             ]
 
             optimizer.zero_grad()
-            new_out = self.forward_new()
+            new_out, _ = self.forward_new()
             loss = mse_loss(new_out, self.gt_image)
             loss.backward()
 
diff --git a/gsplat/cuda/csrc/backward.cu b/gsplat/cuda/csrc/backward.cu
@@ -18,6 +18,7 @@ __global__ void nd_rasterize_backward_kernel(
     const float* __restrict__ final_Ts,
     const int* __restrict__ final_index,
     const float* __restrict__ v_output,
+    const float* __restrict__ v_output_alpha,
     float2* __restrict__ v_xy,
     float3* __restrict__ v_conic,
     float* __restrict__ v_rgb,
@@ -45,6 +46,7 @@ __global__ void nd_rasterize_backward_kernel(
     int2 range = tile_bins[tile_id];
     // df/d_out for this pixel
     const float *v_out = &(v_output[channels * pix_id]);
+    const float v_out_alpha = v_output_alpha[pix_id];
     // this is the T AFTER the last gaussian in this pixel
     float T_final = final_Ts[pix_id];
     float T = T_final;
@@ -97,7 +99,7 @@ __global__ void nd_rasterize_backward_kernel(
             // update the running sum
             S[c] += rgbs[channels * g + c] * fac;
         }
-
+        v_alpha += T_final * ra * v_out_alpha;
         // update v_opacity for this gaussian
         atomicAdd(&(v_opacity[g]), vis * v_alpha);
 
@@ -146,6 +148,7 @@ __global__ void rasterize_backward_kernel(
     const float* __restrict__ final_Ts,
     const int* __restrict__ final_index,
     const float3* __restrict__ v_output,
+    const float* __restrict__ v_output_alpha,
     float2* __restrict__ v_xy,
     float3* __restrict__ v_conic,
     float3* __restrict__ v_rgb,
@@ -188,6 +191,7 @@ __global__ void rasterize_backward_kernel(
 
     // df/d_out for this pixel
     const float3 v_out = v_output[pix_id];
+    const float v_out_alpha = v_output_alpha[pix_id];
 
     // collect and process batches of gaussians
     // each thread loads one gaussian at a time before rasterizing
@@ -265,6 +269,8 @@ __global__ void rasterize_backward_kernel(
                 v_alpha += (rgb.x * T - buffer.x * ra) * v_out.x;
                 v_alpha += (rgb.y * T - buffer.y * ra) * v_out.y;
                 v_alpha += (rgb.z * T - buffer.z * ra) * v_out.z;
+
+                v_alpha += T_final * ra * v_out_alpha;
                 // contribution from background pixel
                 v_alpha += -T_final * ra * background.x * v_out.x;
                 v_alpha += -T_final * ra * background.y * v_out.y;
diff --git a/gsplat/cuda/csrc/backward.cuh b/gsplat/cuda/csrc/backward.cuh
@@ -43,6 +43,7 @@ __global__ void nd_rasterize_backward_kernel(
     const float* __restrict__ final_Ts,
     const int* __restrict__ final_index,
     const float* __restrict__ v_output,
+    const float* __restrict__ v_output_alpha,
     float2* __restrict__ v_xy,
     float3* __restrict__ v_conic,
     float* __restrict__ v_rgb,
@@ -63,6 +64,7 @@ __global__ void rasterize_backward_kernel(
     const float* __restrict__ final_Ts,
     const int* __restrict__ final_index,
     const float3* __restrict__ v_output,
+    const float* __restrict__ v_output_alpha,
     float2* __restrict__ v_xy,
     float3* __restrict__ v_conic,
     float3* __restrict__ v_rgb,
diff --git a/gsplat/cuda/csrc/bindings.cu b/gsplat/cuda/csrc/bindings.cu
@@ -486,7 +486,8 @@ std::
         const torch::Tensor &background,
         const torch::Tensor &final_Ts,
         const torch::Tensor &final_idx,
-        const torch::Tensor &v_output // dL_dout_color
+        const torch::Tensor &v_output, // dL_dout_color
+        const torch::Tensor &v_output_alpha // dL_dout_alpha
     ) {
 
     CHECK_INPUT(xys);
@@ -540,6 +541,7 @@ std::
         final_Ts.contiguous().data_ptr<float>(),
         final_idx.contiguous().data_ptr<int>(),
         v_output.contiguous().data_ptr<float>(),
+        v_output_alpha.contiguous().data_ptr<float>(),
         (float2 *)v_xy.contiguous().data_ptr<float>(),
         (float3 *)v_conic.contiguous().data_ptr<float>(),
         v_colors.contiguous().data_ptr<float>(),
@@ -569,7 +571,8 @@ std::
         const torch::Tensor &background,
         const torch::Tensor &final_Ts,
         const torch::Tensor &final_idx,
-        const torch::Tensor &v_output // dL_dout_color
+        const torch::Tensor &v_output, // dL_dout_color
+        const torch::Tensor &v_output_alpha // dL_dout_alpha
     ) {
 
     CHECK_INPUT(xys);
@@ -612,6 +615,7 @@ std::
         final_Ts.contiguous().data_ptr<float>(),
         final_idx.contiguous().data_ptr<int>(),
         (float3 *)v_output.contiguous().data_ptr<float>(),
+        v_output_alpha.contiguous().data_ptr<float>(),
         (float2 *)v_xy.contiguous().data_ptr<float>(),
         (float3 *)v_conic.contiguous().data_ptr<float>(),
         (float3 *)v_colors.contiguous().data_ptr<float>(),
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
@@ -157,7 +157,8 @@ std::
         const torch::Tensor &background,
         const torch::Tensor &final_Ts,
         const torch::Tensor &final_idx,
-        const torch::Tensor &v_output // dL_dout_color
+        const torch::Tensor &v_output, // dL_dout_color
+        const torch::Tensor &v_output_alpha
     );
 
 std::
@@ -179,5 +180,6 @@ std::
         const torch::Tensor &background,
         const torch::Tensor &final_Ts,
         const torch::Tensor &final_idx,
-        const torch::Tensor &v_output // dL_dout_color
+        const torch::Tensor &v_output, // dL_dout_color
+        const torch::Tensor &v_output_alpha
     );
diff --git a/gsplat/rasterize.py b/gsplat/rasterize.py
@@ -22,6 +22,7 @@ def rasterize_gaussians(
     img_height: int,
     img_width: int,
     background: Optional[Float[Tensor, "channels"]] = None,
+    return_alpha: Optional[bool] = False,
 ) -> Tensor:
     """Rasterizes 2D gaussians by sorting and binning gaussian intersections for each tile and returns an N-dimensional output using alpha-compositing.
 
@@ -39,11 +40,13 @@ def rasterize_gaussians(
         img_height (int): height of the rendered image.
         img_width (int): width of the rendered image.
         background (Tensor): background color
+        return_alpha (bool): whether to return alpha channel
 
     Returns:
         A Tensor:
 
         - **out_img** (Tensor): N-dimensional rendered output image.
+        - **out_alpha** (Optional[Tensor]): Alpha channel of the rendered output image.
     """
     if colors.dtype == torch.uint8:
         # make sure colors are float [0,1]
@@ -75,6 +78,7 @@ def rasterize_gaussians(
         img_height,
         img_width,
         background.contiguous(),
+        return_alpha,
     )
 
 
@@ -94,6 +98,7 @@ def forward(
         img_height: int,
         img_width: int,
         background: Optional[Float[Tensor, "channels"]] = None,
+        return_alpha: Optional[bool] = False,
     ) -> Tensor:
         num_points = xys.size(0)
         BLOCK_X, BLOCK_Y = 16, 16
@@ -148,13 +153,20 @@ def forward(
             final_idx,
         )
 
-        return out_img
+        if return_alpha:
+            out_alpha = 1 - final_Ts
+            return out_img, out_alpha
+        else:
+            return out_img
 
     @staticmethod
-    def backward(ctx, v_out_img):
+    def backward(ctx, v_out_img, v_out_alpha=None):
         img_height = ctx.img_height
         img_width = ctx.img_width
 
+        if v_out_alpha is None:
+            v_out_alpha = torch.zeros_like(v_out_img[..., 0])
+
         (
             gaussian_ids_sorted,
             tile_bins,
@@ -184,6 +196,7 @@ def backward(ctx, v_out_img):
             final_Ts,
             final_idx,
             v_out_img,
+            v_out_alpha,
         )
 
         return (
@@ -197,4 +210,5 @@ def backward(ctx, v_out_img):
             None,  # img_height
             None,  # img_width
             None,  # background
+            None,  # return_alpha
         )