Implement Camera Intrinsics & compute ray direction in camera frame (#22)

horizon-blue · web-flow · commit 1bb706cfbe71 · 2025-11-29T20:36:58.000-05:00
(Part of MET-48)

## Summary of Changes

This PR complete the camera `Intrinsics` and defines two functions for
it:

- `get_ray_direction`: given a pixel coordinate `(px, py)`, returns the
direction of the ray in camera frame
- `get_ray_directions`: returns a 2D array of all ray directions in the
camera frame. If a range is specified, then this method only compute a
subset of the rays.

Depends on how we implement the kernel at the end, we may not need both
functions, but let's still have them here now just in case...

## Test Plans

On Python side, you can find a basic correctness testing in
`test_camera.py`, which checks the value of our
`Intrinsics::get_ray_direction` against the ground truth fucntion
refactored from `run_fmb.py`

Due to the complexity of binding `Array2D&lt;Vec3D, device&gt;` to Python, I'm
leaving the testing of the batched `get_ray_directions` to C++ side,
where `test_camera.cu` only performs a basic sanity check to ensure that
the adjacent rays are pointing toward different direction (and of
course, that the function runs without error).

As always, you can run the tests with

```bash
pixi run test
```
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 ################
 
 add_library(genmetaballs_core
+  genmetaballs/src/cuda/core/camera.cu
+  genmetaballs/src/cuda/core/camera.cuh
   genmetaballs/src/cuda/core/utils.cu
   genmetaballs/src/cuda/core/utils.cuh
   genmetaballs/src/cuda/core/geometry.cuh
diff --git a/examples/run_fmb.py b/examples/run_fmb.py
@@ -33,7 +33,7 @@
 from tqdm import tqdm
 
 # Import local utilities
-from genmetaballs.fmb.utils import DegradeLR, image_grid
+from genmetaballs.fmb.utils import DegradeLR, get_camera_rays, image_grid
 
 CURRENT_DIR = Path(__file__).parent
 PROJECT_ROOT = CURRENT_DIR.parent
@@ -475,14 +475,12 @@ def main():
 
     # Setup camera rays
     height, width = image_size
-    K = np.array([[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]])
     pixel_list = (
         (np.array(np.meshgrid(np.arange(width), height - np.arange(height) - 1, [0]))[:, :, :, 0])
         .reshape((3, -1))
         .T
     )
-    camera_rays = (pixel_list - K[:, 2]) / np.diag(K)
-    camera_rays[:, -1] = -1
+    camera_rays = get_camera_rays(focal_length, focal_length, cx, cy, pixel_list)
     cameras_list = []
     for tran, quat in zip(trans, rand_quats, strict=False):
         R = transforms3d.quaternions.quat2mat(quat)
diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu
@@ -5,6 +5,7 @@
 #include <nanobind/stl/vector.h>
 
 #include "core/blender.cuh"
+#include "core/camera.cuh"
 #include "core/confidence.cuh"
 #include "core/geometry.cuh"
 #include "core/utils.cuh"
@@ -65,7 +66,24 @@ NB_MODULE(_genmetaballs_bindings, m) {
         .def_rw("direction", &Ray::direction);
 
     /*
-     * Confidence submodule bindings
+     * Camera module bindings
+     */
+    nb::module_ camera = m.def_submodule("camera", "Camera intrinsics and extrinsics");
+    nb::class_<Intrinsics>(camera, "Intrinsics")
+        .def(nb::init<uint32_t, uint32_t, float, float, float, float>(), nb::arg("height"),
+             nb::arg("width"), nb::arg("fx"), nb::arg("fy"), nb::arg("cx"), nb::arg("cy"))
+        .def_ro("height", &Intrinsics::height)
+        .def_ro("width", &Intrinsics::width)
+        .def_ro("fx", &Intrinsics::fx)
+        .def_ro("fy", &Intrinsics::fy)
+        .def_ro("cx", &Intrinsics::cx)
+        .def_ro("cy", &Intrinsics::cy)
+        .def("get_ray_direction", &Intrinsics::get_ray_direction,
+             "Get the direction of the ray going through pixel (px, py) in camera frame",
+             nb::arg("px"), nb::arg("py"));
+
+    /*
+     * Confidence module bindings
      */
 
     nb::module_ confidence = m.def_submodule("confidence");
diff --git a/genmetaballs/src/cuda/core/camera.cu b/genmetaballs/src/cuda/core/camera.cu
@@ -0,0 +1,13 @@
+#include <cstdint>
+#include <cuda/std/ranges>
+#include <cuda_runtime.h>
+
+#include "camera.cuh"
+#include "geometry.cuh"
+#include "utils.cuh"
+
+CUDA_CALLABLE Vec3D Intrinsics::get_ray_direction(uint32_t px, uint32_t py) const {
+    auto x = (static_cast<float>(px) - cx) / fx;
+    auto y = (static_cast<float>(py) - cy) / fy;
+    return Vec3D{x, y, -1.0f};
+}
diff --git a/genmetaballs/src/cuda/core/camera.cuh b/genmetaballs/src/cuda/core/camera.cuh
@@ -1,14 +1,36 @@
 #pragma once
 
 #include <cstdint>
+#include <cuda_runtime.h>
+
+#include "geometry.cuh"
+#include "utils.cuh"
 
 struct Intrinsics {
-    uint32_t height;
-    uint32_t width;
+    uint32_t height; // in x direction
+    uint32_t width;  // in y direction
     float fx;
     float fy;
     float cx;
     float cy;
-    float near;
-    float far;
+
+    // Returns the direction of the ray going through pixel (px, py) in camera frame.
+    // For efficiency, this function does not check if the pixel is within bounds.
+    CUDA_CALLABLE Vec3D get_ray_direction(uint32_t px, uint32_t py) const;
+
+    // Returns a 2D array of ray directions in camera frame in the specified pixel range
+    // and store them in the provided buffer. By default, the full image is used
+    template <MemoryLocation location>
+    CUDA_CALLABLE Array2D<Vec3D, location>& get_ray_directions(Array2D<Vec3D, location> buffer,
+                                                               uint32_t px_start = 0,
+                                                               uint32_t px_end = UINT32_MAX,
+                                                               uint32_t py_start = 0,
+                                                               uint32_t py_end = UINT32_MAX) const {
+        for (auto i = max(0, px_start); i < min(height, px_end); ++i) {
+            for (auto j = max(0, py_start); j < min(width, py_end); ++j) {
+                buffer[i][j] = get_ray_direction(j, i);
+            }
+        }
+        return buffer;
+    }
 };
diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh
@@ -72,11 +72,11 @@ public:
         return {rot, tran};
     }
 
-    CUDA_CALLABLE Rotation get_rot() const {
+    CUDA_CALLABLE const Rotation& get_rot() const {
         return rot_;
     }
 
-    CUDA_CALLABLE Vec3D get_tran() const {
+    CUDA_CALLABLE const Vec3D& get_tran() const {
         return tran_;
     }
 
diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py
@@ -3,6 +3,7 @@
     FourParameterBlender,
     ThreeParameterBlender,
 )
+from genmetaballs._genmetaballs_bindings.camera import Intrinsics
 from genmetaballs._genmetaballs_bindings.confidence import (
     TwoParameterConfidence,
     ZeroParameterConfidence,
@@ -30,6 +31,8 @@ def array2d_float(data, device) -> CPUFloatArray2D | GPUFloatArray2D:
     "ZeroParameterConfidence",
     "TwoParameterConfidence",
     "geometry",
+    "Camera",
+    "Intrinsics",
     "sigmoid",
     "FourParameterBlender",
     "ThreeParameterBlender",
diff --git a/genmetaballs/src/genmetaballs/fmb/utils.py b/genmetaballs/src/genmetaballs/fmb/utils.py
@@ -141,3 +141,12 @@ def compute_normals(camera_rays, depth_py_px, eps=1e-20):
     norms = nan_ddiff / (eps + jnp.linalg.norm(nan_ddiff, axis=1, keepdims=True))
 
     return norms
+
+
+def get_camera_rays(
+    fx: float, fy: float, cx: float, cy: float, pixel_list: np.ndarray[int]
+) -> np.ndarray[np.float32]:
+    K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
+    camera_rays = (pixel_list - K[:, 2]) / np.diag(K)
+    camera_rays[:, -1] = -1
+    return camera_rays
diff --git a/tests/cpp_tests/test_camera.cu b/tests/cpp_tests/test_camera.cu
@@ -0,0 +1,54 @@
+#include <cmath>
+#include <cstdint>
+#include <cuda_runtime.h>
+#include <gtest/gtest.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+
+#include "core/camera.cuh"
+#include "core/geometry.cuh"
+#include "core/utils.cuh"
+
+namespace test_camera_gpu {
+
+// CUDA kernel to call get_ray_directions on device with multiple threads
+// Each thread processes one row of the image
+__global__ void get_ray_directions_kernel(Intrinsics intrinsics,
+                                          Array2D<Vec3D, MemoryLocation::DEVICE> ray_buffer) {
+    uint32_t row_start = threadIdx.x * 2;
+    uint32_t row_end = max(row_start + 2, intrinsics.height);
+    uint32_t col_start = threadIdx.y * 2;
+    uint32_t col_end = max(col_start + 2, intrinsics.width);
+    intrinsics.get_ray_directions(ray_buffer, row_start, row_end, col_start, col_end);
+}
+
+} // namespace test_camera_gpu
+
+// Test get_ray_directions on GPU (device)
+TEST(CameraTest, GetRayDirectionsDevice) {
+    // Create a small camera intrinsics
+    Intrinsics intrinsics{4, 6, 100.0f, 100.0f, 3.0f, 2.0f};
+
+    // Create Array2D buffer on device
+    thrust::device_vector<Vec3D> data(intrinsics.height * intrinsics.width);
+    Array2D<Vec3D, MemoryLocation::DEVICE> ray_buffer(data.data(), intrinsics.height,
+                                                      intrinsics.width);
+
+    // Launch kernel with multiple threads -- divide into 2x2 tiles
+    test_camera_gpu::
+        get_ray_directions_kernel<<<1, dim3(intrinsics.height / 2, intrinsics.width / 2)>>>(
+            intrinsics, ray_buffer);
+    CUDA_CHECK(cudaGetLastError());
+    CUDA_CHECK(cudaDeviceSynchronize());
+
+    // Copy data back to host for sanity check
+    thrust::host_vector<Vec3D> ray_data = data;
+
+    // Sanity check: adjacent rays should be different
+    constexpr float eps = 1e-6f;
+    for (auto i = 0; i < data.size() - 1; ++i) {
+        auto diff = ray_data[i + 1] - ray_data[i];
+        float diff_mag = sqrtf(dot(diff, diff));
+        EXPECT_GT(diff_mag, eps) << "Adjacent rays are too similar at index " << i;
+    }
+}
diff --git a/tests/cpp_tests/test_confidence.cu b/tests/cpp_tests/test_confidence.cu
@@ -75,7 +75,6 @@ static std::vector<ConfidenceCase> confidence_cases() {
 
 TEST(GpuConfidenceTest, ConfidenceMultipleValuesGPU_AllTypes) {
     using test_float = float;
-    constexpr float rtol = 1e-6F;
 
     auto sizes = confidence_test_sizes();
     std::mt19937 master_gen(MASTER_SEED);
diff --git a/tests/python_tests/test_camera.py b/tests/python_tests/test_camera.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pytest
+
+from genmetaballs.core import Intrinsics
+from genmetaballs.fmb.utils import get_camera_rays
+
+
+@pytest.fixture
+def rng() -> np.random.Generator:
+    return np.random.default_rng(0)
+
+
+@pytest.fixture
+def intrinsics() -> Intrinsics:
+    return Intrinsics(height=480, width=640, fx=500.0, fy=520.0, cx=320.0, cy=240.0)
+
+
+def test_get_ray_direction_in_camera_frame(intrinsics: Intrinsics):
+    # selet a few random pixels to test
+    pixel_list = np.array(
+        [
+            [0, 0],
+            [intrinsics.width - 1, 0],
+            [0, intrinsics.height - 1],
+            [intrinsics.width - 1, intrinsics.height - 1],
+            [intrinsics.width // 2, intrinsics.height // 2],
+        ]
+    )
+
+    reference_rays = get_camera_rays(
+        intrinsics.fx,
+        intrinsics.fy,
+        intrinsics.cx,
+        intrinsics.cy,
+        np.concatenate([pixel_list, np.zeros((pixel_list.shape[0], 1))], axis=1),
+    )
+
+    for (pixel_x, pixel_y), reference_ray in zip(pixel_list, reference_rays, strict=True):
+        ray_direction = intrinsics.get_ray_direction(pixel_x, pixel_y)
+        assert np.allclose(
+            [ray_direction.x, ray_direction.y, ray_direction.z],
+            reference_ray,
+        )

Original file line number	Diff line number	Diff line change
`@@ -72,11 +72,11 @@ public:`
`72`	`72`	`return {rot, tran};`
`73`	`73`	`}`
`74`	`74`
`75`		`- CUDA_CALLABLE Rotation get_rot() const {`
	`75`	`+ CUDA_CALLABLE const Rotation& get_rot() const {`
`76`	`76`	`return rot_;`
`77`	`77`	`}`
`78`	`78`
`79`		`- CUDA_CALLABLE Vec3D get_tran() const {`
	`79`	`+ CUDA_CALLABLE const Vec3D& get_tran() const {`
`80`	`80`	`return tran_;`
`81`	`81`	`}`
`82`	`82`