nerfstudio-project · npu-chenlin · Jan 16, 2025 · Jan 17, 2025 · May 26, 2025 · Jul 7, 2025
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import json
 import os
 from typing import Any, Dict, List, Optional
@@ -6,8 +7,9 @@
 import imageio.v2 as imageio
 import numpy as np
 import torch
+
 from PIL import Image
-from pycolmap import SceneManager
+from pycolmap import Reconstruction, CameraModelId
 from tqdm import tqdm
 from typing_extensions import assert_never
 
@@ -75,25 +77,27 @@ def __init__(
             colmap_dir
         ), f"COLMAP directory {colmap_dir} does not exist."
 
-        manager = SceneManager(colmap_dir)
-        manager.load_cameras()
-        manager.load_images()
-        manager.load_points3D()
+        manager = Reconstruction(colmap_dir)
+
+        # point_id -> point3D_id_contiguous
+        point3D_id_contiguous = dict()
+        for i, point_id in enumerate(manager.points3D.keys()):
+            point3D_id_contiguous[point_id] = i
 
         # Extract extrinsic matrices in world-to-camera format.
         imdata = manager.images
         w2c_mats = []
         camera_ids = []
         Ks_dict = dict()
+        point_indices = defaultdict(list) # image_name -> [point_idx]
         params_dict = dict()
         imsize_dict = dict()  # width, height
         mask_dict = dict()
         bottom = np.array([0, 0, 0, 1]).reshape(1, 4)
         for k in imdata:
             im = imdata[k]
-            rot = im.R()
-            trans = im.tvec.reshape(3, 1)
-            w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0)
+            w2c = im.cam_from_world().matrix()
+            w2c = np.concatenate([w2c, bottom], axis=0)
             w2c_mats.append(w2c)
 
             # support different camera intrinsics
@@ -102,30 +106,40 @@ def __init__(
 
             # camera intrinsics
             cam = manager.cameras[camera_id]
-            fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy
-            K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+
+            K = cam.calibration_matrix()
             K[:2, :] /= factor
             Ks_dict[camera_id] = K
+
+            # get image_name -> [point_idx] dict
+            for obs_point2d in im.get_observation_points2D():
+                point_indices[im.name].append(point3D_id_contiguous[obs_point2d.point3D_id])
 
             # Get distortion parameters.
-            type_ = cam.camera_type
-            if type_ == 0 or type_ == "SIMPLE_PINHOLE":
+            type_ = cam.model
+            # SIMPLE_PINHOLE:     f, cx, cy
+            if type_ == CameraModelId.SIMPLE_PINHOLE:
                 params = np.empty(0, dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 1 or type_ == "PINHOLE":
+            # PINHOLE:            fx, fy, cx, cy
+            elif type_ == CameraModelId.PINHOLE:
                 params = np.empty(0, dtype=np.float32)
                 camtype = "perspective"
-            if type_ == 2 or type_ == "SIMPLE_RADIAL":
-                params = np.array([cam.k1, 0.0, 0.0, 0.0], dtype=np.float32)
+            # SIMPLE_RADIAL:      f, cx, cy, k
+            if type_ == CameraModelId.SIMPLE_RADIAL:
+                params = np.array([cam.params[3], 0.0, 0.0, 0.0], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 3 or type_ == "RADIAL":
-                params = np.array([cam.k1, cam.k2, 0.0, 0.0], dtype=np.float32)
+            # RADIAL:             f, cx, cy, k1, k2
+            elif type_ == CameraModelId.RADIAL:
+                params = np.array([cam.params[3], cam.params[4], 0.0, 0.0], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 4 or type_ == "OPENCV":
-                params = np.array([cam.k1, cam.k2, cam.p1, cam.p2], dtype=np.float32)
+            # OPENCV:             fx, fy, cx, cy, k1, k2, p1, p2
+            elif type_ == CameraModelId.OPENCV:
+                params = np.array([cam.params[4], cam.params[5], cam.params[6], cam.params[7]], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 5 or type_ == "OPENCV_FISHEYE":
-                params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
+            # OPENCV_FISHEYE:     fx, fy, cx, cy, k1, k2, k3, k4
+            elif type_ == CameraModelId.OPENCV_FISHEYE:
+                params = np.array([cam.params[4], cam.params[5], cam.params[6], cam.params[7]], dtype=np.float32)
                 camtype = "fisheye"
             assert (
                 camtype == "perspective" or camtype == "fisheye"
@@ -140,9 +154,8 @@ def __init__(
 
         if len(imdata) == 0:
             raise ValueError("No images found in COLMAP.")
-        if not (type_ == 0 or type_ == 1):
+        if not (type_ == CameraModelId.PINHOLE or type_ == CameraModelId.SIMPLE_PINHOLE):
             print("Warning: COLMAP Camera is not PINHOLE. Images have distortion.")
-
         w2c_mats = np.stack(w2c_mats, axis=0)
 
         # Convert extrinsics to camera-to-world.
@@ -198,18 +211,12 @@ def __init__(
         colmap_to_image = dict(zip(colmap_files, image_files))
         image_paths = [os.path.join(image_dir, colmap_to_image[f]) for f in image_names]
 
-        # 3D points and {image_name -> [point_idx]}
-        points = manager.points3D.astype(np.float32)
-        points_err = manager.point3D_errors.astype(np.float32)
-        points_rgb = manager.point3D_colors.astype(np.uint8)
-        point_indices = dict()
-
-        image_id_to_name = {v: k for k, v in manager.name_to_image_id.items()}
-        for point_id, data in manager.point3D_id_to_images.items():
-            for image_id, _ in data:
-                image_name = image_id_to_name[image_id]
-                point_idx = manager.point3D_id_to_point3D_idx[point_id]
-                point_indices.setdefault(image_name, []).append(point_idx)
+        # 3D points
+        points3D = manager.points3D.values()
+        points_err = np.array([p.error for p in points3D], dtype=np.float32)
+        points_rgb = np.array([p.color for p in points3D], dtype=np.uint8)
+        points = np.array([p.xyz for p in points3D], dtype=np.float32)
+
         point_indices = {
             k: np.array(v).astype(np.int32) for k, v in point_indices.items()
         }

diff --git a/examples/requirements.txt b/examples/requirements.txt
@@ -1,7 +1,7 @@
 # assume torch is already installed
 
 # pycolmap for data parsing
-git+https://github.com/rmbrualla/pycolmap@cc7ea4b7301720ac29287dbe450952511b32125e
+pycolmap
 # (optional) nerfacc for torch version rasterization 
 # git+https://github.com/nerfstudio-project/nerfacc