Skip to content

Support existing colmap setups #6

@dokipen3d

Description

@dokipen3d

Hi. It would be incredibly useful to be able to use existing colmap setups (created from something like vggt or vggsfm) in this project.

I used chatgpt to create a python script to do it for me and create the transforms.json if that is useful for you. My results haven't been successful though as the resulting mesh glb is just a big blob.

Image
#!/usr/bin/env python3
import os
import sys
import json
import math
import struct
from pathlib import Path

try:
    from PIL import Image
except ImportError:
    print("Please install Pillow: pip install pillow")
    sys.exit(1)


# -----------------------------
# COLMAP binary readers
# -----------------------------
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
    data = fid.read(num_bytes)
    if len(data) != num_bytes:
        raise EOFError("Unexpected end of file while reading COLMAP binary.")
    return struct.unpack(endian_character + format_char_sequence, data)


CAMERA_MODEL_IDS = {
    0: "SIMPLE_PINHOLE",
    1: "PINHOLE",
    2: "SIMPLE_RADIAL",
    3: "RADIAL",
    4: "OPENCV",
    5: "OPENCV_FISHEYE",
    6: "FULL_OPENCV",
    7: "FOV",
    8: "SIMPLE_RADIAL_FISHEYE",
    9: "RADIAL_FISHEYE",
    10: "THIN_PRISM_FISHEYE",
}

CAMERA_MODEL_NUM_PARAMS = {
    "SIMPLE_PINHOLE": 3,
    "PINHOLE": 4,
    "SIMPLE_RADIAL": 4,
    "RADIAL": 5,
    "OPENCV": 8,
    "OPENCV_FISHEYE": 8,
    "FULL_OPENCV": 12,
    "FOV": 5,
    "SIMPLE_RADIAL_FISHEYE": 4,
    "RADIAL_FISHEYE": 5,
    "THIN_PRISM_FISHEYE": 12,
}


def read_cameras_binary(path):
    cameras = {}
    with open(path, "rb") as fid:
        num_cameras = read_next_bytes(fid, 8, "Q")[0]
        for _ in range(num_cameras):
            camera_properties = read_next_bytes(fid, 24, "iiQQ")
            camera_id = camera_properties[0]
            model_id = camera_properties[1]
            width = camera_properties[2]
            height = camera_properties[3]
            model_name = CAMERA_MODEL_IDS[model_id]
            num_params = CAMERA_MODEL_NUM_PARAMS[model_name]
            params = read_next_bytes(fid, 8 * num_params, "d" * num_params)

            cameras[camera_id] = {
                "id": camera_id,
                "model": model_name,
                "width": width,
                "height": height,
                "params": params,
            }
    return cameras


def read_images_binary(path):
    images = {}
    with open(path, "rb") as fid:
        num_images = read_next_bytes(fid, 8, "Q")[0]
        print("ni: " + str(num_images))
        for _ in range(num_images):
            binary_image_props = read_next_bytes(fid, 64, "idddddddi")
            image_id = binary_image_props[0]
            qvec = binary_image_props[1:5]
            tvec = binary_image_props[5:8]
            camera_id = binary_image_props[8]

            name_bytes = b""
            while True:
                char = fid.read(1)
                if char == b"\x00":
                    break
                name_bytes += char
            name = name_bytes.decode("utf-8")

            num_points2D = read_next_bytes(fid, 8, "Q")[0]
            fid.read(num_points2D * 24)  # skip x, y, point3D_id

            images[image_id] = {
                "id": image_id,
                "qvec": qvec,
                "tvec": tvec,
                "camera_id": camera_id,
                "name": name,
            }
    return images


# -----------------------------
# Math helpers
# -----------------------------
def qvec_to_rotmat(qvec):
    q0, q1, q2, q3 = qvec
    return [
        [
            1 - 2 * q2 * q2 - 2 * q3 * q3,
            2 * q1 * q2 - 2 * q0 * q3,
            2 * q3 * q1 + 2 * q0 * q2,
        ],
        [
            2 * q1 * q2 + 2 * q0 * q3,
            1 - 2 * q1 * q1 - 2 * q3 * q3,
            2 * q2 * q3 - 2 * q0 * q1,
        ],
        [
            2 * q3 * q1 - 2 * q0 * q2,
            2 * q2 * q3 + 2 * q0 * q1,
            1 - 2 * q1 * q1 - 2 * q2 * q2,
        ],
    ]


def mat3_transpose(m):
    return [
        [m[0][0], m[1][0], m[2][0]],
        [m[0][1], m[1][1], m[2][1]],
        [m[0][2], m[1][2], m[2][2]],
    ]


def mat3_vec_mul(m, v):
    return [
        m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2],
        m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2],
        m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2],
    ]


def negate(v):
    return [-x for x in v]


def colmap_image_to_c2w_opengl(qvec, tvec):
    """
    COLMAP stores world-to-camera extrinsics:
        X_cam = R * X_world + t

    We convert to camera-to-world:
        R_c2w = R^T
        C = -R^T * t

    COLMAP camera coordinates are OpenCV-style:
        x right, y down, z forward

    Requested output is OGL-style:
        x right, y up, z backward

    So we post-multiply by diag(1, -1, -1) on the camera frame,
    which flips the Y and Z axes.
    """
    R_wc = qvec_to_rotmat(qvec)
    R_cw = mat3_transpose(R_wc)
    C = mat3_vec_mul(R_cw, negate(tvec))

    # Convert camera basis from OpenCV to OpenGL
    # Equivalent to flipping columns 1 and 2 of the c2w rotation.
    R_cw[0][1] *= -1
    R_cw[1][1] *= -1
    R_cw[2][1] *= -1

    R_cw[0][2] *= -1
    R_cw[1][2] *= -1
    R_cw[2][2] *= -1

    return [
        [float(R_cw[0][0]), float(R_cw[0][1]), float(R_cw[0][2]), float(C[0])],
        [float(R_cw[1][0]), float(R_cw[1][1]), float(R_cw[1][2]), float(C[1])],
        [float(R_cw[2][0]), float(R_cw[2][1]), float(R_cw[2][2]), float(C[2])],
        [0.0, 0.0, 0.0, 1.0],
    ]


# -----------------------------
# Camera intrinsics helpers
# -----------------------------
def get_intrinsics(camera):
    model = camera["model"]
    p = camera["params"]

    if model == "SIMPLE_PINHOLE":
        f, cx, cy = p
        fx = fy = f
    elif model == "PINHOLE":
        fx, fy, cx, cy = p
    elif model == "SIMPLE_RADIAL":
        f, cx, cy, _ = p
        fx = fy = f
    elif model == "RADIAL":
        f, cx, cy, _, _ = p
        fx = fy = f
    elif model == "OPENCV":
        fx, fy, cx, cy, _, _, _, _ = p
    elif model == "OPENCV_FISHEYE":
        fx, fy, cx, cy, _, _, _, _ = p
    elif model == "FULL_OPENCV":
        fx, fy, cx, cy = p[:4]
    elif model == "FOV":
        fx, fy, cx, cy, _ = p
    elif model == "SIMPLE_RADIAL_FISHEYE":
        f, cx, cy, _ = p
        fx = fy = f
    elif model == "RADIAL_FISHEYE":
        f, cx, cy, _, _ = p
        fx = fy = f
    elif model == "THIN_PRISM_FISHEYE":
        fx, fy, cx, cy = p[:4]
    else:
        raise ValueError(f"Unsupported camera model: {model}")

    return float(fx), float(fy), float(cx), float(cy)


def image_size(path, fallback_width=None, fallback_height=None):
    try:
        with Image.open(path) as img:
            return img.width, img.height
    except Exception:
        if fallback_width is not None and fallback_height is not None:
            return fallback_width, fallback_height
        raise


def parse_view_index(filename, fallback):
    stem = Path(filename).stem
    try:
        return int(stem)
    except ValueError:
        return fallback


# -----------------------------
# Main conversion
# -----------------------------
def convert(parent_folder, output_json):
    parent = Path(parent_folder)
    images_dir = parent / "images"
    sparse_dir = parent / "sparse"

    cameras_bin = sparse_dir / "cameras.bin"
    images_bin = sparse_dir / "images.bin"

    if not images_dir.is_dir():
        raise FileNotFoundError(f"Missing images directory: {images_dir}")
    if not cameras_bin.is_file():
        raise FileNotFoundError(f"Missing file: {cameras_bin}")
    if not images_bin.is_file():
        raise FileNotFoundError(
            f"Missing file: {images_bin}\n"
            "cameras.bin alone is not enough to build frame transform matrices."
        )

    cameras = read_cameras_binary(cameras_bin)
    images = read_images_binary(images_bin)
    print(len(images))
    frames = []
    sorted_items = sorted(images.values(), key=lambda x: x["name"])

    for i, image_rec in enumerate(sorted_items):
        print(i)
        img_name = image_rec["name"]
        print(img_name)
        img_path = images_dir / img_name
        print(img_path)
        camera = cameras[image_rec["camera_id"]]

        fx, fy, cx, cy = get_intrinsics(camera)
        width, height = image_size(
            img_path,
            fallback_width=camera["width"],
            fallback_height=camera["height"],
        )

        fov_x = 2.0 * math.atan(width / (2.0 * fx))
        fov_y = 2.0 * math.atan(height / (2.0 * fy))

        transform_matrix = colmap_image_to_c2w_opengl(
            image_rec["qvec"], image_rec["tvec"]
        )

        frame = {
            "view_index": parse_view_index(img_name, i),
            "file_path": f"images/{img_name}",
            "width": int(width),
            "height": int(height),
            "transform_matrix": transform_matrix,
            "camera_fov": [float(fov_x), float(fov_y)],
            "camera_principal_point": [float(cx), float(cy)],
        }
        frames.append(frame)

    frames.sort(key=lambda x: x["view_index"])

    output = {
        "object_uid": "Camera_01",
        "illumination_index": 0,
        "illumination": {
            "type": "environment_illumination",
            "z_rotation": 0.0,
            "img_name": ""
        },
        "coordinate_system": "ogl",
        "frames": frames,
    }

    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(output, f, indent=2)

    print(f"Wrote {output_json}")


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print(
            "Usage:\n"
            "  python colmap_to_json.py /path/to/parent_folder output.json\n\n"
            "Expected structure:\n"
            "  parent_folder/\n"
            "    images/\n"
            "    sparse/\n"
            "      cameras.bin\n"
            "      images.bin"
        )
        sys.exit(1)

    convert(sys.argv[1], sys.argv[2])

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions