-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Hi. It would be incredibly useful to be able to use existing colmap setups (created from something like vggt or vggsfm) in this project.
I used chatgpt to create a python script to do it for me and create the transforms.json if that is useful for you. My results haven't been successful though as the resulting mesh glb is just a big blob.
#!/usr/bin/env python3
import os
import sys
import json
import math
import struct
from pathlib import Path
try:
from PIL import Image
except ImportError:
print("Please install Pillow: pip install pillow")
sys.exit(1)
# -----------------------------
# COLMAP binary readers
# -----------------------------
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
data = fid.read(num_bytes)
if len(data) != num_bytes:
raise EOFError("Unexpected end of file while reading COLMAP binary.")
return struct.unpack(endian_character + format_char_sequence, data)
CAMERA_MODEL_IDS = {
0: "SIMPLE_PINHOLE",
1: "PINHOLE",
2: "SIMPLE_RADIAL",
3: "RADIAL",
4: "OPENCV",
5: "OPENCV_FISHEYE",
6: "FULL_OPENCV",
7: "FOV",
8: "SIMPLE_RADIAL_FISHEYE",
9: "RADIAL_FISHEYE",
10: "THIN_PRISM_FISHEYE",
}
CAMERA_MODEL_NUM_PARAMS = {
"SIMPLE_PINHOLE": 3,
"PINHOLE": 4,
"SIMPLE_RADIAL": 4,
"RADIAL": 5,
"OPENCV": 8,
"OPENCV_FISHEYE": 8,
"FULL_OPENCV": 12,
"FOV": 5,
"SIMPLE_RADIAL_FISHEYE": 4,
"RADIAL_FISHEYE": 5,
"THIN_PRISM_FISHEYE": 12,
}
def read_cameras_binary(path):
cameras = {}
with open(path, "rb") as fid:
num_cameras = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_cameras):
camera_properties = read_next_bytes(fid, 24, "iiQQ")
camera_id = camera_properties[0]
model_id = camera_properties[1]
width = camera_properties[2]
height = camera_properties[3]
model_name = CAMERA_MODEL_IDS[model_id]
num_params = CAMERA_MODEL_NUM_PARAMS[model_name]
params = read_next_bytes(fid, 8 * num_params, "d" * num_params)
cameras[camera_id] = {
"id": camera_id,
"model": model_name,
"width": width,
"height": height,
"params": params,
}
return cameras
def read_images_binary(path):
images = {}
with open(path, "rb") as fid:
num_images = read_next_bytes(fid, 8, "Q")[0]
print("ni: " + str(num_images))
for _ in range(num_images):
binary_image_props = read_next_bytes(fid, 64, "idddddddi")
image_id = binary_image_props[0]
qvec = binary_image_props[1:5]
tvec = binary_image_props[5:8]
camera_id = binary_image_props[8]
name_bytes = b""
while True:
char = fid.read(1)
if char == b"\x00":
break
name_bytes += char
name = name_bytes.decode("utf-8")
num_points2D = read_next_bytes(fid, 8, "Q")[0]
fid.read(num_points2D * 24) # skip x, y, point3D_id
images[image_id] = {
"id": image_id,
"qvec": qvec,
"tvec": tvec,
"camera_id": camera_id,
"name": name,
}
return images
# -----------------------------
# Math helpers
# -----------------------------
def qvec_to_rotmat(qvec):
q0, q1, q2, q3 = qvec
return [
[
1 - 2 * q2 * q2 - 2 * q3 * q3,
2 * q1 * q2 - 2 * q0 * q3,
2 * q3 * q1 + 2 * q0 * q2,
],
[
2 * q1 * q2 + 2 * q0 * q3,
1 - 2 * q1 * q1 - 2 * q3 * q3,
2 * q2 * q3 - 2 * q0 * q1,
],
[
2 * q3 * q1 - 2 * q0 * q2,
2 * q2 * q3 + 2 * q0 * q1,
1 - 2 * q1 * q1 - 2 * q2 * q2,
],
]
def mat3_transpose(m):
return [
[m[0][0], m[1][0], m[2][0]],
[m[0][1], m[1][1], m[2][1]],
[m[0][2], m[1][2], m[2][2]],
]
def mat3_vec_mul(m, v):
return [
m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2],
m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2],
m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2],
]
def negate(v):
return [-x for x in v]
def colmap_image_to_c2w_opengl(qvec, tvec):
"""
COLMAP stores world-to-camera extrinsics:
X_cam = R * X_world + t
We convert to camera-to-world:
R_c2w = R^T
C = -R^T * t
COLMAP camera coordinates are OpenCV-style:
x right, y down, z forward
Requested output is OGL-style:
x right, y up, z backward
So we post-multiply by diag(1, -1, -1) on the camera frame,
which flips the Y and Z axes.
"""
R_wc = qvec_to_rotmat(qvec)
R_cw = mat3_transpose(R_wc)
C = mat3_vec_mul(R_cw, negate(tvec))
# Convert camera basis from OpenCV to OpenGL
# Equivalent to flipping columns 1 and 2 of the c2w rotation.
R_cw[0][1] *= -1
R_cw[1][1] *= -1
R_cw[2][1] *= -1
R_cw[0][2] *= -1
R_cw[1][2] *= -1
R_cw[2][2] *= -1
return [
[float(R_cw[0][0]), float(R_cw[0][1]), float(R_cw[0][2]), float(C[0])],
[float(R_cw[1][0]), float(R_cw[1][1]), float(R_cw[1][2]), float(C[1])],
[float(R_cw[2][0]), float(R_cw[2][1]), float(R_cw[2][2]), float(C[2])],
[0.0, 0.0, 0.0, 1.0],
]
# -----------------------------
# Camera intrinsics helpers
# -----------------------------
def get_intrinsics(camera):
model = camera["model"]
p = camera["params"]
if model == "SIMPLE_PINHOLE":
f, cx, cy = p
fx = fy = f
elif model == "PINHOLE":
fx, fy, cx, cy = p
elif model == "SIMPLE_RADIAL":
f, cx, cy, _ = p
fx = fy = f
elif model == "RADIAL":
f, cx, cy, _, _ = p
fx = fy = f
elif model == "OPENCV":
fx, fy, cx, cy, _, _, _, _ = p
elif model == "OPENCV_FISHEYE":
fx, fy, cx, cy, _, _, _, _ = p
elif model == "FULL_OPENCV":
fx, fy, cx, cy = p[:4]
elif model == "FOV":
fx, fy, cx, cy, _ = p
elif model == "SIMPLE_RADIAL_FISHEYE":
f, cx, cy, _ = p
fx = fy = f
elif model == "RADIAL_FISHEYE":
f, cx, cy, _, _ = p
fx = fy = f
elif model == "THIN_PRISM_FISHEYE":
fx, fy, cx, cy = p[:4]
else:
raise ValueError(f"Unsupported camera model: {model}")
return float(fx), float(fy), float(cx), float(cy)
def image_size(path, fallback_width=None, fallback_height=None):
try:
with Image.open(path) as img:
return img.width, img.height
except Exception:
if fallback_width is not None and fallback_height is not None:
return fallback_width, fallback_height
raise
def parse_view_index(filename, fallback):
stem = Path(filename).stem
try:
return int(stem)
except ValueError:
return fallback
# -----------------------------
# Main conversion
# -----------------------------
def convert(parent_folder, output_json):
parent = Path(parent_folder)
images_dir = parent / "images"
sparse_dir = parent / "sparse"
cameras_bin = sparse_dir / "cameras.bin"
images_bin = sparse_dir / "images.bin"
if not images_dir.is_dir():
raise FileNotFoundError(f"Missing images directory: {images_dir}")
if not cameras_bin.is_file():
raise FileNotFoundError(f"Missing file: {cameras_bin}")
if not images_bin.is_file():
raise FileNotFoundError(
f"Missing file: {images_bin}\n"
"cameras.bin alone is not enough to build frame transform matrices."
)
cameras = read_cameras_binary(cameras_bin)
images = read_images_binary(images_bin)
print(len(images))
frames = []
sorted_items = sorted(images.values(), key=lambda x: x["name"])
for i, image_rec in enumerate(sorted_items):
print(i)
img_name = image_rec["name"]
print(img_name)
img_path = images_dir / img_name
print(img_path)
camera = cameras[image_rec["camera_id"]]
fx, fy, cx, cy = get_intrinsics(camera)
width, height = image_size(
img_path,
fallback_width=camera["width"],
fallback_height=camera["height"],
)
fov_x = 2.0 * math.atan(width / (2.0 * fx))
fov_y = 2.0 * math.atan(height / (2.0 * fy))
transform_matrix = colmap_image_to_c2w_opengl(
image_rec["qvec"], image_rec["tvec"]
)
frame = {
"view_index": parse_view_index(img_name, i),
"file_path": f"images/{img_name}",
"width": int(width),
"height": int(height),
"transform_matrix": transform_matrix,
"camera_fov": [float(fov_x), float(fov_y)],
"camera_principal_point": [float(cx), float(cy)],
}
frames.append(frame)
frames.sort(key=lambda x: x["view_index"])
output = {
"object_uid": "Camera_01",
"illumination_index": 0,
"illumination": {
"type": "environment_illumination",
"z_rotation": 0.0,
"img_name": ""
},
"coordinate_system": "ogl",
"frames": frames,
}
with open(output_json, "w", encoding="utf-8") as f:
json.dump(output, f, indent=2)
print(f"Wrote {output_json}")
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"Usage:\n"
" python colmap_to_json.py /path/to/parent_folder output.json\n\n"
"Expected structure:\n"
" parent_folder/\n"
" images/\n"
" sparse/\n"
" cameras.bin\n"
" images.bin"
)
sys.exit(1)
convert(sys.argv[1], sys.argv[2])
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels