Skip to content

The generated multi-view data exhibits significant errors #351

@wwsource

Description

@wwsource

Hi, I found that the generated multiview data does not meet the depth to disparity conversion formula, i. e., disparity = fx * baseline / depth.
I used the following script to generate binocular images and marked corresponding point pairs in the binocular images based on the depth of the left image, but the result was clearly incorrect. I tried both depth and z, but there isn't much difference between them.

import os
os.environ["KUBRIC_USE_GPU"] = "1"

import logging

import bpy
import kubric as kb
from kubric.simulator import PyBullet
from kubric.renderer import Blender
import numpy as np


# --- Some configuration values
# the region in which to place objects [(min), (max)]
STATIC_SPAWN_REGION = [(-7, -7, 0), (7, 7, 10)]
DYNAMIC_SPAWN_REGION = [(-5, -5, 1), (5, 5, 5)]
VELOCITY_RANGE = [(-4., -4., 0.), (4., 4., 0.)]

# --- CLI arguments
parser = kb.ArgumentParser()
parser.add_argument("--objects_split", choices=["train", "test"],
                    default="train")
# Configuration for the objects of the scene
parser.add_argument("--min_num_static_objects", type=int, default=10,
                    help="minimum number of static (distractor) objects")
parser.add_argument("--max_num_static_objects", type=int, default=20,
                    help="maximum number of static (distractor) objects")
parser.add_argument("--min_num_dynamic_objects", type=int, default=1,
                    help="minimum number of dynamic (tossed) objects")
parser.add_argument("--max_num_dynamic_objects", type=int, default=3,
                    help="maximum number of dynamic (tossed) objects")
# Configuration for the floor and background
parser.add_argument("--floor_friction", type=float, default=0.3)
parser.add_argument("--floor_restitution", type=float, default=0.5)
parser.add_argument("--backgrounds_split", choices=["train", "test"],
                    default="train")

parser.add_argument("--camera", choices=["fixed_random", "linear_movement", "linear_movement_linear_lookat"],
                    default="fixed_random")
parser.add_argument("--max_camera_movement", type=float, default=4.0)
parser.add_argument("--max_motion_blur", type=float, default=0.0)


# Configuration for the source of the assets
parser.add_argument("--kubasic_assets", type=str,
                    default="gs://kubric-public/assets/KuBasic/KuBasic.json")
parser.add_argument("--hdri_assets", type=str,
                    default="gs://kubric-public/assets/HDRI_haven/HDRI_haven.json")
parser.add_argument("--gso_assets", type=str,
                    default="gs://kubric-public/assets/GSO/GSO.json")
parser.add_argument("--save_state", dest="save_state", action="store_true")
parser.set_defaults(save_state=False, frame_end=1, frame_rate=12,
                    resolution=256)
FLAGS = parser.parse_args()

# --- Common setups & resources
scene, rng, output_dir, scratch_dir = kb.setup(FLAGS)

motion_blur = rng.uniform(0, FLAGS.max_motion_blur)
if motion_blur > 0.0:
  logging.info(f"Using motion blur strength {motion_blur}")

simulator = PyBullet(scene, scratch_dir)
renderer = Blender(scene, scratch_dir, use_denoising=True, samples_per_pixel=64,
                   motion_blur=motion_blur)
# kubasic = kb.AssetSource.from_manifest(FLAGS.kubasic_assets)
# gso = kb.AssetSource.from_manifest(FLAGS.gso_assets)
# hdri_source = kb.AssetSource.from_manifest(FLAGS.hdri_assets)
kubasic = kb.AssetSource.from_manifest('assets/KuBasic.json')
gso = kb.AssetSource.from_manifest('assets/GSO.json')
hdri_source = kb.AssetSource.from_manifest('assets/HDRI_haven.json')


hdri_id = 'pond'
background_hdri = hdri_source.create(asset_id='pond')
#assert isinstance(background_hdri, kb.Texture)
logging.info("Using background %s", hdri_id)
scene.metadata["background"] = hdri_id
renderer._set_ambient_light_hdri(background_hdri.filename)

# Dome
dome = kubasic.create(asset_id="dome", name="dome",
                      friction=1.0,
                      restitution=0.0,
                      static=True, background=True)
assert isinstance(dome, kb.FileBasedObject)
scene += dome
dome_blender = dome.linked_objects[renderer]
texture_node = dome_blender.data.materials[0].node_tree.nodes["Image Texture"]
texture_node.image = bpy.data.images.load(background_hdri.filename)

obj = gso.create(asset_id='ACE_Coffee_Mug_Kristen_16_oz_cup')
scale = 1.0
obj.scale = scale / np.max(obj.bounds[1] - obj.bounds[0])
obj.metadata["scale"] = scale
scene += obj
obj.position = (0, 0, 0)
obj.metadata["is_dynamic"] = False
logging.info("    Added %s at %s with %s", obj.asset_id, obj.position, obj.velocity)

# Run dynamic objects simulation
logging.info("Running the simulation ...")
animation, collisions = simulator.run(frame_start=0, frame_end=scene.frame_end+1)

# Camera
logging.info("Setting up the Camera...")
scene.camera = kb.PerspectiveCamera(focal_length=35., sensor_width=32)
scene.camera.position = (0.5, 3, 1)
scene.camera.look_at((0.5, 0, 1))
scene.camera.keyframe_insert("position", 0)
scene.camera.keyframe_insert("quaternion", 0)
logging.info("Rendering the scene ...")
data_stack = renderer.render(return_layers=['rgba', 'depth'])
kb.write_image_dict(data_stack, 'output_left')

logging.info("Setting up the right Camera...")
scene.camera = kb.PerspectiveCamera(focal_length=35., sensor_width=32)
scene.camera.position = (-0.5, 3, 1)
scene.camera.look_at((-0.5, 0, 1))
scene.camera.keyframe_insert("position", 0)
scene.camera.keyframe_insert("quaternion", 0)

logging.info("Rendering the scene ...")
data_stack_second = renderer.render(return_layers=['rgba'])
kb.write_image_dict(data_stack_second, 'output_right')

kb.done()

You can run the script using the following command:

sudo docker run --rm --interactive --user $(id -u):$(id -g) --volume "$(pwd):/kubric" kubricdockerhub/kubruntu /usr/bin/python3 challenges/movi/wb_multi_view_movi_def_worker_move_camera_for_issue.py --camera=linear_movement --resolution=512

Based on the above script, we calculate that fx = 512 * 35 / 32 andbaseline = 1.0. The marked corresponding point pairs are shown as follow:

Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions