Adding render modalities (#596)

abhihjoshi · web-flow · commit 2f66486e9cff · 2025-01-08T11:31:35.000-08:00
* Adding render modalities

* Adding section in documentation for isaac installation

* Adding isaac basic usage

Authored by Abhishek Joshi
diff --git a/docs/modules/renderers.md b/docs/modules/renderers.md
@@ -23,6 +23,8 @@ MuJoCo exposes users to an OpenGL context supported by [mujoco](https://mujoco.r
 
 Users are also able to render using photorealistic methods through Isaac Sim. Specifically, we users are able to choose between two rendering modes: ray tracing and path tracing. For more information about Isaac Sim rendering options, please visit [here](https://docs.omniverse.nvidia.com/materials-and-rendering/latest/rtx-renderer.html). Isaac renderers are only available to those who are running on a Linux or Windows machine.
 
+To install Isaac on your local system, please follow the instructions listed [here](https://isaac-sim.github.io/IsaacLab/main/source/setup/installation/pip_installation.html). Make sure to follow instructions to install both Isaac Sim and Isaac Lab. 
+
 ### Ray tracing
 ![Ray tracing](../images/gr1_cereal_ray_tracing.png "Ray tracing")
 
@@ -33,6 +35,34 @@ Ray tracing can be performed in real time. We are currently working on enhancing
 
 Path tracing typically offers higher quality and is ideal for offline learning. If you have the time to collect data and plan to train algorithms using offline data, we recommend using path tracing for its photorealistic results.
 
+### Basic usage
+
+Once all dependecies for Isaac rendering have been installed, users can run the `robosuite/scripts/render_dataset_with_omniverse.py` to render previously collected demonstrations using either ray tracing or path tracining. Below we highlight the arguments that can be passed into the script.
+
+- **dataset**: Path to hdf5 dataset with the demonstrations to render.
+- **ds_format**: Dataset format (options include `robosuite` and `robomimic` depending on if the dataset was collected using robosuite or robomimic, respectively).
+- **episode**: Episode/demonstration to render. If no episode is provided, all demonstrations will be rendered.
+- **output_directory**: Directory to store outputs from Isaac rendering and USD generation.
+- **cameras**: List of cameras to render images. Cameras must be defined in robosuite.
+- **width**: Width of the rendered output.
+- **height**: Height of the rendered output.
+- **renderer**: Renderer mode to use (options include `RayTracedLighting` or `PathTracing`).
+- **save_video**: Whether to save the outputs renderings as a video.
+- **online**: Enables online rendering and will not save the USD for future rendering offline.
+- **skip_frames**: Renders every nth frame.
+- **hide_sites**: Hides all sites in the scene.
+- **reload_model**: Reloads the model from the Mujoco XML file.
+- **keep_models**: List of names of models to keep from the original Mujoco XML file.
+- **rgb**: Render with the RGB modality. If no other modality is selected, we default to rendering with RGB.
+- **normals**: Render with normals.
+- **semantic_segmentation**: Render with semantic segmentation.
+
+Here is an example command to render an video of a demonstration using ray tracing with the RGB and normal modality.
+
+```bash
+$ python robosuite/scripts/render_dataset_with_omniverse.py --dataset /home/abhishek/Documents/research/rpl/robosuite/robosuite/models/assets/demonstrations_private/1734107564_9898326/demo.hdf5 --ds_format robosuite --episode 1 --camera agentview frontview --width 1920 --height 1080 --renderer RayTracedLighting --save_video --hide_sites --rgb --normals
+```
+
 ### Rendering Speed
 
 Below, we present a table showing the estimated frames per second when using these renderers. Note that the exact speed of rendering might depend on your machine and scene size. Larger scenes may take longer to render. Additionally, changing renderer inputs such as samples per pixel (spp) or max bounces might affect rendering speeds. The values below are estimates using the `Lift` task with an NVIDIA GeForce RTX 4090. We use an spp of 64 when rendering with path tracing.
diff --git a/robosuite/scripts/render_dataset_with_omniverse.py b/robosuite/scripts/render_dataset_with_omniverse.py
@@ -74,6 +74,23 @@
     "--keep_models", type=str, nargs="+", default=[], help="(optional) keep the model from the Mujoco XML file"
 )
 
+# adding rendering types
+parser.add_argument(
+    "--rgb",
+    action="store_true",
+    default=False,
+)
+parser.add_argument(
+    "--normals",
+    action="store_true",
+    default=False,
+)
+parser.add_argument(
+    "--semantic_segmentation",
+    action="store_true",
+    default=False,
+)
+
 # Add arguments for launch
 AppLauncher.add_app_launcher_args(parser)
 # Parse the arguments
@@ -95,11 +112,13 @@
 import cv2
 import h5py
 import lxml.etree as ET
+import numpy as np
 import omni
 import omni.isaac.core.utils.stage as stage_utils
 import omni.kit.app
 import omni.replicator.core as rep
 import omni.timeline
+from pxr import Semantics
 from termcolor import colored
 from tqdm import tqdm
 
@@ -124,6 +143,8 @@
 scene_option = mujoco.MjvOption()
 scene_option.geomgroup = [0, 1, 0, 0, 0, 0]
 
+render_modalities = {"rgb": args.rgb, "normals": args.normals, "semantic_segmentation": args.semantic_segmentation}
+
 
 def make_sites_invisible(mujoco_xml):
     """
@@ -343,6 +364,24 @@ def link_env_with_ov(self):
         )
         exp.update_scene(data=data, scene_option=scene_option)
         exp.add_light(pos=[0, 0, 0], intensity=1500, light_type="dome", light_name="dome_1")
+
+        # adds semantic information to objects in the scene
+        if args.semantic_segmentation:
+            for geom in exp.scene.geoms:
+                geom_id = geom.objid
+                geom_name = exp._get_geom_name(geom)
+                if geom_id in self.env.model._geom_ids_to_classes:
+                    semantic_value = self.env.model._geom_ids_to_classes[geom_id]
+                    if "site" in geom_name or "None" in geom_name:
+                        continue
+                    prim = exp.geom_refs[geom_name].usd_prim
+                    instance_name = f"class_{semantic_value}"
+                    sem = Semantics.SemanticsAPI.Apply(prim, instance_name)
+                    sem.CreateSemanticTypeAttr()
+                    sem.CreateSemanticDataAttr()
+                    sem.GetSemanticTypeAttr().Set("class")
+                    sem.GetSemanticDataAttr().Set(semantic_value)
+
         return exp
 
     def update_simulation(self, index):
@@ -369,6 +408,8 @@ def __init__(
         output_dir: str = None,
         image_output_format: str = "png",
         rgb: bool = False,
+        normals: bool = False,
+        semantic_segmentation: bool = False,
         frame_padding: int = 4,
     ):
         self._output_dir = output_dir
@@ -385,9 +426,16 @@ def __init__(
         self.data_structure = "annotator"
         self.write_ready = False
 
-        # RGB
+        self.rgb = rgb
+        self.normals = normals
+        self.semantic_segmentation = semantic_segmentation
+
         if rgb:
             self.annotators.append(rep.AnnotatorRegistry.get_annotator("rgb"))
+        if normals:
+            self.annotators.append(rep.AnnotatorRegistry.get_annotator("normals"))
+        if semantic_segmentation:
+            self.annotators.append(rep.AnnotatorRegistry.get_annotator("semantic_segmentation", {"colorize": True}))
 
     def write(self, data: dict):
         """Write function called from the OgnWriter node on every frame to process annotator output.
@@ -399,7 +447,25 @@ def write(self, data: dict):
             for annotator_name, annotator_data in data["annotators"].items():
                 for idx, (render_product_name, anno_rp_data) in enumerate(annotator_data.items()):
                     if annotator_name == "rgb":
-                        filepath = os.path.join(args.cameras[idx], f"rgb_{self._frame_id}.{self._image_output_format}")
+                        filepath = os.path.join(
+                            args.cameras[idx], "rgb", f"rgb_{self._frame_id}.{self._image_output_format}"
+                        )
+                        self._backend.write_image(filepath, anno_rp_data["data"])
+                    elif annotator_name == "normals":
+                        normals = anno_rp_data["data"][..., :3]
+                        norm_lengths = np.linalg.norm(normals, axis=-1, keepdims=True)
+                        normals_normalized = normals / np.clip(norm_lengths, 1e-8, None)
+                        img = ((normals_normalized + 1) / 2 * 255).astype(np.uint8)
+                        filepath = os.path.join(
+                            args.cameras[idx], "normals", f"normals_{self._frame_id}.{self._image_output_format}"
+                        )
+                        self._backend.write_image(filepath, img)
+                    elif annotator_name == "semantic_segmentation":
+                        filepath = os.path.join(
+                            args.cameras[idx],
+                            "semantic_segmentation",
+                            f"semantic_segmentation_{self._frame_id}.{self._image_output_format}",
+                        )
                         self._backend.write_image(filepath, anno_rp_data["data"])
 
             self._frame_id += 1
@@ -481,7 +547,12 @@ def init_recorder(self):
 
         # Create writer for capturing generated data
         self.writer = rep.WriterRegistry.get(self.writer_name)
-        self.writer.initialize(output_dir=self.output_dir, rgb=True)
+        self.writer.initialize(
+            output_dir=self.output_dir,
+            rgb=args.rgb,
+            normals=args.normals,
+            semantic_segmentation=args.semantic_segmentation,
+        )
 
         print("Writer Initiazed")
 
@@ -589,22 +660,27 @@ def create_video_from_frames(self, frame_folder, output_path, fps=30):
         video.release()
         print(f"Video saved: {output_path}")
 
+    def create_video(self, videos_folder, camera, data_type):
+        camera_folder_path = os.path.join(self.output_dir, camera, data_type)  # temp, change to render type
+        if not os.path.isdir(camera_folder_path):
+            return
+
+        # Construct output filename and path
+        output_filename = f"{camera}_{data_type}.mp4"
+        output_path = os.path.join(videos_folder, output_filename)
+
+        # Create the video from the frames in the camera folder
+        self.create_video_from_frames(camera_folder_path, output_path)
+
     def process_folders(self):
         videos_folder = os.path.join(self.output_dir, "videos")
         os.makedirs(videos_folder, exist_ok=True)
 
         # Iterate over each camera folder in the output directory
         for camera in args.cameras:
-            camera_folder_path = os.path.join(self.output_dir, camera)
-            if not os.path.isdir(camera_folder_path):
-                continue
-
-            # Construct output filename and path
-            output_filename = f"{camera}_rgb.mp4"
-            output_path = os.path.join(videos_folder, output_filename)
-
-            # Create the video from the frames in the camera folder
-            self.create_video_from_frames(camera_folder_path, output_path)
+            for render_modality, selected in render_modalities.items():
+                if selected:
+                    self.create_video(videos_folder=videos_folder, camera=camera, data_type=render_modality)
 
 
 def main():
diff --git a/robosuite/utils/usd/exporter.py b/robosuite/utils/usd/exporter.py
@@ -527,6 +527,8 @@ def _get_geom_name(self, geom) -> str:
         geom_name = mujoco.mj_id2name(self.model, geom.objtype, geom.objid)
         if not geom_name:
             geom_name = "None"
+        geom_name = geom_name.replace("-", "m_")
+        geom_name = geom_name.replace("+", "p_")
         geom_name += f"_id{geom.objid}"
 
         # adding additional naming information to differentiate
diff --git a/robosuite/utils/usd/objects.py b/robosuite/utils/usd/objects.py
@@ -59,7 +59,10 @@ def __init__(
         self.rgba = rgba
         self.texture_file = texture_file
 
-        self.xform_path = f"/World/Mesh_Xform_{obj_name}"
+        self.obj_name = self.obj_name.replace("-", "m_")
+        self.obj_name = self.obj_name.replace("+", "p_")
+
+        self.xform_path = f"/World/Mesh_Xform_{self.obj_name}"
         self.usd_xform = UsdGeom.Xform.Define(stage, self.xform_path)
 
         # defining ops required by update function
@@ -199,7 +202,7 @@ def __init__(
 
         self.dataid = dataid
 
-        mesh_path = f"{self.xform_path}/Mesh_{obj_name}"
+        mesh_path = f"{self.xform_path}/Mesh_{self.obj_name}"
         self.usd_mesh = UsdGeom.Mesh.Define(stage, mesh_path)
         self.usd_prim = stage.GetPrimAtPath(mesh_path)
 
@@ -288,7 +291,7 @@ def __init__(
         self.mesh_config = mesh_config
         self.prim_mesh = self.generate_primitive_mesh()
 
-        mesh_path = f"{self.xform_path}/Mesh_{obj_name}"
+        mesh_path = f"{self.xform_path}/Mesh_{self.obj_name}"
         self.usd_mesh = UsdGeom.Mesh.Define(stage, mesh_path)
         self.usd_prim = stage.GetPrimAtPath(mesh_path)