NVIDIA · loliverhennigh · Apr 14, 2023 · Nov 28, 2023 · Jan 12, 2024 · Feb 20, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Safe API to override `__init__`'s arguments saved in checkpoint file with
   `Module.from_checkpoint("chkpt.mdlus", override_args=set(...))`.
 - PyTorch Geometric MeshGraphNet backend.
+- Transient Mesh Dataset.
 - Functionality in DoMINO to take arbitrary number of `scalar` or `vector`
   global parameters and encode them using `class ParameterModel`
 
@@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Existing DGL-based vortex shedding example has been renamed to `vortex_shedding_mgn_dgl`.
   Added new `vortex_shedding_mgn` example that uses PyTorch Geometric instead.
 - HEALPixLayer can now use earth2grid HEALPix padding ops, if desired
+- Mesh Dataset supports vtm files
 - Migrated Vortex Shedding Reduced Mesh example to PyTorch Geometric.
 - CorrDiff example: fixed bugs when training regression `UNet`.
 - Diffusion models: fixed bugs related to gradient checkpointing on non-square

diff --git a/physicsnemo/datapipes/cae/__init__.py b/physicsnemo/datapipes/cae/__init__.py
@@ -16,3 +16,4 @@
 
 from .domino_datapipe import DoMINODataPipe
 from .mesh_datapipe import MeshDatapipe
+from .transient_mesh_datapipe import TransientMeshDatapipe
@@ -17,7 +17,6 @@
 
 import numpy as np
 import torch
-import vtk
 
 try:
     import nvidia.dali as dali
@@ -38,7 +37,14 @@
 from physicsnemo.datapipes.datapipe import Datapipe
 from physicsnemo.datapipes.meta import DatapipeMetaData
 
-from .readers import read_cgns, read_vtp, read_vtu
+from .readers import (
+    parse_vtk_polydata,
+    parse_vtk_unstructuredgrid,
+    read_cgns,
+    read_vtm,
+    read_vtp,
+    read_vtu,
+)
 
 
 @dataclass
@@ -57,7 +63,7 @@ class MeshDatapipe(Datapipe):
     Parameters
     ----------
     data_dir : str
-        Directory where ERA5 data is stored
+        Directory where data is stored
     variables : List[str, None]
         Ordered list of variables to be loaded from the files
     num_variables : int
@@ -70,8 +76,8 @@ class MeshDatapipe(Datapipe):
         If provided, the statistics are used to normalize the attributes
     batch_size : int, optional
         Batch size, by default 1
-    num_steps : int, optional
-        Number of timesteps are included in the output variables, by default 1
+    num_samples : int, optional
+        Number of samples to be loaded from the files, by default 1
     shuffle : bool, optional
         Shuffle dataset, by default True
     num_workers : int, optional
@@ -84,6 +90,20 @@ class MeshDatapipe(Datapipe):
         Number of training processes, by default 1
     cache_data : False, optional
         Whether to cache the data in memory for faster access in subsequent epochs, by default False
+
+    Note
+    ----
+    The data is expected to be stored in the following format:
+    data_dir/
+    ├── mesh_0001.vtp
+    ├── mesh_0002.vtp
+    └── ...
+
+    The data is returned as a tuple of vertices, attributes, and edges.
+    An example of the data output a tuple of tensors:
+        vertices: torch.Size([batch_size, num_vertices, dim])
+        ux: torch.Size([batch_size, num_vertices, 1])
+        edges: torch.Size([batch_size, num_edges, 2])
     """
 
     def __init__(
@@ -150,6 +170,8 @@ def parse_dataset_files(self) -> None:
                 pattern = "*.vtp"
             case "vtu":
                 pattern = "*.vtu"
+            case "vtm":
+                pattern = "*.vtm"
             case "cgns":
                 pattern = "*.cgns"
             case _:
@@ -378,6 +400,8 @@ def mesh_reader(self):
             return read_vtp
         if self.file_format == "vtu":
             return read_vtu
+        if self.file_format == "vtm":
+            return read_vtm
         if self.file_format == "cgns":
             return read_cgns
         else:
@@ -387,101 +411,10 @@ def mesh_reader(self):
 
     def parse_vtk_data(self):
         if self.file_format == "vtp":
-            return _parse_vtk_polydata
-        elif self.file_format in ["vtu", "cgns"]:
-            return _parse_vtk_unstructuredgrid
+            return parse_vtk_polydata
+        elif self.file_format in ["vtu", "cgns", "vtm"]:
+            return parse_vtk_unstructuredgrid
         else:
             raise NotImplementedError(
                 f"Data type {self.file_format} is not supported yet"
             )
-
-
-def _parse_vtk_polydata(polydata, variables):
-    # Fetch vertices
-    points = polydata.GetPoints()
-    if points is None:
-        raise ValueError("Failed to get points from the polydata.")
-    vertices = torch.tensor(
-        np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
-        dtype=torch.float32,
-    )
-
-    # Fetch node attributes  # TODO modularize
-    attributes = []
-    point_data = polydata.GetPointData()
-    if point_data is None:
-        raise ValueError("Failed to get point data from the unstructured grid.")
-    for array_name in variables:
-        try:
-            array = point_data.GetArray(array_name)
-        except ValueError:
-            raise ValueError(
-                f"Failed to get array {array_name} from the unstructured grid."
-            )
-        array_data = np.zeros(
-            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
-        )
-        for j in range(points.GetNumberOfPoints()):
-            array.GetTuple(j, array_data[j])
-        attributes.append(torch.tensor(array_data, dtype=torch.float32))
-    attributes = torch.cat(attributes, dim=-1)
-    # TODO torch.cat is usually very inefficient when the number of items is large.
-    # If possible, the resulting tensor should be pre-allocated and filled in during the loop.
-
-    # Fetch edges
-    polys = polydata.GetPolys()
-    if polys is None:
-        raise ValueError("Failed to get polygons from the polydata.")
-    polys.InitTraversal()
-    edges = []
-    id_list = vtk.vtkIdList()
-    for _ in range(polys.GetNumberOfCells()):
-        polys.GetNextCell(id_list)
-        num_ids = id_list.GetNumberOfIds()
-        edges = [
-            (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
-        ]
-    edges = torch.tensor(edges, dtype=torch.long)
-
-    return vertices, attributes, edges
-
-
-def _parse_vtk_unstructuredgrid(grid, variables):
-    # Fetch vertices
-    points = grid.GetPoints()
-    if points is None:
-        raise ValueError("Failed to get points from the unstructured grid.")
-    vertices = torch.tensor(
-        np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
-        dtype=torch.float32,
-    )
-
-    # Fetch node attributes  # TODO modularize
-    attributes = []
-    point_data = grid.GetPointData()
-    if point_data is None:
-        raise ValueError("Failed to get point data from the unstructured grid.")
-    for array_name in variables:
-        try:
-            array = point_data.GetArray(array_name)
-        except ValueError:
-            raise ValueError(
-                f"Failed to get array {array_name} from the unstructured grid."
-            )
-        array_data = np.zeros(
-            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
-        )
-        for j in range(points.GetNumberOfPoints()):
-            array.GetTuple(j, array_data[j])
-        attributes.append(torch.tensor(array_data, dtype=torch.float32))
-    if variables:
-        attributes = torch.cat(attributes, dim=-1)
-    else:
-        attributes = torch.zeros((1,), dtype=torch.float32)
-
-    # Return a dummy tensor of zeros for edges since they are not directly computable
-    return (
-        vertices,
-        attributes,
-        torch.zeros((0, 2), dtype=torch.long),
-    )  # Dummy tensor for edges
@@ -17,12 +17,104 @@
 import os
 from typing import Any
 
+import numpy as np
 import torch
 import vtk
 
 Tensor = torch.Tensor
 
 
+def parse_vtk_polydata(polydata, variables):
+    # Fetch vertices
+    points = polydata.GetPoints()
+    if points is None:
+        raise ValueError("Failed to get points from the polydata.")
+    vertices = torch.tensor(
+        np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
+        dtype=torch.float32,
+    )
+
+    # Fetch node attributes  # TODO modularize
+    attributes = []
+    point_data = polydata.GetPointData()
+    if point_data is None:
+        raise ValueError("Failed to get point data from the unstructured grid.")
+    for array_name in variables:
+        try:
+            array = point_data.GetArray(array_name)
+        except ValueError:
+            raise ValueError(
+                f"Failed to get array {array_name} from the unstructured grid."
+            )
+        array_data = np.zeros(
+            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
+        )
+        for j in range(points.GetNumberOfPoints()):
+            array.GetTuple(j, array_data[j])
+        attributes.append(torch.tensor(array_data, dtype=torch.float32))
+    attributes = torch.cat(attributes, dim=-1)
+    # TODO torch.cat is usually very inefficient when the number of items is large.
+    # If possible, the resulting tensor should be pre-allocated and filled in during the loop.
+
+    # Fetch edges
+    polys = polydata.GetPolys()
+    if polys is None:
+        raise ValueError("Failed to get polygons from the polydata.")
+    polys.InitTraversal()
+    edges = []
+    id_list = vtk.vtkIdList()
+    for _ in range(polys.GetNumberOfCells()):
+        polys.GetNextCell(id_list)
+        num_ids = id_list.GetNumberOfIds()
+        edges = [
+            (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
+        ]
+    edges = torch.tensor(edges, dtype=torch.long)
+
+    return vertices, attributes, edges
+
+
+def parse_vtk_unstructuredgrid(grid, variables):
+    # Fetch vertices
+    points = grid.GetPoints()
+    if points is None:
+        raise ValueError("Failed to get points from the unstructured grid.")
+    vertices = torch.tensor(
+        np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
+        dtype=torch.float32,
+    )
+
+    # Fetch node attributes  # TODO modularize
+    attributes = []
+    point_data = grid.GetPointData()
+    if point_data is None:
+        raise ValueError("Failed to get point data from the unstructured grid.")
+    for array_name in variables:
+        try:
+            array = point_data.GetArray(array_name)
+        except ValueError:
+            raise ValueError(
+                f"Failed to get array {array_name} from the unstructured grid."
+            )
+        array_data = np.zeros(
+            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
+        )
+        for j in range(points.GetNumberOfPoints()):
+            array.GetTuple(j, array_data[j])
+        attributes.append(torch.tensor(array_data, dtype=torch.float32))
+    if variables:
+        attributes = torch.cat(attributes, dim=-1)
+    else:
+        attributes = torch.zeros((1,), dtype=torch.float32)
+
+    # Return a dummy tensor of zeros for edges since they are not directly computable
+    return (
+        vertices,
+        attributes,
+        torch.zeros((0, 2), dtype=torch.long),
+    )  # Dummy tensor for edges
+
+
 def read_vtp(file_path: str) -> Any:  # TODO add support for older format (VTK)
     """
     Read a VTP file and return the polydata.
@@ -95,6 +187,44 @@ def read_vtu(file_path: str) -> Any:
     return grid
 
 
+def read_vtm(file_path: str) -> Any:
+    """
+    Read a VTM (VTK MultiBlock) file and return the unstructured grid data.
+
+    Parameters
+    ----------
+    file_path : str
+        Path to the VTM file.
+
+    Returns
+    -------
+    vtkUnstructuredGrid
+        The unstructured grid data extracted from the multi-block dataset.
+    """
+    # Check if file exists
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"{file_path} does not exist.")
+
+    # Check if file has .vtm extension
+    if not file_path.endswith(".vtm"):
+        raise ValueError(f"Expected a .vtm file, got {file_path}")
+
+    # Create a VTM reader
+    reader = vtk.vtkXMLMultiBlockDataReader()
+    reader.SetFileName(file_path)
+    reader.Update()
+
+    # Get the multi-block dataset
+    multi_block = reader.GetOutput()
+
+    # Check if the multi-block dataset is valid
+    if multi_block is None:
+        raise ValueError(f"Failed to read multi-block data from {file_path}")
+
+    # Extract and return the vtkUnstructuredGrid from the multi-block dataset
+    return _extract_unstructured_grid(multi_block)
+
+
 def read_cgns(file_path: str) -> Any:
     """
     Read a CGNS file and return the unstructured grid data.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@

		from .domino_datapipe import DoMINODataPipe
		from .mesh_datapipe import MeshDatapipe
		from .transient_mesh_datapipe import TransientMeshDatapipe