Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Safe API to override `__init__`'s arguments saved in checkpoint file with
`Module.from_checkpoint("chkpt.mdlus", override_args=set(...))`.
- PyTorch Geometric MeshGraphNet backend.
- Transient Mesh Dataset.
- Functionality in DoMINO to take arbitrary number of `scalar` or `vector`
global parameters and encode them using `class ParameterModel`

Expand All @@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Existing DGL-based vortex shedding example has been renamed to `vortex_shedding_mgn_dgl`.
Added new `vortex_shedding_mgn` example that uses PyTorch Geometric instead.
- HEALPixLayer can now use earth2grid HEALPix padding ops, if desired
- Mesh Dataset supports vtm files
- Migrated Vortex Shedding Reduced Mesh example to PyTorch Geometric.
- CorrDiff example: fixed bugs when training regression `UNet`.
- Diffusion models: fixed bugs related to gradient checkpointing on non-square
Expand Down
1 change: 1 addition & 0 deletions physicsnemo/datapipes/cae/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@

from .domino_datapipe import DoMINODataPipe
from .mesh_datapipe import MeshDatapipe
from .transient_mesh_datapipe import TransientMeshDatapipe
131 changes: 32 additions & 99 deletions physicsnemo/datapipes/cae/mesh_datapipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import numpy as np
import torch
import vtk

try:
import nvidia.dali as dali
Expand All @@ -38,7 +37,14 @@
from physicsnemo.datapipes.datapipe import Datapipe
from physicsnemo.datapipes.meta import DatapipeMetaData

from .readers import read_cgns, read_vtp, read_vtu
from .readers import (
parse_vtk_polydata,
parse_vtk_unstructuredgrid,
read_cgns,
read_vtm,
read_vtp,
read_vtu,
)


@dataclass
Expand All @@ -57,7 +63,7 @@ class MeshDatapipe(Datapipe):
Parameters
----------
data_dir : str
Directory where ERA5 data is stored
Directory where data is stored
variables : List[str, None]
Ordered list of variables to be loaded from the files
num_variables : int
Expand All @@ -70,8 +76,8 @@ class MeshDatapipe(Datapipe):
If provided, the statistics are used to normalize the attributes
batch_size : int, optional
Batch size, by default 1
num_steps : int, optional
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this ever needed? This seems like copy pasta from ERA5 or something?

Number of timesteps are included in the output variables, by default 1
num_samples : int, optional
Number of samples to be loaded from the files, by default 1
shuffle : bool, optional
Shuffle dataset, by default True
num_workers : int, optional
Expand All @@ -84,6 +90,20 @@ class MeshDatapipe(Datapipe):
Number of training processes, by default 1
cache_data : False, optional
Whether to cache the data in memory for faster access in subsequent epochs, by default False

Note
----
The data is expected to be stored in the following format:
data_dir/
├── mesh_0001.vtp
├── mesh_0002.vtp
└── ...

The data is returned as a tuple of vertices, attributes, and edges.
An example of the data output a tuple of tensors:
vertices: torch.Size([batch_size, num_vertices, dim])
ux: torch.Size([batch_size, num_vertices, 1])
edges: torch.Size([batch_size, num_edges, 2])
"""

def __init__(
Expand Down Expand Up @@ -150,6 +170,8 @@ def parse_dataset_files(self) -> None:
pattern = "*.vtp"
case "vtu":
pattern = "*.vtu"
case "vtm":
pattern = "*.vtm"
case "cgns":
pattern = "*.cgns"
case _:
Expand Down Expand Up @@ -378,6 +400,8 @@ def mesh_reader(self):
return read_vtp
if self.file_format == "vtu":
return read_vtu
if self.file_format == "vtm":
return read_vtm
if self.file_format == "cgns":
return read_cgns
else:
Expand All @@ -387,101 +411,10 @@ def mesh_reader(self):

def parse_vtk_data(self):
if self.file_format == "vtp":
return _parse_vtk_polydata
elif self.file_format in ["vtu", "cgns"]:
return _parse_vtk_unstructuredgrid
return parse_vtk_polydata
elif self.file_format in ["vtu", "cgns", "vtm"]:
return parse_vtk_unstructuredgrid
else:
raise NotImplementedError(
f"Data type {self.file_format} is not supported yet"
)


def _parse_vtk_polydata(polydata, variables):
# Fetch vertices
points = polydata.GetPoints()
if points is None:
raise ValueError("Failed to get points from the polydata.")
vertices = torch.tensor(
np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
dtype=torch.float32,
)

# Fetch node attributes # TODO modularize
attributes = []
point_data = polydata.GetPointData()
if point_data is None:
raise ValueError("Failed to get point data from the unstructured grid.")
for array_name in variables:
try:
array = point_data.GetArray(array_name)
except ValueError:
raise ValueError(
f"Failed to get array {array_name} from the unstructured grid."
)
array_data = np.zeros(
(points.GetNumberOfPoints(), array.GetNumberOfComponents())
)
for j in range(points.GetNumberOfPoints()):
array.GetTuple(j, array_data[j])
attributes.append(torch.tensor(array_data, dtype=torch.float32))
attributes = torch.cat(attributes, dim=-1)
# TODO torch.cat is usually very inefficient when the number of items is large.
# If possible, the resulting tensor should be pre-allocated and filled in during the loop.

# Fetch edges
polys = polydata.GetPolys()
if polys is None:
raise ValueError("Failed to get polygons from the polydata.")
polys.InitTraversal()
edges = []
id_list = vtk.vtkIdList()
for _ in range(polys.GetNumberOfCells()):
polys.GetNextCell(id_list)
num_ids = id_list.GetNumberOfIds()
edges = [
(id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
]
edges = torch.tensor(edges, dtype=torch.long)

return vertices, attributes, edges


def _parse_vtk_unstructuredgrid(grid, variables):
# Fetch vertices
points = grid.GetPoints()
if points is None:
raise ValueError("Failed to get points from the unstructured grid.")
vertices = torch.tensor(
np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
dtype=torch.float32,
)

# Fetch node attributes # TODO modularize
attributes = []
point_data = grid.GetPointData()
if point_data is None:
raise ValueError("Failed to get point data from the unstructured grid.")
for array_name in variables:
try:
array = point_data.GetArray(array_name)
except ValueError:
raise ValueError(
f"Failed to get array {array_name} from the unstructured grid."
)
array_data = np.zeros(
(points.GetNumberOfPoints(), array.GetNumberOfComponents())
)
for j in range(points.GetNumberOfPoints()):
array.GetTuple(j, array_data[j])
attributes.append(torch.tensor(array_data, dtype=torch.float32))
if variables:
attributes = torch.cat(attributes, dim=-1)
else:
attributes = torch.zeros((1,), dtype=torch.float32)

# Return a dummy tensor of zeros for edges since they are not directly computable
return (
vertices,
attributes,
torch.zeros((0, 2), dtype=torch.long),
) # Dummy tensor for edges
130 changes: 130 additions & 0 deletions physicsnemo/datapipes/cae/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,104 @@
import os
from typing import Any

import numpy as np
import torch
import vtk

Tensor = torch.Tensor


def parse_vtk_polydata(polydata, variables):
# Fetch vertices
points = polydata.GetPoints()
if points is None:
raise ValueError("Failed to get points from the polydata.")
vertices = torch.tensor(
np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
dtype=torch.float32,
)

# Fetch node attributes # TODO modularize
attributes = []
point_data = polydata.GetPointData()
if point_data is None:
raise ValueError("Failed to get point data from the unstructured grid.")
for array_name in variables:
try:
array = point_data.GetArray(array_name)
except ValueError:
raise ValueError(
f"Failed to get array {array_name} from the unstructured grid."
)
array_data = np.zeros(
(points.GetNumberOfPoints(), array.GetNumberOfComponents())
)
for j in range(points.GetNumberOfPoints()):
array.GetTuple(j, array_data[j])
attributes.append(torch.tensor(array_data, dtype=torch.float32))
attributes = torch.cat(attributes, dim=-1)
# TODO torch.cat is usually very inefficient when the number of items is large.
# If possible, the resulting tensor should be pre-allocated and filled in during the loop.

# Fetch edges
polys = polydata.GetPolys()
if polys is None:
raise ValueError("Failed to get polygons from the polydata.")
polys.InitTraversal()
edges = []
id_list = vtk.vtkIdList()
for _ in range(polys.GetNumberOfCells()):
polys.GetNextCell(id_list)
num_ids = id_list.GetNumberOfIds()
edges = [
(id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
]
edges = torch.tensor(edges, dtype=torch.long)

return vertices, attributes, edges


def parse_vtk_unstructuredgrid(grid, variables):
# Fetch vertices
points = grid.GetPoints()
if points is None:
raise ValueError("Failed to get points from the unstructured grid.")
vertices = torch.tensor(
np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]),
dtype=torch.float32,
)

# Fetch node attributes # TODO modularize
attributes = []
point_data = grid.GetPointData()
if point_data is None:
raise ValueError("Failed to get point data from the unstructured grid.")
for array_name in variables:
try:
array = point_data.GetArray(array_name)
except ValueError:
raise ValueError(
f"Failed to get array {array_name} from the unstructured grid."
)
array_data = np.zeros(
(points.GetNumberOfPoints(), array.GetNumberOfComponents())
)
for j in range(points.GetNumberOfPoints()):
array.GetTuple(j, array_data[j])
attributes.append(torch.tensor(array_data, dtype=torch.float32))
if variables:
attributes = torch.cat(attributes, dim=-1)
else:
attributes = torch.zeros((1,), dtype=torch.float32)

# Return a dummy tensor of zeros for edges since they are not directly computable
return (
vertices,
attributes,
torch.zeros((0, 2), dtype=torch.long),
) # Dummy tensor for edges


def read_vtp(file_path: str) -> Any: # TODO add support for older format (VTK)
"""
Read a VTP file and return the polydata.
Expand Down Expand Up @@ -95,6 +187,44 @@ def read_vtu(file_path: str) -> Any:
return grid


def read_vtm(file_path: str) -> Any:
"""
Read a VTM (VTK MultiBlock) file and return the unstructured grid data.

Parameters
----------
file_path : str
Path to the VTM file.

Returns
-------
vtkUnstructuredGrid
The unstructured grid data extracted from the multi-block dataset.
"""
# Check if file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"{file_path} does not exist.")

# Check if file has .vtm extension
if not file_path.endswith(".vtm"):
raise ValueError(f"Expected a .vtm file, got {file_path}")

# Create a VTM reader
reader = vtk.vtkXMLMultiBlockDataReader()
reader.SetFileName(file_path)
reader.Update()

# Get the multi-block dataset
multi_block = reader.GetOutput()

# Check if the multi-block dataset is valid
if multi_block is None:
raise ValueError(f"Failed to read multi-block data from {file_path}")

# Extract and return the vtkUnstructuredGrid from the multi-block dataset
return _extract_unstructured_grid(multi_block)


def read_cgns(file_path: str) -> Any:
"""
Read a CGNS file and return the unstructured grid data.
Expand Down
Loading