facebookresearch
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎packages/fairchem-core/pyproject.toml‎
Lines changed: 4 additions & 3 deletions b/‎packages/fairchem-core/pyproject.toml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/fairchem/core/datasets/ase_datasets.py‎
Lines changed: 4 additions & 1 deletion b/‎src/fairchem/core/datasets/ase_datasets.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/fairchem/core/datasets/atomic_data.py‎
Lines changed: 37 additions & 14 deletions b/‎src/fairchem/core/datasets/atomic_data.py‎
Lines changed: 37 additions & 14 deletions
diff --git a/‎src/fairchem/core/datasets/common_structures.py‎
Lines changed: 26 additions & 3 deletions b/‎src/fairchem/core/datasets/common_structures.py‎
Lines changed: 26 additions & 3 deletions
diff --git a/‎src/fairchem/core/graph/compute.py‎
Lines changed: 8 additions & 2 deletions b/‎src/fairchem/core/graph/compute.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/fairchem/core/graph/radius_graph_pbc.py‎
Lines changed: 15 additions & 4 deletions b/‎src/fairchem/core/graph/radius_graph_pbc.py‎
Lines changed: 15 additions & 4 deletions
@@ -35,7 +35,7 @@ jobs:
     strategy:
       max-parallel: 10
       matrix:
-        python_version: ['3.10', '3.13']
+        python_version: ['3.11', '3.13']
 
     steps:
       - name: Checkout code
@@ -155,7 +155,7 @@ jobs:
       - name: Install core dependencies and package
         run: |
           python -m pip install --upgrade pip
-          pip install packages/fairchem-core[dev] \
+          pip install packages/fairchem-core[dev,extras] \
           packages/fairchem-data-omol[dev] \
           packages/fairchem-data-omat \
           -r tests/requirements.txt # pin test packages
 
@@ -7,7 +7,7 @@ name = "fairchem-core"
 description = "Machine learning models for chemistry and materials science by the FAIR Chemistry team"
 license = {text = "MIT License"}
 dynamic = ["version", "readme"]
-requires-python = ">=3.10, <3.14"
+requires-python = ">=3.11, <3.14"
 dependencies = [
     "torch~=2.8.0",
     "ray[serve]>=2.53.0",
@@ -36,8 +36,9 @@ dependencies = [
 [project.optional-dependencies]  # add optional dependencies, e.g. to be installed as pip install fairchem.core[dev]
 dev = ["pre-commit", "pytest", "pytest-cov", "coverage", "syrupy", "ruff==0.5.1"]
 docs = ["jupyter-book", "jupytext", "sphinx","sphinx-autoapi==3.3.3", "astroid<4", "umap-learn", "vdict", "ipywidgets", "jupyter_book>=2.0", "torch-dftd"]
-adsorbml = ["dscribe","x3dase","scikit-image"]
-extras = ["ray[default]", "pymatgen", "quacc[phonons]>=0.15.3", "pandas"]
+adsorbml = ["dscribe", "x3dase", "scikit-image"]
+extras = ["ray[default]", "pymatgen", "quacc[phonons]>=0.15.3", "pandas", "nvalchemi-toolkit-ops"]
+
 
 [project.scripts]
 fairchem = "fairchem.core._cli:main"
 
@@ -16,7 +16,7 @@
 from functools import cache, partial
 from glob import glob
 from pathlib import Path
-from typing import Any, Callable
+from typing import TYPE_CHECKING, Any
 
 import ase
 import numpy as np
@@ -28,6 +28,9 @@
 from fairchem.core.datasets.base_dataset import BaseDataset
 from fairchem.core.modules.transforms import DataTransforms
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
 
 def apply_one_tags(
     atoms: ase.Atoms, skip_if_nonzero: bool = True, skip_always: bool = False
 
@@ -13,7 +13,7 @@
 import logging
 import re
 from collections.abc import Sequence
-from typing import List, Optional, Union
+from typing import Union
 
 import ase
 import ase.db.sqlite
@@ -25,6 +25,8 @@
 from ase.stress import full_3x3_to_voigt_6_stress, voigt_6_to_full_3x3_stress
 from monty.dev import requires
 
+from fairchem.core.common.utils import StrEnum
+
 try:
     from pymatgen.io.ase import AseAtomsAdaptor
 
@@ -33,9 +35,18 @@
     AseAtomsAdaptor = None
     pmg_installed = False
 
+from fairchem.core.graph.radius_graph_pbc_nvidia import get_neighbors_nvidia_atoms
 
 IndexType = Union[slice, torch.Tensor, np.ndarray, Sequence]
 
+
+class ExternalGraphMethod(StrEnum):
+    """Enum for external graph generation methods."""
+
+    PYMATGEN = "pymatgen"
+    NVIDIA = "nvidia"
+
+
 # these are all currently certainly output by the current a2g
 # except for tags, all fields are required for network inference.
 _REQUIRED_KEYS = [
@@ -83,7 +94,7 @@ def size_repr(key: str, item: torch.Tensor, indent=0) -> str:
         out = item.item()
     elif torch.is_tensor(item):
         out = str(list(item.size()))
-    elif isinstance(item, (List, tuple)):
+    elif isinstance(item, (list, tuple)):
         out = str([len(item)])
     elif isinstance(item, dict):
         lines = [indent_str + size_repr(k, v, 2) for k, v in item.items()]
@@ -300,10 +311,8 @@ def validate(self):
             assert self.forces.dtype == self.pos.dtype
         if hasattr(self, "stress"):
             # NOTE: usually decomposed. for EFS prediction right now we reshape to (9,). need to discuss, perhaps use (1,3,3)
-            assert (
-                self.stress.dim() == 3
-                and self.stress.shape[1:] == (3, 3)
-                or (self.stress.dim() == 2 and self.stress.shape[1:] == (9,))
+            assert (self.stress.dim() == 3 and self.stress.shape[1:] == (3, 3)) or (
+                self.stress.dim() == 2 and self.stress.shape[1:] == (9,)
             )
             assert self.stress.shape[0] == self.num_graphs
             assert self.stress.dtype == self.pos.dtype
@@ -332,6 +341,7 @@ def from_ase(
         r_data_keys: list[str] | None = None,  # NOT USED, compat for now
         task_name: str | None = None,
         target_dtype: torch.dtype = torch.float32,
+        external_graph_method: ExternalGraphMethod | str = ExternalGraphMethod.PYMATGEN,
     ) -> AtomicData:
         atoms = input_atoms.copy()
         calc = input_atoms.calc
@@ -375,7 +385,16 @@ def from_ase(
             assert (
                 max_neigh is not None
             ), "max_neigh must be specified for cpu graph construction."
-            split_idx_dist = get_neighbors_pymatgen(atoms, radius, max_neigh)
+
+            if external_graph_method == ExternalGraphMethod.PYMATGEN:
+                split_idx_dist = get_neighbors_pymatgen(atoms, radius, max_neigh)
+            elif external_graph_method == ExternalGraphMethod.NVIDIA:
+                split_idx_dist = get_neighbors_nvidia_atoms(atoms, radius, max_neigh)
+            else:
+                raise ValueError(
+                    f"external_graph_method must be 'pymatgen' or 'nvidia', got {external_graph_method}"
+                )
+
             edge_index, cell_offsets = reshape_features(
                 *split_idx_dist, target_dtype=target_dtype
             )
@@ -443,16 +462,20 @@ def from_ase(
         # TODO another way to specify this is to spcify a key. maybe total_charge
         charge = torch.LongTensor(
             [
-                atoms.info.get("charge", 0)
-                if r_data_keys is not None and "charge" in r_data_keys
-                else 0
+                (
+                    atoms.info.get("charge", 0)
+                    if r_data_keys is not None and "charge" in r_data_keys
+                    else 0
+                )
             ]
         )
         spin = torch.LongTensor(
             [
-                atoms.info.get("spin", 0)
-                if r_data_keys is not None and "spin" in r_data_keys
-                else 0
+                (
+                    atoms.info.get("spin", 0)
+                    if r_data_keys is not None and "spin" in r_data_keys
+                    else 0
+                )
             ]
         )
 
@@ -844,7 +867,7 @@ def update_batch_edges(
 
 
 def atomicdata_list_to_batch(
-    data_list: list[AtomicData], exclude_keys: Optional[list] = None
+    data_list: list[AtomicData], exclude_keys: list | None = None
 ) -> AtomicData:
     """
     all data points must be single graphs and have the same set of keys.
 
@@ -1,15 +1,16 @@
 from __future__ import annotations
 
 import numpy as np
-from ase.build import bulk
+from ase import Atoms
+from ase.build import bulk, molecule
 from ase.lattice.cubic import FaceCenteredCubic
 
 
 def get_fcc_crystal_by_num_atoms(
     num_atoms: int,
     lattice_constant: float = 3.8,
     atom_type: str = "C",
-):
+) -> Atoms:
     # lattice_constant = 3.8, fcc generates a supercell with ~50 edges/atom, used for benchmarking
     atoms = bulk(atom_type, "fcc", a=lattice_constant)
     n_cells = int(np.ceil(np.cbrt(num_atoms)))
@@ -24,7 +25,7 @@ def get_fcc_crystal_by_num_cells(
     n_cells: int,
     atom_type: str = "Cu",
     lattice_constant: float = 3.61,
-):
+) -> Atoms:
     atoms = FaceCenteredCubic(
         directions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
         symbol=atom_type,
@@ -34,3 +35,25 @@ def get_fcc_crystal_by_num_cells(
     )
     atoms.info = {"charge": 0, "spin": 0}
     return atoms
+
+
+def get_water_box(num_molecules=20, box_size=10.0, seed=42) -> Atoms:
+    """Create a random box of water molecules."""
+
+    rng = np.random.default_rng(seed)
+    water = molecule("H2O")
+
+    all_positions = []
+    all_symbols = []
+
+    for _ in range(num_molecules):
+        # Random position and rotation for each water molecule
+        offset = rng.random(3) * box_size
+        positions = water.get_positions() + offset
+        all_positions.extend(positions)
+        all_symbols.extend(water.get_chemical_symbols())
+
+    atoms = Atoms(
+        symbols=all_symbols, positions=all_positions, cell=[box_size] * 3, pbc=True
+    )
+    return atoms
@@ -9,7 +9,11 @@
 
 import torch
 
-from fairchem.core.graph.radius_graph_pbc import radius_graph_pbc, radius_graph_pbc_v2
+from fairchem.core.graph.radius_graph_pbc import (
+    radius_graph_pbc,
+    radius_graph_pbc_v2,
+)
+from fairchem.core.graph.radius_graph_pbc_nvidia import radius_graph_pbc_nvidia
 
 
 def get_pbc_distances(
@@ -74,7 +78,7 @@ def generate_graph(
         cutoff (float): The maximum distance between atoms to consider them as neighbors.
         max_neighbors (int): The maximum number of neighbors to consider for each atom.
         enforce_max_neighbors_strictly (bool): Whether to strictly enforce the maximum number of neighbors.
-        radius_pbc_version: the version of radius_pbc impl
+        radius_pbc_version: the version of radius_pbc impl (1, 2, or 3 for NVIDIA)
         pbc (list[bool]): The periodic boundary conditions in 3 dimensions, defaults to [True,True,True] for 3D pbc
 
     Returns:
@@ -90,6 +94,8 @@ def generate_graph(
         radius_graph_pbc_fn = radius_graph_pbc
     elif radius_pbc_version == 2:
         radius_graph_pbc_fn = radius_graph_pbc_v2
+    elif radius_pbc_version == 3:
+        radius_graph_pbc_fn = radius_graph_pbc_nvidia
     else:
         raise ValueError(f"Invalid radius_pbc version {radius_pbc_version}")
 
 
@@ -15,10 +15,21 @@
 
 
 def sum_partitions(x: torch.Tensor, partition_idxs: torch.Tensor) -> torch.Tensor:
-    sums = torch.zeros(partition_idxs.shape[0] - 1, device=x.device, dtype=x.dtype)
-    for idx in range(partition_idxs.shape[0] - 1):
-        sums[idx] = x[partition_idxs[idx] : partition_idxs[idx + 1]].sum()
-    return sums
+    """
+    Sum values within partitions defined by indices.
+    """
+    num_partitions = partition_idxs.shape[0] - 1
+    if num_partitions == 0:
+        return torch.zeros(0, device=x.device, dtype=x.dtype)
+
+    # Use cumsum-based approach for vectorization
+    cumsum = torch.zeros(len(x) + 1, device=x.device, dtype=x.dtype)
+    cumsum[1:] = torch.cumsum(x, dim=0)
+
+    # Gather cumsum at partition boundaries and compute differences
+    starts = cumsum[partition_idxs[:-1]]
+    ends = cumsum[partition_idxs[1:]]
+    return ends - starts
 
 
 def get_counts(x: torch.Tensor, length: int):