Consolidate model and model_name args in FairchemModel (#377)

orionarcher · web-flow · commit 19787c1729d1 · 2025-12-11T15:38:50.000-05:00
diff --git a/examples/scripts/1_Introduction/1.3_fairchem.py b/examples/scripts/1_Introduction/1.3_fairchem.py
@@ -36,10 +36,9 @@
 si_dc = bulk("Si", "diamond", a=5.43).repeat((2, 2, 2))
 atomic_numbers = si_dc.get_atomic_numbers()
 model = FairChemModel(
-    model=None,
-    model_name=MODEL_NAME,
+    model=MODEL_NAME,
     task_name="omat",  # Open Materials task for crystalline systems
-    cpu=False,
+    device=device,
 )
 atoms_list = [si_dc, si_dc]
 state = ts.io.atoms_to_state(atoms_list, device=device, dtype=dtype)
diff --git a/tests/models/test_fairchem.py b/tests/models/test_fairchem.py
@@ -29,8 +29,7 @@
 @pytest.fixture
 def eqv2_uma_model_pbc() -> FairChemModel:
     """UMA model for periodic boundary condition systems."""
-    cpu = DEVICE.type == "cpu"
-    return FairChemModel(model=None, model_name="uma-s-1", task_name="omat", cpu=cpu)
+    return FairChemModel(model="uma-s-1", task_name="omat", device=DEVICE)
 
 
 @pytest.mark.skipif(
@@ -39,7 +38,9 @@ def eqv2_uma_model_pbc() -> FairChemModel:
 @pytest.mark.parametrize("task_name", ["omat", "omol", "oc20"])
 def test_task_initialization(task_name: str) -> None:
     """Test that different UMA task names work correctly."""
-    model = FairChemModel(model=None, model_name="uma-s-1", task_name=task_name, cpu=True)
+    model = FairChemModel(
+        model="uma-s-1", task_name=task_name, device=torch.device("cpu")
+    )
     assert model.task_name
     assert str(model.task_name.value) == task_name
     assert hasattr(model, "predictor")
@@ -75,9 +76,7 @@ def test_homogeneous_batching(task_name: str, systems_func: Callable) -> None:
         for mol in systems:
             mol.info |= {"charge": 0, "spin": 1}
 
-    model = FairChemModel(
-        model=None, model_name="uma-s-1", task_name=task_name, cpu=DEVICE.type == "cpu"
-    )
+    model = FairChemModel(model="uma-s-1", task_name=task_name, device=DEVICE)
     state = ts.io.atoms_to_state(systems, device=DEVICE, dtype=DTYPE)
     results = model(state)
 
@@ -109,10 +108,9 @@ def test_heterogeneous_tasks() -> None:
             systems[0].info |= {"charge": 0, "spin": 1}
 
         model = FairChemModel(
-            model=None,
-            model_name="uma-s-1",
+            model="uma-s-1",
             task_name=task_name,
-            cpu=DEVICE.type == "cpu",
+            device=DEVICE,
         )
         state = ts.io.atoms_to_state(systems, device=DEVICE, dtype=DTYPE)
         results = model(state)
@@ -151,9 +149,7 @@ def test_batch_size_variations(systems_func: Callable, expected_count: int) -> N
     """Test batching with different numbers and sizes of systems."""
     systems = systems_func()
 
-    model = FairChemModel(
-        model=None, model_name="uma-s-1", task_name="omat", cpu=DEVICE.type == "cpu"
-    )
+    model = FairChemModel(model="uma-s-1", task_name="omat", device=DEVICE)
     state = ts.io.atoms_to_state(systems, device=DEVICE, dtype=DTYPE)
     results = model(state)
 
@@ -173,10 +169,9 @@ def test_stress_computation(*, compute_stress: bool) -> None:
     systems = [bulk("Si", "diamond", a=5.43), bulk("Al", "fcc", a=4.05)]
 
     model = FairChemModel(
-        model=None,
-        model_name="uma-s-1",
+        model="uma-s-1",
         task_name="omat",
-        cpu=DEVICE.type == "cpu",
+        device=DEVICE,
         compute_stress=compute_stress,
     )
     state = ts.io.atoms_to_state(systems, device=DEVICE, dtype=DTYPE)
@@ -195,9 +190,7 @@ def test_stress_computation(*, compute_stress: bool) -> None:
 )
 def test_device_consistency() -> None:
     """Test device consistency between model and data."""
-    cpu = DEVICE.type == "cpu"
-
-    model = FairChemModel(model=None, model_name="uma-s-1", task_name="omat", cpu=cpu)
+    model = FairChemModel(model="uma-s-1", task_name="omat", device=DEVICE)
     system = bulk("Si", "diamond", a=5.43)
     state = ts.io.atoms_to_state([system], device=DEVICE, dtype=DTYPE)
 
@@ -211,7 +204,7 @@ def test_device_consistency() -> None:
 )
 def test_empty_batch_error() -> None:
     """Test that empty batches raise appropriate errors."""
-    model = FairChemModel(model=None, model_name="uma-s-1", task_name="omat", cpu=True)
+    model = FairChemModel(model="uma-s-1", task_name="omat", device=torch.device("cpu"))
     with pytest.raises((ValueError, RuntimeError, IndexError)):
         model(ts.io.atoms_to_state([], device=torch.device("cpu"), dtype=torch.float32))
 
@@ -223,7 +216,7 @@ def test_load_from_checkpoint_path() -> None:
     """Test loading model from a saved checkpoint file path."""
     checkpoint_path = pretrained_checkpoint_path_from_name("uma-s-1")
     loaded_model = FairChemModel(
-        model=str(checkpoint_path), task_name="omat", cpu=DEVICE == "cpu"
+        model=str(checkpoint_path), task_name="omat", device=DEVICE
     )
 
     # Verify the loaded model works
@@ -278,10 +271,9 @@ def test_fairchem_charge_spin(charge: float, spin: float) -> None:
 
     # Create model with UMA omol task (supports charge/spin for molecules)
     model = FairChemModel(
-        model=None,
-        model_name="uma-s-1",
+        model="uma-s-1",
         task_name="omol",
-        cpu=DEVICE.type == "cpu",
+        device=DEVICE,
     )
 
     # This should not raise an error
diff --git a/tests/models/test_fairchem_legacy.py b/tests/models/test_fairchem_legacy.py
@@ -35,16 +35,14 @@ def model_path_oc20(tmp_path_factory: pytest.TempPathFactory) -> str:
 
 @pytest.fixture
 def eqv2_oc20_model_pbc(model_path_oc20: str) -> FairChemV1Model:
-    cpu = DEVICE.type == "cpu"
-    return FairChemV1Model(model=model_path_oc20, cpu=cpu, seed=0, pbc=True)
+    return FairChemV1Model(model=model_path_oc20, device=DEVICE, seed=0, pbc=True)
 
 
 @pytest.fixture
 def eqv2_oc20_model_non_pbc(
     model_path_oc20: str,
 ) -> FairChemV1Model:
-    cpu = DEVICE.type == "cpu"
-    return FairChemV1Model(model=model_path_oc20, cpu=cpu, seed=0, pbc=False)
+    return FairChemV1Model(model=model_path_oc20, device=DEVICE, seed=0, pbc=False)
 
 
 if get_token():
@@ -59,8 +57,7 @@ def model_path_omat24(tmp_path_factory: pytest.TempPathFactory) -> str:
     def eqv2_omat24_model_pbc(
         model_path_omat24: str,
     ) -> FairChemV1Model:
-        cpu = DEVICE.type == "cpu"
-        return FairChemV1Model(model=model_path_omat24, cpu=cpu, seed=0, pbc=True)
+        return FairChemV1Model(model=model_path_omat24, device=DEVICE, seed=0, pbc=True)
 
 
 @pytest.fixture
@@ -106,10 +103,9 @@ def ocp_calculator(model_path_oc20: str) -> OCPCalculator:
 
 def test_fairchem_mixed_pbc_init_raises(model_path_oc20: str) -> None:
     """Test that initializing FairChemV1Model with mixed PBC raises ValueError."""
-    cpu = DEVICE.type == "cpu"
     mixed_pbc = torch.tensor([True, False, True], dtype=torch.bool)
     with pytest.raises(ValueError, match="FairChemV1Model does not support mixed PBC"):
-        FairChemV1Model(model=model_path_oc20, cpu=cpu, seed=0, pbc=mixed_pbc)
+        FairChemV1Model(model=model_path_oc20, device=DEVICE, seed=0, pbc=mixed_pbc)
 
 
 def test_fairchem_mixed_pbc_forward_raises(
diff --git a/torch_sim/models/fairchem.py b/torch_sim/models/fairchem.py
@@ -12,6 +12,7 @@
 import traceback
 import typing
 import warnings
+from pathlib import Path
 from typing import Any
 
 import torch
@@ -43,7 +44,6 @@ def __init__(self, err: ImportError = exc, *_args: Any, **_kwargs: Any) -> None:
 
 if typing.TYPE_CHECKING:
     from collections.abc import Callable
-    from pathlib import Path
 
     from torch_sim.typing import StateDict
 
@@ -71,34 +71,34 @@ class FairChemModel(ModelInterface):
 
     def __init__(
         self,
-        model: str | Path | None,
+        model: str | Path,
         neighbor_list_fn: Callable | None = None,
         *,  # force remaining arguments to be keyword-only
-        model_name: str | None = None,
         model_cache_dir: str | Path | None = None,
-        cpu: bool = False,
+        device: torch.device | None = None,
         dtype: torch.dtype | None = None,
         compute_stress: bool = False,
         task_name: UMATask | str | None = None,
     ) -> None:
         """Initialize the FairChem model.
 
         Args:
-            model (str | Path | None): Path to model checkpoint file
+            model (str | Path): Either a pretrained model name or path to model
+                checkpoint file. The function will first check if the input matches
+                a known pretrained model name, then check if it's a valid file path.
             neighbor_list_fn (Callable | None): Function to compute neighbor lists
                 (not currently supported)
-            model_name (str | None): Name of pretrained model to load
             model_cache_dir (str | Path | None): Path where to save the model
-            cpu (bool): Whether to use CPU instead of GPU for computation
+            device (torch.device | None): Device to use for computation. If None,
+                defaults to CUDA if available, otherwise CPU.
             dtype (torch.dtype | None): Data type to use for computation
             compute_stress (bool): Whether to compute stress tensor
             task_name (UMATask | str | None): Task type for UMA models (optional,
                 only needed for UMA models)
 
         Raises:
-            RuntimeError: If both model_name and model are specified
             NotImplementedError: If custom neighbor list function is provided
-            ValueError: If neither model nor model_name is provided
+            ValueError: If model is not a known model name or valid file path
         """
         setup_imports()
         setup_logging()
@@ -114,24 +114,19 @@ def __init__(
                 "Custom neighbor list is not supported for FairChemModel."
             )
 
-        if model_name is not None:
-            if model is not None:
-                raise RuntimeError(
-                    "model_name and checkpoint_path were both specified, "
-                    "please use only one at a time"
-                )
-            model = model_name
-
-        if model is None:
-            raise ValueError("Either model or model_name must be provided")
+        # Convert Path to string for consistency
+        if isinstance(model, Path):
+            model = str(model)
 
         # Convert task_name to UMATask if it's a string (only for UMA models)
         if isinstance(task_name, str):
             task_name = UMATask(task_name)
 
         # Use the efficient predictor API for optimal performance
-        device_str = "cpu" if cpu else "cuda" if torch.cuda.is_available() else "cpu"
-        self._device = torch.device(device_str)
+        self._device = device or torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
+        device_str = str(self._device)
         self.task_name = task_name
 
         # Create efficient batch predictor for fast inference
diff --git a/torch_sim/models/fairchem_legacy.py b/torch_sim/models/fairchem_legacy.py
@@ -19,9 +19,11 @@
 from __future__ import annotations
 
 import copy
+import os
 import traceback
 import typing
 import warnings
+from pathlib import Path
 from types import MappingProxyType
 from typing import Any
 
@@ -71,7 +73,6 @@ def __init__(self, err: ImportError = exc, *_args: Any, **_kwargs: Any) -> None:
 
 if typing.TYPE_CHECKING:
     from collections.abc import Callable
-    from pathlib import Path
 
     from torch_sim.typing import StateDict
 
@@ -110,6 +111,7 @@ class FairChemV1Model(ModelInterface):
     Examples:
         >>> model = FairChemV1Model(model="path/to/checkpoint.pt", compute_stress=True)
         >>> results = model(state)
+
     """
 
     _reshaped_props = MappingProxyType(
@@ -118,14 +120,13 @@ class FairChemV1Model(ModelInterface):
 
     def __init__(  # noqa: C901, PLR0915
         self,
-        model: str | Path | None,
+        model: str | Path | None = None,
         neighbor_list_fn: Callable | None = None,
         *,  # force remaining arguments to be keyword-only
         config_yml: str | None = None,
-        model_name: str | None = None,
         local_cache: str | None = None,
         trainer: str | None = None,
-        cpu: bool = False,
+        device: torch.device | None = None,
         seed: int | None = None,
         dtype: torch.dtype | None = None,
         compute_stress: bool = False,
@@ -139,24 +140,28 @@ def __init__(  # noqa: C901, PLR0915
         in energy and force calculations.
 
         Args:
-            model (str | Path | None): Path to model checkpoint file
+            model (str | Path | None): Either a pretrained model name or path to model
+                checkpoint file. The function will first check if it's a valid file
+                path, and if not, will attempt to load it as a pretrained model name
+                (requires local_cache to be set). If None, config_yml must be provided.
             neighbor_list_fn (Callable | None): Function to compute neighbor lists
                 (not currently supported)
             config_yml (str | None): Path to configuration YAML file
-            model_name (str | None): Name of pretrained model to load
-            local_cache (str | None): Path to local model cache directory
+            local_cache (str | None): Path to local model cache directory (required
+                when using pretrained model names)
             trainer (str | None): Name of trainer class to use
-            cpu (bool): Whether to use CPU instead of GPU for computation
+            device (torch.device | None): Device to use for computation. If None,
+                defaults to CUDA if available, otherwise CPU.
             seed (int | None): Random seed for reproducibility
             dtype (torch.dtype | None): Data type to use for computation
             compute_stress (bool): Whether to compute stress tensor
             pbc (torch.Tensor | bool): Whether to use periodic boundary conditions
             disable_amp (bool): Whether to disable AMP
         Raises:
-            RuntimeError: If both model_name and model are specified
-            NotImplementedError: If local_cache is not set when model_name is used
             NotImplementedError: If custom neighbor list function is provided
             ValueError: If stress computation is requested but not supported by model
+            ValueError: If neither config_yml nor model is provided
+            ValueError: If model cannot be loaded as file or pretrained model
 
         Notes:
             Either config_yml or model must be provided. The model loads configuration
@@ -178,19 +183,25 @@ def __init__(  # noqa: C901, PLR0915
             )
         self.pbc = pbc
 
-        if model_name is not None:
-            if model is not None:
-                raise RuntimeError(
-                    "model_name and checkpoint_path were both specified, "
-                    "please use only one at a time"
-                )
-            if local_cache is None:
-                raise NotImplementedError(
-                    "Local cache must be set when specifying a model name"
+        # Process model parameter if provided
+        if model is not None:
+            # Convert Path to string for consistency
+            if isinstance(model, Path):
+                model = str(model)
+
+            # Determine if model is a file path or a pretrained model name
+            # First check if it's a valid file path
+            if not os.path.isfile(model):
+                # If not a file, try to load as pretrained model name
+                if local_cache is None:
+                    raise ValueError(
+                        f"Model '{model}' is not a valid file path. "
+                        "If using a pretrained model name, local_cache must be set."
+                    )
+                # Attempt to load as pretrained model name
+                model = model_name_to_local_file(
+                    model_name=model, local_cache=local_cache
                 )
-            model = model_name_to_local_file(
-                model_name=model_name, local_cache=local_cache
-            )
 
         # Either the config path or the checkpoint path needs to be provided
         if not config_yml and model is None:
@@ -276,6 +287,11 @@ def __init__(  # noqa: C901, PLR0915
         self.config["checkpoint"] = str(model)
         del config["dataset"]["src"]
 
+        # Determine if CPU should be used (for the legacy trainer API)
+        cpu = device is not None and device.type == "cpu"
+        if device is None:
+            cpu = not torch.cuda.is_available()
+
         self.trainer = registry.get_trainer_class(config["trainer"])(
             task=config.get("task", {}),
             model=config["model"],