UCL-CCS
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 4 deletions b/‎CHANGELOG.md‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎docs/notebooks/publications/A Scalable Approach to Quantum Simulation via Projection-based Embedding.ipynb‎
Lines changed: 37 additions & 37 deletions b/‎docs/notebooks/publications/A Scalable Approach to Quantum Simulation via Projection-based Embedding.ipynb‎
Lines changed: 37 additions & 37 deletions
diff --git a/‎docs/source/config.rst‎
Lines changed: 9 additions & 0 deletions b/‎docs/source/config.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎nbed/config.py‎
Lines changed: 110 additions & 0 deletions b/‎nbed/config.py‎
Lines changed: 110 additions & 0 deletions
@@ -5,13 +5,17 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+### Added
+- `NbedConfig` pydantic model to validate user input.
+
 ## [0.0.8]
-## Fixed
+### Fixed
 - `SPADELocalizer` now outputs whole c matrix when virtual localization is stopped early.
 - `ACELocalizer` was returning 1 too few moleucular orbitals.
 - Fixed a bug causing embedded FCI calculations to fail for open shell systems.
 
-## Changed
+### Changed
 - 'nbed.scf.huzinaga_hf' and 'nbed.scf.huzinaga_rks' cmbined into 'nbed.scf.huzinaga_scf'
 - Combined `scf/huzinaga_` HF and KS methods into `huzinaga_scf`
 - python version requirement changed to `>=3.11, <4`
@@ -20,11 +24,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `localizers` now comprised of `occupied` and `virtual`, with `Localizer` now `OccupiedLocalizer`
 - concentric localization moved from `SPADELocalizer` to its own class `ConcentricLocalizer(VirtualLocalizer)`
 
-## Added
+### Added
 - `.pre-commit-config.yaml` added
 - added `ACELocalizer` which implements ace-of-spade method for multiple reaction geometries.
 
-## Removed
+### Removed
 - `mol_plot.py` removed as not required for/by main uses of package
 - dropped support for Pennylane, as they are pinned to numpy <2
 - Removed function to convert from fermionic hamiltonian to qubit hamiltonian, which was in `ham_builder.py`.
 
@@ -238,7 +238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -337,13 +337,13 @@
     "            transform=transform,\n",
     "        )\n",
     "        qham = huz_builder.build()\n",
-    "        result[active][\"huz\"] = {}\n",
-    "        result[active][\"huz\"][\"qham\"] = HamiltonianConverter(qham)._intermediate\n",
-    "        result[active][\"huz\"][\"terms\"] = len(qham.terms)\n",
-    "        result[active][\"huz\"][\"n_qubits\"] = count_qubits(qham)\n",
-    "        result[active][\"huz\"][\"classical_energy\"] = driver._huzinaga[\"classical_energy\"]\n",
-    "        result[active][\"huz\"][\"ground\"] = None\n",
-    "        result[active][\"huz\"][\"e_ccsd\"] = driver._huzinaga[\"e_ccsd\"]\n",
+    "        result[active][\"huzinaga\"] = {}\n",
+    "        result[active][\"huzinaga\"][\"qham\"] = HamiltonianConverter(qham)._intermediate\n",
+    "        result[active][\"huzinaga\"][\"terms\"] = len(qham.terms)\n",
+    "        result[active][\"huzinaga\"][\"n_qubits\"] = count_qubits(qham)\n",
+    "        result[active][\"huzinaga\"][\"classical_energy\"] = driver._huzinaga[\"classical_energy\"]\n",
+    "        result[active][\"huzinaga\"][\"ground\"] = None\n",
+    "        result[active][\"huzinaga\"][\"e_ccsd\"] = driver._huzinaga[\"e_ccsd\"]\n",
     "        print(\"Huzinaga finished.\")\n",
     "\n",
     "        # untapered_mu = mu_builder.build(taper=False)\n",
@@ -366,7 +366,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -382,11 +382,11 @@
     "    embeddings = pd.concat([threes, twos], axis=0)\n",
     "    full_vals = pd.DataFrame([v for v in df[\"full\"].to_list()], index=df[\"mol_name\"])\n",
     "    mu_vals = pd.DataFrame([v for v in embeddings[\"mu\"]], index=embeddings.index)\n",
-    "    huz_vals = pd.DataFrame([v for v in embeddings[\"huz\"]], index=embeddings.index)\n",
+    "    huz_vals = pd.DataFrame([v for v in embeddings[\"huzinaga\"]], index=embeddings.index)\n",
     "\n",
     "    energies = pd.concat(\n",
     "        [df[\"e_dft\"], full_vals[\"e_ccsd\"], mu_vals[\"e_ccsd\"], huz_vals[\"e_ccsd\"]],\n",
-    "        keys=[\"DFT\", \"Full\", \"Mu\", \"Huz\"],\n",
+    "        keys=[\"DFT\", \"Full\", \"Mu\", \"huzinaga\"],\n",
     "        axis=1,\n",
     "    )\n",
     "    energies[\"dft_diffs\"] = (\n",
@@ -396,7 +396,7 @@
     "        (energies[\"Mu\"] - energies[\"Full\"]) / energies[\"Full\"]\n",
     "    ).apply(lambda x: np.log10(abs(x)))\n",
     "    energies[\"huz_diffs\"] = (\n",
-    "        (energies[\"Huz\"] - energies[\"Full\"]) / energies[\"Full\"]\n",
+    "        (energies[\"huzinaga\"] - energies[\"Full\"]) / energies[\"Full\"]\n",
     "    ).apply(lambda x: np.log10(abs(x)))\n",
     "    energies = energies.reindex(\n",
     "        [\n",
@@ -581,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -591,15 +591,15 @@
     "    print(\"\\nQUBITS\")\n",
     "    qubits = pd.concat(\n",
     "        [full_vals[\"n_qubits\"], mu_vals[\"n_qubits\"], huz_vals[\"n_qubits\"]],\n",
-    "        keys=[\"Full\", \"Mu\", \"Huz\"],\n",
+    "        keys=[\"Full\", \"Mu\", \"huzinaga\"],\n",
     "        axis=1,\n",
     "    )\n",
     "    print(qubits)\n",
     "\n",
     "    print(\"\\nTERMS\")\n",
     "    terms = pd.concat(\n",
     "        [full_vals[\"terms\"], mu_vals[\"terms\"], huz_vals[\"terms\"]],\n",
-    "        keys=[\"Full\", \"Mu\", \"Huz\"],\n",
+    "        keys=[\"Full\", \"Mu\", \"huzinaga\"],\n",
     "        axis=1,\n",
     "    )\n",
     "    print(terms)\n",
@@ -615,11 +615,11 @@
     "    print(\"\\nMolecule Results\")\n",
     "    mol_results = pd.concat(\n",
     "        [\n",
-    "            energies[\"Full\"] - energies[\"Huz\"],\n",
+    "            energies[\"Full\"] - energies[\"huzinaga\"],\n",
     "            energies[\"Full\"] - energies[\"Mu\"],\n",
-    "            qubits[\"Huz\"],\n",
+    "            qubits[\"huzinaga\"],\n",
     "            qubits[\"Mu\"],\n",
-    "            terms[\"Huz\"],\n",
+    "            terms[\"huzinaga\"],\n",
     "            terms[\"Mu\"],\n",
     "        ],\n",
     "        axis=1,\n",
@@ -1122,7 +1122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1206,14 +1206,14 @@
     "            transform=transform,\n",
     "        )\n",
     "        qham = huz_builder.build(qubits, taper=False)\n",
-    "        result[active][\"huz\"] = {}\n",
-    "        result[active][\"huz\"][\"qham\"] = HamiltonianConverter(qham)._intermediate\n",
-    "        result[active][\"huz\"][\"terms\"] = len(qham.terms)\n",
-    "        result[active][\"huz\"][\"n_qubits\"] = count_qubits(qham)\n",
-    "        result[active][\"huz\"][\"classical_energy\"] = driver._huzinaga[\"classical_energy\"]\n",
-    "        result[active][\"huz\"][\"ground\"] = None\n",
-    "        result[active][\"huz\"][\"e_ccsd\"] = driver._huzinaga[\"e_ccsd\"]\n",
-    "        result[active][\"huz\"][\"nmos\"] = len(driver.localized_system.active_MO_inds)\n",
+    "        result[active][\"huzinaga\"] = {}\n",
+    "        result[active][\"huzinaga\"][\"qham\"] = HamiltonianConverter(qham)._intermediate\n",
+    "        result[active][\"huzinaga\"][\"terms\"] = len(qham.terms)\n",
+    "        result[active][\"huzinaga\"][\"n_qubits\"] = count_qubits(qham)\n",
+    "        result[active][\"huzinaga\"][\"classical_energy\"] = driver._huzinaga[\"classical_energy\"]\n",
+    "        result[active][\"huzinaga\"][\"ground\"] = None\n",
+    "        result[active][\"huzinaga\"][\"e_ccsd\"] = driver._huzinaga[\"e_ccsd\"]\n",
+    "        result[active][\"huzinaga\"][\"nmos\"] = len(driver.localized_system.active_MO_inds)\n",
     "        print(\"Huzinaga finished.\")\n",
     "\n",
     "        # untapered_mu = mu_builder.build(taper=False)\n",
@@ -1236,7 +1236,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1245,15 +1245,15 @@
     "    active_atoms = range(1, 6)\n",
     "    mu_qubits = [cyclopentane[str(i)][\"mu\"][\"n_qubits\"] for i in active_atoms]\n",
     "    mu_terms = [cyclopentane[str(i)][\"mu\"][\"terms\"] for i in active_atoms]\n",
-    "    huz_qubits = [cyclopentane[str(i)][\"huz\"][\"n_qubits\"] for i in active_atoms]\n",
-    "    huz_terms = [cyclopentane[str(i)][\"huz\"][\"terms\"] for i in active_atoms]\n",
+    "    huz_qubits = [cyclopentane[str(i)][\"huzinaga\"][\"n_qubits\"] for i in active_atoms]\n",
+    "    huz_terms = [cyclopentane[str(i)][\"huzinaga\"][\"terms\"] for i in active_atoms]\n",
     "    full_terms = cyclopentane[\"full\"][\"terms\"]\n",
     "    full_n_qubits = cyclopentane[\"full\"][\"n_qubits\"]\n",
     "    full_nmos = cyclopentane[\"full\"][\"nmos\"]\n",
     "    mu_energies = [cyclopentane[str(i)][\"mu\"][\"e_ccsd\"] for i in active_atoms]\n",
-    "    huz_energies = [cyclopentane[str(i)][\"huz\"][\"e_ccsd\"] for i in active_atoms]\n",
+    "    huz_energies = [cyclopentane[str(i)][\"huzinaga\"][\"e_ccsd\"] for i in active_atoms]\n",
     "    mu_orbitals = [cyclopentane[str(i)][\"mu\"][\"nmos\"] for i in active_atoms]\n",
-    "    huz_orbitals = [cyclopentane[str(i)][\"huz\"][\"nmos\"] for i in active_atoms]\n",
+    "    huz_orbitals = [cyclopentane[str(i)][\"huzinaga\"][\"nmos\"] for i in active_atoms]\n",
     "\n",
     "    active_atoms = [0, *active_atoms]\n",
     "\n",
@@ -1944,7 +1944,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -2008,8 +2008,8 @@
     "        if n_data:\n",
     "            if n_data[\"mu\"].get(\"qham\", False):\n",
     "                n_data[\"mu\"].pop(\"qham\")\n",
-    "            if n_data[\"huz\"].get(\"qham\", False):\n",
-    "                n_data[\"huz\"].pop(\"qham\")"
+    "            if n_data[\"huzinaga\"].get(\"qham\", False):\n",
+    "                n_data[\"huzinaga\"].pop(\"qham\")"
    ]
   },
   {
@@ -3899,7 +3899,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "nbed-1_9TTDE1-py3.10",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -3913,7 +3913,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.13.1"
   }
  },
  "nbformat": 4,
 
@@ -0,0 +1,9 @@
+Configuration
+-------------
+
+Input data is validated against a Pydantic model, in the `NbedConfig` class. This is then passed to the `NbedDriver`.
+
+.. automodule:: nbed.config
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,110 @@
+"""Custom Types and Enums."""
+
+import os
+from enum import Enum
+from typing import Annotated, Any
+
+from pydantic import (
+    BaseModel,
+    BeforeValidator,
+    Field,
+    NonNegativeInt,
+    PositiveFloat,
+    PositiveInt,
+    TypeAdapter,
+)
+
+
+class Projector(Enum):
+    """Implemented Projectors."""
+
+    MU = "mu"
+    HUZ = "huzinaga"
+    BOTH = "both"
+
+
+class Localizer(Enum):
+    """Implemented Occupied Localizers."""
+
+    SPADE = "spade"
+    BOYS = "boys"
+    IBO = "ibo"
+    PM = "pm"
+
+
+XYZGeometry = Annotated[
+    str, Field(pattern="^\\d+\n\\s?\n(?:\\w(?:\\s+\\-?\\d\\.\\d+){3}\n?)*")
+]
+
+
+def validate_xyz_file(maybe_xyz: Any) -> str:
+    """Validates the the filepath given leads to a valid XYZ formatted file.
+
+    Args:
+        maybe_xyz (Any): A path to an existing file.
+
+    Returns:
+        str: an XYZ geometry string.
+    """
+    if os.path.exists(maybe_xyz):
+        with open(maybe_xyz) as file:
+            content = file.read()
+        TypeAdapter(XYZGeometry).validate_strings(content)
+        return content
+    else:
+        return maybe_xyz
+
+
+class NbedConfig(BaseModel):
+    """Config for Nbed.
+
+    Args:
+        geometry (str): Path to .xyz file containing molecular geometry or raw xyz string.
+        n_active_atoms (int): The number of atoms to include in the active region.
+        basis (str): The name of an atomic orbital basis set to use for chemistry calculations.
+        xc_functional (str): The name of an Exchange-Correlation functional to be used for DFT.
+        projector (str): Projector to screen out environment orbitals, One of 'mu' or 'huzinaga'.
+        localization (str): Orbital localization method to use. One of 'spade', 'pipek-mezey', 'boys' or 'ibo'.
+        convergence (float): The convergence tolerance for energy calculations.
+        charge (int): Charge of molecular species
+        mu_level_shift (float): Level shift parameter to use for mu-projector.
+        run_ccsd_emb (bool): Whether or not to find the CCSD energy of embbeded system for reference.
+        run_fci_emb (bool): Whether or not to find the FCI energy of embbeded system for reference.
+        run_virtual_localization (bool): Whether or not to localize virtual orbitals.
+        n_mo_overwrite (tuple[None| int, None | int]): Optional overwrite values for occupied localizers.
+        max_ram_memory (int): Amount of RAM memery in MB available for PySCF calculation
+        pyscf_print_level (int): Amount of information PySCF prints
+        unit (str): molecular geometry unit 'Angstrom' or 'Bohr'
+        max_hf_cycles (int): max number of Hartree-Fock iterations allowed (for global and local HFock)
+        max_dft_cycles (int): max number of DFT iterations allowed in scf calc
+        init_huzinaga_rhf_with_mu (bool): Hidden flag to seed huzinaga RHF with mu shift result (for developers only)
+    """
+
+    geometry: Annotated[XYZGeometry, BeforeValidator(validate_xyz_file)]
+    n_active_atoms: PositiveInt
+    basis: str
+    xc_functional: str
+    projector: Projector = Field(default=Projector.MU)
+    localization: Localizer = Field(default=Localizer.SPADE)
+    convergence: PositiveFloat = 1e-6
+    charge: NonNegativeInt = Field(default=0)
+    spin: NonNegativeInt = Field(default=0)
+    unit: str = "angstrom"
+    symmetry: bool = False
+    mu_level_shift: PositiveFloat = 1e6
+    run_ccsd_emb: bool = False
+    run_fci_emb: bool = False
+    run_virtual_localization: bool = True
+    run_dft_in_dft: bool = False
+    n_mo_overwrite: tuple[None | NonNegativeInt, None | NonNegativeInt] = (None, None)
+    max_ram_memory: PositiveInt = 4000
+    occupied_threshold: float = Field(default=0.95, gt=0, lt=1)
+    virtual_threshold: float = Field(default=0.95, gt=0, lt=1)
+    max_shells: PositiveInt = 4
+    init_huzinaga_rhf_with_mu: bool = False
+    max_hf_cycles: PositiveInt = Field(default=50)
+    max_dft_cycles: PositiveInt = Field(default=50)
+    force_unrestricted: bool = False
+    mm_coords: list | None = None
+    mm_charges: list | None = None
+    mm_radii: list | None = None