remove old periodic dependency

enjyashraf18 · enjyashraf18 · commit 9be8ea954185 · 2025-07-21T15:53:16.000+03:00
diff --git a/atomdb/datasets/slater/run.py b/atomdb/datasets/slater/run.py
@@ -19,15 +19,15 @@
 import re
 import atomdb
 
-from atomdb.periodic import Element
 from grid.onedgrid import UniformInteger
 from grid.rtransform import ExpRTransform
 
 # from importlib_resources import files
 from atomdb.utils import DEFAULT_DATAPATH
 from scipy.special import factorial
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Optional, Dict
+from atomdb.periodic_test import element_symbol_map, get_scalar_data
 
 __all__ = ["AtomicDensity", "load_slater_wfn", "run"]
 
@@ -1123,7 +1123,7 @@ def run(elem, charge, mult, nexc, dataset, datapath):
 
     # Set up internal variables
     elem = atomdb.element_symbol(elem)
-    atnum = atomdb.element_number(elem)
+    atnum = element_symbol_map[elem][0]
     nelec = atnum - charge
     nspin = mult - 1
 
@@ -1177,23 +1177,13 @@ def run(elem, charge, mult, nexc, dataset, datapath):
     mo_ked_a = species.eval_orbs_ked_positive_definite(rs)[:norba, :]
     mo_ked_b = species.eval_orbs_ked_positive_definite(rs)[:norba, :]
 
-
-
-
-    # Get information about the element --> (dont forget) needs to be refactored
-    atom = Element(elem)
-    atmass = atom.mass
-    cov_radius, vdw_radius, at_radius, polarizability, dispersion = [
-        None,
-    ] * 5
-    # overwrite values for neutral atomic species
-    if charge == 0:
-        cov_radius, vdw_radius, at_radius = (atom.cov_radius, atom.vdw_radius, atom.at_radius)
-        polarizability = atom.pold
-        dispersion = {"C6": atom.c6}
-
-
-
+    # Get periodic data
+    cov_radius = get_scalar_data('cov_radius', atnum, nelec)
+    vdw_radius = get_scalar_data('vdw_radius', atnum, nelec)
+    at_radius = get_scalar_data('at_radius', atnum, nelec)
+    polarizability = get_scalar_data('polarizability', atnum, nelec)
+    dispersion = get_scalar_data('dispersion', atnum, nelec)
+    atmass = get_scalar_data('atmass', atnum, nelec)
 
 
 
@@ -1211,12 +1201,12 @@ def run(elem, charge, mult, nexc, dataset, datapath):
         nelec=nelec,
         nspin=nspin,
         nexc=nexc,
-        atmass=atmass,  #
-        cov_radius=cov_radius,  #
-        vdw_radius=vdw_radius,  #
-        at_radius=at_radius,  #
-        polarizability=polarizability,  #
-        dispersion=dispersion,  #
+        atmass=atmass,
+        cov_radius=cov_radius,
+        vdw_radius=vdw_radius,
+        at_radius=at_radius,
+        polarizability=polarizability,
+        dispersion=dispersion,
         energy=energy,
         mo_energy_a=mo_e_up,
         mo_energy_b=mo_e_dn,
diff --git a/atomdb/migration/periodic/elements_data.py b/atomdb/migration/periodic/elements_data.py
@@ -14,20 +14,6 @@
 hdf5_file = files("atomdb.data").joinpath("elements_data.h5")
 
 
-PROPERTY_NAME_MAP = {
-    "atmass": "atmass",
-    "cov_radius": "cov_radius",
-    "vdw_radius": "vdw_radius",
-    "at_radius": "at_radius",
-    "polarizability": "polarizability",
-    "dispersion_c6": "dispersion_c6",
-    "dispersion": "dispersion_c6",
-    "elem": "symbol",
-    "atnum": "atnum",
-    "name": "name",
-}
-
-
 # Properties of each element in the HDF5 file.
 PROPERTY_CONFIGS = [
     {
@@ -362,20 +348,6 @@ def write_data_info_to_hdf5(data_info_list):
 
 
 
-def map_element_symbol(ELEMENTS_H5FILE):
-    element_symbol_map = {}
-    for element_group in ELEMENTS_H5FILE.root.Elements:
-        symbol = element_group.symbol[0]['value'].decode('utf-8').strip()
-        atnum = element_group.atnum[0]['value']
-        name = element_group.name[0]['value'].decode('utf-8').strip()
-        element_symbol_map[symbol] = (atnum, name)
-
-    return element_symbol_map
-
-
-
-
-
 if __name__ == "__main__":
     # Read the elements data from the CSV file
     data, unique_headers, sources_data, units_data = read_elements_data_csv(elements_data_csv)
diff --git a/atomdb/periodic_test.py b/atomdb/periodic_test.py
@@ -0,0 +1,84 @@
+from enum import IntEnum
+from numbers import Integral
+import tables as pt
+import numpy as np
+from importlib_resources import files
+
+
+__all__ = [
+    "PROPERTY_NAME_MAP",
+    "get_scalar_data",
+    "element_symbol_map",
+    "ElementAttr",
+]
+
+class ElementAttr(IntEnum):
+    atnum = 0
+    name = 1
+
+
+elements_hdf5_file = files("atomdb.data").joinpath("elements_data.h5")
+ELEMENTS_H5FILE = pt.open_file(elements_hdf5_file, mode="r")
+
+PROPERTY_NAME_MAP = {
+    "atmass": "atmass",
+    "cov_radius": "cov_radius",
+    "vdw_radius": "vdw_radius",
+    "at_radius": "at_radius",
+    "polarizability": "polarizability",
+    "dispersion_c6": "dispersion_c6",
+    "dispersion": "dispersion_c6", #fields in run
+    "elem": "symbol",
+    "atnum": "atnum",
+    "name": "name",
+}
+
+def get_scalar_data(prop_name, atnum, nelec):
+    charge = atnum - nelec
+
+    if charge != 0 and prop_name not in ["atmass", "elem", "atnum", "name"]:
+        return None
+
+    # get the element group
+    element_group = f"/Elements/{atnum:03d}"
+
+    table_name = PROPERTY_NAME_MAP[prop_name]
+    table_path = f"{element_group}/{table_name}"
+
+    # get the table node from the HDF5 file
+    table = ELEMENTS_H5FILE.get_node(table_path)
+
+    # Handle basic properties (single column --> no sources)
+    if len(table.colnames) == 1 and table.colnames[0] == "value":
+        value = table[0]["value"]
+        # if the value is an int, return it as an int
+        if isinstance(value, Integral):
+            return int(value)
+        # if the value is a string, decode from bytes
+        elif isinstance(value, bytes):
+            return value.decode("utf-8")
+    else:
+        # handle properties with multiple sources
+        result = {}
+        for row in table:
+            source = row["source"].decode("utf-8")
+            value = row["value"]
+            # exclude none values
+            if not np.isnan(value):
+                result[source] = float(value)
+        return result if result else None
+
+
+
+def map_element_symbol():
+    element_symbol_map = {}
+    for element_group in ELEMENTS_H5FILE.root.Elements:
+        symbol = element_group.symbol[0]['value'].decode('utf-8').strip()
+        atnum = element_group.atnum[0]['value']
+        name = element_group.name[0]['value'].decode('utf-8').strip()
+        element_symbol_map[symbol] = (atnum, name)
+
+    return element_symbol_map
+
+
+element_symbol_map = map_element_symbol()
diff --git a/atomdb/species.py b/atomdb/species.py
@@ -24,27 +24,20 @@
 import numpy as np
 import pooch
 import requests
-from msgpack import packb, unpackb
-from msgpack_numpy import decode, encode
 from numpy import ndarray
 from scipy.interpolate import CubicSpline
 
-from atomdb.periodic import Element, element_symbol
+from atomdb.periodic_test import element_symbol_map, PROPERTY_NAME_MAP, get_scalar_data, ElementAttr
 from atomdb.utils import DEFAULT_DATAPATH, DEFAULT_DATASET, DEFAULT_REMOTE
 from importlib_resources import \
 files
 import tables as pt
 from numbers import Integral
-from migration.periodic.elements_data import PROPERTY_NAME_MAP, map_element_symbol
 
-elements_hdf5_file = files("atomdb.data").joinpath("elements_data.h5")
 datasets_hdf5_file = files("atomdb.data").joinpath("datasets_data.h5")
-
-
-ELEMENTS_H5FILE = pt.open_file(elements_hdf5_file, mode="r")
 DATASETS_H5FILE = pt.open_file(datasets_hdf5_file, mode="r")
 
-element_symbol_map = map_element_symbol(ELEMENTS_H5FILE)
+
 
 __all__ = [
     "Species",
@@ -90,41 +83,6 @@ def wrapper(self):
     return wrapper
 
 
-def get_scalar_data(prop_name, atnum, nelec):
-    charge = atnum - nelec
-
-    if charge != 0 and prop_name != "atmass":
-        return None
-
-    # get the element group
-    element_group = f"/Elements/{atnum:03d}"
-
-    table_name = PROPERTY_NAME_MAP[prop_name]
-    table_path = f"{element_group}/{table_name}"
-
-    # get the table node from the HDF5 file
-    table = ELEMENTS_H5FILE.get_node(table_path)
-
-    # Handle basic properties (single column --> no sources)
-    if len(table.colnames) == 1 and table.colnames[0] == "value":
-        value = table[0]["value"]
-        # if the value is an int, return it as an int
-        if isinstance(value, Integral):
-            return int(value)
-        # if the value is a string, decode from bytes
-        elif isinstance(value, bytes):
-            return value.decode("utf-8")
-    else:
-        # handle properties with multiple sources
-        result = {}
-        for row in table:
-            source = row["source"].decode("utf-8")
-            value = row["value"]
-            # exclude none values
-            if not np.isnan(value):
-                result[source] = float(value)
-        return result if result else None
-
 
 def _remove_suffix(input_string, suffix):
     if suffix and input_string.endswith(suffix):
@@ -781,19 +739,19 @@ def compile_species(
     fields = submodule.run(elem, charge, mult, nexc, dataset, datapath)
 
     # dump the data to the HDF5 file
-    dump(fields, dataset, elem, charge, mult, nexc)
+    # dump(fields, dataset, elem, charge, mult, nexc)
 
 
-    # fields = asdict(fields)
-    # # print all fields
-    # for key, value in fields.items():
-    #     if isinstance(value, np.ndarray):
-    #         print(f"{key}: shape={value.shape}, first 5 elements={value.flat[:5]}")
-    #     else:
-    #         print(f"{key}: {value}")
-    #
-    # species = Species(dataset, fields)
-    # return species
+    fields = asdict(fields)
+    # print all fields
+    for key, value in fields.items():
+        if isinstance(value, np.ndarray):
+            print(f"{key}: shape={value.shape}, first 5 elements={value.flat[:5]}")
+        else:
+            print(f"{key}: {value}")
+
+    species = Species(dataset, fields)
+    return species
 
 
 
@@ -1014,7 +972,8 @@ def get_species_data(folder_path, elem, DATASET_PROPERTY_CONFIGS):
             fields[config['Carray_property']] = table[:]
 
 
-    fields['atnum'] = element_symbol_map[elem][0]
+    fields['atnum'] = element_symbol_map[elem][ElementAttr.atnum]
+
 
     # Add scalar properties
     for prop in ('atmass', 'cov_radius', 'vdw_radius', 'at_radius', 'polarizability', 'dispersion'):
@@ -1063,7 +1022,8 @@ def raw_datafile(
     str
         Path to the raw data file.
     """
-    elem = "*" if elem is Ellipsis else element_symbol(elem)
+    # elem = "*" if elem is Ellipsis else element_symbol(elem) --> why using element_symbol here
+    elem = "*" if elem is Ellipsis else elem
     charge = "*" if charge is Ellipsis else f"{charge:03d}"
     mult = "*" if mult is Ellipsis else f"{mult:03d}"
     nexc = "*" if nexc is Ellipsis else f"{nexc:03d}"
diff --git a/atomdb/test.py b/atomdb/test.py
@@ -12,20 +12,20 @@
 #     print(f"Element: {hydrogen.elem}, Charge: {hydrogen.charge}, Multiplicity: {hydrogen.mult}, vdw_radius: {hydrogen.vdw_radius}")
 # except Exception as e:
 #     print(f"Error during compilation: {e}")
-
+#
 
 
 
 from atomdb import load
 # hydrogen = load("H", ..., ...,  dataset="slater")
 
-hydrogen = load(..., ..., 2, ...,  dataset="slater")
+# hydrogen = load(..., ..., 2, ...,  dataset="slater")
 
-# hydrogen = load('C', 0, 3, 0,  dataset="slater")
+hydrogen = load('C', 0, 3, 0,  dataset="slater")
 
 
-for species in hydrogen:
-    print(f"\nSpecies: {species.elem} (Charge: {species.charge}, Mult: {species.mult})")
-    print("Attributes available:", vars(species).keys())
-    print("Energy:", getattr(species, "energy", "Not available"))
-
+# for species in hydrogen:
+#     print(f"\nSpecies: {species.elem} (Charge: {species.charge}, Mult: {species.mult})")
+#     print("Attributes available:", vars(species).keys())
+#     print("Energy:", getattr(species, "energy", "Not available"))
+#