Merge pull request #63 from Stuermer/master

polyanskiy · web-flow · commit 28c7707c4ffd · 2025-04-06T16:02:20.000-04:00
Extend testing + add pyproject.toml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,15 +1,14 @@
 # This `test.yml` file defines a GitHub action that will run for all pull requests,
-# and for all pushes to main. It installs the python test libraries `parameterized`
-# and `pytest`, and then runs the `pytest` command. This automatically detects
-# `test_*.py` files and executes tests therein.
+# and for all pushes to dev. It installs the python test libraries and then runs the `pytest` command.
+# This automatically detects `test_*.py` files and executes tests therein.
 
 name: CI
 
 on:
   pull_request:
   push:
     branches:
-      - master
+      - dev
   # schedule:
   #     - cron: "0 13 * * 1"  # Every Monday at 9AM EST
 
@@ -20,11 +19,16 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
 
-      - name: Set up environment
-        run: |
-          python -m pip install --upgrade pip
-          pip install parameterized
-          pip install pytest
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: "pyproject.toml"
+
+      - name: Install the project
+        run: uv sync --all-extras --dev
 
       - name: Run tests
-        run: pytest
+        run: uv run pytest tests
diff --git a/database/data/main/WSe2/nk/Ushkov-e.yml b/database/data/main/WSe2/nk/Ushkov-e.yml
@@ -9,7 +9,7 @@ REFERENCES: |
     (Numerical data kindly provided by Georgy Ermolaev)
 COMMENTS: "WSe<sub>2</sub> nanoparticles with diameters from 5 to 150 nm synthesized by femtosecond laser ablation."
 CONDITIONS:
-    direcrion: e
+    direction: e
 DATA:
   - type: tabulated nk
     data: | 
diff --git a/database/data/main/WSe2/nk/Ushkov-o.yml b/database/data/main/WSe2/nk/Ushkov-o.yml
@@ -9,7 +9,7 @@ REFERENCES: |
     (Numerical data kindly provided by Georgy Ermolaev)
 COMMENTS: "WSe<sub>2</sub> nanoparticles with diameters from 5 to 150 nm synthesized by femtosecond laser ablation."
 CONDITIONS:
-    direcrion: o
+    direction: o
 DATA:
   - type: tabulated nk
     data: | 
diff --git a/database/data/specs/schott/misc/B270.yml b/database/data/specs/schott/misc/B270.yml
@@ -8,7 +8,6 @@ COMMENTS: |
     20 °C. Clear high transmission crown glass (modified soda-lime glass) available in form of sheets, profile rods strips and blanks.
 CONDITIONS:
     temperature: 293
-PROPERTIES:
 DATA:
   - type: tabulated n
     data: |
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "refractiveindex-info-database"
+version = "2025.02.23"
+description = "Database of optical constants"
+requires-python = ">=3.10"
+dependencies = [
+    "cerberus>=1.3.7",
+    "matplotlib>=3.10.1",
+    "numpy>=2.2.4",
+    "pyqt6>=6.8.1",
+    "pytest>=8.3.5",
+    "pytest-xdist>=3.6.1",
+    "pyyaml>=6.0.2",
+]
+
+[tool.pytest.ini_options]
+addopts = "-n 12"
diff --git a/tests/test_parse.py b/tests/test_parse.py
diff --git a/tests/test_yaml.py b/tests/test_yaml.py
@@ -0,0 +1,136 @@
+"""Tests to verify that there is a data yaml file for each entry in the catalog-nk.yml and catalog-n2.yml files,
+and that all files can be parsed and conform to the expected schema.
+
+This test can be run by the command
+
+    pytest tests/test_yaml.py
+
+or simply `pytest` from the repo directory. (The latter will automatically discover
+all test files.)
+"""
+
+from __future__ import annotations
+
+import pathlib
+import re
+
+import pytest
+import yaml
+from cerberus import Validator
+
+
+# Extract all YAML paths from the catalog
+def extract_paths(catalog: str | list, base_path: str = "") -> list:
+    paths = []
+    for item in catalog:
+        if isinstance(item, dict):
+            for key, value in item.items():
+                if key == "data":
+                    paths.append(base_path + value)
+                elif key == "content":
+                    paths.extend(extract_paths(value, base_path))
+                elif key == "DIVIDER":
+                    continue
+                else:
+                    paths.extend(extract_paths([value], base_path))
+    return paths
+
+
+# Path to the catalog files
+CATALOG_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent / "database"
+CATALOG_NK_PATH = CATALOG_ROOT_PATH / "catalog-nk.yml"
+CATALOG_N2_PATH = CATALOG_ROOT_PATH / "catalog-n2.yml"
+
+# Load the catalog files
+with open(CATALOG_NK_PATH, "r", encoding="utf-8") as stream:
+    catalog_nk = yaml.safe_load(stream)
+
+with open(CATALOG_N2_PATH, "r", encoding="utf-8") as stream:
+    catalog_n2 = yaml.safe_load(stream)
+
+# Get all paths from the catalogs
+ALL_NK_PATHS = extract_paths(catalog_nk)
+ALL_N2_PATHS = extract_paths(catalog_n2)
+
+# Define the regex pattern for URLs, this is used to validate the URLs in the YAML files
+url_regex = re.compile(
+    r"(?i)^(?:http|ftp)s?://"  # http:// or https://  also ignore case
+    r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|"  # domain...
+    r"localhost|"  # localhost...
+    r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|"  # ...or ipv4
+    r"\[?[A-F0-9]*:[A-F0-9:]+\]?)"  # ...or ipv6
+    r"(?::\d+)?"  # optional port
+    r"(?:/?|[/?]\S+)$"
+)
+
+# Define the schema for the YAML nk data files. This could be extended to test other levels of the yaml files and/or test
+# the content of each field more thoroughly. E.g. check that temperature is a number, that 'tabulated n' is a list with two columns, etc.
+schema = {
+    "REFERENCES": {"type": "string", "required": True},
+    "COMMENTS": {"type": "string", "required": False},
+    "DATA": {"type": "list", "required": True},
+    "CONDITIONS": {
+        "oneof": [{"type": "string"}, {"type": "list"}, {"type": "dict"}],
+        "required": False,
+    },
+    "PROPERTIES": {
+        "oneof": [{"type": "string"}, {"type": "list"}, {"type": "dict"}],
+        "required": False,
+    },
+}
+
+validator = Validator(schema)
+
+# Define the schema for the 'about.yaml' files
+about_scheme = {
+    "NAMES": {"type": "list", "schema": {"type": "string"}},
+    "ABOUT": {"type": "string"},
+    "LINKS": {
+        "type": "list",
+        "schema": {
+            "type": "dict",
+            "schema": {
+                "url": {"type": "string", "regex": url_regex.pattern},
+                "text": {"type": "string"},
+            },
+        },
+        "required": False,
+    },
+}
+validator_about = Validator(about_scheme)
+
+# Discover the paths for all `.yml` files.
+DATABASE_PATH = pathlib.Path(__file__).resolve().parent.parent / "database" / "data"
+ALL_YAML_FILES = list(DATABASE_PATH.rglob("*.yml"))
+
+
+# Verify that each about.yml file conforms to the expected schema
+@pytest.mark.parametrize(
+    "yaml_file",
+    ALL_YAML_FILES,
+    ids=lambda x: str(x).replace(str(DATABASE_PATH), "")[1:].replace(".yml", ""),
+)
+def test_yaml_schema(yaml_file):
+    with open(yaml_file, "r") as file:
+        data = yaml.safe_load(file)
+        if yaml_file.name == "about.yml":
+            assert validator_about.validate(data), (
+                f"Schema validation failed for {yaml_file.name}: {validator_about.errors}"
+            )
+        else:
+            assert validator.validate(data), (
+                f"Schema validation failed for {yaml_file.name}: {validator.errors}"
+            )
+
+
+# Verify that each yaml path referenced in the catalog files exists in the database/data directory
+@pytest.mark.parametrize(
+    "path",
+    ALL_NK_PATHS + ALL_N2_PATHS,
+    ids=lambda x: str(x).replace(str(DATABASE_PATH), "").replace(".yml", ""),
+)
+def test_paths_exist(path):
+    full_path = (
+        pathlib.Path(__file__).resolve().parent.parent / "database" / "data" / path
+    )
+    assert full_path.exists(), f"Path does not exist: {full_path}"
diff --git a/uv.lock b/uv.lock