Merge pull request #33 from GeoOcean/update/pca-rbf

tausiaj · web-flow · commit c291f9053078 · 2024-12-11T16:53:47.000+01:00
Merge tests and docu for pca and rbf
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -31,3 +31,4 @@ jobs:
           source /usr/share/miniconda/etc/profile.d/conda.sh
           conda activate bluemath
           python -m unittest discover tests/datamining/
+          python -m unittest discover tests/interpolation/
diff --git a/bluemath_tk/core/decorators.py b/bluemath_tk/core/decorators.py
@@ -263,6 +263,7 @@ def wrapper(
         subset_custom_scale_factor: dict = {},
         normalize_target_data: bool = True,
         target_custom_scale_factor: dict = {},
+        num_threads: int = None,
     ):
         if subset_data is None:
             raise ValueError("Subset data cannot be None")
@@ -292,6 +293,9 @@ def wrapper(
             raise TypeError("Normalize target data must be a bool")
         if not isinstance(target_custom_scale_factor, dict):
             raise TypeError("Target custom scale factor must be a dict")
+        if num_threads is not None:
+            if not isinstance(num_threads, int) or num_threads <= 0:
+                raise ValueError("Number of threads must be integer and > 0")
         return func(
             self,
             subset_data,
@@ -301,6 +305,7 @@ def wrapper(
             subset_custom_scale_factor,
             normalize_target_data,
             target_custom_scale_factor,
+            num_threads,
         )
 
     return wrapper
diff --git a/bluemath_tk/interpolation/rbf.py b/bluemath_tk/interpolation/rbf.py
@@ -349,7 +349,7 @@ def _preprocess_subset_data(
 
         self.logger.info("Preprocessing subset data")
         for directional_variable in self.subset_directional_variables:
-            var_u_component, var_y_component = self._get_uv_components(
+            var_u_component, var_y_component = self.get_uv_components(
                 x_deg=subset_data[directional_variable].values
             )
             subset_data[f"{directional_variable}_u"] = var_u_component
@@ -416,7 +416,7 @@ def _preprocess_target_data(
 
         self.logger.info("Preprocessing target data")
         for directional_variable in self.target_directional_variables:
-            var_u_component, var_y_component = self._get_uv_components(
+            var_u_component, var_y_component = self.get_uv_components(
                 x_deg=target_data[directional_variable].values
             )
             target_data[f"{directional_variable}_u"] = var_u_component
@@ -682,6 +682,7 @@ def fit(
         subset_custom_scale_factor: dict = {},
         normalize_target_data: bool = True,
         target_custom_scale_factor: dict = {},
+        num_threads: int = None,
     ) -> None:
         """
         Fits the model to the data.
@@ -702,14 +703,22 @@ def fit(
             Whether to normalize the target data. Default is True.
         target_custom_scale_factor : dict, optional
             The custom scale factor for the target data. Default is {}.
+        num_threads : int, optional
+            The number of threads to use for the optimization. Default is None.
 
         Notes
         -----
         - This function fits the RBF model to the data by:
             1. Preprocessing the subset and target data.
             2. Calculating the optimal sigma for the target variables.
+            3. Storing the RBF coefficients and optimal sigmas.
+        - The number of threads to use for the optimization can be specified.
         """
 
+        if num_threads is not None:
+            self.set_num_processors_to_use(num_processors=num_threads)
+            self.logger.info(f"Using {num_threads} threads for optimization.")
+
         self._subset_directional_variables = subset_directional_variables
         self._target_directional_variables = target_directional_variables
         self._subset_custom_scale_factor = subset_custom_scale_factor
@@ -780,7 +789,7 @@ def predict(self, dataset: pd.DataFrame) -> pd.DataFrame:
             )
         for directional_variable in self.target_directional_variables:
             self.logger.info(f"Calculating target degrees for {directional_variable}")
-            interpolated_target[directional_variable] = self._get_degrees_from_uv(
+            interpolated_target[directional_variable] = self.get_degrees_from_uv(
                 xu=interpolated_target[f"{directional_variable}_u"].values,
                 xv=interpolated_target[f"{directional_variable}_v"].values,
             )
@@ -796,6 +805,7 @@ def fit_predict(
         subset_custom_scale_factor: dict = {},
         normalize_target_data: bool = True,
         target_custom_scale_factor: dict = {},
+        num_threads: int = None,
     ) -> pd.DataFrame:
         """
         Fits the model to the subset and predicts the interpolated dataset.
@@ -818,6 +828,8 @@ def fit_predict(
             Whether to normalize the target data. Default is True.
         target_custom_scale_factor : dict, optional
             The custom scale factor for the target data. Default is {}.
+        num_threads : int, optional
+            The number of threads to use for the optimization. Default is None.
 
         Returns
         -------
@@ -837,5 +849,7 @@ def fit_predict(
             subset_custom_scale_factor=subset_custom_scale_factor,
             normalize_target_data=normalize_target_data,
             target_custom_scale_factor=target_custom_scale_factor,
+            num_threads=num_threads,
         )
+
         return self.predict(dataset=dataset)
diff --git a/docs/contribute.md b/docs/contribute.md
@@ -69,4 +69,4 @@ By contributing to the BlueMath package, you agree that your contributions will
 
 If you have any questions or need further assistance, feel free to reach out to the maintainers.
 
-Thank you for your contributions and support!
+Thank you for your contributions and support!
diff --git a/docs/datamining/base_datamining.md b/docs/datamining/base_datamining.md
@@ -0,0 +1 @@
+::: bluemath_tk.datamining._base_datamining
diff --git a/docs/datamining/clustering_datamining.md b/docs/datamining/clustering_datamining.md
@@ -0,0 +1,5 @@
+::: bluemath_tk.datamining.mda
+
+::: bluemath_tk.datamining.kma
+
+::: bluemath_tk.datamining.som
diff --git a/docs/datamining/intro.md b/docs/datamining/intro.md
@@ -0,0 +1,31 @@
+# Datamining
+
+The DataMining package in this repository provides tools and algorithms for extracting valuable insights from large datasets. It includes functionalities for data preprocessing, clustering, classification, and visualization, making it a comprehensive solution for data analysis tasks.
+
+For more detailed information, refer to the specific class implementations and their docstrings.
+
+## Sampling Models
+
+### LHS
+
+The [`Latin Hypercube Sampling (LHS)`](sampling_datamining.md) model is used for generating a distribution of plausible collections of parameter values from a multidimensional distribution. It ensures that the entire range of each parameter is explored by dividing the range into intervals of equal probability and sampling from each interval.
+
+## Clustering Models
+
+### MDA
+
+The [`Maximum Dissimilarity Algorithm (MDA)`](clustering_datamining.md) model is a sampling technique used to select a subset of data points that are maximally dissimilar from each other, ensuring a diverse representation of the dataset.
+
+### KMA
+
+The [`K-Means Algorithm (KMA)`](clustering_datamining.md) model is a clustering method that partitions the dataset into K distinct, non-overlapping subsets.
+
+### SOM
+
+The [`Self-Organizing Map (SOM)`](clustering_datamining.md) model is a type of artificial neural network used for unsupervised learning to produce a low-dimensional representation of the input space.
+
+## Reduction Models
+
+### PCA
+
+The [`Principal Component Analysis (PCA)`](reduction_datamining.md) model is a dimensionality reduction technique that transforms the data into a set of orthogonal components, capturing the most variance.
diff --git a/docs/datamining/reduction_datamining.md b/docs/datamining/reduction_datamining.md
@@ -0,0 +1 @@
+::: bluemath_tk.datamining.pca
diff --git a/docs/datamining/sampling_datamining.md b/docs/datamining/sampling_datamining.md
@@ -0,0 +1 @@
+::: bluemath_tk.datamining.lhs
diff --git a/docs/index.md b/docs/index.md
@@ -12,4 +12,4 @@ Demo codes have been developed through a collaborative effort by members of the
 
 ## Project status
 
-- UNDER DEVELOPMENT
+- UNDER DEVELOPMENT
diff --git a/docs/installation.md b/docs/installation.md
@@ -23,4 +23,4 @@ conda activate bluemath
 5. Finally, install package in development mode:
 ```sh
 pip install -e .
-```
+```
diff --git a/docs/interpolation/intro.md b/docs/interpolation/intro.md
@@ -0,0 +1,22 @@
+# Interpolation
+
+The Interpolation package in this repository provides tools and algorithms for estimating unknown values within the range of a discrete set of known data points. It includes functionalities for various interpolation methods, making it a comprehensive solution for data analysis and modeling tasks.
+
+## Radial Basis Function (RBF) Model
+
+The [`Radial Basis Function (RBF)`](rbf_interpolation.md) model is a powerful interpolation method that uses radial basis functions to approximate the underlying function of the data. RBF interpolation is particularly useful for multidimensional data and can handle irregularly spaced data points.
+
+### Key Features of the RBF Model
+
+- **Flexibility**: RBF interpolation can be applied to data in any number of dimensions.
+- **Smoothness**: The resulting interpolated surface is smooth and continuous.
+- **Versatility**: Various types of radial basis functions (e.g., Gaussian, Multiquadric, Inverse Multiquadric) can be used to tailor the interpolation to specific needs.
+- **Scalability**: Suitable for large datasets with efficient computation methods.
+
+### Applications
+
+- **Geospatial Analysis**: Interpolating spatial data such as elevation, temperature, or precipitation.
+- **Engineering**: Modeling physical phenomena like stress, strain, or fluid dynamics.
+- **Machine Learning**: Enhancing feature spaces and improving model accuracy through smooth approximations.
+
+The RBF model in this package provides a robust and versatile tool for interpolation tasks, ensuring accurate and reliable results for a wide range of applications.
diff --git a/docs/interpolation/rbf_interpolation.md b/docs/interpolation/rbf_interpolation.md
@@ -0,0 +1 @@
+::: bluemath_tk.interpolation.rbf
diff --git a/docs/wrappers/intro.md b/docs/wrappers/intro.md
@@ -1,4 +1,4 @@
-# Model Wrappers
+# Numerical Model Wrappers
 
 This section provides general documentation for the model wrappers usage. The wrappers are designed to facilitate the interaction with various numerical models by providing a consistent interface for setting parameters, running simulations, and processing outputs.
 
@@ -160,4 +160,4 @@ if __name__ == "__main__":
     )
     # Run the model
     swan_model.run_cases()
-```
+```
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -4,6 +4,14 @@ nav:
   - Home: index.md
   - Installation: installation.md
   - Contribute: contribute.md
+  - Datamining:
+      - Introduction: datamining/intro.md
+      - Sampling: datamining/sampling_datamining.md
+      - Clustering: datamining/clustering_datamining.md
+      - Reduction: datamining/reduction_datamining.md
+  - Interpolation:
+      - Itroduction: interpolation/intro.md
+      - Radial Basis Functions: interpolation/rbf_interpolation.md
   - Wrappers:
       - Intro: wrappers/intro.md
 
diff --git a/tests/interpolation/test_rbf.py b/tests/interpolation/test_rbf.py
@@ -65,6 +65,7 @@ def test_fit_predict(self):
             target_directional_variables=["DirPred"],
             normalize_target_data=True,
             dataset=self.dataset,
+            num_threads=4,
         )
         self.assertIsInstance(predictions, pd.DataFrame)
         self.assertIn("HsPred", predictions.columns)

Original file line number	Diff line number	Diff line change
`@@ -69,4 +69,4 @@ By contributing to the BlueMath package, you agree that your contributions will`
`69`	`69`
`70`	`70`	`If you have any questions or need further assistance, feel free to reach out to the maintainers.`
`71`	`71`
`72`		`-Thank you for your contributions and support!`
	`72`	`+Thank you for your contributions and support!`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+::: bluemath_tk.datamining._base_datamining`
Original file line number	Diff line number	Diff line change
`@@ -12,4 +12,4 @@ Demo codes have been developed through a collaborative effort by members of the`
`12`	`12`
`13`	`13`	`## Project status`
`14`	`14`
`15`		`-- UNDER DEVELOPMENT`
	`15`	`+- UNDER DEVELOPMENT`