Skip to content

Commit c291f90

Browse files
authored
Merge pull request #33 from GeoOcean/update/pca-rbf
Merge tests and docu for pca and rbf
2 parents b7a4a13 + 2a18ce3 commit c291f90

File tree

16 files changed

+99
-8
lines changed

16 files changed

+99
-8
lines changed

.github/workflows/python-tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ jobs:
3131
source /usr/share/miniconda/etc/profile.d/conda.sh
3232
conda activate bluemath
3333
python -m unittest discover tests/datamining/
34+
python -m unittest discover tests/interpolation/

bluemath_tk/core/decorators.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def wrapper(
263263
subset_custom_scale_factor: dict = {},
264264
normalize_target_data: bool = True,
265265
target_custom_scale_factor: dict = {},
266+
num_threads: int = None,
266267
):
267268
if subset_data is None:
268269
raise ValueError("Subset data cannot be None")
@@ -292,6 +293,9 @@ def wrapper(
292293
raise TypeError("Normalize target data must be a bool")
293294
if not isinstance(target_custom_scale_factor, dict):
294295
raise TypeError("Target custom scale factor must be a dict")
296+
if num_threads is not None:
297+
if not isinstance(num_threads, int) or num_threads <= 0:
298+
raise ValueError("Number of threads must be integer and > 0")
295299
return func(
296300
self,
297301
subset_data,
@@ -301,6 +305,7 @@ def wrapper(
301305
subset_custom_scale_factor,
302306
normalize_target_data,
303307
target_custom_scale_factor,
308+
num_threads,
304309
)
305310

306311
return wrapper

bluemath_tk/interpolation/rbf.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def _preprocess_subset_data(
349349

350350
self.logger.info("Preprocessing subset data")
351351
for directional_variable in self.subset_directional_variables:
352-
var_u_component, var_y_component = self._get_uv_components(
352+
var_u_component, var_y_component = self.get_uv_components(
353353
x_deg=subset_data[directional_variable].values
354354
)
355355
subset_data[f"{directional_variable}_u"] = var_u_component
@@ -416,7 +416,7 @@ def _preprocess_target_data(
416416

417417
self.logger.info("Preprocessing target data")
418418
for directional_variable in self.target_directional_variables:
419-
var_u_component, var_y_component = self._get_uv_components(
419+
var_u_component, var_y_component = self.get_uv_components(
420420
x_deg=target_data[directional_variable].values
421421
)
422422
target_data[f"{directional_variable}_u"] = var_u_component
@@ -682,6 +682,7 @@ def fit(
682682
subset_custom_scale_factor: dict = {},
683683
normalize_target_data: bool = True,
684684
target_custom_scale_factor: dict = {},
685+
num_threads: int = None,
685686
) -> None:
686687
"""
687688
Fits the model to the data.
@@ -702,14 +703,22 @@ def fit(
702703
Whether to normalize the target data. Default is True.
703704
target_custom_scale_factor : dict, optional
704705
The custom scale factor for the target data. Default is {}.
706+
num_threads : int, optional
707+
The number of threads to use for the optimization. Default is None.
705708
706709
Notes
707710
-----
708711
- This function fits the RBF model to the data by:
709712
1. Preprocessing the subset and target data.
710713
2. Calculating the optimal sigma for the target variables.
714+
3. Storing the RBF coefficients and optimal sigmas.
715+
- The number of threads to use for the optimization can be specified.
711716
"""
712717

718+
if num_threads is not None:
719+
self.set_num_processors_to_use(num_processors=num_threads)
720+
self.logger.info(f"Using {num_threads} threads for optimization.")
721+
713722
self._subset_directional_variables = subset_directional_variables
714723
self._target_directional_variables = target_directional_variables
715724
self._subset_custom_scale_factor = subset_custom_scale_factor
@@ -780,7 +789,7 @@ def predict(self, dataset: pd.DataFrame) -> pd.DataFrame:
780789
)
781790
for directional_variable in self.target_directional_variables:
782791
self.logger.info(f"Calculating target degrees for {directional_variable}")
783-
interpolated_target[directional_variable] = self._get_degrees_from_uv(
792+
interpolated_target[directional_variable] = self.get_degrees_from_uv(
784793
xu=interpolated_target[f"{directional_variable}_u"].values,
785794
xv=interpolated_target[f"{directional_variable}_v"].values,
786795
)
@@ -796,6 +805,7 @@ def fit_predict(
796805
subset_custom_scale_factor: dict = {},
797806
normalize_target_data: bool = True,
798807
target_custom_scale_factor: dict = {},
808+
num_threads: int = None,
799809
) -> pd.DataFrame:
800810
"""
801811
Fits the model to the subset and predicts the interpolated dataset.
@@ -818,6 +828,8 @@ def fit_predict(
818828
Whether to normalize the target data. Default is True.
819829
target_custom_scale_factor : dict, optional
820830
The custom scale factor for the target data. Default is {}.
831+
num_threads : int, optional
832+
The number of threads to use for the optimization. Default is None.
821833
822834
Returns
823835
-------
@@ -837,5 +849,7 @@ def fit_predict(
837849
subset_custom_scale_factor=subset_custom_scale_factor,
838850
normalize_target_data=normalize_target_data,
839851
target_custom_scale_factor=target_custom_scale_factor,
852+
num_threads=num_threads,
840853
)
854+
841855
return self.predict(dataset=dataset)

docs/contribute.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ By contributing to the BlueMath package, you agree that your contributions will
6969

7070
If you have any questions or need further assistance, feel free to reach out to the maintainers.
7171

72-
Thank you for your contributions and support!
72+
Thank you for your contributions and support!

docs/datamining/base_datamining.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: bluemath_tk.datamining._base_datamining
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
::: bluemath_tk.datamining.mda
2+
3+
::: bluemath_tk.datamining.kma
4+
5+
::: bluemath_tk.datamining.som

docs/datamining/intro.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Datamining
2+
3+
The DataMining package in this repository provides tools and algorithms for extracting valuable insights from large datasets. It includes functionalities for data preprocessing, clustering, classification, and visualization, making it a comprehensive solution for data analysis tasks.
4+
5+
For more detailed information, refer to the specific class implementations and their docstrings.
6+
7+
## Sampling Models
8+
9+
### LHS
10+
11+
The [`Latin Hypercube Sampling (LHS)`](sampling_datamining.md) model is used for generating a distribution of plausible collections of parameter values from a multidimensional distribution. It ensures that the entire range of each parameter is explored by dividing the range into intervals of equal probability and sampling from each interval.
12+
13+
## Clustering Models
14+
15+
### MDA
16+
17+
The [`Maximum Dissimilarity Algorithm (MDA)`](clustering_datamining.md) model is a sampling technique used to select a subset of data points that are maximally dissimilar from each other, ensuring a diverse representation of the dataset.
18+
19+
### KMA
20+
21+
The [`K-Means Algorithm (KMA)`](clustering_datamining.md) model is a clustering method that partitions the dataset into K distinct, non-overlapping subsets.
22+
23+
### SOM
24+
25+
The [`Self-Organizing Map (SOM)`](clustering_datamining.md) model is a type of artificial neural network used for unsupervised learning to produce a low-dimensional representation of the input space.
26+
27+
## Reduction Models
28+
29+
### PCA
30+
31+
The [`Principal Component Analysis (PCA)`](reduction_datamining.md) model is a dimensionality reduction technique that transforms the data into a set of orthogonal components, capturing the most variance.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: bluemath_tk.datamining.pca
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: bluemath_tk.datamining.lhs

docs/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ Demo codes have been developed through a collaborative effort by members of the
1212

1313
## Project status
1414

15-
- UNDER DEVELOPMENT
15+
- UNDER DEVELOPMENT

0 commit comments

Comments
 (0)