[JTH] add more docu to custering techniques

tausiaj · tausiaj · commit 32943683b41f · 2025-05-28T10:02:09.000+02:00
diff --git a/bluemath_tk/core/decorators.py b/bluemath_tk/core/decorators.py
@@ -216,7 +216,14 @@ def wrapper(
             raise ValueError("Number of iterations must be integer and > 0")
         if not isinstance(normalize_data, bool):
             raise TypeError("Normalize data must be a boolean")
-        return func(self, data, directional_variables, num_iteration)
+        return func(
+            self,
+            data,
+            directional_variables,
+            custom_scale_factor,
+            num_iteration,
+            normalize_data,
+        )
 
     return wrapper
 
diff --git a/bluemath_tk/datamining/kma.py b/bluemath_tk/datamining/kma.py
@@ -377,10 +377,14 @@ def fit_predict(
         data : pd.DataFrame
             The input data to be used for the KMA algorithm.
         directional_variables : List[str], optional
-            A list of directional variables (will be transformed to u and v).
+            A list of directional variables that will be transformed to u and v components.
+            Then, to use custom_scale_factor, you must specify the variables names with the u and v suffixes.
+            Example: directional_variables=["Dir"], custom_scale_factor={"Dir_u": [0, 1], "Dir_v": [0, 1]}.
             Default is [].
-        custom_scale_factor : dict
+        custom_scale_factor : dict, optional
             A dictionary specifying custom scale factors for normalization.
+            If normalize_data is True, this will be used to normalize the data.
+            Example: {"Hs": [0, 10], "Tp": [0, 10]}.
             Default is {}.
         min_number_of_points : int, optional
             The minimum number of points to consider a cluster.
@@ -390,10 +394,13 @@ def fit_predict(
             This is used when min_number_of_points is not None.
             Default is 10.
         normalize_data : bool, optional
-            A flag to normalize the data. Default is False.
+            A flag to normalize the data.
+            If True, the data will be normalized using the custom_scale_factor.
+            Default is False.
         regression_guided: dict, optional
             A dictionary specifying regression-guided clustering variables and relative weights.
-            Example: {"vars":["Fe"],"alpha":[0.6]}. Default is {}.
+            Example: {"vars": ["Fe"], "alpha": [0.6]}.
+            Default is {}.
 
         Returns
         -------
diff --git a/bluemath_tk/datamining/mda.py b/bluemath_tk/datamining/mda.py
@@ -297,16 +297,22 @@ def fit(
         data : pd.DataFrame
             The input data to be used for the MDA algorithm.
         directional_variables : List[str], optional
-            A list of names of the directional variables within the data.
+            A list of directional variables that will be transformed to u and v components.
+            Then, to use custom_scale_factor, you must specify the variables names with the u and v suffixes.
+            Example: directional_variables=["Dir"], custom_scale_factor={"Dir_u": [0, 1], "Dir_v": [0, 1]}.
             Default is [].
         custom_scale_factor : dict, optional
             A dictionary specifying custom scale factors for normalization.
+            If normalize_data is True, this will be used to normalize the data.
+            Example: {"Hs": [0, 10], "Tp": [0, 10]}.
             Default is {}.
         first_centroid_seed : int, optional
             The index of the first centroid to use in the MDA algorithm.
             Default is None.
         normalize_data : bool, optional
-            A flag to normalize the data. Default is False.
+            A flag to normalize the data.
+            If True, the data will be normalized using the custom_scale_factor.
+            Default is False.
 
         Notes
         -----
@@ -432,16 +438,22 @@ def fit_predict(
         data : pd.DataFrame
             The input data to be used for the MDA algorithm.
         directional_variables : List[str], optional
-            A list of names of the directional variables within the data.
+            A list of directional variables that will be transformed to u and v components.
+            Then, to use custom_scale_factor, you must specify the variables names with the u and v suffixes.
+            Example: directional_variables=["Dir"], custom_scale_factor={"Dir_u": [0, 1], "Dir_v": [0, 1]}.
             Default is [].
         custom_scale_factor : dict, optional
             A dictionary specifying custom scale factors for normalization.
+            If normalize_data is True, this will be used to normalize the data.
+            Example: {"Hs": [0, 10], "Tp": [0, 10]}.
             Default is {}.
         first_centroid_seed : int, optional
             The index of the first centroid to use in the MDA algorithm.
             Default is None.
         normalize_data : bool, optional
-            A flag to normalize the data. Default is False.
+            A flag to normalize the data.
+            If True, the data will be normalized using the custom_scale_factor.
+            Default is False.
 
         Returns
         -------
diff --git a/bluemath_tk/datamining/som.py b/bluemath_tk/datamining/som.py
@@ -294,18 +294,24 @@ def fit(
         Parameters
         ----------
         data : pd.DataFrame
-            The input data to be used for the fitting.
+            The input data to be used for the SOM algorithm.
         directional_variables : List[str], optional
-            A list with the directional variables (will be transformed to u and v).
+            A list of directional variables that will be transformed to u and v components.
+            Then, to use custom_scale_factor, you must specify the variables names with the u and v suffixes.
+            Example: directional_variables=["Dir"], custom_scale_factor={"Dir_u": [0, 1], "Dir_v": [0, 1]}.
             Default is [].
         custom_scale_factor : dict, optional
             A dictionary specifying custom scale factors for normalization.
+            If normalize_data is True, this will be used to normalize the data.
+            Example: {"Hs": [0, 10], "Tp": [0, 10]}.
             Default is {}.
         num_iteration : int, optional
             The number of iterations for the SOM fitting.
             Default is 1000.
         normalize_data : bool, optional
-            A flag to normalize the data. Default is False.
+            A flag to normalize the data.
+            If True, the data will be normalized using the custom_scale_factor.
+            Default is False.
         """
 
         super().fit(
@@ -382,16 +388,22 @@ def fit_predict(
         data : pd.DataFrame
             The input data to be used for the SOM algorithm.
         directional_variables : List[str], optional
-            A list of directional variables (will be transformed to u and v).
+            A list of directional variables that will be transformed to u and v components.
+            Then, to use custom_scale_factor, you must specify the variables names with the u and v suffixes.
+            Example: directional_variables=["Dir"], custom_scale_factor={"Dir_u": [0, 1], "Dir_v": [0, 1]}.
             Default is [].
         custom_scale_factor : dict, optional
             A dictionary specifying custom scale factors for normalization.
+            If normalize_data is True, this will be used to normalize the data.
+            Example: {"Hs": [0, 10], "Tp": [0, 10]}.
             Default is {}.
         num_iteration : int, optional
             The number of iterations for the SOM fitting.
             Default is 1000.
         normalize_data : bool, optional
-            A flag to normalize the data. Default is False.
+            A flag to normalize the data.
+            If True, the data will be normalized using the custom_scale_factor.
+            Default is False.
 
         Returns
         -------