Fraunhofer-IESE
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎badgers/generators/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎badgers/generators/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎badgers/generators/graph/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎badgers/generators/graph/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎badgers/generators/graph/missingness.py‎
Lines changed: 40 additions & 10 deletions b/‎badgers/generators/graph/missingness.py‎
Lines changed: 40 additions & 10 deletions
diff --git a/‎badgers/generators/tabular_data/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎badgers/generators/tabular_data/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎badgers/generators/tabular_data/drift.py‎
Lines changed: 30 additions & 21 deletions b/‎badgers/generators/tabular_data/drift.py‎
Lines changed: 30 additions & 21 deletions
diff --git a/‎badgers/generators/tabular_data/imbalance.py‎
Lines changed: 72 additions & 24 deletions b/‎badgers/generators/tabular_data/imbalance.py‎
Lines changed: 72 additions & 24 deletions
@@ -37,4 +37,4 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Run tests with tox
       # Run tox using the version of Python in `PATH`
-      run: tox -e py
+      run: tox -e py
@@ -162,3 +162,6 @@ cython_debug/
 .idea/
 
 /notebooks/
+/badgers/uncertainty-main-uncertainty-generate-augmentation/
+/experiments/
+/.continue/
@@ -21,7 +21,8 @@ from badgers.generators.tabular_data.noise import GaussianNoiseGenerator
 
 X, y = make_blobs()
 trf = GaussianNoiseGenerator()
-Xt, yt = trf.generate(X,y,noise_std=0.5)
+
+Xt, yt = trf.generate(X, y, noise_std=0.5)
 ```
 
 More examples are available in the [tutorials](https://fraunhofer-iese.github.io/badgers/tutorials/Imbalance-Tabular-Data/) section.
 
@@ -1,3 +1,3 @@
 """
-Module containing all transformers
+Module containing all generators
 """
@@ -1,3 +1,3 @@
 """
-Module containing all the transformers that accept graph data as input
+This module contains all the generator functions designed to process and yield data from graph inputs.
 """
@@ -24,6 +24,9 @@ def __init__(self, random_generator: numpy.random.Generator = default_rng(seed=0
 
     @abc.abstractmethod
     def generate(self, X, y=None, **params) -> Tuple:
+        """
+        This method should be overridden by subclasses.
+        """
         pass
 
 
@@ -33,15 +36,28 @@ class NodesMissingCompletelyAtRandom(MissingGenerator):
     """
 
     def __init__(self, random_generator: numpy.random.Generator = default_rng(seed=0)):
+        """
+        Initialize the missingness generator.
+
+        :param random_generator: A NumPy random number generator.
+                               Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
+        """
         super().__init__(random_generator=random_generator)
 
     def generate(self, X, y=None, percentage_missing: float = 0.1) -> Tuple:
         """
-
-        :param X:
-        :param y:
-        :param percentage_missing: The percentage of missing nodes (float value between 0 and 1 excluded)
-        :return:
+        Generate a graph with a specified percentage of missing nodes.
+
+        :param X: The input graph from which nodes will be removed.
+        :type X: nx.Graph
+        :param y: Optional target array associated with the nodes in the graph.
+                  If provided, the corresponding elements will also be removed.
+        :type y: np.ndarray, optional
+        :param percentage_missing: The percentage of nodes to be removed (float value between 0 and 1).
+        :type percentage_missing: float
+        :return: A tuple containing the modified graph with missing nodes and the modified target array (if provided).
+        :rtype: Tuple[nx.Graph, Optional[np.ndarray]]
         """
         assert 0 < percentage_missing < 1
         if not isinstance(X, nx.Graph):
@@ -70,15 +86,29 @@ class EdgesMissingCompletelyAtRandom(MissingGenerator):
     """
 
     def __init__(self, random_generator: numpy.random.Generator = default_rng(seed=0)):
+        """
+        Initialize the missingness generator.
+
+        :param random_generator: A NumPy random number generator.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
+        """
         super().__init__(random_generator=random_generator)
 
     def generate(self, X, y=None, percentage_missing: float = 0.1) -> Tuple:
         """
-
-        :param X:
-        :param y:
-        :param percentage_missing: The percentage of missing nodes (float value between 0 and 1 excluded)
-        :return:
+        Generate a graph with a specified percentage of missing edges.
+
+        :param X: The input graph from which edges will be removed.
+        :type X: nx.Graph
+        :param y: Optional target data associated with the edges in the graph.
+                  If provided, the corresponding elements will also be removed.
+                  Can be a dictionary where keys are edge tuples and values are target values.
+        :type y: dict, optional
+        :param percentage_missing: The percentage of edges to be removed (float value between 0 and 1).
+        :type percentage_missing: float
+        :return: A tuple containing the modified graph with missing edges and the modified target data (if provided).
+        :rtype: Tuple[nx.Graph, Optional[dict]]
         """
         assert 0 < percentage_missing < 1
         if not isinstance(X, nx.Graph):
 
@@ -1,3 +1,3 @@
 """
-Module containing all the transformers that accept tabular data as input
+This module contains all the generator functions designed to process and yield data from tabular inputs.
 """
@@ -17,8 +17,10 @@ class DriftGenerator(GeneratorMixin):
 
     def __init__(self, random_generator=default_rng(seed=0)):
         """
-        :param random_generator: numpy.random.Generator, default default_rng(seed=0)
-            A random generator
+        Initialize the drift generator.
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         self.random_generator = random_generator
 
@@ -27,7 +29,6 @@ def generate(self, X, y, **params):
         pass
 
 
-
 class RandomShiftGenerator(DriftGenerator):
     """
     Randomly shift (geometrical translation) values of each column independently of one another.
@@ -37,24 +38,27 @@ class RandomShiftGenerator(DriftGenerator):
 
     def __init__(self, random_generator=default_rng(seed=0)):
         """
+        Initialize the RandomShiftGenerator.
 
-        :param random_generator: A random generator
-        :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         super().__init__(random_generator=random_generator)
 
     @preprocess_inputs
-    def generate(self, X, y=None, shift_std: Union[float,np.array] = 0.1):
+    def generate(self, X, y=None, shift_std: Union[float, np.array] = 0.1):
         """
         Randomly shift (geometrical translation) values of each column independently of one another.
-        Data are first standardized (mean = 0, var = 1) and a random number is added to each column.
-        The ith columns is simply translated: `$x_i \left arrow x_i + \epsilon_i$`
-
-
-        :param X:
-        :param y:
-        :param shift_std:
-        :return:
+        Data are first standardized (mean = 0, var = 1), and a random number drawn from a normal distribution
+        with mean 0 and standard deviation `shift_std` is added to each column.
+        The ith column is simply translated: `$x_i \leftarrow x_i + \epsilon_i$`, where $\epsilon_i \sim \mathcal{N}(0, \text{shift\_std})$.
+
+        :param X: Input features, a 2D array-like object (e.g., a Pandas DataFrame or a NumPy array).
+        :param y: Target variable, a 1D array-like object (optional). Not used in this implementation.
+        :param shift_std: Standard deviation of the normal distribution from which the random shifts are drawn.
+                          Can be a single float (applied to all columns) or an array of floats (one per column).
+        :return: A tuple containing the modified feature matrix `X'` and the original target `y`.
         """
         # normalize X
         scaler = StandardScaler()
@@ -77,20 +81,25 @@ class RandomShiftClassesGenerator(DriftGenerator):
 
     def __init__(self, random_generator=default_rng(seed=0)):
         """
-        :param random_generator: A random generator
+        Initialize the RandomShiftClassesGenerator.
+
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         super().__init__(random_generator=random_generator)
 
     @preprocess_inputs
-    def generate(self, X, y, shift_std: Union[float,np.array] = 0.1):
+    def generate(self, X, y, shift_std: Union[float, np.array] = 0.1):
         """
         Randomly shift (geometrical translation) values of each class independently of one another.
-        Data are first standardized (mean = 0, var = 1) and
-        for each class a random number is added to all instances.
+        Data are first standardized (mean = 0, var = 1) and for each class a random number is added to all instances.
 
-        :param X:
-        :param y:
-        :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
+        :param X: Input features, a 2D array-like object (e.g., a Pandas DataFrame or a NumPy array).
+        :param y: Target variable, a 1D array-like object representing the class labels.
+        :param shift_std: Standard deviation of the normal distribution from which the random shifts are drawn.
+                          Can be a single float (applied to all classes) or an array of floats (one per class).
+        :return: A tuple containing the modified feature matrix `X'` and the original target `y`.
         """
         # extract unique labels
         classes = np.unique(y)
 
@@ -16,32 +16,61 @@ class ImbalanceGenerator(GeneratorMixin):
 
     def __init__(self, random_generator=default_rng(seed=0)):
         """
-        :param random_generator: A random generator
+        Initialize the ImbalanceGenerator with a specified random number generator.
+
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         self.random_generator = random_generator
 
     @abc.abstractmethod
     def generate(self, X, y=None, **params):
+        """
+        Abstract method to generate imbalanced data from the input data.
+        This should be overridden
+
+        :param X: Input features, can be a pandas DataFrame or a numpy array.
+        :type X: Union[pandas.DataFrame, numpy.ndarray]
+        :param y: Target variable, can be a pandas Series or a numpy array.
+                  If None, it is assumed that the target is not provided.
+        :type y: Union[pandas.Series, numpy.ndarray, None], optional
+        :param params: Additional keyword arguments that might be required for specific implementations.
+        :type params: dict
+        """
         pass
 
 
 class RandomSamplingFeaturesGenerator(ImbalanceGenerator):
 
     def __init__(self, random_generator=default_rng(seed=0), ):
         """
-        :param random_generator: A random generator
+        Initialize the RandomSamplingFeaturesGenerator with a specified random number generator.
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         super().__init__(random_generator=random_generator)
 
     @preprocess_inputs
     def generate(self, X, y=None, sampling_proba_func=lambda X: normalize_proba(X.iloc[:, 0])):
         """
-        Randomly samples instances based on the features values in X
-
-        :param X:
-        :param y:
-        :param sampling_proba_func: A function that takes as input data and returns a sampling probability
-        :return: Xt, yt
+        Randomly samples instances based on the feature values in X using a specified sampling probability function.
+
+        The sampling probability function is applied to the input features X to determine the probability of each instance being sampled.
+        By default, the first column of X is used to compute the normalized sampling probabilities.
+
+        :param X: Input features, can be a pandas DataFrame or a numpy array.
+        :type X: Union[pandas.DataFrame, numpy.ndarray]
+        :param y: Target variable, can be a pandas Series or a numpy array.
+                  If None, it is assumed that the target is not provided.
+        :type y: Union[pandas.Series, numpy.ndarray, None], optional
+        :param sampling_proba_func: A function that takes as input data (X) and returns a series of sampling probabilities.
+                                    The function should ensure that the probabilities are normalized.
+        :type sampling_proba_func: callable
+        :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
+                 If y is None, only the sampled features (Xt) are returned.
+        :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray, None]]
         """
         # total number of instances that will be missing
         # sampling
@@ -59,23 +88,31 @@ class RandomSamplingClassesGenerator(ImbalanceGenerator):
 
     def __init__(self, random_generator=default_rng(seed=0), ):
         """
+        Initialize the RandomSamplingClassesGenerator with a specified random number generator.
 
-        :param random_generator: A random generator
-
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         super().__init__(random_generator=random_generator)
         self.transformed_labels_ = None
 
     @preprocess_inputs
     def generate(self, X, y, proportion_classes: dict = None):
         """
-        Randomly samples instances for each classes
-
-        :param X:
-        :param y:
-        :param proportion_classes: Example for having in total 50% of class 'A', 30% of class 'B', and 20% of class 'C'
-            proportion_classes={'A':0.5, 'B':0.3, 'C':0.2}
-        :return:
+        Randomly samples instances for each class based on the specified proportions.
+
+        :param X: Input features, can be a pandas DataFrame or a numpy array.
+        :type X: Union[pandas.DataFrame, numpy.ndarray]
+        :param y: Target variable, must be a pandas Series or a numpy array.
+        :type y: Union[pandas.Series, numpy.ndarray]
+        :param proportion_classes: A dictionary specifying the desired proportion of each class.
+                                   The keys are class labels and the values are the desired proportions.
+                                   For example, to have 50% of class 'A', 30% of class 'B', and 20% of class 'C',
+                                   use `proportion_classes={'A': 0.5, 'B': 0.3, 'C': 0.2}`.
+        :type proportion_classes: dict, optional
+        :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
+        :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray]]
         """
         # local variables
         Xt = []
@@ -103,21 +140,32 @@ class RandomSamplingTargetsGenerator(ImbalanceGenerator):
 
     def __init__(self, random_generator=default_rng(seed=0)):
         """
+        Initialize the RandomSamplingTargetsGenerator with a specified random number generator.
 
-        :param random_generator: A random generator
-        :param sampling_proba_func: A function that takes y as input and returns a sampling probability
+        :param random_generator: A NumPy random number generator used to generate random numbers.
+                                 Defaults to a default random number generator seeded with 0.
+        :type random_generator: numpy.random.Generator
         """
         super().__init__(random_generator=random_generator)
         self.transformed_labels_ = None
 
     @preprocess_inputs
     def generate(self, X, y, sampling_proba_func=lambda y: normalize_proba(y)):
         """
-        Randomly samples instances for each classes
-
-        :param X:
-        :param y:
-        :return:
+        Randomly samples instances based on the target values in y using a specified sampling probability function.
+
+        The sampling probability function is applied to the target values y to determine the probability of each instance being sampled.
+        By default, the target values are used to compute the normalized sampling probabilities.
+
+        :param X: Input features, can be a pandas DataFrame or a numpy array.
+        :type X: Union[pandas.DataFrame, numpy.ndarray]
+        :param y: Target variable, must be a pandas Series or a numpy array.
+        :type y: Union[pandas.Series, numpy.ndarray]
+        :param sampling_proba_func: A function that takes as input target values (y) and returns a series of sampling probabilities.
+                                    The function should ensure that the probabilities are normalized.
+        :type sampling_proba_func: callable
+        :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
+        :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray]]
         """
         sampling_probabilities_ = sampling_proba_func(y)
         sampling_mask = self.random_generator.choice(X.shape[0], p=sampling_probabilities_, size=X.shape[0],
-Original file line number
+Diff line change
@@ @@ -1,3 +1,3 @@ @@
 """
 -Module containing all transformers
 +Module containing all generators
 """