@@ -16,32 +16,61 @@ class ImbalanceGenerator(GeneratorMixin):
1616
1717 def __init__ (self , random_generator = default_rng (seed = 0 )):
1818 """
19- :param random_generator: A random generator
19+ Initialize the ImbalanceGenerator with a specified random number generator.
20+
21+ :param random_generator: A NumPy random number generator used to generate random numbers.
22+ Defaults to a default random number generator seeded with 0.
23+ :type random_generator: numpy.random.Generator
2024 """
2125 self .random_generator = random_generator
2226
2327 @abc .abstractmethod
2428 def generate (self , X , y = None , ** params ):
29+ """
30+ Abstract method to generate imbalanced data from the input data.
31+ This should be overridden
32+
33+ :param X: Input features, can be a pandas DataFrame or a numpy array.
34+ :type X: Union[pandas.DataFrame, numpy.ndarray]
35+ :param y: Target variable, can be a pandas Series or a numpy array.
36+ If None, it is assumed that the target is not provided.
37+ :type y: Union[pandas.Series, numpy.ndarray, None], optional
38+ :param params: Additional keyword arguments that might be required for specific implementations.
39+ :type params: dict
40+ """
2541 pass
2642
2743
2844class RandomSamplingFeaturesGenerator (ImbalanceGenerator ):
2945
3046 def __init__ (self , random_generator = default_rng (seed = 0 ), ):
3147 """
32- :param random_generator: A random generator
48+ Initialize the RandomSamplingFeaturesGenerator with a specified random number generator.
49+ :param random_generator: A NumPy random number generator used to generate random numbers.
50+ Defaults to a default random number generator seeded with 0.
51+ :type random_generator: numpy.random.Generator
3352 """
3453 super ().__init__ (random_generator = random_generator )
3554
3655 @preprocess_inputs
3756 def generate (self , X , y = None , sampling_proba_func = lambda X : normalize_proba (X .iloc [:, 0 ])):
3857 """
39- Randomly samples instances based on the features values in X
40-
41- :param X:
42- :param y:
43- :param sampling_proba_func: A function that takes as input data and returns a sampling probability
44- :return: Xt, yt
58+ Randomly samples instances based on the feature values in X using a specified sampling probability function.
59+
60+ The sampling probability function is applied to the input features X to determine the probability of each instance being sampled.
61+ By default, the first column of X is used to compute the normalized sampling probabilities.
62+
63+ :param X: Input features, can be a pandas DataFrame or a numpy array.
64+ :type X: Union[pandas.DataFrame, numpy.ndarray]
65+ :param y: Target variable, can be a pandas Series or a numpy array.
66+ If None, it is assumed that the target is not provided.
67+ :type y: Union[pandas.Series, numpy.ndarray, None], optional
68+ :param sampling_proba_func: A function that takes as input data (X) and returns a series of sampling probabilities.
69+ The function should ensure that the probabilities are normalized.
70+ :type sampling_proba_func: callable
71+ :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
72+ If y is None, only the sampled features (Xt) are returned.
73+ :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray, None]]
4574 """
4675 # total number of instances that will be missing
4776 # sampling
@@ -59,23 +88,31 @@ class RandomSamplingClassesGenerator(ImbalanceGenerator):
5988
6089 def __init__ (self , random_generator = default_rng (seed = 0 ), ):
6190 """
91+ Initialize the RandomSamplingClassesGenerator with a specified random number generator.
6292
63- :param random_generator: A random generator
64-
93+ :param random_generator: A NumPy random number generator used to generate random numbers.
94+ Defaults to a default random number generator seeded with 0.
95+ :type random_generator: numpy.random.Generator
6596 """
6697 super ().__init__ (random_generator = random_generator )
6798 self .transformed_labels_ = None
6899
69100 @preprocess_inputs
70101 def generate (self , X , y , proportion_classes : dict = None ):
71102 """
72- Randomly samples instances for each classes
73-
74- :param X:
75- :param y:
76- :param proportion_classes: Example for having in total 50% of class 'A', 30% of class 'B', and 20% of class 'C'
77- proportion_classes={'A':0.5, 'B':0.3, 'C':0.2}
78- :return:
103+ Randomly samples instances for each class based on the specified proportions.
104+
105+ :param X: Input features, can be a pandas DataFrame or a numpy array.
106+ :type X: Union[pandas.DataFrame, numpy.ndarray]
107+ :param y: Target variable, must be a pandas Series or a numpy array.
108+ :type y: Union[pandas.Series, numpy.ndarray]
109+ :param proportion_classes: A dictionary specifying the desired proportion of each class.
110+ The keys are class labels and the values are the desired proportions.
111+ For example, to have 50% of class 'A', 30% of class 'B', and 20% of class 'C',
112+ use `proportion_classes={'A': 0.5, 'B': 0.3, 'C': 0.2}`.
113+ :type proportion_classes: dict, optional
114+ :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
115+ :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray]]
79116 """
80117 # local variables
81118 Xt = []
@@ -103,21 +140,32 @@ class RandomSamplingTargetsGenerator(ImbalanceGenerator):
103140
104141 def __init__ (self , random_generator = default_rng (seed = 0 )):
105142 """
143+ Initialize the RandomSamplingTargetsGenerator with a specified random number generator.
106144
107- :param random_generator: A random generator
108- :param sampling_proba_func: A function that takes y as input and returns a sampling probability
145+ :param random_generator: A NumPy random number generator used to generate random numbers.
146+ Defaults to a default random number generator seeded with 0.
147+ :type random_generator: numpy.random.Generator
109148 """
110149 super ().__init__ (random_generator = random_generator )
111150 self .transformed_labels_ = None
112151
113152 @preprocess_inputs
114153 def generate (self , X , y , sampling_proba_func = lambda y : normalize_proba (y )):
115154 """
116- Randomly samples instances for each classes
117-
118- :param X:
119- :param y:
120- :return:
155+ Randomly samples instances based on the target values in y using a specified sampling probability function.
156+
157+ The sampling probability function is applied to the target values y to determine the probability of each instance being sampled.
158+ By default, the target values are used to compute the normalized sampling probabilities.
159+
160+ :param X: Input features, can be a pandas DataFrame or a numpy array.
161+ :type X: Union[pandas.DataFrame, numpy.ndarray]
162+ :param y: Target variable, must be a pandas Series or a numpy array.
163+ :type y: Union[pandas.Series, numpy.ndarray]
164+ :param sampling_proba_func: A function that takes as input target values (y) and returns a series of sampling probabilities.
165+ The function should ensure that the probabilities are normalized.
166+ :type sampling_proba_func: callable
167+ :return: A tuple containing the sampled features (Xt) and the corresponding target values (yt).
168+ :rtype: Tuple[Union[pandas.DataFrame, numpy.ndarray], Union[pandas.Series, numpy.ndarray]]
121169 """
122170 sampling_probabilities_ = sampling_proba_func (y )
123171 sampling_mask = self .random_generator .choice (X .shape [0 ], p = sampling_probabilities_ , size = X .shape [0 ],
0 commit comments