diff --git a/aeon/classification/dictionary_based/_redcomets.py b/aeon/classification/dictionary_based/_redcomets.py index 4e782f2e5b..bdc19ff113 100644 --- a/aeon/classification/dictionary_based/_redcomets.py +++ b/aeon/classification/dictionary_based/_redcomets.py @@ -44,6 +44,17 @@ class REDCOMETS(BaseClassifier): If ``RandomState`` instance, ``random_state`` is the random number generator; If ``None``, the random number generator is the ``RandomState`` instance used by ``np.random``. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -102,6 +113,7 @@ def __init__( perc_length=5, n_trees=100, random_state=None, + class_weight=None, n_jobs=1, parallel_backend=None, ): @@ -114,6 +126,7 @@ def __init__( self.n_trees = n_trees self.random_state = random_state + self.class_weight = class_weight self.n_jobs = n_jobs self.parallel_backend = parallel_backend @@ -260,6 +273,7 @@ def _build_univariate_ensemble(self, X, y): rf = RandomForestClassifier( n_estimators=self.n_trees, random_state=self.random_state, + class_weight=self.class_weight, n_jobs=self.n_jobs, ) rf.fit(X_sfa, y_smote) @@ -285,6 +299,7 @@ def _build_univariate_ensemble(self, X, y): rf = RandomForestClassifier( n_estimators=self.n_trees, random_state=self.random_state, + class_weight=self.class_weight, n_jobs=self.n_jobs, ) rf.fit(X_sax, y_smote) diff --git a/aeon/classification/early_classification/_probability_threshold.py b/aeon/classification/early_classification/_probability_threshold.py index efce24c59f..c6f386bd7c 100644 --- a/aeon/classification/early_classification/_probability_threshold.py +++ b/aeon/classification/early_classification/_probability_threshold.py @@ -49,6 +49,17 @@ class ProbabilityThresholdEarlyClassifier(BaseEarlyClassifier): in the _classification_points List. Duplicate values will be removed, and the full series length will be appeneded if not present. If None, will use 20 thresholds linearly spaces from 0 to the series length. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -106,6 +117,7 @@ def __init__( probability_threshold=0.85, consecutive_predictions=1, classification_points=None, + class_weight=None, n_jobs=1, random_state=None, ): @@ -114,6 +126,7 @@ def __init__( self.consecutive_predictions = consecutive_predictions self.classification_points = classification_points + self.class_weight = class_weight self.n_jobs = n_jobs self.random_state = random_state diff --git a/aeon/classification/early_classification/_teaser.py b/aeon/classification/early_classification/_teaser.py index 4df620eeb6..66b548716e 100644 --- a/aeon/classification/early_classification/_teaser.py +++ b/aeon/classification/early_classification/_teaser.py @@ -58,6 +58,17 @@ class TEASER(BaseEarlyClassifier): in the _classification_points List. Duplicate values will be removed, and the full series length will be appeneded if not present. If None, will use 20 thresholds linearly spaces from 0 to the series length. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -118,6 +129,7 @@ def __init__( one_class_classifier=None, one_class_param_grid=None, classification_points=None, + class_weight=None, n_jobs=1, random_state=None, ): @@ -125,7 +137,8 @@ def __init__( self.one_class_classifier = one_class_classifier self.one_class_param_grid = one_class_param_grid self.classification_points = classification_points - + + self.class_weight = class_weight self.n_jobs = n_jobs self.random_state = random_state diff --git a/aeon/classification/feature_based/_catch22.py b/aeon/classification/feature_based/_catch22.py index c99475cf33..8b82f0bf8c 100644 --- a/aeon/classification/feature_based/_catch22.py +++ b/aeon/classification/feature_based/_catch22.py @@ -59,6 +59,17 @@ class Catch22Classifier(BaseClassifier): If `RandomState` instance, random_state is the random number generator; If `None`, the random number generator is the `RandomState` instance used by `np.random`. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -128,6 +139,7 @@ def __init__( use_pycatch22=False, estimator=None, random_state=None, + class_weight=None, n_jobs=1, parallel_backend=None, ): @@ -138,6 +150,7 @@ def __init__( self.use_pycatch22 = use_pycatch22 self.estimator = estimator self.random_state = random_state + self.class_weight = class_weight self.n_jobs = n_jobs self.parallel_backend = parallel_backend @@ -167,6 +180,7 @@ def _fit(self, X, y): outlier_norm=self.outlier_norm, replace_nans=self.replace_nans, use_pycatch22=self.use_pycatch22, + class_weight=self.class_weight, n_jobs=self._n_jobs, parallel_backend=self.parallel_backend, ) diff --git a/aeon/classification/feature_based/_signature_classifier.py b/aeon/classification/feature_based/_signature_classifier.py index 445efb7b40..6fd079824a 100644 --- a/aeon/classification/feature_based/_signature_classifier.py +++ b/aeon/classification/feature_based/_signature_classifier.py @@ -61,6 +61,17 @@ class SignatureClassifier(BaseClassifier): Signature truncation depth. random_state : int, default=None If `int`, random_state is the seed used by the random number generator; + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. Attributes ---------- @@ -105,6 +116,7 @@ def __init__( sig_tfm="signature", depth=4, random_state=None, + class_weight=None, ): self.estimator = estimator self.augmentation_list = augmentation_list @@ -116,6 +128,7 @@ def __init__( self.sig_tfm = sig_tfm self.depth = depth self.random_state = random_state + self.class_weight = class_weight super().__init__() diff --git a/aeon/classification/feature_based/_summary.py b/aeon/classification/feature_based/_summary.py index 4be24a0b7c..d73e1484f5 100644 --- a/aeon/classification/feature_based/_summary.py +++ b/aeon/classification/feature_based/_summary.py @@ -35,6 +35,17 @@ class SummaryClassifier(BaseClassifier): estimator : sklearn classifier, default=None An sklearn estimator to be built using the transformed data. Defaults to a Random Forest with 200 trees. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -79,12 +90,14 @@ def __init__( self, summary_stats="default", estimator=None, + class_weight=None, n_jobs=1, random_state=None, ): self.summary_stats = summary_stats self.estimator = estimator + self.class_weight = class_weight self.n_jobs = n_jobs self.random_state = random_state diff --git a/aeon/classification/feature_based/_tsfresh.py b/aeon/classification/feature_based/_tsfresh.py index fcb9159f2b..b87479c410 100644 --- a/aeon/classification/feature_based/_tsfresh.py +++ b/aeon/classification/feature_based/_tsfresh.py @@ -38,6 +38,17 @@ class TSFreshClassifier(BaseClassifier): Random Forest with 200 trees. verbose : int, default=0 Level of output printed to the console (for information only). + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -84,6 +95,7 @@ def __init__( relevant_feature_extractor=True, estimator=None, verbose=0, + class_weight=None, n_jobs=1, chunksize=None, random_state=None, @@ -93,6 +105,7 @@ def __init__( self.estimator = estimator self.verbose = verbose + self.class_weight = class_weight self.n_jobs = n_jobs self.chunksize = chunksize self.random_state = random_state @@ -127,12 +140,14 @@ def _fit(self, X, y): self._transformer = ( TSFreshRelevantFeatureExtractor( default_fc_parameters=self.default_fc_parameters, + class_weight = self.class_weight, n_jobs=self._n_jobs, chunksize=self.chunksize, ) if self.relevant_feature_extractor else TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, + class_weight = self.class_weight, n_jobs=self._n_jobs, chunksize=self.chunksize, ) diff --git a/aeon/classification/hybrid/_rist.py b/aeon/classification/hybrid/_rist.py index b991ba4f18..5d303abb74 100644 --- a/aeon/classification/hybrid/_rist.py +++ b/aeon/classification/hybrid/_rist.py @@ -60,6 +60,17 @@ class RISTClassifier(BaseRIST, BaseClassifier): If `RandomState` instance, random_state is the random number generator; If `None`, the random number generator is the `RandomState` instance used by `np.random`. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. @@ -110,6 +121,7 @@ def __init__( use_pycatch22=False, use_pyfftw=False, estimator=None, + class_weight=None, n_jobs=1, random_state=None, ): @@ -129,6 +141,7 @@ def __init__( use_pyfftw=use_pyfftw, estimator=estimator, random_state=random_state, + class_weight=class_weight, n_jobs=n_jobs, ) diff --git a/aeon/classification/interval_based/_interval_pipelines.py b/aeon/classification/interval_based/_interval_pipelines.py index dc514f352d..f624de2af1 100644 --- a/aeon/classification/interval_based/_interval_pipelines.py +++ b/aeon/classification/interval_based/_interval_pipelines.py @@ -53,6 +53,17 @@ class RandomIntervalClassifier(BaseClassifier): Seed or RandomState object used for random number generation. If random_state is None, use the RandomState singleton used by np.random. If random_state is an int, use a new RandomState instance seeded with seed. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `transform` functions. `-1` means using all processors. @@ -111,6 +122,7 @@ def __init__( features=None, dilation=None, estimator=None, + class_weight=None, n_jobs=1, random_state=None, parallel_backend=None, @@ -122,6 +134,7 @@ def __init__( self.dilation = dilation self.estimator = estimator self.random_state = random_state + self.class_weight = class_weight self.n_jobs = n_jobs self.parallel_backend = parallel_backend @@ -151,6 +164,7 @@ def _fit(self, X, y): features=self.features, dilation=self.dilation, random_state=self.random_state, + class_weight=self.class_weight, n_jobs=self._n_jobs, parallel_backend=self.parallel_backend, ) @@ -290,6 +304,17 @@ class SupervisedIntervalClassifier(BaseClassifier): Seed or RandomState object used for random number generation. If random_state is None, use the RandomState singleton used by np.random. If random_state is an int, use a new RandomState instance seeded with seed. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `transform` functions. `-1` means using all processors. @@ -350,6 +375,7 @@ def __init__( normalise_for_search=True, estimator=None, random_state=None, + class_weight=None, n_jobs=1, parallel_backend=None, ): @@ -361,6 +387,7 @@ def __init__( self.normalise_for_search = normalise_for_search self.estimator = estimator self.random_state = random_state + self.class_weight = class_weight self.n_jobs = n_jobs self.parallel_backend = parallel_backend @@ -391,6 +418,7 @@ def _fit(self, X, y): randomised_split_point=self.randomised_split_point, normalise_for_search=self.normalise_for_search, random_state=self.random_state, + class_weight=self.class_weight, n_jobs=self.n_jobs, parallel_backend=self.parallel_backend, ) diff --git a/aeon/classification/interval_based/_rstsf.py b/aeon/classification/interval_based/_rstsf.py index 34c77cefdc..fa82997bc2 100644 --- a/aeon/classification/interval_based/_rstsf.py +++ b/aeon/classification/interval_based/_rstsf.py @@ -45,6 +45,17 @@ class RSTSF(BaseClassifier): Seed or RandomState object used for random number generation. If random_state is None, use the RandomState singleton used by np.random. If random_state is an int, use a new RandomState instance seeded with seed. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict` functions. `-1` means using all processors. @@ -86,6 +97,7 @@ def __init__( min_interval_length=3, use_pyfftw=False, random_state=None, + class_weight=None, n_jobs=1, ): self.n_estimators = n_estimators @@ -93,6 +105,7 @@ def __init__( self.min_interval_length = min_interval_length self.use_pyfftw = use_pyfftw self.random_state = random_state + self.class_weight = class_weight self.n_jobs = n_jobs super().__init__() diff --git a/aeon/classification/shapelet_based/_rsast.py b/aeon/classification/shapelet_based/_rsast.py index c1623dc7c3..50cd3bf147 100644 --- a/aeon/classification/shapelet_based/_rsast.py +++ b/aeon/classification/shapelet_based/_rsast.py @@ -34,6 +34,17 @@ class RSASTClassifier(BaseClassifier): the seed of the random generator classifier : sklearn compatible classifier, default = None if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default -1 Number of threads to use for the transform. @@ -70,12 +81,14 @@ def __init__( nb_inst_per_class=10, seed=None, classifier=None, + class_weight=None, n_jobs=-1, ): super().__init__() self.n_random_points = n_random_points self.len_method = len_method self.nb_inst_per_class = nb_inst_per_class + self.class_weight = class_weight self.n_jobs = n_jobs self.seed = seed self.classifier = classifier @@ -101,12 +114,13 @@ def _fit(self, X, y): self.len_method, self.nb_inst_per_class, self.seed, + self.class_weight, self.n_jobs, ) self._classifier = _clone_estimator( ( - RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) + RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight=self.class_weight) if self.classifier is None else self.classifier ), diff --git a/aeon/classification/shapelet_based/_sast.py b/aeon/classification/shapelet_based/_sast.py index e12bf8110b..756c20674a 100644 --- a/aeon/classification/shapelet_based/_sast.py +++ b/aeon/classification/shapelet_based/_sast.py @@ -34,6 +34,17 @@ class SASTClassifier(BaseClassifier): the seed of the random generator classifier : sklearn compatible classifier, default = None if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default -1 Number of threads to use for the transform. @@ -71,12 +82,14 @@ def __init__( nb_inst_per_class: int = 1, seed: int = None, classifier=None, + class_weight=None, n_jobs: int = -1, ) -> None: super().__init__() self.length_list = length_list self.stride = stride self.nb_inst_per_class = nb_inst_per_class + self.class_weight = class_weight self.n_jobs = n_jobs self.seed = seed @@ -103,12 +116,15 @@ def _fit(self, X, y): self.stride, self.nb_inst_per_class, self.seed, + self.class_weight, self.n_jobs, ) self._classifier = _clone_estimator( ( - RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) + RidgeClassifierCV( + alphas=np.logspace(-3, 3, 10), class_weight=self.class_weight + ) if self.classifier is None else self.classifier ), diff --git a/aeon/classification/sklearn/_rotation_forest_classifier.py b/aeon/classification/sklearn/_rotation_forest_classifier.py index 37142c723a..fe7d11d716 100644 --- a/aeon/classification/sklearn/_rotation_forest_classifier.py +++ b/aeon/classification/sklearn/_rotation_forest_classifier.py @@ -56,6 +56,17 @@ class RotationForestClassifier(ClassifierMixin, BaseEstimator): Default of `0` means ``n_estimators`` is used. contract_max_n_estimators : int, default=500 Max number of estimators to build when ``time_limit_in_minutes`` is set. + class_weight{“balanced”, “balanced_subsample”}: dict or list of dicts, default=None + From sklearn documentation: + If not given, all classes are supposed to have weight one. + The “balanced” mode uses the values of y to automatically adjust weights + inversely proportional to class frequencies in the input data as + n_samples / (n_classes * np.bincount(y)) + The “balanced_subsample” mode is the same as “balanced” except that weights + are computed based on the bootstrap sample for every tree grown. + For multi-output, the weights of each column of y will be multiplied. + Note that these weights will be multiplied with sample_weight (passed through + the fit method) if sample_weight is specified. n_jobs : int, default=1 The number of jobs to run in parallel for both ``fit`` and ``predict``. `-1` means using all processors. @@ -110,6 +121,7 @@ def __init__( pca_solver: str = "auto", time_limit_in_minutes: float = 0.0, contract_max_n_estimators: int = 500, + class_weight: Optional[Union[str, dict]] = None, n_jobs: int = 1, random_state: Union[int, Type[np.random.RandomState], None] = None, ): @@ -121,6 +133,7 @@ def __init__( self.pca_solver = pca_solver self.time_limit_in_minutes = time_limit_in_minutes self.contract_max_n_estimators = contract_max_n_estimators + self.class_weight = class_weight self.n_jobs = n_jobs self.random_state = random_state