@@ -427,7 +427,13 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
427427 data [group_name ] = self .transform_values (name , data [name ], inverse = False , group_id = True )
428428
429429 # encode categoricals
430- for name in set (self .group_ids + self .categoricals ):
430+ if isinstance (
431+ self .target_normalizer , GroupNormalizer
432+ ): # if we use a group normalizer, group_ids must be encoded as well
433+ group_ids_to_encode = self .group_ids
434+ else :
435+ group_ids_to_encode = []
436+ for name in set (group_ids_to_encode + self .categoricals ):
431437 allow_nans = name in self .dropout_categoricals
432438 if name in self .variable_groups : # fit groups
433439 columns = self .variable_groups [name ]
@@ -452,7 +458,7 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
452458 self .categorical_encoders [name ] = self .categorical_encoders [name ].fit (data [name ])
453459
454460 # encode them
455- for name in set (self . group_ids + self .flat_categoricals ):
461+ for name in set (group_ids_to_encode + self .flat_categoricals ):
456462 data [name ] = self .transform_values (name , data [name ], inverse = False )
457463
458464 # save special variables
@@ -494,6 +500,10 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
494500 data [self .target ], scales = self .target_normalizer .transform (data [self .target ], data , return_norm = True )
495501 elif isinstance (self .target_normalizer , NaNLabelEncoder ):
496502 data [self .target ] = self .target_normalizer .transform (data [self .target ])
503+ data ["__target__" ] = data [
504+ self .target
505+ ] # overwrite target because it requires encoding (continuous targets should not be normalized)
506+ scales = "no target scales available for categorical target"
497507 else :
498508 data [self .target ], scales = self .target_normalizer .transform (data [self .target ], return_norm = True )
499509
@@ -510,6 +520,8 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
510520
511521 if self .target in self .reals :
512522 self .scalers [self .target ] = self .target_normalizer
523+ else :
524+ self .categorical_encoders [self .target ] = self .target_normalizer
513525
514526 # rescale continuous variables apart from target
515527 for name in self .reals :
@@ -830,8 +842,8 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra
830842 if not group_ids .isin (df_index .group_id ).all ():
831843 missing_groups = data .loc [~ group_ids .isin (df_index .group_id ), self ._group_ids ].drop_duplicates ()
832844 # decode values
833- for name in missing_groups . columns :
834- missing_groups [name ] = self .transform_values (name , missing_groups [name ], inverse = True , group_id = True )
845+ for name , id in self . _group_ids_mapping . items () :
846+ missing_groups [id ] = self .transform_values (name , missing_groups [id ], inverse = True , group_id = True )
835847 warnings .warn (
836848 "Min encoder length and/or min_prediction_idx and/or min prediction length is too large for "
837849 f"{ len (missing_groups )} series/groups which therefore are not present in the dataset index. "
0 commit comments