-
Notifications
You must be signed in to change notification settings - Fork 889
Description
Hi, This is my code but I am receiving and error, Could you please hep me with this error?
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool, EShapCalcType, EFeaturesSelectionAlgorithm
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
Generating numerical features
X, y = make_regression(n_samples=100, n_features=9, n_informative=2, noise=0.1, random_state=0)
Generating categorical features
X_categorical = np.random.choice(['pooh', 'rabbit', 'piglet', 'Christopher'], size=(100, 2))
Combine into a DataFrame
X_combined = np.hstack((X, X_categorical))
feature_names = ['F{}'.format(i) for i in range(X_combined.shape[1])]
df = pd.DataFrame(X_combined, columns=feature_names)
df[['F9', 'F10']]=df[['F9', 'F10']].astype("category")
df.loc[:, ~df.columns.isin(['F9', 'F10'])]=df.loc[:, ~df.columns.isin(['F9', 'F10'])].astype("float")
num_col=df.columns.drop(['F9', 'F10'])
for column in num_col:
df[column] = pd.to_numeric(df[column], errors='coerce')
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.25, random_state=0)
import xgboost as xgb
clf = xgb.XGBRegressor( enable_categorical=True)
clf.fit(X_train, y_train)
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
import xgboost as xgb
clf = xgb.XGBRegressor( enable_categorical=True)
sfs1 = SFS(clf,
k_features="best",
forward=False,
floating=False,
verbose=2,
scoring="neg_mean_absolute_error",
clone_estimator=False,
n_jobs=1,
cv=0,
)
sfs1 = sfs1.fit(X_train, y_train)
and this is the error
ValueError Traceback (most recent call last)
Cell In[47], line 13
3 clf = xgb.XGBRegressor( enable_categorical=True)
4 sfs1 = SFS(clf,
5 k_features="best",
6 forward=False,
(...)
10 n_jobs=-1,
11 cv=5)
---> 13 sfs1 = sfs1.fit(X_train, y_train)
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\mlxtend\feature_selection\sequential_feature_selector.py:518, in SequentialFeatureSelector.fit(self, X, y, groups, **fit_params)
516 k = len(k_idx)
517 if k > 0:
--> 518 k_idx, k_score = calc_score(
519 self,
520 X,
521 y,
522 k_idx,
523 groups=groups,
524 feature_groups=self.feature_groups_,
525 **fit_params,
526 )
527 self.subsets_[k] = {
528 "feature_idx": k_idx,
529 "cv_scores": k_score,
530 "avg_score": np.nanmean(k_score),
531 }
533 orig_set = set(range(self.k_ub))
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\mlxtend\feature_selection\utilities.py:98, in calc_score(selector, X, y, indices, groups, feature_groups, **fit_params)
96 IDX = merge_lists(feature_groups, indices)
97 if selector.cv:
---> 98 scores = cross_val_score(
99 selector.est,
100 X[:, IDX],
101 y,
102 groups=groups,
103 cv=selector.cv,
104 scoring=selector.scorer,
105 n_jobs=1,
106 pre_dispatch=selector.pre_dispatch,
107 fit_params=fit_params,
108 )
109 else:
110 selector.est.fit(X[:, IDX], y, **fit_params)
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils_param_validation.py:213, in validate_params..decorator..wrapper(*args, **kwargs)
207 try:
208 with config_context(
209 skip_parameter_validation=(
210 prefer_skip_nested_validation or global_skip_validation
211 )
212 ):
--> 213 return func(*args, **kwargs)
214 except InvalidParameterError as e:
215 # When the function is just a wrapper around an estimator, we allow
216 # the function to delegate validation to the estimator, but we replace
217 # the name of the estimator by the name of the function in the error
218 # message to avoid confusion.
219 msg = re.sub(
220 r"parameter of \w+ must be",
221 f"parameter of {func.qualname} must be",
222 str(e),
223 )
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection_validation.py:719, in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, error_score)
716 # To ensure multimetric format is not supported
717 scorer = check_scoring(estimator, scoring=scoring)
--> 719 cv_results = cross_validate(
720 estimator=estimator,
721 X=X,
722 y=y,
723 groups=groups,
724 scoring={"score": scorer},
725 cv=cv,
726 n_jobs=n_jobs,
727 verbose=verbose,
728 fit_params=fit_params,
729 params=params,
730 pre_dispatch=pre_dispatch,
731 error_score=error_score,
732 )
733 return cv_results["test_score"]
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils_param_validation.py:213, in validate_params..decorator..wrapper(*args, **kwargs)
207 try:
208 with config_context(
209 skip_parameter_validation=(
210 prefer_skip_nested_validation or global_skip_validation
211 )
212 ):
--> 213 return func(*args, **kwargs)
214 except InvalidParameterError as e:
215 # When the function is just a wrapper around an estimator, we allow
216 # the function to delegate validation to the estimator, but we replace
217 # the name of the estimator by the name of the function in the error
218 # message to avoid confusion.
219 msg = re.sub(
220 r"parameter of \w+ must be",
221 f"parameter of {func.qualname} must be",
222 str(e),
223 )
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection_validation.py:450, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, return_train_score, return_estimator, return_indices, error_score)
429 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
430 results = parallel(
431 delayed(_fit_and_score)(
432 clone(estimator),
(...)
447 for train, test in indices
448 )
--> 450 _warn_or_raise_about_fit_failures(results, error_score)
452 # For callable scoring, the return type is only know after calling. If the
453 # return type is a dictionary, the error scores can now be inserted with
454 # the correct key.
455 if callable(scoring):
File c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection_validation.py:536, in _warn_or_raise_about_fit_failures(results, error_score)
529 if num_failed_fits == num_fits:
530 all_fits_failed_message = (
531 f"\nAll the {num_fits} fits failed.\n"
532 "It is very likely that your model is misconfigured.\n"
533 "You can try to debug the error by setting error_score='raise'.\n\n"
534 f"Below are more details about the failures:\n{fit_errors_summary}"
535 )
--> 536 raise ValueError(all_fits_failed_message)
538 else:
539 some_fits_failed_message = (
540 f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
541 "The score on these train-test partitions for these parameters"
(...)
545 f"Below are more details about the failures:\n{fit_errors_summary}"
546 )
ValueError:
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.
Below are more details about the failures:
1 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection_validation.py", line 895, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 1055, in fit
train_dmatrix, evals = _wrap_evaluation_matrices(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 521, in _wrap_evaluation_matrices
train_dmatrix = create_dmatrix(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 958, in _create_dmatrix
return QuantileDMatrix(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1529, in init
self._init(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1588, in _init
it.reraise()
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 576, in reraise
raise exc # pylint: disable=raising-bad-type
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 557, in _handle_exception
return fn()
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 641, in
return self._handle_exception(lambda: self.next(input_data), 0)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1280, in next
input_data(**self.kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 624, in input_data
new, cat_codes, feature_names, feature_types = _proxy_transform(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1305, in _proxy_transform
data, _ = _ensure_np_dtype(data, data.dtype)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 176, in _ensure_np_dtype
data = data.astype(dtype, copy=False)
ValueError: could not convert string to float: 'piglet'
4 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection_validation.py", line 895, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 1055, in fit
train_dmatrix, evals = _wrap_evaluation_matrices(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 521, in _wrap_evaluation_matrices
train_dmatrix = create_dmatrix(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 958, in _create_dmatrix
return QuantileDMatrix(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1529, in init
self._init(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1588, in _init
it.reraise()
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 576, in reraise
raise exc # pylint: disable=raising-bad-type
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 557, in _handle_exception
return fn()
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 641, in
return self._handle_exception(lambda: self.next(input_data), 0)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1280, in next
input_data(**self.kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
return func(**kwargs)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 624, in input_data
new, cat_codes, feature_names, feature_types = _proxy_transform(
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1305, in _proxy_transform
data, _ = _ensure_np_dtype(data, data.dtype)
File "c:\Users\Aligo\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 176, in _ensure_np_dtype
data = data.astype(dtype, copy=False)
ValueError: could not convert string to float: 'Christopher'
Why this package does not work with categorical feature?