Description
I have a net that does some string processing as it's first step, that works with the deprecated wrapper, but not with this (very nice looking) library
model = Sequential(
[
layers.Input(shape=(1,), dtype=tf.string),
text_vectorizer,
layers.Embedding(max_features + 1, 50 * multiplier),
layers.Dropout(.1),
layers.GlobalAveragePooling1D(),
layers.Dropout(.1),
layers.Dense(20, kernel_initializer=initializers.random_uniform, activation=activations.swish),
]
)
Traceback
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) File ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:787, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator) 786 try: --> 787 array = array.astype(np.float64) 788 except ValueError as e:ValueError: could not convert string to float: 'two fires american indians civil war laurence hauptman reveals several hundred thousand indians affected civil war twenty thousand indians enlisted sides attempt gain legitimacy autonomy simply land'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Input In [28], in
9 classifier = KerasClassifier(X=feature.values.astype(np.str_), y=label.values, model=make_model, batch_size=-1, validation_split=.2, verbose=1, sample_weight=1, )
10 grid = GridSearchCV(
11 estimator=classifier,
12 param_grid={},
13 verbose=1,
14 )
---> 15 grid_result = grid.fit(feature.values.astype(np.str_), label.values, callbacks=[callback], verbose=1)
16 history = grid_result.history_
18 grid_resultFile ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/model_selection/search.py:926, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
924 refit_start_time = time.time()
925 if y is not None:
--> 926 self.best_estimator.fit(X, y, **fit_params)
927 else:
928 self.best_estimator_.fit(X, **fit_params)File ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/scikeras/wrappers.py:523, in BaseWrapper.fit(self, X, y, sample_weight, warm_start, **kwargs)
520 else:
521 # No warm start requested
522 reset = True
--> 523 X, y = self._validate_data(X=X, y=y, reset=reset)
525 if sample_weight is not None:
526 sample_weight = _check_sample_weight(
527 sample_weight, X, dtype=["float64", "int"]
528 )File ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/scikeras/wrappers.py:383, in BaseWrapper._validate_data(self, X, y, reset)
363 """Validate input data and set or check then_features_in_
attribute.
364 Parameters
365 ----------
(...)
380 The validated input. A tuple is returned ify
is not None.
381 """
382 if y is not None:
--> 383 X, y = check_X_y(
384 X,
385 y,
386 allow_nd=True, # allow X to have more than 2 dimensions
387 multi_output=True, # allow y to be 2D
388 )
389 X = check_array(X, allow_nd=True, dtype=["float64", "int"])
391 n_features = X.shape[1]File ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:964, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
961 if y is None:
962 raise ValueError("y cannot be None")
--> 964 X = check_array(
965 X,
966 accept_sparse=accept_sparse,
967 accept_large_sparse=accept_large_sparse,
968 dtype=dtype,
969 order=order,
970 copy=copy,
971 force_all_finite=force_all_finite,
972 ensure_2d=ensure_2d,
973 allow_nd=allow_nd,
974 ensure_min_samples=ensure_min_samples,
975 ensure_min_features=ensure_min_features,
976 estimator=estimator,
977 )
979 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric)
981 check_consistent_length(X, y)File ~/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:789, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
787 array = array.astype(np.float64)
788 except ValueError as e:
--> 789 raise ValueError(
790 "Unable to convert array of bytes/strings "
791 "into decimal numbers with dtype='numeric'"
792 ) from e
793 if not allow_nd and array.ndim >= 3:
794 raise ValueError(
795 "Found array with dim %d. %s expected <= 2."
796 % (array.ndim, estimator_name)
797 )ValueError: Unable to convert array of bytes/strings into decimal numbers with dtype='numeric'
There's also a bunch of related warnings
/Users/billie/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:964: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.
X = check_array(
/Users/billie/Library/Caches/pypoetry/virtualenvs/tensorflow-part-2-JIKxCiSF-py3.10/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:372: FitFailedWarning:
5 fits failed out of a total of 5.
Version
scikeras 0.6.0 Scikit-Learn API wrapper for Keras.