Open
Description
I may have found a bug related to how IbisML integrates with grid search from sklearn, but it could be that this is out of scope of the project.
import ibis_ml as iml
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
tfm = iml.Recipe(
iml.Cast(iml.numeric(), "float32")
)
pipe = make_pipeline(tfm, GradientBoostingRegressor())
GridSearchCV(pipe, param_grid={}, cv=5).fit(X_train, y_train).cv_results_
This gave me this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File <timed exec>:7
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/base.py:1473](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/base.py#line=1472), in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1466 estimator._validate_params()
1468 with config_context(
1469 skip_parameter_validation=(
1470 prefer_skip_nested_validation or global_skip_validation
1471 )
1472 ):
-> 1473 return fit_method(estimator, *args, **kwargs)
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py:968](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py#line=967), in BaseSearchCV.fit(self, X, y, **params)
962 results = self._format_results(
963 all_candidate_params, n_splits, all_out, all_more_results
964 )
966 return results
--> 968 self._run_search(evaluate_candidates)
970 # multimetric is determined here because in the case of a callable
971 # self.scoring the return type is only known after calling
972 first_test_score = all_out[0]["test_scores"]
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py:1543](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py#line=1542), in GridSearchCV._run_search(self, evaluate_candidates)
1541 def _run_search(self, evaluate_candidates):
1542 """Search all candidates in param_grid"""
-> 1543 evaluate_candidates(ParameterGrid(self.param_grid))
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py:914](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_search.py#line=913), in BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results)
906 if self.verbose > 0:
907 print(
908 "Fitting {0} folds for each of {1} candidates,"
909 " totalling {2} fits".format(
910 n_splits, n_candidates, n_candidates * n_splits
911 )
912 )
--> 914 out = parallel(
915 delayed(_fit_and_score)(
916 clone(base_estimator),
917 X,
918 y,
919 train=train,
920 test=test,
921 parameters=parameters,
922 split_progress=(split_idx, n_splits),
923 candidate_progress=(cand_idx, n_candidates),
924 **fit_and_score_kwargs,
925 )
926 for (cand_idx, parameters), (split_idx, (train, test)) in product(
927 enumerate(candidate_params),
928 enumerate(cv.split(X, y, **routed_params.splitter.split)),
929 )
930 )
932 if len(out) < 1:
933 raise ValueError(
934 "No fits were performed. "
935 "Was the CV iterator empty? "
936 "Were there no candidates?"
937 )
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/parallel.py:67](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/parallel.py#line=66), in Parallel.__call__(self, iterable)
62 config = get_config()
63 iterable_with_config = (
64 (_with_config(delayed_func, config), args, kwargs)
65 for delayed_func, args, kwargs in iterable
66 )
---> 67 return super().__call__(iterable_with_config)
File [~/Development/probabl/venv/lib/python3.11/site-packages/joblib/parallel.py:1863](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/joblib/parallel.py#line=1862), in Parallel.__call__(self, iterable)
1861 output = self._get_sequential_output(iterable)
1862 next(output)
-> 1863 return output if self.return_generator else list(output)
1865 # Let's create an ID that uniquely identifies the current call. If the
1866 # call is interrupted early and that the same instance is immediately
1867 # re-used, this id will be used to prevent workers that were
1868 # concurrently finalizing a task from the previous call to run the
1869 # callback.
1870 with self._lock:
File [~/Development/probabl/venv/lib/python3.11/site-packages/joblib/parallel.py:1792](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/joblib/parallel.py#line=1791), in Parallel._get_sequential_output(self, iterable)
1790 self.n_dispatched_batches += 1
1791 self.n_dispatched_tasks += 1
-> 1792 res = func(*args, **kwargs)
1793 self.n_completed_tasks += 1
1794 self.print_progress()
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/parallel.py:129](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/parallel.py#line=128), in _FuncWrapper.__call__(self, *args, **kwargs)
127 config = {}
128 with config_context(**config):
--> 129 return self.function(*args, **kwargs)
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py:880](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py#line=879), in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, score_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
876 estimator = estimator.set_params(**clone(parameters, safe=False))
878 start_time = time.time()
--> 880 X_train, y_train = _safe_split(estimator, X, y, train)
881 X_test, y_test = _safe_split(estimator, X, y, test, train)
883 result = {}
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/metaestimators.py:161](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/metaestimators.py#line=160), in _safe_split(estimator, X, y, indices, train_indices)
158 X_subset = _safe_indexing(X, indices)
160 if y is not None:
--> 161 y_subset = _safe_indexing(y, indices)
162 else:
163 y_subset = None
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py:269](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py#line=268), in _safe_indexing(X, indices, axis)
267 return _array_indexing(X, indices, indices_dtype, axis=axis)
268 else:
--> 269 return _list_indexing(X, indices, indices_dtype)
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py:60](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py#line=59), in _list_indexing(X, key, key_dtype)
58 return list(compress(X, key))
59 # key is a integer array-like of key
---> 60 return [X[idx] for idx in key]
File [~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py:60](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py#line=59), in <listcomp>(.0)
58 return list(compress(X, key))
59 # key is a integer array-like of key
---> 60 return [X[idx] for idx in key]
File [~/Development/probabl/venv/lib/python3.11/site-packages/ibis/expr/types/generic.py:1374](http://localhost:8888/lab/tree/~/Development/probabl/venv/lib/python3.11/site-packages/ibis/expr/types/generic.py#line=1373), in Column.__getitem__(self, _)
1373 def __getitem__(self, _):
-> 1374 raise TypeError(
1375 f"{self.__class__.__name__!r} is not subscriptable: "
1376 "see https://ibis-project.org/tutorial/ibis-for-pandas-users/#ibis-for-pandas-users for details."
1377 )
TypeError: 'IntegerColumn' is not subscriptable: see https://ibis-project.org/tutorial/ibis-for-pandas-users/#ibis-for-pandas-users for details.
It seems that the issues originates from the Ibis side of things, hence the ping. If this is out of scope for this project though I will gladly hear it.
Metadata
Metadata
Assignees
Labels
No labels
Type
Projects
Status
backlog