Skip to content

Commit feecf41

Browse files
authored
Replace dask_ml.wrappers.ParallelPostFit with custom ParallelPostFit class (#832)
* create ParallelPostFit class * _timer * update create_experiment * update comment * migrate changes from 799 * predict_proba_meta * fix gpu? * fix TypeError? * trying again * meta to output_meta * remove _timer * try import sklearn * style fix * Update wrappers.py * use ImportError
1 parent e9ff9cd commit feecf41

File tree

5 files changed

+589
-19
lines changed

5 files changed

+589
-19
lines changed

Diff for: dask_sql/physical/rel/custom/create_experiment.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,7 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
168168
f"Can not import tuner {experiment_class}. Make sure you spelled it correctly and have installed all packages."
169169
)
170170

171-
try:
172-
from dask_ml.wrappers import ParallelPostFit
173-
except ImportError: # pragma: no cover
174-
raise ValueError(
175-
"dask_ml must be installed to use automl and tune hyperparameters"
176-
)
171+
from dask_sql.physical.rel.custom.wrappers import ParallelPostFit
177172

178173
model = ModelClass()
179174

@@ -199,12 +194,7 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
199194
f"Can not import automl model {automl_class}. Make sure you spelled it correctly and have installed all packages."
200195
)
201196

202-
try:
203-
from dask_ml.wrappers import ParallelPostFit
204-
except ImportError: # pragma: no cover
205-
raise ValueError(
206-
"dask_ml must be installed to use automl and tune hyperparameters"
207-
)
197+
from dask_sql.physical.rel.custom.wrappers import ParallelPostFit
208198

209199
automl = AutoMLClass(**automl_kwargs)
210200
# should be avoided if data doesn't fit in memory

Diff for: dask_sql/physical/rel/custom/create_model.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
from typing import TYPE_CHECKING
33

4+
import numpy as np
45
from dask import delayed
56

67
from dask_sql.datacontainer import DataContainer
@@ -43,7 +44,7 @@ class CreateModelPlugin(BaseRelPlugin):
4344
unsupervised algorithms). This means, you typically
4445
want to set this parameter.
4546
* wrap_predict: Boolean flag, whether to wrap the selected
46-
model with a :class:`dask_ml.wrappers.ParallelPostFit`.
47+
model with a :class:`dask_sql.physical.rel.custom.wrappers.ParallelPostFit`.
4748
Have a look into the
4849
[dask-ml docu](https://ml.dask.org/meta-estimators.html#parallel-prediction-and-transformation)
4950
to learn more about it. Defaults to false. Typically you set
@@ -165,10 +166,7 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
165166
model = Incremental(estimator=model)
166167

167168
if wrap_predict:
168-
try:
169-
from dask_ml.wrappers import ParallelPostFit
170-
except ImportError: # pragma: no cover
171-
raise ValueError("Wrapping requires dask-ml to be installed.")
169+
from dask_sql.physical.rel.custom.wrappers import ParallelPostFit
172170

173171
# When `wrap_predict` is set to True we train on single partition frames
174172
# because this is only useful for non dask distributed models
@@ -183,7 +181,16 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
183181

184182
delayed_model = [delayed(model.fit)(x_p, y_p) for x_p, y_p in zip(X_d, y_d)]
185183
model = delayed_model[0].compute()
186-
model = ParallelPostFit(estimator=model)
184+
if "sklearn" in model_class:
185+
output_meta = np.array([])
186+
model = ParallelPostFit(
187+
estimator=model,
188+
predict_meta=output_meta,
189+
predict_proba_meta=output_meta,
190+
transform_meta=output_meta,
191+
)
192+
else:
193+
model = ParallelPostFit(estimator=model)
187194

188195
else:
189196
model.fit(X, y, **fit_kwargs)

0 commit comments

Comments
 (0)