Skip to content

Commit e4b1a43

Browse files
committed
fix: convert cuDF arrays to NumPy for sklearn models
- sklearn GradientBoostingRegressor and Ridge don't support cuDF arrays - Convert cuDF arrays to NumPy using .get() or .to_numpy() before sklearn models - Keep cuDF arrays for cuML models (RandomForest, LinearRegression, SVR) - Resolves 'Implicit conversion to a NumPy array is not allowed' error
1 parent e8b4d1f commit e4b1a43

File tree

1 file changed

+35
-7
lines changed

1 file changed

+35
-7
lines changed

scripts/forecasting/rapids_gpu_forecasting.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,34 @@ async def train_models(self, X, y):
291291
X_train_scaled = self.scaler.fit_transform(X_train)
292292
X_test_scaled = self.scaler.transform(X_test)
293293

294+
# Convert cuDF arrays to NumPy for sklearn models that don't support cuDF
295+
if RAPIDS_AVAILABLE:
296+
# Check if arrays are cuDF/cuML arrays and convert to NumPy
297+
if hasattr(X_train_scaled, 'get'):
298+
X_train_scaled_np = X_train_scaled.get()
299+
X_test_scaled_np = X_test_scaled.get()
300+
elif hasattr(X_train_scaled, 'to_numpy'):
301+
X_train_scaled_np = X_train_scaled.to_numpy()
302+
X_test_scaled_np = X_test_scaled.to_numpy()
303+
else:
304+
X_train_scaled_np = X_train_scaled
305+
X_test_scaled_np = X_test_scaled
306+
307+
if hasattr(y_train, 'get'):
308+
y_train_np = y_train.get()
309+
y_test_np = y_test.get()
310+
elif hasattr(y_train, 'to_numpy'):
311+
y_train_np = y_train.to_numpy()
312+
y_test_np = y_test.to_numpy()
313+
else:
314+
y_train_np = y_train
315+
y_test_np = y_test
316+
else:
317+
X_train_scaled_np = X_train_scaled
318+
X_test_scaled_np = X_test_scaled
319+
y_train_np = y_train
320+
y_test_np = y_test
321+
294322
models = {}
295323
metrics = {}
296324

@@ -392,21 +420,21 @@ async def train_models(self, X, y):
392420
'mae': mean_absolute_error(y_test, xgb_pred)
393421
}
394422

395-
# 4. Gradient Boosting
423+
# 4. Gradient Boosting (sklearn - needs NumPy arrays)
396424
logger.info("🌳 Training Gradient Boosting...")
397425
gb_model = GradientBoostingRegressor(
398426
n_estimators=100,
399427
max_depth=5,
400428
learning_rate=0.1,
401429
random_state=self.config['random_state']
402430
)
403-
gb_model.fit(X_train_scaled, y_train)
404-
gb_pred = gb_model.predict(X_test_scaled)
431+
gb_model.fit(X_train_scaled_np, y_train_np)
432+
gb_pred = gb_model.predict(X_test_scaled_np)
405433

406434
models['gradient_boosting'] = gb_model
407435
metrics['gradient_boosting'] = {
408-
'mse': mean_squared_error(y_test, gb_pred),
409-
'mae': mean_absolute_error(y_test, gb_pred)
436+
'mse': mean_squared_error(y_test_np, gb_pred),
437+
'mae': mean_absolute_error(y_test_np, gb_pred)
410438
}
411439

412440
# 5. Ridge Regression
@@ -439,8 +467,8 @@ async def train_models(self, X, y):
439467
}
440468
else:
441469
metrics['svr'] = {
442-
'mse': mean_squared_error(y_test, svr_pred),
443-
'mae': mean_absolute_error(y_test, svr_pred)
470+
'mse': mean_squared_error(y_test_np, svr_pred),
471+
'mae': mean_absolute_error(y_test_np, svr_pred)
444472
}
445473

446474
self.models = models

0 commit comments

Comments
 (0)