Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 59 additions & 132 deletions onedal/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,13 @@
from onedal.common._backend import bind_default_backend
from onedal.utils import _sycl_queue_manager as QM

from .._config import _get_config
from ..common._estimator_checks import _check_is_fitted
from ..common._mixin import ClassifierMixin
from ..datatypes import from_table, to_table
from ..utils._array_api import _get_sycl_namespace
from ..utils.validation import (
_check_array,
_check_n_features,
_check_X_y,
_is_csr,
_num_features,
_type_of_target,
_num_features
)


Expand All @@ -49,14 +44,14 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm):
self.max_iter = max_iter
self.algorithm = algorithm

@abstractmethod
def train(self, params, X, y): ...
@bind_default_backend("logistic_regression.classification")
def train(self, *args, **kwargs): ...

@abstractmethod
def infer(self, params, X): ...
@bind_default_backend("logistic_regression.classification")
def infer(self, params, model, X): ...

# direct access to the backend model constructor
@abstractmethod
# direct access to the backend model class
@bind_default_backend("logistic_regression.classification")
def model(self): ...

def _get_onedal_params(self, is_csr, dtype=np.float32):
Expand All @@ -76,62 +71,48 @@ def _get_onedal_params(self, is_csr, dtype=np.float32):
),
}

def _fit(self, X, y):
use_raw_input = _get_config()["use_raw_input"] is True

sparsity_enabled = daal_check_version((2024, "P", 700))
if not use_raw_input:
X, y = _check_X_y(
X,
y,
accept_sparse=sparsity_enabled,
force_all_finite=True,
accept_2d_y=False,
dtype=[np.float64, np.float32],
)
if _type_of_target(y) != "binary":
raise ValueError("Only binary classification is supported")

self.classes_, y = np.unique(y, return_inverse=True)
y = y.astype(dtype=np.int32)
else:
_, xp, _ = _get_sycl_namespace(X)
# try catch needed for raw_inputs + array_api data where unlike
# numpy the way to yield unique values is via `unique_values`
# This should be removed when refactored for gpu zero-copy
try:
self.classes_ = xp.unique(y)
except AttributeError:
self.classes_ = xp.unique_values(y)

n_classes = len(self.classes_)
if n_classes != 2:
raise ValueError("Only binary classification is supported")
@supports_queue
def fit(self, X, y, queue=None):

# Is sparsity check here fine? - Same in BasicStatistics
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Answer is: yes, provided that the sklearnex validation already happened before.

is_csr = _is_csr(X)

# Is it good place? - Same in LinReg
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but remember that this attribute also needs to be present in the sklearnex object, because sklearn has it.

self.n_features_in_ = _num_features(X, fallback_1d=True)
X_table, y_table = to_table(X, y, queue=QM.get_global_queue())

X_table, y_table = to_table(X, y, queue=queue)
params = self._get_onedal_params(is_csr, X_table.dtype)

result = self.train(params, X_table, y_table)

self._onedal_model = result.model

# For now it's fine to keep n_iteration as numpy variable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the answer should be yes for scikit-learn compatibility, since they do not fully support array API for logistic regression at the moment. But that might change in the future.

self.n_iter_ = np.array([result.iterations_count])

# _n_inner_iter is the total number of cg-solver iterations
if daal_check_version((2024, "P", 300)) and self.solver == "newton-cg":
self._n_inner_iter = result.inner_iterations_count

coeff = from_table(result.model.packed_coefficients)
coeff = from_table(result.model.packed_coefficients, like=X)
self.coef_, self.intercept_ = coeff[:, 1:], coeff[:, 0]

return self

# TODO check if we need to pass queue as an argument
def _create_model(self):
# TODO revise create_model implementation here and in LinearRegression


# TODO do we need to support behavior when model fitted with sklearn
# (e.g. torch tensor or else and then this method is run)
# Currently it can't
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible, because the oneDAL model object would only require coefficients and intercepts.

m = self.model()

coefficients = self.coef_
# TODO is it fine to use get_dtype
dtype = get_dtype(coefficients)
# TODO check if it's fine to use numpy for coefs
coefficients = np.asarray(coefficients, dtype=dtype)

if coefficients.ndim == 2:
Expand All @@ -145,19 +126,7 @@ def _create_model(self):
intercept = np.asarray(intercept, dtype=dtype)
assert intercept.size == 1

intercept = _check_array(
intercept,
dtype=[np.float64, np.float32],
force_all_finite=True,
ensure_2d=False,
)
coefficients = _check_array(
coefficients,
dtype=[np.float64, np.float32],
force_all_finite=True,
ensure_2d=False,
)

# TODO is it fine to use this func?
coefficients, intercept = make2d(coefficients), make2d(intercept)

assert coefficients.shape == (1, n_features_in)
Expand All @@ -176,72 +145,38 @@ def _create_model(self):

return m

def _infer(self, X):
def _infer(self, X, queue=None):
_check_is_fitted(self)

sparsity_enabled = daal_check_version((2024, "P", 700))

if not _get_config()["use_raw_input"]:
X = _check_array(
X,
dtype=[np.float64, np.float32],
accept_sparse=sparsity_enabled,
force_all_finite=True,
ensure_2d=False,
accept_large_sparse=sparsity_enabled,
)
# Is sparsity check here fine? - Same in BasicStatistics
is_csr = _is_csr(X)
_check_n_features(self, X, False)

X = make2d(X)
# Is this check fine? - Same in LinReg
_check_n_features(self, X, False)

if hasattr(self, "_onedal_model"):
model = self._onedal_model
else:
model = self._create_model()
if not hasattr(self, "_onedal_model"):
self._onedal_model = self._create_model()

X_table = to_table(X, queue=QM.get_global_queue())
params = self._get_onedal_params(is_csr, X.dtype)
X_table = to_table(X, queue=queue)
params = self._get_onedal_params(is_csr, X_table.dtype)

result = self.infer(params, model, X_table)
result = self.infer(params, self._onedal_model, X_table)
return result

def _predict(self, X):
result = self._infer(X)
_, xp, _ = _get_sycl_namespace(X)
@supports_queue
def predict(self, X, queue=None):
result = self._infer(X, queue)
y = from_table(result.responses, like=X)
y = xp.take(xp.asarray(self.classes_), xp.reshape(y, (-1,)), axis=0)
return y

def _predict_proba(self, X):
result = self._infer(X)
_, xp, _ = _get_sycl_namespace(X)
@supports_queue
def predict_proba(self, X, queue=None):
result = self._infer(X, queue)
y = from_table(result.probabilities, like=X)
y = xp.reshape(y, -1)
return xp.stack([1 - y, y], axis=1)

def _predict_log_proba(self, X):
_, xp, _ = _get_sycl_namespace(X)
y_proba = self._predict_proba(X)
# These are the same thresholds used by oneDAL during the model fitting procedure
if y_proba.dtype == np.float32:
min_prob = 1e-7
max_prob = 1.0 - 1e-7
else:
min_prob = 1e-15
max_prob = 1.0 - 1e-15
y_proba = xp.clip(y_proba, min_prob, max_prob)
return xp.log(y_proba)

def _decision_function(self, X):
_, xp, _ = _get_sycl_namespace(X)
raw = xp.matmul(X, xp.reshape(self.coef_, -1))
if self.fit_intercept:
raw += self.intercept_
return raw
return y


class LogisticRegression(ClassifierMixin, BaseLogisticRegression):
class LogisticRegression(BaseLogisticRegression):

def __init__(
self,
Expand All @@ -263,31 +198,23 @@ def __init__(
algorithm=algorithm,
)

@bind_default_backend("logistic_regression.classification")
def train(self, params, X, y, queue=None): ...

@bind_default_backend("logistic_regression.classification")
def infer(self, params, X, model, queue=None): ...

@bind_default_backend("logistic_regression.classification")
def model(self): ...
# @bind_default_backend("logistic_regression.classification")
# def train(self, params, X, y, queue=None): ...

@supports_queue
def fit(self, X, y, queue=None):
return self._fit(X, y)
# @bind_default_backend("logistic_regression.classification")
# def infer(self, params, X, model, queue=None): ...

@supports_queue
def predict(self, X, queue=None):
return self._predict(X)
# @bind_default_backend("logistic_regression.classification")
# def model(self): ...

@supports_queue
def predict_proba(self, X, queue=None):
return self._predict_proba(X)
# @supports_queue
# def fit(self, X, y, queue=None):
# return self._fit(X, y)

@supports_queue
def predict_log_proba(self, X, queue=None):
return self._predict_log_proba(X)
# @supports_queue
# def predict(self, X, queue=None):
# return self._predict(X)

@supports_queue
def decision_function(self, X, queue=None):
return self._decision_function(X)
# @supports_queue
# def predict_proba(self, X, queue=None):
# return self._predict_proba(X)
6 changes: 3 additions & 3 deletions onedal/spmd/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ def predict(self, X, queue=None):
def predict_proba(self, X, queue=None):
return super().predict_proba(X, queue=queue)

@support_input_format
def predict_log_proba(self, X, queue=None):
return super().predict_log_proba(X, queue=queue)
# @support_input_format
# def predict_log_proba(self, X, queue=None):
# return super().predict_log_proba(X, queue=queue)
Loading
Loading