From 8bedde1a45d31b3fff8caff90f313eadf417d388 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 2 Oct 2024 03:20:56 -0700 Subject: [PATCH 01/38] ENH: array api dispatching added array-api-compat to test env --- requirements-test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-test.txt b/requirements-test.txt index d9de92c2da..8cc7049d33 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -11,4 +11,5 @@ xgboost==2.1.1 lightgbm==4.5.0 catboost==1.2.7 ; python_version < '3.11' # TODO: Remove 3.11 condition when catboost supports numpy 2.0 shap==0.46.0 +array-api-compat==1.8.0 array-api-strict==2.0.1 From b11fcf3c3e5e4b61116039f748f84405bfbc5f0c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 4 Oct 2024 05:18:57 -0700 Subject: [PATCH 02/38] Deselect some scikit-learn Array API tests --- deselected_tests.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 4f10264586..e658ad0ae2 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -25,6 +25,12 @@ # will exclude deselection in versions 0.18.1, and 0.18.2 only. deselected_tests: + # Array API support + # sklearnex functional Array API support doesn't guaranty namespace consistency for the estimator's array attributes. + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_input_and_values-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_input_and_values-array_api_strict-None-None] + - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] + # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 - neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3 - neighbors/tests/test_neighbors.py::test_radius_neighbors_brute_backend[kulsinski] <1.3 From 943796e3710617e459a4579ea2f339d44787fc2f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 08:35:01 -0500 Subject: [PATCH 03/38] deselect more tests --- deselected_tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 2ab5fb6fa2..42fcc22978 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -29,6 +29,8 @@ deselected_tests: # sklearnex functional Array API support doesn't guaranty namespace consistency for the estimator's array attributes. - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_input_and_values-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_input_and_values-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_get_precision-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 From ef42daa0c909d39058c8bf005eef7654685d67c5 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 08:53:17 -0500 Subject: [PATCH 04/38] deselect more tests --- deselected_tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 42fcc22978..d46c9ded6d 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -31,6 +31,7 @@ deselected_tests: - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_input_and_values-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_get_precision-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None] + - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_attributes-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 From 3bc755d874e6cf44fdc64b852b8c815ccc8e9aa9 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 09:03:33 -0500 Subject: [PATCH 05/38] disabled tests for --- deselected_tests.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index d46c9ded6d..934b6bb21a 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -33,6 +33,10 @@ deselected_tests: - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_attributes-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] + # `test_array_api_train_test_split` inconsistency for Array API inputs. + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None] + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None] + - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None] # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 - neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3 From 76f1876be95d1cf356b788c3b71ceea4da361373 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 09:04:58 -0500 Subject: [PATCH 06/38] fix the deselection comment --- deselected_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 934b6bb21a..edcb4db49e 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -33,7 +33,7 @@ deselected_tests: - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_attributes-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] - # `test_array_api_train_test_split` inconsistency for Array API inputs. + # `train_test_split` inconsistency for Array API inputs. - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None] From ce0b8e1d7aa01809a84402253c64f3594ec156fb Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 13:15:56 -0500 Subject: [PATCH 07/38] disabled test for Ridge regression --- deselected_tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index edcb4db49e..6da88115c5 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -37,6 +37,8 @@ deselected_tests: - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None] + # Ridge regression. Array API functionally supported for all solvers. Not raising error for non-svd solvers. + - linear_model/tests/test_ridge.py::test_array_api_error_and_warnings_for_solver_parameter[array_api_strict] # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 - neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3 From 404e8c0abf708f2a0c8d82b04ef5e7a83bf24b3e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 15:03:28 -0500 Subject: [PATCH 08/38] Disabled tests and added comment --- deselected_tests.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 6da88115c5..364e597393 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -37,6 +37,11 @@ deselected_tests: - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None] + # PCA. Array API functionally supported for all factorizations. power_iteration_normalizer=["LU", "QR"] + - decomposition/tests/test_pca.py::test_array_api_error_and_warnings_on_unsupported_params + # PCA. InvalidParameterError: The 'M' parameter of randomized_svd must be an instance of 'numpy.ndarray' or a sparse matrix. + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_input_and_values-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_get_precision-array_api_strict-None-None] # Ridge regression. Array API functionally supported for all solvers. Not raising error for non-svd solvers. - linear_model/tests/test_ridge.py::test_array_api_error_and_warnings_for_solver_parameter[array_api_strict] From ced43bf9ea4df4b4cddb33624377efad03781012 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 8 Oct 2024 15:36:53 -0700 Subject: [PATCH 09/38] ENH: Array API dispatching --- sklearnex/dispatcher.py | 44 +++++++++++ sklearnex/utils/_array_api.py | 133 +++++++++++++++++++++++++++------- 2 files changed, 150 insertions(+), 27 deletions(-) diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index a4a62556f6..b84abe9135 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -128,6 +128,11 @@ def get_patch_map_core(preview=False): from ._config import get_config as get_config_sklearnex from ._config import set_config as set_config_sklearnex + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + import sklearn.utils._array_api as _array_api_module + if sklearn_check_version("1.2.1"): from .utils.parallel import _FuncWrapper as _FuncWrapper_sklearnex else: @@ -165,6 +170,15 @@ def get_patch_map_core(preview=False): from .svm import NuSVC as NuSVC_sklearnex from .svm import NuSVR as NuSVR_sklearnex + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex + from .utils._array_api import get_namespace as get_namespace_sklearnex + from .utils._array_api import ( + yield_namespace_device_dtype_combinations as yield_namespace_device_dtype_combinations_sklearnex, + ) + # DBSCAN mapping.pop("dbscan") mapping["dbscan"] = [[(cluster_module, "DBSCAN", DBSCAN_sklearnex), None]] @@ -440,6 +454,36 @@ def get_patch_map_core(preview=False): mapping["_funcwrapper"] = [ [(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None] ] + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + # Necessary for array_api support + mapping["get_namespace"] = [ + [ + ( + _array_api_module, + "get_namespace", + get_namespace_sklearnex, + ), + None, + ] + ] + mapping["_convert_to_numpy"] = [ + [ + (_array_api_module, "_convert_to_numpy", _convert_to_numpy_sklearnex), + None, + ] + ] + mapping["yield_namespace_device_dtype_combinations"] = [ + [ + ( + _array_api_module, + "yield_namespace_device_dtype_combinations", + yield_namespace_device_dtype_combinations_sklearnex, + ), + None, + ] + ] return mapping diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index bc30be5021..a0054d0903 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -16,47 +16,124 @@ """Tools to support array_api.""" +import itertools + import numpy as np from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils._array_api import _get_sycl_namespace +from onedal.utils._array_api import _asarray, _get_sycl_namespace +# TODO: +# check the version of skl. if sklearn_check_version("1.2"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace + from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy +from onedal._device_offload import dpctl_available, dpnp_available + +if dpctl_available: + import dpctl.tensor as dpt + +if dpnp_available: + import dpnp + +_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy"} -def get_namespace(*arrays): - """Get namespace of arrays. - Introspect `arrays` arguments and return their common Array API - compatible namespace object, if any. NumPy 1.22 and later can - construct such containers using the `numpy.array_api` namespace - for instance. +def yield_namespaces(include_numpy_namespaces=True): + """Yield supported namespace. - This function will return the namespace of SYCL-related arrays - which define the __sycl_usm_array_interface__ attribute - regardless of array_api support, the configuration of - array_api_dispatch, or scikit-learn version. + This is meant to be used for testing purposes only. + + Parameters + ---------- + include_numpy_namespaces : bool, default=True + If True, also yield numpy namespaces. + + Returns + ------- + array_namespace : str + The name of the Array API namespace. + """ + for array_namespace in [ + # The following is used to test the array_api_compat wrapper when + # array_api_dispatch is enabled: in particular, the arrays used in the + # tests are regular numpy arrays without any "device" attribute. + "numpy", + # Stricter NumPy-based Array API implementation. The + # array_api_strict.Array instances always have a dummy "device" attribute. + "array_api_strict", + "dpctl.tensor", + "cupy", + "torch", + ]: + if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES: + continue + yield array_namespace + + +def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True): + """Yield supported namespace, device, dtype tuples for testing. + + Use this to test that an estimator works with all combinations. - See: https://numpy.org/neps/nep-0047-array-api-standard.html + Parameters + ---------- + include_numpy_namespaces : bool, default=True + If True, also yield numpy namespaces. - If `arrays` are regular numpy arrays, an instance of the - `_NumPyApiWrapper` compatibility wrapper is returned instead. + Returns + ------- + array_namespace : str + The name of the Array API namespace. - Namespace support is not enabled by default. To enabled it - call: + device : str + The name of the device on which to allocate the arrays. Can be None to + indicate that the default value should be used. - sklearn.set_config(array_api_dispatch=True) + dtype_name : str + The name of the data type to use for arrays. Can be None to indicate + that the default value should be used. + """ + for array_namespace in yield_namespaces( + include_numpy_namespaces=include_numpy_namespaces + ): + if array_namespace == "torch": + for device, dtype in itertools.product( + ("cpu", "cuda"), ("float64", "float32") + ): + yield array_namespace, device, dtype + yield array_namespace, "mps", "float32" + elif array_namespace == "dpctl.tensor": + for device, dtype in itertools.product( + ("cpu", "gpu"), ("float64", "float32") + ): + yield array_namespace, device, dtype + else: + yield array_namespace, None, None + + +def _convert_to_numpy(array, xp): + """Convert X into a NumPy ndarray on the CPU.""" + xp_name = xp.__name__ + + # if dpctl_available and isinstance(array, dpctl.tensor): + if dpctl_available and xp_name in { + "dpctl.tensor", + }: + return dpt.to_numpy(array) + elif dpnp_available and isinstance(array, dpnp.ndarray): + return dpnp.asnumpy(array) + elif sklearn_check_version("1.2"): + return _sklearn_convert_to_numpy(array, xp) + else: + return _asarray(array, xp) - or: - with sklearn.config_context(array_api_dispatch=True): - # your code here +def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): + """Get namespace of arrays. - Otherwise an instance of the `_NumPyApiWrapper` - compatibility wrapper is always returned irrespective of - the fact that arrays implement the `__array_namespace__` - protocol or not. + TBD Parameters ---------- @@ -72,11 +149,13 @@ def get_namespace(*arrays): True of the arrays are containers that implement the Array API spec. """ - sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays) + sycl_type, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace(*arrays) if sycl_type: - return xp, is_array_api_compliant + return xp_sycl_namespace, is_array_api_compliant elif sklearn_check_version("1.2"): - return sklearn_get_namespace(*arrays) + return sklearn_get_namespace( + *arrays, remove_none=remove_none, remove_types=remove_types, xp=xp + ) else: return np, False From c395d03654fe358a14139012b6a87dd8bba77a78 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 03:34:02 -0700 Subject: [PATCH 10/38] Revert adding dpctl into Array PI conformance testing added versioning for the get_nnamespace --- sklearnex/dispatcher.py | 25 +----- sklearnex/utils/_array_api.py | 157 +++++++++++++--------------------- 2 files changed, 61 insertions(+), 121 deletions(-) diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index b84abe9135..e49b29eb80 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -128,9 +128,7 @@ def get_patch_map_core(preview=False): from ._config import get_config as get_config_sklearnex from ._config import set_config as set_config_sklearnex - # TODO: - # check the version of skl. - if sklearn_check_version("1.4"): + if sklearn_check_version("1.2"): import sklearn.utils._array_api as _array_api_module if sklearn_check_version("1.2.1"): @@ -170,14 +168,9 @@ def get_patch_map_core(preview=False): from .svm import NuSVC as NuSVC_sklearnex from .svm import NuSVR as NuSVR_sklearnex - # TODO: - # check the version of skl. - if sklearn_check_version("1.4"): + if sklearn_check_version("1.2"): from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex from .utils._array_api import get_namespace as get_namespace_sklearnex - from .utils._array_api import ( - yield_namespace_device_dtype_combinations as yield_namespace_device_dtype_combinations_sklearnex, - ) # DBSCAN mapping.pop("dbscan") @@ -454,9 +447,7 @@ def get_patch_map_core(preview=False): mapping["_funcwrapper"] = [ [(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None] ] - # TODO: - # check the version of skl. - if sklearn_check_version("1.4"): + if sklearn_check_version("1.2"): # Necessary for array_api support mapping["get_namespace"] = [ [ @@ -474,16 +465,6 @@ def get_patch_map_core(preview=False): None, ] ] - mapping["yield_namespace_device_dtype_combinations"] = [ - [ - ( - _array_api_module, - "yield_namespace_device_dtype_combinations", - yield_namespace_device_dtype_combinations_sklearnex, - ), - None, - ] - ] return mapping diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index a0054d0903..de7e3dbed6 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -16,8 +16,6 @@ """Tools to support array_api.""" -import itertools - import numpy as np from daal4py.sklearn._utils import sklearn_check_version @@ -37,81 +35,6 @@ if dpnp_available: import dpnp -_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy"} - - -def yield_namespaces(include_numpy_namespaces=True): - """Yield supported namespace. - - This is meant to be used for testing purposes only. - - Parameters - ---------- - include_numpy_namespaces : bool, default=True - If True, also yield numpy namespaces. - - Returns - ------- - array_namespace : str - The name of the Array API namespace. - """ - for array_namespace in [ - # The following is used to test the array_api_compat wrapper when - # array_api_dispatch is enabled: in particular, the arrays used in the - # tests are regular numpy arrays without any "device" attribute. - "numpy", - # Stricter NumPy-based Array API implementation. The - # array_api_strict.Array instances always have a dummy "device" attribute. - "array_api_strict", - "dpctl.tensor", - "cupy", - "torch", - ]: - if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES: - continue - yield array_namespace - - -def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True): - """Yield supported namespace, device, dtype tuples for testing. - - Use this to test that an estimator works with all combinations. - - Parameters - ---------- - include_numpy_namespaces : bool, default=True - If True, also yield numpy namespaces. - - Returns - ------- - array_namespace : str - The name of the Array API namespace. - - device : str - The name of the device on which to allocate the arrays. Can be None to - indicate that the default value should be used. - - dtype_name : str - The name of the data type to use for arrays. Can be None to indicate - that the default value should be used. - """ - for array_namespace in yield_namespaces( - include_numpy_namespaces=include_numpy_namespaces - ): - if array_namespace == "torch": - for device, dtype in itertools.product( - ("cpu", "cuda"), ("float64", "float32") - ): - yield array_namespace, device, dtype - yield array_namespace, "mps", "float32" - elif array_namespace == "dpctl.tensor": - for device, dtype in itertools.product( - ("cpu", "gpu"), ("float64", "float32") - ): - yield array_namespace, device, dtype - else: - yield array_namespace, None, None - def _convert_to_numpy(array, xp): """Convert X into a NumPy ndarray on the CPU.""" @@ -130,32 +53,68 @@ def _convert_to_numpy(array, xp): return _asarray(array, xp) -def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): - """Get namespace of arrays. +if sklearn_check_version("1.5"): - TBD + def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): + """Get namespace of arrays. - Parameters - ---------- - *arrays : array objects - Array objects. + TBD - Returns - ------- - namespace : module - Namespace shared by array objects. + Parameters + ---------- + *arrays : array objects + Array objects. - is_array_api : bool - True of the arrays are containers that implement the Array API spec. - """ + Returns + ------- + namespace : module + Namespace shared by array objects. - sycl_type, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace(*arrays) + is_array_api : bool + True of the arrays are containers that implement the Array API spec. + """ - if sycl_type: - return xp_sycl_namespace, is_array_api_compliant - elif sklearn_check_version("1.2"): - return sklearn_get_namespace( - *arrays, remove_none=remove_none, remove_types=remove_types, xp=xp + usm_iface, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace( + *arrays ) - else: - return np, False + + if usm_iface: + return xp_sycl_namespace, is_array_api_compliant + elif sklearn_check_version("1.2"): + return sklearn_get_namespace( + *arrays, remove_none=remove_none, remove_types=remove_types, xp=xp + ) + else: + return np, False + +else: + + def get_namespace(*arrays): + """Get namespace of arrays. + + TBD + + Parameters + ---------- + *arrays : array objects + Array objects. + + Returns + ------- + namespace : module + Namespace shared by array objects. + + is_array_api : bool + True of the arrays are containers that implement the Array API spec. + """ + + usm_iface, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace( + *arrays + ) + + if usm_iface: + return xp_sycl_namespace, is_array_api_compliant + elif sklearn_check_version("1.2"): + return sklearn_get_namespace(*arrays) + else: + return np, False From 5784c25a4c16cf2723325431be43a8d752a0709c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 05:57:20 -0700 Subject: [PATCH 11/38] minor refactoring onedal _array_api --- onedal/utils/_array_api.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 4accdd3ac0..9f8fa6c7c1 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -73,19 +73,19 @@ def _get_sycl_namespace(*arrays): """Get namespace of sycl arrays.""" # sycl support designed to work regardless of array_api_dispatch sklearn global value - sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} + sua_iface = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} - if len(sycl_type) > 1: - raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}") + if len(sua_iface) > 1: + raise ValueError(f"Multiple SYCL types for array inputs: {sua_iface}") - if sycl_type: - (X,) = sycl_type.values() + if sua_iface: + (X,) = sua_iface.values() if hasattr(X, "__array_namespace__"): - return sycl_type, X.__array_namespace__(), True + return sua_iface, X.__array_namespace__(), True elif dpnp_available and isinstance(X, dpnp.ndarray): - return sycl_type, dpnp, False + return sua_iface, dpnp, False else: - raise ValueError(f"SYCL type not recognized: {sycl_type}") + raise ValueError(f"SYCL type not recognized: {sua_iface}") - return sycl_type, None, False + return sua_iface, None, False From 8d7f664454fd0c80a75b59ff1dc7ac14b1d2bd2f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 12:07:00 -0700 Subject: [PATCH 12/38] add tests --- sklearnex/_device_offload.py | 5 +- sklearnex/utils/_array_api.py | 1 - sklearnex/utils/tests/test_array_api.py | 152 ++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 sklearnex/utils/tests/test_array_api.py diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 06f97aa679..2d1d197746 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -16,13 +16,14 @@ from functools import wraps +from daal4py.sklearn._utils import sklearn_check_version from onedal._device_offload import ( _copy_to_usm, _get_global_queue, _transfer_to_host, dpnp_available, ) -from onedal.utils._array_api import _asarray, _is_numpy_namespace +from onedal.utils._array_api import _asarray if dpnp_available: import dpnp @@ -74,7 +75,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": if ( - "array_api_dispatch" in get_config() + sklearn_check_version("1.2") and get_config()["array_api_dispatch"] and "array_api_support" in obj._get_tags() and obj._get_tags()["array_api_support"] diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index de7e3dbed6..ba53b72d0b 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -40,7 +40,6 @@ def _convert_to_numpy(array, xp): """Convert X into a NumPy ndarray on the CPU.""" xp_name = xp.__name__ - # if dpctl_available and isinstance(array, dpctl.tensor): if dpctl_available and xp_name in { "dpctl.tensor", }: diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py new file mode 100644 index 0000000000..059282bce6 --- /dev/null +++ b/sklearnex/utils/tests/test_array_api.py @@ -0,0 +1,152 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.tests.utils._dataframes_support import ( + _convert_to_dataframe, + get_dataframes_and_queues, +) + +array_api_dataframes_and_namespaces = { + "dpctl": "dpctl.tensor", +} + + +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues( + dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" + ), +) +def test_get_namespace_with_config_context(dataframe, queue): + """Test get_namespace TBD""" + from sklearnex import config_context + from sklearnex.utils._array_api import get_namespace + + array_api_compat = pytest.importorskip("array_api_compat") + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + xp_out, is_array_api_compliant = get_namespace(X) + assert is_array_api_compliant + assert xp_out is array_api_compat.get_namespace(X) + + +@pytest.mark.skipif( + not sklearn_check_version("1.2"), + reason="array api dispatch requires sklearn 1.2 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues( + dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" + ), +) +def test_get_namespace_with_patching(dataframe, queue): + """Test get_namespace TBD + with `patch_sklearn` + """ + array_api_compat = pytest.importorskip("array_api_compat") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils._array_api import get_namespace + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + xp_out, is_array_api_compliant = get_namespace(X) + assert is_array_api_compliant + assert xp_out is array_api_compat.get_namespace(X) + + +@pytest.mark.skipif( + not sklearn_check_version("1.2"), + reason="array api dispatch requires sklearn 1.2 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues( + dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" + ), +) +def test_convert_to_numpy_with_patching(dataframe, queue): + """Test _convert_to_numpy TBD with `patch_sklearn`""" + pytest.importorskip("array_api_compat") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils._array_api import _convert_to_numpy, get_namespace + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + xp, _ = get_namespace(X) + x_np = _convert_to_numpy(X, xp) + assert type(X_np) == type(x_np) + assert_allclose(X_np, x_np) + + +@pytest.mark.skipif( + not sklearn_check_version("1.2"), + reason="array api dispatch requires sklearn 1.2 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues( + dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" + ), +) +@pytest.mark.parametrize( + "dtype", + [ + pytest.param(np.float32, id=np.dtype(np.float32).name), + pytest.param(np.float64, id=np.dtype(np.float64).name), + ], +) +def test_check_array_with_patching(dataframe, queue, dtype): + """Test check_array TBD with `patch_sklearn`""" + pytest.importorskip("array_api_compat") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils import check_array + from sklearn.utils._array_api import _convert_to_numpy, get_namespace + + X_np = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype) + X_df = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + with config_context(array_api_dispatch=True): + xp, _ = get_namespace(X_df) + X_df_res = check_array(X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32]) + assert type(X_df) == type(X_df_res) + assert_allclose(_convert_to_numpy(X_df, xp), _convert_to_numpy(X_df_res, xp)) From 63d8f3034aee99168fb01859612b5ac7ce0f1a4c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 13:04:57 -0700 Subject: [PATCH 13/38] addressed memory usage tests --- sklearnex/tests/test_memory_usage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 012086507b..a94de1fa4f 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -45,6 +45,7 @@ CPU_SKIP_LIST = ( + "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "config_context", # does not malloc "get_config", # does not malloc @@ -59,6 +60,7 @@ ) GPU_SKIP_LIST = ( + "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "RandomForestRegressor", # too slow for using in testing on common data size "KMeans", # does not support GPU offloading From 6bd028076ccdba1e1e833c525f6da9d266a58a09 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 13:34:28 -0700 Subject: [PATCH 14/38] Address some array api test fails --- sklearnex/utils/tests/test_array_api.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index 059282bce6..66b5705f9b 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -90,7 +90,7 @@ def test_get_namespace_with_patching(dataframe, queue): @pytest.mark.parametrize( "dataframe,queue", get_dataframes_and_queues( - dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" + dataframe_filter_="dpctl,array_api", device_filter_="cpu,gpu" ), ) def test_convert_to_numpy_with_patching(dataframe, queue): @@ -149,4 +149,8 @@ def test_check_array_with_patching(dataframe, queue, dtype): xp, _ = get_namespace(X_df) X_df_res = check_array(X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32]) assert type(X_df) == type(X_df_res) - assert_allclose(_convert_to_numpy(X_df, xp), _convert_to_numpy(X_df_res, xp)) + if dataframe != "numpy": + # _convert_to_numpy not designed for numpy.ndarray inputs. + assert_allclose(_convert_to_numpy(X_df, xp), _convert_to_numpy(X_df_res, xp)) + else: + assert_allclose(X_df, X_df_res) From 90411e79b9d07b6e3f36ccd7a956d2c93059ef8e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 13:43:39 -0700 Subject: [PATCH 15/38] linting --- sklearnex/tests/test_memory_usage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index a94de1fa4f..8a1c740aa4 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -45,7 +45,7 @@ CPU_SKIP_LIST = ( - "_convert_to_numpy", # additional memory allocation is expected proportional to the input data + "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "config_context", # does not malloc "get_config", # does not malloc @@ -60,7 +60,7 @@ ) GPU_SKIP_LIST = ( - "_convert_to_numpy", # additional memory allocation is expected proportional to the input data + "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "RandomForestRegressor", # too slow for using in testing on common data size "KMeans", # does not support GPU offloading From 2b7bbc5e6594cc74807916e6b1fb8be5504305aa Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 13:53:48 -0700 Subject: [PATCH 16/38] addressed test_get_namespace --- sklearnex/utils/tests/test_array_api.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index 66b5705f9b..abafabdd3e 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -28,6 +28,9 @@ "dpctl": "dpctl.tensor", } +# TODO: +# add test suit for dpctl.tensor, dpnp.ndarray, numpy.ndarray without config_context(array_api_dispatch=True)). + @pytest.mark.parametrize( "dataframe,queue", @@ -48,7 +51,11 @@ def test_get_namespace_with_config_context(dataframe, queue): with config_context(array_api_dispatch=True): xp_out, is_array_api_compliant = get_namespace(X) assert is_array_api_compliant - assert xp_out is array_api_compat.get_namespace(X) + if not dataframe in "numpy,array_api": + # Rather than array_api_compat.get_namespace raw output + # `get_namespace` has specific wrapper classes for `numpy.ndarray` + # or `array-api-strict`. + assert xp_out == array_api_compat.get_namespace(X) @pytest.mark.skipif( @@ -80,7 +87,11 @@ def test_get_namespace_with_patching(dataframe, queue): with config_context(array_api_dispatch=True): xp_out, is_array_api_compliant = get_namespace(X) assert is_array_api_compliant - assert xp_out is array_api_compat.get_namespace(X) + if not dataframe in "numpy,array_api": + # Rather than array_api_compat.get_namespace raw output + # `get_namespace` has specific wrapper classes for `numpy.ndarray` + # or `array-api-strict`. + assert xp_out == array_api_compat.get_namespace(X) @pytest.mark.skipif( From b7b8f0329d49089ebf56c5fb0ac66ff2eacb8e09 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 14:45:03 -0700 Subject: [PATCH 17/38] adding test case for validate_data check with Array API inputs --- sklearnex/utils/tests/test_array_api.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index abafabdd3e..c8915ac1b2 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -60,7 +60,7 @@ def test_get_namespace_with_config_context(dataframe, queue): @pytest.mark.skipif( not sklearn_check_version("1.2"), - reason="array api dispatch requires sklearn 1.2 version", + reason="Array API dispatch requires sklearn 1.2 version", ) @pytest.mark.parametrize( "dataframe,queue", @@ -96,7 +96,7 @@ def test_get_namespace_with_patching(dataframe, queue): @pytest.mark.skipif( not sklearn_check_version("1.2"), - reason="array api dispatch requires sklearn 1.2 version", + reason="Array API dispatch requires sklearn 1.2 version", ) @pytest.mark.parametrize( "dataframe,queue", @@ -127,7 +127,7 @@ def test_convert_to_numpy_with_patching(dataframe, queue): @pytest.mark.skipif( not sklearn_check_version("1.2"), - reason="array api dispatch requires sklearn 1.2 version", + reason="Array API dispatch requires sklearn 1.2 version", ) @pytest.mark.parametrize( "dataframe,queue", @@ -142,8 +142,8 @@ def test_convert_to_numpy_with_patching(dataframe, queue): pytest.param(np.float64, id=np.dtype(np.float64).name), ], ) -def test_check_array_with_patching(dataframe, queue, dtype): - """Test check_array TBD with `patch_sklearn`""" +def test_validate_data_with_patching(dataframe, queue, dtype): + """Test validate_data TBD with `patch_sklearn`""" pytest.importorskip("array_api_compat") from sklearnex import patch_sklearn @@ -151,14 +151,23 @@ def test_check_array_with_patching(dataframe, queue, dtype): patch_sklearn() from sklearn import config_context - from sklearn.utils import check_array + from sklearn.base import BaseEstimator + + if sklearn_check_version("1.6"): + from sklearn.utils.validation import validate_data + else: + validate_data = BaseEstimator._validate_data + from sklearn.utils._array_api import _convert_to_numpy, get_namespace X_np = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype) X_df = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) with config_context(array_api_dispatch=True): + est = BaseEstimator() xp, _ = get_namespace(X_df) - X_df_res = check_array(X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32]) + X_df_res = validate_data( + est, X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32] + ) assert type(X_df) == type(X_df_res) if dataframe != "numpy": # _convert_to_numpy not designed for numpy.ndarray inputs. From 169009decb73ede720aaf28e6839b89198b5be2c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 14:48:33 -0700 Subject: [PATCH 18/38] minor refactoring --- sklearnex/utils/_array_api.py | 2 -- sklearnex/utils/tests/test_array_api.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index ba53b72d0b..90c29bbcf2 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -21,8 +21,6 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal.utils._array_api import _asarray, _get_sycl_namespace -# TODO: -# check the version of skl. if sklearn_check_version("1.2"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index c8915ac1b2..b2d5a29105 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -24,10 +24,6 @@ get_dataframes_and_queues, ) -array_api_dataframes_and_namespaces = { - "dpctl": "dpctl.tensor", -} - # TODO: # add test suit for dpctl.tensor, dpnp.ndarray, numpy.ndarray without config_context(array_api_dispatch=True)). From 9ca118c29af01caec7b0f85c3d4cb5a3b38c5233 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 15:07:06 -0700 Subject: [PATCH 19/38] addressed test_patch_map_match fail --- sklearnex/tests/test_patching.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearnex/tests/test_patching.py b/sklearnex/tests/test_patching.py index 897f19172d..0cb8297f27 100755 --- a/sklearnex/tests/test_patching.py +++ b/sklearnex/tests/test_patching.py @@ -307,10 +307,12 @@ def list_all_attr(string): module_map = {i: i for i in sklearnex__all__.intersection(sklearn__all__)} - # _assert_all_finite patches an internal sklearn function which isn't - # exposed via __all__ in sklearn. It is a special case where this rule - # is not applied (e.g. it is grandfathered in). + # _assert_all_finite, _convert_to_numpy, get_namespace patch an internal + # sklearn functions which aren't exposed via __all__ in sklearn. It is a special + # case where this rule is not applied (e.g. it is grandfathered in). del patched["_assert_all_finite"] + del patched["_convert_to_numpy"] + del patched["get_namespace"] # remove all scikit-learn-intelex-only estimators for i in patched.copy(): From 7ddcf408822bfe4955dee00352c3ff027695fa3b Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 15:41:24 -0700 Subject: [PATCH 20/38] Added docstrings for get_namespace --- sklearnex/utils/_array_api.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index 90c29bbcf2..077d710f46 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -55,13 +55,33 @@ def _convert_to_numpy(array, xp): def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): """Get namespace of arrays. - TBD + Extends stock scikit-learn's `get_namespace` primitive to support DPCTL usm_ndarrays + and DPNP ndarrays. + If no DPCTL usm_ndarray or DPNP ndarray inputs and backend scikit-learn version supports + Array API then :obj:`sklearn.utils._array_api.get_namespace` results are drawn. + Otherwise, numpy namespace will be returned. + + Designed to work for numpy.ndarray, DPCTL usm_ndarrays and DPNP ndarrays without + `array-api-compat` or backend scikit-learn Array API support. + + For full documentation refer to :obj:`sklearn.utils._array_api.get_namespace`. Parameters ---------- *arrays : array objects Array objects. + remove_none : bool, default=True + Whether to ignore None objects passed in arrays. + + remove_types : tuple or list, default=(str,) + Types to ignore in the arrays. + + xp : module, default=None + Precomputed array namespace module. When passed, typically from a caller + that has already performed inspection of its own inputs, skips array + namespace inspection. + Returns ------- namespace : module @@ -89,7 +109,16 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): def get_namespace(*arrays): """Get namespace of arrays. - TBD + Extends stock scikit-learn's `get_namespace` primitive to support DPCTL usm_ndarrays + and DPNP ndarrays. + If no DPCTL usm_ndarray or DPNP ndarray inputs and backend scikit-learn version supports + Array API then :obj:`sklearn.utils._array_api.get_namespace(*arrays)` results are drawn. + Otherwise, numpy namespace will be returned. + + Designed to work for numpy.ndarray, DPCTL usm_ndarrays and DPNP ndarrays without + `array-api-compat` or backend scikit-learn Array API support. + + For full documentation refer to :obj:`sklearn.utils._array_api.get_namespace`. Parameters ---------- From ec90d43be11fa841aee148762b9039a7a123e052 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 15:51:18 -0700 Subject: [PATCH 21/38] docstrings for Array API tests --- sklearnex/utils/tests/test_array_api.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index b2d5a29105..67db9e10b9 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -35,7 +35,7 @@ ), ) def test_get_namespace_with_config_context(dataframe, queue): - """Test get_namespace TBD""" + """Test `get_namespace` with `array_api_dispatch` enabled.""" from sklearnex import config_context from sklearnex.utils._array_api import get_namespace @@ -65,8 +65,8 @@ def test_get_namespace_with_config_context(dataframe, queue): ), ) def test_get_namespace_with_patching(dataframe, queue): - """Test get_namespace TBD - with `patch_sklearn` + """Test `get_namespace` with `array_api_dispatch` and + `patch_sklearn` enabled. """ array_api_compat = pytest.importorskip("array_api_compat") @@ -101,7 +101,9 @@ def test_get_namespace_with_patching(dataframe, queue): ), ) def test_convert_to_numpy_with_patching(dataframe, queue): - """Test _convert_to_numpy TBD with `patch_sklearn`""" + """Test `_convert_to_numpy` with `array_api_dispatch` and + `patch_sklearn` enabled. + """ pytest.importorskip("array_api_compat") from sklearnex import patch_sklearn @@ -139,7 +141,9 @@ def test_convert_to_numpy_with_patching(dataframe, queue): ], ) def test_validate_data_with_patching(dataframe, queue, dtype): - """Test validate_data TBD with `patch_sklearn`""" + """Test validate_data with `array_api_dispatch` and + `patch_sklearn` enabled. + """ pytest.importorskip("array_api_compat") from sklearnex import patch_sklearn From 6e7e547522f967cca89d8cb3aef5f89c7b302c7a Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 15:59:43 -0700 Subject: [PATCH 22/38] updated minimal scikit-learn version for Array API dispatching --- sklearnex/dispatcher.py | 6 +++--- sklearnex/tests/test_patching.py | 5 +++-- sklearnex/utils/tests/test_array_api.py | 12 ++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index e49b29eb80..9e3601ff14 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -128,7 +128,7 @@ def get_patch_map_core(preview=False): from ._config import get_config as get_config_sklearnex from ._config import set_config as set_config_sklearnex - if sklearn_check_version("1.2"): + if sklearn_check_version("1.4"): import sklearn.utils._array_api as _array_api_module if sklearn_check_version("1.2.1"): @@ -168,7 +168,7 @@ def get_patch_map_core(preview=False): from .svm import NuSVC as NuSVC_sklearnex from .svm import NuSVR as NuSVR_sklearnex - if sklearn_check_version("1.2"): + if sklearn_check_version("1.4"): from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex from .utils._array_api import get_namespace as get_namespace_sklearnex @@ -447,7 +447,7 @@ def get_patch_map_core(preview=False): mapping["_funcwrapper"] = [ [(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None] ] - if sklearn_check_version("1.2"): + if sklearn_check_version("1.4"): # Necessary for array_api support mapping["get_namespace"] = [ [ diff --git a/sklearnex/tests/test_patching.py b/sklearnex/tests/test_patching.py index 0cb8297f27..c7ec3b1475 100755 --- a/sklearnex/tests/test_patching.py +++ b/sklearnex/tests/test_patching.py @@ -311,8 +311,9 @@ def list_all_attr(string): # sklearn functions which aren't exposed via __all__ in sklearn. It is a special # case where this rule is not applied (e.g. it is grandfathered in). del patched["_assert_all_finite"] - del patched["_convert_to_numpy"] - del patched["get_namespace"] + if sklearn_check_version("1.4"): + del patched["_convert_to_numpy"] + del patched["get_namespace"] # remove all scikit-learn-intelex-only estimators for i in patched.copy(): diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index 67db9e10b9..ffde3997a2 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -55,8 +55,8 @@ def test_get_namespace_with_config_context(dataframe, queue): @pytest.mark.skipif( - not sklearn_check_version("1.2"), - reason="Array API dispatch requires sklearn 1.2 version", + not sklearn_check_version("1.4"), + reason="Array API dispatch requires sklearn 1.4 version", ) @pytest.mark.parametrize( "dataframe,queue", @@ -91,8 +91,8 @@ def test_get_namespace_with_patching(dataframe, queue): @pytest.mark.skipif( - not sklearn_check_version("1.2"), - reason="Array API dispatch requires sklearn 1.2 version", + not sklearn_check_version("1.4"), + reason="Array API dispatch requires sklearn 1.4 version", ) @pytest.mark.parametrize( "dataframe,queue", @@ -124,8 +124,8 @@ def test_convert_to_numpy_with_patching(dataframe, queue): @pytest.mark.skipif( - not sklearn_check_version("1.2"), - reason="Array API dispatch requires sklearn 1.2 version", + not sklearn_check_version("1.4"), + reason="Array API dispatch requires sklearn 1.4 version", ) @pytest.mark.parametrize( "dataframe,queue", From e5db839f2928877ed502d9efd1da7356b83ccde0 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 16:04:45 -0700 Subject: [PATCH 23/38] updated minimal scikit-learn version for Array API dispatching in _device_offload.py _array_api.py --- sklearnex/_device_offload.py | 2 +- sklearnex/utils/_array_api.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 2d1d197746..622e4ed032 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -75,7 +75,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": if ( - sklearn_check_version("1.2") + sklearn_check_version("1.4") and get_config()["array_api_dispatch"] and "array_api_support" in obj._get_tags() and obj._get_tags()["array_api_support"] diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index 077d710f46..901e242851 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -21,7 +21,7 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal.utils._array_api import _asarray, _get_sycl_namespace -if sklearn_check_version("1.2"): +if sklearn_check_version("1.4"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy @@ -44,7 +44,7 @@ def _convert_to_numpy(array, xp): return dpt.to_numpy(array) elif dpnp_available and isinstance(array, dpnp.ndarray): return dpnp.asnumpy(array) - elif sklearn_check_version("1.2"): + elif sklearn_check_version("1.4"): return _sklearn_convert_to_numpy(array, xp) else: return _asarray(array, xp) @@ -97,7 +97,7 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): if usm_iface: return xp_sycl_namespace, is_array_api_compliant - elif sklearn_check_version("1.2"): + elif sklearn_check_version("1.4"): return sklearn_get_namespace( *arrays, remove_none=remove_none, remove_types=remove_types, xp=xp ) @@ -140,7 +140,7 @@ def get_namespace(*arrays): if usm_iface: return xp_sycl_namespace, is_array_api_compliant - elif sklearn_check_version("1.2"): + elif sklearn_check_version("1.4"): return sklearn_get_namespace(*arrays) else: return np, False From f99a92b64018065ff913697c0341322d60f94f57 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 16:28:34 -0700 Subject: [PATCH 24/38] fix test test_get_namespace_with_config_context --- sklearnex/utils/tests/test_array_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index ffde3997a2..bc4756ba84 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -26,8 +26,14 @@ # TODO: # add test suit for dpctl.tensor, dpnp.ndarray, numpy.ndarray without config_context(array_api_dispatch=True)). +# TODO: +# extend for DPNP inputs. +@pytest.mark.skipif( + not sklearn_check_version("1.4"), + reason="Array API dispatch requires sklearn 1.4 version", +) @pytest.mark.parametrize( "dataframe,queue", get_dataframes_and_queues( From 3771fc292d02b0481966d2910d841ba140501239 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 11 Oct 2024 14:48:06 -0700 Subject: [PATCH 25/38] refactor onedal/datatypes/_data_conversion.py --- onedal/datatypes/_data_conversion.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index d1dedba81c..46384c19be 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -32,10 +32,14 @@ dpctl_available = False -def _apply_and_pass(func, *args): +def _apply_and_pass(func, *args, **kwargs): if len(args) == 1: - return func(args[0]) - return tuple(map(func, args)) + return func(args[0], **kwargs) if len(kwargs) > 0 else func(args[0]) + return ( + tuple(func(arg, **kwargs) for arg in args) + if len(kwargs) > 0 + else tuple(func(arg) for arg in args) + ) def from_table(*args): @@ -59,7 +63,7 @@ def to_table(*args): if _is_dpc_backend: from ..common._policy import _HostInteropPolicy - def _convert_to_supported(policy, *data): + def _convert_to_supported(policy, *data, xp=np): def func(x): return x @@ -71,13 +75,13 @@ def func(x): device = policy._queue.sycl_device def convert_or_pass(x): - if (x is not None) and (x.dtype == np.float64): + if (x is not None) and (x.dtype == xp.float64): warnings.warn( "Data will be converted into float32 from " "float64 because device does not support it", RuntimeWarning, ) - return x.astype(np.float32) + return x.astype(xp.float32) else: return x @@ -88,7 +92,7 @@ def convert_or_pass(x): else: - def _convert_to_supported(policy, *data): + def _convert_to_supported(policy, *data, xp=np): def func(x): return x From acbca7a58a100fd0fec09280112141ed792ad65e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 13 Oct 2024 16:08:22 -0700 Subject: [PATCH 26/38] correction for array api --- onedal/datatypes/_data_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 46384c19be..a5a85ae3c8 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -81,7 +81,7 @@ def convert_or_pass(x): "float64 because device does not support it", RuntimeWarning, ) - return x.astype(xp.float32) + return xp.astype(x, dtype=xp.float32) else: return x From 57dfde3f1fe7b00fd8275e8e085e7afebf4b2413 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sat, 19 Oct 2024 23:36:29 +0200 Subject: [PATCH 27/38] Update conftest.py --- sklearnex/conftest.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sklearnex/conftest.py b/sklearnex/conftest.py index b9415b13ff..7006f7d2bf 100644 --- a/sklearnex/conftest.py +++ b/sklearnex/conftest.py @@ -67,10 +67,12 @@ def with_sklearnex(): unpatch_sklearn() -@pytest.fixture -def with_array_api(): - if sklearn_check_version("1.2"): - with config_context(array_api_dispatch=True): - yield - else: - yield +# TODO: +# check if required. +#@pytest.fixture +#def with_array_api(): +# if sklearn_check_version("1.2"): +# with config_context(array_api_dispatch=True): +# yield +# else: +# yield From 0976082efdbf4a1ec68e76443087ba1d077feb6a Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 13:09:58 +0100 Subject: [PATCH 28/38] introduce tags --- sklearnex/_device_offload.py | 4 +- sklearnex/_utils.py | 16 ------- sklearnex/base.py | 57 +++++++++++++++++++++++ sklearnex/conftest.py | 4 +- sklearnex/neighbors/knn_classification.py | 3 +- sklearnex/neighbors/knn_regression.py | 3 +- sklearnex/neighbors/knn_unsupervised.py | 3 +- sklearnex/svm/_common.py | 3 +- sklearnex/utils/__init__.py | 19 +++++++- 9 files changed, 87 insertions(+), 25 deletions(-) create mode 100644 sklearnex/base.py diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index df3e1885e9..fe439a9019 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -26,6 +26,7 @@ from onedal.utils._array_api import _convert_to_dpnp from ._config import get_config +from .utils import get_tags def _get_backend(obj, queue, method_name, *data): @@ -75,8 +76,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): if ( sklearn_check_version("1.4") and get_config()["array_api_dispatch"] - and "array_api_support" in obj._get_tags() - and obj._get_tags()["array_api_support"] + and get_tags(obj)["array_api_support"] and not has_usm_data ): # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is diff --git a/sklearnex/_utils.py b/sklearnex/_utils.py index 651ac72654..4f91e0b4ef 100755 --- a/sklearnex/_utils.py +++ b/sklearnex/_utils.py @@ -125,19 +125,3 @@ def get_hyperparameters(self, op): return cls return decorator - - -# This abstract class is meant to generate a clickable doc link for classses -# in sklearnex that are not part of base scikit-learn. It should be inherited -# before inheriting from a scikit-learn estimator, otherwise will get overriden -# by the estimator's original. -class IntelEstimator(ABC): - @property - def _doc_link_module(self) -> str: - return "sklearnex" - - @property - def _doc_link_template(self) -> str: - module_path, _ = self.__class__.__module__.rsplit(".", 1) - class_name = self.__class__.__name__ - return f"https://intel.github.io/scikit-learn-intelex/latest/non-scikit-algorithms.html#{module_path}.{class_name}" diff --git a/sklearnex/base.py b/sklearnex/base.py new file mode 100644 index 0000000000..9d83f98f46 --- /dev/null +++ b/sklearnex/base.py @@ -0,0 +1,57 @@ +# =============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +from abc import ABC + +from daal4py.sklearn._utils import sklearn_check_version + + +class IntelEstimator(ABC): + + if sklearn_check_version("1.6"): + # Starting in sklearn 1.6, _more_tags is deprecated. An IntelEstimator + # is defined to handle the various versioning issues with the tags and + # with the ongoing rollout of sklearn's array_api support. This will make + # maintenance easier, and centralize tag changes to a single location. + + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.onedal_array_api = False + return tags + + elif sklearn_check_version("1.3"): + + def _more_tags(self): + return {"onedal_array_api": False} + + else: + # array_api_support tag was added in sklearn 1.3 via scikit-learn/scikit-learn#26372 + def _more_tags(self): + return {"array_api_support": False, "onedal_array_api": False} + + if sklearn_check_version("1.4"): + + def _get_doc_link(self) -> str: + # This method is meant to generate a clickable doc link for classses + # in sklearnex that are not part of base scikit-learn. It should be + # inherited before inheriting from a scikit-learn estimator, otherwise + # will get overriden by the estimator's original. + url = super()._get_doc_link() + if not url: + module_path, _ = self.__class__.__module__.rsplit(".", 1) + class_name = self.__class__.__name__ + url = f"https://intel.github.io/scikit-learn-intelex/latest/non-scikit-algorithms.html#{module_path}.{class_name}" + return url diff --git a/sklearnex/conftest.py b/sklearnex/conftest.py index 4b126b14ba..78cf8771cb 100644 --- a/sklearnex/conftest.py +++ b/sklearnex/conftest.py @@ -69,8 +69,8 @@ def with_sklearnex(): # TODO: # check if required. -#@pytest.fixture -#def with_array_api(): +# @pytest.fixture +# def with_array_api(): # if sklearn_check_version("1.2"): # with config_context(array_api_dispatch=True): # yield diff --git a/sklearnex/neighbors/knn_classification.py b/sklearnex/neighbors/knn_classification.py index 3b9871b4cf..b661f01ab8 100755 --- a/sklearnex/neighbors/knn_classification.py +++ b/sklearnex/neighbors/knn_classification.py @@ -26,6 +26,7 @@ from onedal.neighbors import KNeighborsClassifier as onedal_KNeighborsClassifier from .._device_offload import dispatch, wrap_output_data +from ..utils import get_tags from .common import KNeighborsDispatchingBase if sklearn_check_version("1.6"): @@ -184,7 +185,7 @@ def _onedal_fit(self, X, y, queue=None): } try: - requires_y = self._get_tags()["requires_y"] + requires_y = self.get_tags()["requires_y"] except KeyError: requires_y = False diff --git a/sklearnex/neighbors/knn_regression.py b/sklearnex/neighbors/knn_regression.py index 5889687498..1112eb7f16 100755 --- a/sklearnex/neighbors/knn_regression.py +++ b/sklearnex/neighbors/knn_regression.py @@ -25,6 +25,7 @@ from onedal.neighbors import KNeighborsRegressor as onedal_KNeighborsRegressor from .._device_offload import dispatch, wrap_output_data +from ..utils import get_tags from .common import KNeighborsDispatchingBase if sklearn_check_version("1.6"): @@ -166,7 +167,7 @@ def _onedal_fit(self, X, y, queue=None): } try: - requires_y = self._get_tags()["requires_y"] + requires_y = get_tags(self)["requires_y"] except KeyError: requires_y = False diff --git a/sklearnex/neighbors/knn_unsupervised.py b/sklearnex/neighbors/knn_unsupervised.py index d76e220cae..fa32b9cb7f 100755 --- a/sklearnex/neighbors/knn_unsupervised.py +++ b/sklearnex/neighbors/knn_unsupervised.py @@ -22,6 +22,7 @@ from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors from .._device_offload import dispatch, wrap_output_data +from ..utils import get_tags from .common import KNeighborsDispatchingBase if sklearn_check_version("1.6"): @@ -140,7 +141,7 @@ def _onedal_fit(self, X, y=None, queue=None): } try: - requires_y = self._get_tags()["requires_y"] + requires_y = get_tags(self)["requires_y"] except KeyError: requires_y = False diff --git a/sklearnex/svm/_common.py b/sklearnex/svm/_common.py index 4b481314ae..416455e17c 100644 --- a/sklearnex/svm/_common.py +++ b/sklearnex/svm/_common.py @@ -30,6 +30,7 @@ from .._config import config_context, get_config from .._utils import PatchingConditionsChain +from ..utils import get_tags if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -156,7 +157,7 @@ def _onedal_fit_checks(self, X, y, sample_weight=None): ) if y is None: - if self._get_tags()["requires_y"]: + if get_tags(self)["requires_y"]: raise ValueError( f"This {self.__class__.__name__} estimator " f"requires y to be passed, but the target y is None." diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index 4c3fe21154..cf77ebc318 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -16,4 +16,21 @@ from .validation import _assert_all_finite -__all__ = ["_assert_all_finite"] +from daal4py.sklearn._utils import sklearn_check_version + +# Not an ideal solution, but this converts the outputs of newer sklearnex tags +# into dicts to match how tags had been used. Someone more clever than me will +# have to find a way of converting older tags into newer ones instead (with +# minimal impact on performance). + +if sklearn_check_version("1.6"): + from sklearn.utils import get_tags as _sklearn_get_tags + + get_tags = lambda estimator: _sklearn_get_tags(estimator).__dict__ + +else: + from sklearn.base import BaseEstimator + + get_tags = BaseEstimator._get_tags + +__all__ = ["_assert_all_finite", "get_tags"] From c47daf397068e34c89cce414300127adf91bf381 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 13:10:56 +0100 Subject: [PATCH 29/38] fix imports --- sklearnex/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index cf77ebc318..e771dd5ab2 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -14,10 +14,10 @@ # limitations under the License. # =============================================================================== -from .validation import _assert_all_finite - from daal4py.sklearn._utils import sklearn_check_version +from .validation import _assert_all_finite + # Not an ideal solution, but this converts the outputs of newer sklearnex tags # into dicts to match how tags had been used. Someone more clever than me will # have to find a way of converting older tags into newer ones instead (with From b64f9629ad6446fd6b21414132113263ec4d3645 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 13:15:56 +0100 Subject: [PATCH 30/38] see if this works --- sklearnex/basic_statistics/basic_statistics.py | 3 ++- sklearnex/basic_statistics/incremental_basic_statistics.py | 3 ++- sklearnex/covariance/incremental_covariance.py | 3 ++- sklearnex/linear_model/incremental_linear.py | 3 ++- sklearnex/linear_model/incremental_ridge.py | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index da82e3bd82..0600aec23e 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -26,7 +26,8 @@ from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics from .._device_offload import dispatch -from .._utils import IntelEstimator, PatchingConditionsChain +from .._utils import PatchingConditionsChain +from ..base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index d1ddcd55dc..664d0fd811 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -26,7 +26,8 @@ ) from .._device_offload import dispatch -from .._utils import IntelEstimator, PatchingConditionsChain +from .._utils import PatchingConditionsChain +from ..base import IntelEstimator if sklearn_check_version("1.2"): from sklearn.utils._param_validation import Interval, StrOptions diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 89ed92b601..4b9bf99563 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -33,7 +33,8 @@ from sklearnex import config_context from .._device_offload import dispatch, wrap_output_data -from .._utils import IntelEstimator, PatchingConditionsChain, register_hyperparameters +from .._utils import PatchingConditionsChain, register_hyperparameters +from ..base import IntelEstimator from ..metrics import pairwise_distances from ..utils._array_api import get_namespace diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index c52be49ca6..4dedac1435 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -40,7 +40,8 @@ from onedal.common.hyperparameters import get_hyperparameters from .._device_offload import dispatch, wrap_output_data -from .._utils import IntelEstimator, PatchingConditionsChain, register_hyperparameters +from .._utils import PatchingConditionsChain, register_hyperparameters +from ..base import IntelEstimator @register_hyperparameters( diff --git a/sklearnex/linear_model/incremental_ridge.py b/sklearnex/linear_model/incremental_ridge.py index e750491ef9..25136ec1e1 100644 --- a/sklearnex/linear_model/incremental_ridge.py +++ b/sklearnex/linear_model/incremental_ridge.py @@ -33,6 +33,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain +from ..base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -43,7 +44,7 @@ @control_n_jobs( decorated_methods=["fit", "partial_fit", "predict", "score", "_onedal_finalize_fit"] ) -class IncrementalRidge(MultiOutputMixin, RegressorMixin, BaseEstimator): +class IncrementalRidge(IntelEstimator, MultiOutputMixin, RegressorMixin, BaseEstimator): """ Incremental estimator for Ridge Regression. Allows to train Ridge Regression if data is splitted into batches. From 9e8edd7f1661f1d061a7f3aacd157845eb460b57 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 13:38:47 +0100 Subject: [PATCH 31/38] really lazy logic introduction --- sklearnex/_device_offload.py | 58 ++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index fe439a9019..884ede1211 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -61,35 +61,47 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): q = _get_global_queue() - has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) - has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) - has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs + array_api_offload = ( + "array_api_dispatch" in get_config() and get_config()["array_api_dispatch"] + ) + # we only guarantee onedal_cpu_supported and onedal_gpu_supported are generalized to non-numpy inputs + # for zero copy estimators. this will eventually be deprecated when all estimators are zero-copy generalized + onedal_array_api = array_api_offload and get_tags(obj)["onedal_array_api"] + + # We need to avoid a copy to host here if zero_copy supported + if onedal_array_api: + backend, q, patching_status = _get_backend(obj, q, method_name, *args) + else: + has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) + has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) + has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs + if backend == "onedal": - # Host args only used before onedal backend call. - # Device will be offloaded when onedal backend will be called. - patching_status.write_log(queue=q, transferred_to_host=False) - return branches[backend](obj, *hostargs, **hostkwargs, queue=q) + if onedal_array_api: + # Host args only used before onedal backend call. + # Device will be offloaded when onedal backend will be called. + patching_status.write_log(queue=q, transferred_to_host=False) + return branches[backend](obj, *args, **kwargs, queue=q) + else: + patching_status.write_log(queue=q, transferred_to_host=False) + return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": - if ( - sklearn_check_version("1.4") - and get_config()["array_api_dispatch"] - and get_tags(obj)["array_api_support"] - and not has_usm_data - ): - # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is - # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant, - # except for the linalg module. There is no guarantee that stock scikit-learn will - # work with such input data. The condition will be updated after DPNP.ndarray and - # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance - # of the fallback cases. - # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, - # then raw inputs are used for the fallback. + if array_api_offload and get_tags(obj)["array_api_support"]: + # dpnp fallback is not handled properly yet. patching_status.write_log(transferred_to_host=False) return branches[backend](obj, *args, **kwargs) else: + # This is ugly logic, but I need to get this off the ground + if onedal_array_api: + has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) + has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host( + q, *kwargs.values() + ) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + patching_status.write_log() return branches[backend](obj, *hostargs, **hostkwargs) raise RuntimeError( From 6db82b24554d5911154d70f9e6f4d26013b4e516 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 13:49:55 +0100 Subject: [PATCH 32/38] introduce IntelEstimator --- sklearnex/cluster/dbscan.py | 5 +++-- sklearnex/cluster/k_means.py | 3 ++- sklearnex/decomposition/pca.py | 3 ++- sklearnex/ensemble/_forest.py | 7 ++++--- sklearnex/linear_model/linear.py | 3 ++- sklearnex/linear_model/logistic_regression.py | 5 +++-- sklearnex/linear_model/ridge.py | 3 ++- sklearnex/neighbors/common.py | 3 ++- sklearnex/preview/covariance/covariance.py | 3 ++- sklearnex/preview/decomposition/incremental_pca.py | 3 ++- sklearnex/svm/_common.py | 3 ++- sklearnex/svm/nusvc.py | 2 +- sklearnex/svm/nusvr.py | 2 +- sklearnex/svm/svc.py | 2 +- sklearnex/svm/svr.py | 2 +- 15 files changed, 30 insertions(+), 19 deletions(-) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index ef5f6b78d9..0d81eb1bd6 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -27,6 +27,7 @@ from .._device_offload import dispatch from .._utils import PatchingConditionsChain +from ..base import IntelEstimator if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar @@ -37,7 +38,7 @@ validate_data = _sklearn_DBSCAN._validate_data -class BaseDBSCAN(ABC): +class BaseDBSCAN(IntelEstimator): def _onedal_dbscan(self, **onedal_params): return onedal_DBSCAN(**onedal_params) @@ -51,7 +52,7 @@ def _save_attributes(self): @control_n_jobs(decorated_methods=["fit"]) -class DBSCAN(_sklearn_DBSCAN, BaseDBSCAN): +class DBSCAN(BaseDBSCAN, _sklearn_DBSCAN): __doc__ = _sklearn_DBSCAN.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 4ba75ca5b8..c092dae9c3 100644 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -40,6 +40,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain + from ..base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -47,7 +48,7 @@ validate_data = _sklearn_KMeans._validate_data @control_n_jobs(decorated_methods=["fit", "fit_transform", "predict", "score"]) - class KMeans(_sklearn_KMeans): + class KMeans(IntelEstimator, _sklearn_KMeans): __doc__ = _sklearn_KMeans.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/decomposition/pca.py b/sklearnex/decomposition/pca.py index 143587aa16..3e5616d7ae 100755 --- a/sklearnex/decomposition/pca.py +++ b/sklearnex/decomposition/pca.py @@ -32,6 +32,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain + from ..base import IntelEstimator from ..utils._array_api import get_namespace if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): @@ -50,7 +51,7 @@ validate_data = _sklearn_PCA._validate_data @control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"]) - class PCA(_sklearn_PCA): + class PCA(IntelEstimator, _sklearn_PCA): __doc__ = _sklearn_PCA.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 2a04962645..bd485726b4 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -62,6 +62,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain +from ..base import IntelEstimator from ..utils._array_api import get_namespace if sklearn_check_version("1.2"): @@ -75,7 +76,7 @@ validate_data = BaseEstimator._validate_data -class BaseForest(ABC): +class BaseForest(IntelEstimator): _onedal_factory = None def _onedal_fit(self, X, y, sample_weight=None, queue=None): @@ -402,7 +403,7 @@ def base_estimator(self, estimator): self.estimator = estimator -class ForestClassifier(_sklearn_ForestClassifier, BaseForest): +class ForestClassifier(BaseForest, _sklearn_ForestClassifier): # Surprisingly, even though scikit-learn warns against using # their ForestClassifier directly, it actually has a more stable # API than the user-facing objects (over time). If they change it @@ -851,7 +852,7 @@ def _onedal_score(self, X, y, sample_weight=None, queue=None): ) -class ForestRegressor(_sklearn_ForestRegressor, BaseForest): +class ForestRegressor(BaseForest, _sklearn_ForestRegressor): _err = "out_of_bag_error_r2|out_of_bag_error_prediction" _get_tree_state = staticmethod(get_tree_state_reg) diff --git a/sklearnex/linear_model/linear.py b/sklearnex/linear_model/linear.py index fb7eca8cf1..71f85d2a15 100644 --- a/sklearnex/linear_model/linear.py +++ b/sklearnex/linear_model/linear.py @@ -28,6 +28,7 @@ from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, get_patch_message, register_hyperparameters +from ..base import IntelEstimator if sklearn_check_version("1.0") and not sklearn_check_version("1.2"): from sklearn.linear_model._base import _deprecate_normalize @@ -47,7 +48,7 @@ @register_hyperparameters({"fit": get_hyperparameters("linear_regression", "train")}) @control_n_jobs(decorated_methods=["fit", "predict", "score"]) -class LinearRegression(_sklearn_LinearRegression): +class LinearRegression(IntelEstimator, _sklearn_LinearRegression): __doc__ = _sklearn_LinearRegression.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/linear_model/logistic_regression.py b/sklearnex/linear_model/logistic_regression.py index 01e944c74f..be195aedb7 100644 --- a/sklearnex/linear_model/logistic_regression.py +++ b/sklearnex/linear_model/logistic_regression.py @@ -39,6 +39,7 @@ from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, get_patch_message + from ..base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -47,7 +48,7 @@ _sparsity_enabled = daal_check_version((2024, "P", 700)) - class BaseLogisticRegression(ABC): + class BaseLogisticRegression(IntelEstimator): def _onedal_gpu_save_attributes(self): assert hasattr(self, "_onedal_estimator") self.classes_ = self._onedal_estimator.classes_ @@ -65,7 +66,7 @@ def _onedal_gpu_save_attributes(self): "score", ] ) - class LogisticRegression(_sklearn_LogisticRegression, BaseLogisticRegression): + class LogisticRegression(BaseLogisticRegression, _sklearn_LogisticRegression): __doc__ = _sklearn_LogisticRegression.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index 85d6714905..74ff42cdbe 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -39,6 +39,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain + from ..base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -46,7 +47,7 @@ validate_data = _sklearn_Ridge._validate_data @control_n_jobs(decorated_methods=["fit", "predict", "score"]) - class Ridge(_sklearn_Ridge): + class Ridge(IntelEstimator, _sklearn_Ridge): __doc__ = _sklearn_Ridge.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py index 0ad5a62dd1..d405b2c28f 100644 --- a/sklearnex/neighbors/common.py +++ b/sklearnex/neighbors/common.py @@ -28,10 +28,11 @@ from onedal.utils import _check_array, _num_features, _num_samples from .._utils import PatchingConditionsChain +from ..base import IntelEstimator from ..utils._array_api import get_namespace -class KNeighborsDispatchingBase: +class KNeighborsDispatchingBase(IntelEstimator): def _fit_validation(self, X, y=None): if sklearn_check_version("1.2"): self._validate_params() diff --git a/sklearnex/preview/covariance/covariance.py b/sklearnex/preview/covariance/covariance.py index 04bdc0be8d..668a6e617a 100644 --- a/sklearnex/preview/covariance/covariance.py +++ b/sklearnex/preview/covariance/covariance.py @@ -30,6 +30,7 @@ from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain, register_hyperparameters +from ...base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -39,7 +40,7 @@ @register_hyperparameters({"fit": get_hyperparameters("covariance", "compute")}) @control_n_jobs(decorated_methods=["fit", "mahalanobis"]) -class EmpiricalCovariance(_sklearn_EmpiricalCovariance): +class EmpiricalCovariance(IntelEstimator, _sklearn_EmpiricalCovariance): __doc__ = _sklearn_EmpiricalCovariance.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index fdf13e0817..51aeaa0c51 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -24,6 +24,7 @@ from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain +from ...base import IntelEstimator if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data @@ -34,7 +35,7 @@ @control_n_jobs( decorated_methods=["fit", "partial_fit", "transform", "_onedal_finalize_fit"] ) -class IncrementalPCA(_sklearn_IncrementalPCA): +class IncrementalPCA(IntelEstimator, _sklearn_IncrementalPCA): def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=None): super().__init__( diff --git a/sklearnex/svm/_common.py b/sklearnex/svm/_common.py index 416455e17c..0971b405d1 100644 --- a/sklearnex/svm/_common.py +++ b/sklearnex/svm/_common.py @@ -30,6 +30,7 @@ from .._config import config_context, get_config from .._utils import PatchingConditionsChain +from ..base import IntelEstimator from ..utils import get_tags if sklearn_check_version("1.6"): @@ -38,7 +39,7 @@ validate_data = BaseEstimator._validate_data -class BaseSVM(BaseEstimator, ABC): +class BaseSVM(IntelEstimator): @property def _dual_coef_(self): diff --git a/sklearnex/svm/nusvc.py b/sklearnex/svm/nusvc.py index 301d90ccc4..82dd8a776f 100644 --- a/sklearnex/svm/nusvc.py +++ b/sklearnex/svm/nusvc.py @@ -45,7 +45,7 @@ @control_n_jobs( decorated_methods=["fit", "predict", "_predict_proba", "decision_function", "score"] ) -class NuSVC(_sklearn_NuSVC, BaseSVC): +class NuSVC(BaseSVC, _sklearn_NuSVC): __doc__ = _sklearn_NuSVC.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/svm/nusvr.py b/sklearnex/svm/nusvr.py index 6c746174ac..25ef27d1c5 100644 --- a/sklearnex/svm/nusvr.py +++ b/sklearnex/svm/nusvr.py @@ -36,7 +36,7 @@ @control_n_jobs(decorated_methods=["fit", "predict", "score"]) -class NuSVR(_sklearn_NuSVR, BaseSVR): +class NuSVR(BaseSVR, _sklearn_NuSVR): __doc__ = _sklearn_NuSVR.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/svm/svc.py b/sklearnex/svm/svc.py index bf5e7f32fc..32339f6c9a 100644 --- a/sklearnex/svm/svc.py +++ b/sklearnex/svm/svc.py @@ -47,7 +47,7 @@ @control_n_jobs( decorated_methods=["fit", "predict", "_predict_proba", "decision_function", "score"] ) -class SVC(_sklearn_SVC, BaseSVC): +class SVC(BaseSVC, _sklearn_SVC): __doc__ = _sklearn_SVC.__doc__ if sklearn_check_version("1.2"): diff --git a/sklearnex/svm/svr.py b/sklearnex/svm/svr.py index ff2641bea0..72cbf6d25d 100644 --- a/sklearnex/svm/svr.py +++ b/sklearnex/svm/svr.py @@ -32,7 +32,7 @@ @control_n_jobs(decorated_methods=["fit", "predict", "score"]) -class SVR(_sklearn_SVR, BaseSVR): +class SVR(BaseSVR, _sklearn_SVR): __doc__ = _sklearn_SVR.__doc__ if sklearn_check_version("1.2"): From b26b0d1239d288210f860108c855f63629b0fffd Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 14:00:06 +0100 Subject: [PATCH 33/38] missing change in knn --- sklearnex/neighbors/knn_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/neighbors/knn_classification.py b/sklearnex/neighbors/knn_classification.py index b661f01ab8..2c7d6e5a79 100755 --- a/sklearnex/neighbors/knn_classification.py +++ b/sklearnex/neighbors/knn_classification.py @@ -185,7 +185,7 @@ def _onedal_fit(self, X, y, queue=None): } try: - requires_y = self.get_tags()["requires_y"] + requires_y = get_tags(self)["requires_y"] except KeyError: requires_y = False From 9718ad8701cabb73e72171ae3c6715cb91a11d9b Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 14:19:54 +0100 Subject: [PATCH 34/38] recofigure logic --- sklearnex/_device_offload.py | 46 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 884ede1211..34893af44b 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -65,43 +65,41 @@ def dispatch(obj, method_name, branches, *args, **kwargs): array_api_offload = ( "array_api_dispatch" in get_config() and get_config()["array_api_dispatch"] ) - # we only guarantee onedal_cpu_supported and onedal_gpu_supported are generalized to non-numpy inputs - # for zero copy estimators. this will eventually be deprecated when all estimators are zero-copy generalized + onedal_array_api = array_api_offload and get_tags(obj)["onedal_array_api"] + sklearn_array_api = array_api_offload and get_tags(obj)["array_api_support"] # We need to avoid a copy to host here if zero_copy supported + backend = "" if onedal_array_api: backend, q, patching_status = _get_backend(obj, q, method_name, *args) - else: - has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) - has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + if backend == "onedal": + patching_status.write_log(queue=q, transferred_to_host=False) + return branches[backend](obj, *args, **kwargs, queue=q) + if sklearn_array_api and backend == "sklearn": + patching_status.write_log(transferred_to_host=False) + return branches[backend](obj, *args, **kwargs) + + # move to host because it is necessary for checking + # we only guarantee onedal_cpu_supported and onedal_gpu_supported are generalized to non-numpy inputs + # for zero copy estimators. this will eventually be deprecated when all estimators are zero-copy generalized + has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) + has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs + + if not backend: backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) - has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs if backend == "onedal": - if onedal_array_api: - # Host args only used before onedal backend call. - # Device will be offloaded when onedal backend will be called. - patching_status.write_log(queue=q, transferred_to_host=False) - return branches[backend](obj, *args, **kwargs, queue=q) - else: - patching_status.write_log(queue=q, transferred_to_host=False) - return branches[backend](obj, *hostargs, **hostkwargs, queue=q) + patching_status.write_log(queue=q, transferred_to_host=False) + return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": - if array_api_offload and get_tags(obj)["array_api_support"]: + if sklearn_array_api and not has_usm_data: # dpnp fallback is not handled properly yet. patching_status.write_log(transferred_to_host=False) return branches[backend](obj, *args, **kwargs) else: - # This is ugly logic, but I need to get this off the ground - if onedal_array_api: - has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) - has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host( - q, *kwargs.values() - ) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - patching_status.write_log() return branches[backend](obj, *hostargs, **hostkwargs) raise RuntimeError( From b03f3f9ee5255da9f7fbd643d801ab7ca66a2525 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 14:28:25 +0100 Subject: [PATCH 35/38] strip out dpnp/dpctl special code, will come back to it later --- sklearnex/conftest.py | 16 ++- sklearnex/dispatcher.py | 25 ----- sklearnex/tests/test_memory_usage.py | 2 - sklearnex/tests/test_patching.py | 9 +- sklearnex/utils/_array_api.py | 150 ++++++++------------------- 5 files changed, 53 insertions(+), 149 deletions(-) diff --git a/sklearnex/conftest.py b/sklearnex/conftest.py index 78cf8771cb..4ecad5383b 100644 --- a/sklearnex/conftest.py +++ b/sklearnex/conftest.py @@ -67,15 +67,13 @@ def with_sklearnex(): unpatch_sklearn() -# TODO: -# check if required. -# @pytest.fixture -# def with_array_api(): -# if sklearn_check_version("1.2"): -# with config_context(array_api_dispatch=True): -# yield -# else: -# yield +@pytest.fixture +def with_array_api(): + if sklearn_check_version("1.2"): + with config_context(array_api_dispatch=True): + yield + else: + yield @pytest.fixture diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index 068e992b77..c15459cd6e 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -119,9 +119,6 @@ def get_patch_map_core(preview=False): from ._config import get_config as get_config_sklearnex from ._config import set_config as set_config_sklearnex - if sklearn_check_version("1.4"): - import sklearn.utils._array_api as _array_api_module - if sklearn_check_version("1.2.1"): from .utils.parallel import _FuncWrapper as _FuncWrapper_sklearnex else: @@ -159,10 +156,6 @@ def get_patch_map_core(preview=False): from .svm import NuSVC as NuSVC_sklearnex from .svm import NuSVR as NuSVR_sklearnex - if sklearn_check_version("1.4"): - from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex - from .utils._array_api import get_namespace as get_namespace_sklearnex - # DBSCAN mapping.pop("dbscan") mapping["dbscan"] = [[(cluster_module, "DBSCAN", DBSCAN_sklearnex), None]] @@ -438,24 +431,6 @@ def get_patch_map_core(preview=False): mapping["_funcwrapper"] = [ [(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None] ] - if sklearn_check_version("1.4"): - # Necessary for array_api support - mapping["get_namespace"] = [ - [ - ( - _array_api_module, - "get_namespace", - get_namespace_sklearnex, - ), - None, - ] - ] - mapping["_convert_to_numpy"] = [ - [ - (_array_api_module, "_convert_to_numpy", _convert_to_numpy_sklearnex), - None, - ] - ] return mapping diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 7cabd3871e..aa92df1d6a 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -52,7 +52,6 @@ CPU_SKIP_LIST = ( - "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "config_context", # does not malloc "get_config", # does not malloc @@ -67,7 +66,6 @@ ) GPU_SKIP_LIST = ( - "_convert_to_numpy", # additional memory allocation is expected proportional to the input data "TSNE", # too slow for using in testing on common data size "RandomForestRegressor", # too slow for using in testing on common data size "KMeans", # does not support GPU offloading diff --git a/sklearnex/tests/test_patching.py b/sklearnex/tests/test_patching.py index d1ef52e508..036ebf6412 100755 --- a/sklearnex/tests/test_patching.py +++ b/sklearnex/tests/test_patching.py @@ -299,13 +299,10 @@ def list_all_attr(string): module_map = {i: i for i in sklearnex__all__.intersection(sklearn__all__)} - # _assert_all_finite, _convert_to_numpy, get_namespace patch an internal - # sklearn functions which aren't exposed via __all__ in sklearn. It is a special - # case where this rule is not applied (e.g. it is grandfathered in). + # _assert_all_finite patches an internal sklearn function which isn't + # exposed via __all__ in sklearn. It is a special case where this rule + # is not applied (e.g. it is grandfathered in). del patched["_assert_all_finite"] - if sklearn_check_version("1.4"): - del patched["_convert_to_numpy"] - del patched["get_namespace"] # remove all scikit-learn-intelex-only estimators for i in patched.copy(): diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index 901e242851..bc30be5021 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -19,128 +19,64 @@ import numpy as np from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils._array_api import _asarray, _get_sycl_namespace +from onedal.utils._array_api import _get_sycl_namespace -if sklearn_check_version("1.4"): +if sklearn_check_version("1.2"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace - from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy -from onedal._device_offload import dpctl_available, dpnp_available -if dpctl_available: - import dpctl.tensor as dpt +def get_namespace(*arrays): + """Get namespace of arrays. -if dpnp_available: - import dpnp + Introspect `arrays` arguments and return their common Array API + compatible namespace object, if any. NumPy 1.22 and later can + construct such containers using the `numpy.array_api` namespace + for instance. + This function will return the namespace of SYCL-related arrays + which define the __sycl_usm_array_interface__ attribute + regardless of array_api support, the configuration of + array_api_dispatch, or scikit-learn version. -def _convert_to_numpy(array, xp): - """Convert X into a NumPy ndarray on the CPU.""" - xp_name = xp.__name__ + See: https://numpy.org/neps/nep-0047-array-api-standard.html - if dpctl_available and xp_name in { - "dpctl.tensor", - }: - return dpt.to_numpy(array) - elif dpnp_available and isinstance(array, dpnp.ndarray): - return dpnp.asnumpy(array) - elif sklearn_check_version("1.4"): - return _sklearn_convert_to_numpy(array, xp) - else: - return _asarray(array, xp) - - -if sklearn_check_version("1.5"): - - def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): - """Get namespace of arrays. - - Extends stock scikit-learn's `get_namespace` primitive to support DPCTL usm_ndarrays - and DPNP ndarrays. - If no DPCTL usm_ndarray or DPNP ndarray inputs and backend scikit-learn version supports - Array API then :obj:`sklearn.utils._array_api.get_namespace` results are drawn. - Otherwise, numpy namespace will be returned. - - Designed to work for numpy.ndarray, DPCTL usm_ndarrays and DPNP ndarrays without - `array-api-compat` or backend scikit-learn Array API support. - - For full documentation refer to :obj:`sklearn.utils._array_api.get_namespace`. - - Parameters - ---------- - *arrays : array objects - Array objects. + If `arrays` are regular numpy arrays, an instance of the + `_NumPyApiWrapper` compatibility wrapper is returned instead. - remove_none : bool, default=True - Whether to ignore None objects passed in arrays. + Namespace support is not enabled by default. To enabled it + call: - remove_types : tuple or list, default=(str,) - Types to ignore in the arrays. + sklearn.set_config(array_api_dispatch=True) - xp : module, default=None - Precomputed array namespace module. When passed, typically from a caller - that has already performed inspection of its own inputs, skips array - namespace inspection. + or: - Returns - ------- - namespace : module - Namespace shared by array objects. + with sklearn.config_context(array_api_dispatch=True): + # your code here - is_array_api : bool - True of the arrays are containers that implement the Array API spec. - """ + Otherwise an instance of the `_NumPyApiWrapper` + compatibility wrapper is always returned irrespective of + the fact that arrays implement the `__array_namespace__` + protocol or not. - usm_iface, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace( - *arrays - ) + Parameters + ---------- + *arrays : array objects + Array objects. - if usm_iface: - return xp_sycl_namespace, is_array_api_compliant - elif sklearn_check_version("1.4"): - return sklearn_get_namespace( - *arrays, remove_none=remove_none, remove_types=remove_types, xp=xp - ) - else: - return np, False + Returns + ------- + namespace : module + Namespace shared by array objects. -else: + is_array_api : bool + True of the arrays are containers that implement the Array API spec. + """ - def get_namespace(*arrays): - """Get namespace of arrays. + sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays) - Extends stock scikit-learn's `get_namespace` primitive to support DPCTL usm_ndarrays - and DPNP ndarrays. - If no DPCTL usm_ndarray or DPNP ndarray inputs and backend scikit-learn version supports - Array API then :obj:`sklearn.utils._array_api.get_namespace(*arrays)` results are drawn. - Otherwise, numpy namespace will be returned. - - Designed to work for numpy.ndarray, DPCTL usm_ndarrays and DPNP ndarrays without - `array-api-compat` or backend scikit-learn Array API support. - - For full documentation refer to :obj:`sklearn.utils._array_api.get_namespace`. - - Parameters - ---------- - *arrays : array objects - Array objects. - - Returns - ------- - namespace : module - Namespace shared by array objects. - - is_array_api : bool - True of the arrays are containers that implement the Array API spec. - """ - - usm_iface, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace( - *arrays - ) - - if usm_iface: - return xp_sycl_namespace, is_array_api_compliant - elif sklearn_check_version("1.4"): - return sklearn_get_namespace(*arrays) - else: - return np, False + if sycl_type: + return xp, is_array_api_compliant + elif sklearn_check_version("1.2"): + return sklearn_get_namespace(*arrays) + else: + return np, False From 4e94ff5a9ad7ace6e4962bacf0323e989c33f21f Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 6 Dec 2024 14:42:59 +0100 Subject: [PATCH 36/38] switchover --- sklearnex/svm/nusvc.py | 2 +- sklearnex/svm/nusvr.py | 2 +- sklearnex/svm/svc.py | 2 +- sklearnex/svm/svr.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearnex/svm/nusvc.py b/sklearnex/svm/nusvc.py index 82dd8a776f..fa9f77295d 100644 --- a/sklearnex/svm/nusvc.py +++ b/sklearnex/svm/nusvc.py @@ -39,7 +39,7 @@ if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data else: - validate_data = BaseSVC._validate_data + validate_data = _sklearn_NuSVC._validate_data @control_n_jobs( diff --git a/sklearnex/svm/nusvr.py b/sklearnex/svm/nusvr.py index 25ef27d1c5..4979fde0a3 100644 --- a/sklearnex/svm/nusvr.py +++ b/sklearnex/svm/nusvr.py @@ -32,7 +32,7 @@ if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data else: - validate_data = BaseSVR._validate_data + validate_data = _sklearn_NuSVR._validate_data @control_n_jobs(decorated_methods=["fit", "predict", "score"]) diff --git a/sklearnex/svm/svc.py b/sklearnex/svm/svc.py index 32339f6c9a..8e8ced9eee 100644 --- a/sklearnex/svm/svc.py +++ b/sklearnex/svm/svc.py @@ -41,7 +41,7 @@ if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data else: - validate_data = BaseSVC._validate_data + validate_data = _sklearn_SVC._validate_data @control_n_jobs( diff --git a/sklearnex/svm/svr.py b/sklearnex/svm/svr.py index 72cbf6d25d..fdf1d5d7c7 100644 --- a/sklearnex/svm/svr.py +++ b/sklearnex/svm/svr.py @@ -28,7 +28,7 @@ if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data else: - validate_data = BaseSVR._validate_data + validate_data = _sklearn_SVR._validate_data @control_n_jobs(decorated_methods=["fit", "predict", "score"]) From 4b2bf0d681cdbf666c3377f4d0d6cd2d6e2ab35a Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 9 Feb 2025 22:26:20 +0100 Subject: [PATCH 37/38] Update __init__.py --- sklearnex/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index e771dd5ab2..0d30f6074b 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -16,7 +16,7 @@ from daal4py.sklearn._utils import sklearn_check_version -from .validation import _assert_all_finite +from .validation import assert_all_finite # Not an ideal solution, but this converts the outputs of newer sklearnex tags # into dicts to match how tags had been used. Someone more clever than me will @@ -33,4 +33,4 @@ get_tags = BaseEstimator._get_tags -__all__ = ["_assert_all_finite", "get_tags"] +__all__ = ["assert_all_finite", "get_tags"] From f0aeae01df559122cf4a4aea49ee159efd819faf Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Mon, 10 Feb 2025 12:01:00 +0100 Subject: [PATCH 38/38] Update test_array_api.py --- sklearnex/utils/tests/test_array_api.py | 122 ------------------------ 1 file changed, 122 deletions(-) diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py index bc4756ba84..ed929c4c34 100644 --- a/sklearnex/utils/tests/test_array_api.py +++ b/sklearnex/utils/tests/test_array_api.py @@ -58,125 +58,3 @@ def test_get_namespace_with_config_context(dataframe, queue): # `get_namespace` has specific wrapper classes for `numpy.ndarray` # or `array-api-strict`. assert xp_out == array_api_compat.get_namespace(X) - - -@pytest.mark.skipif( - not sklearn_check_version("1.4"), - reason="Array API dispatch requires sklearn 1.4 version", -) -@pytest.mark.parametrize( - "dataframe,queue", - get_dataframes_and_queues( - dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" - ), -) -def test_get_namespace_with_patching(dataframe, queue): - """Test `get_namespace` with `array_api_dispatch` and - `patch_sklearn` enabled. - """ - array_api_compat = pytest.importorskip("array_api_compat") - - from sklearnex import patch_sklearn - - patch_sklearn() - - from sklearn import config_context - from sklearn.utils._array_api import get_namespace - - X_np = np.asarray([[1, 2, 3]]) - X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) - - with config_context(array_api_dispatch=True): - xp_out, is_array_api_compliant = get_namespace(X) - assert is_array_api_compliant - if not dataframe in "numpy,array_api": - # Rather than array_api_compat.get_namespace raw output - # `get_namespace` has specific wrapper classes for `numpy.ndarray` - # or `array-api-strict`. - assert xp_out == array_api_compat.get_namespace(X) - - -@pytest.mark.skipif( - not sklearn_check_version("1.4"), - reason="Array API dispatch requires sklearn 1.4 version", -) -@pytest.mark.parametrize( - "dataframe,queue", - get_dataframes_and_queues( - dataframe_filter_="dpctl,array_api", device_filter_="cpu,gpu" - ), -) -def test_convert_to_numpy_with_patching(dataframe, queue): - """Test `_convert_to_numpy` with `array_api_dispatch` and - `patch_sklearn` enabled. - """ - pytest.importorskip("array_api_compat") - - from sklearnex import patch_sklearn - - patch_sklearn() - - from sklearn import config_context - from sklearn.utils._array_api import _convert_to_numpy, get_namespace - - X_np = np.asarray([[1, 2, 3]]) - X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) - - with config_context(array_api_dispatch=True): - xp, _ = get_namespace(X) - x_np = _convert_to_numpy(X, xp) - assert type(X_np) == type(x_np) - assert_allclose(X_np, x_np) - - -@pytest.mark.skipif( - not sklearn_check_version("1.4"), - reason="Array API dispatch requires sklearn 1.4 version", -) -@pytest.mark.parametrize( - "dataframe,queue", - get_dataframes_and_queues( - dataframe_filter_="numpy,dpctl,array_api", device_filter_="cpu,gpu" - ), -) -@pytest.mark.parametrize( - "dtype", - [ - pytest.param(np.float32, id=np.dtype(np.float32).name), - pytest.param(np.float64, id=np.dtype(np.float64).name), - ], -) -def test_validate_data_with_patching(dataframe, queue, dtype): - """Test validate_data with `array_api_dispatch` and - `patch_sklearn` enabled. - """ - pytest.importorskip("array_api_compat") - - from sklearnex import patch_sklearn - - patch_sklearn() - - from sklearn import config_context - from sklearn.base import BaseEstimator - - if sklearn_check_version("1.6"): - from sklearn.utils.validation import validate_data - else: - validate_data = BaseEstimator._validate_data - - from sklearn.utils._array_api import _convert_to_numpy, get_namespace - - X_np = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype) - X_df = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) - with config_context(array_api_dispatch=True): - est = BaseEstimator() - xp, _ = get_namespace(X_df) - X_df_res = validate_data( - est, X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32] - ) - assert type(X_df) == type(X_df_res) - if dataframe != "numpy": - # _convert_to_numpy not designed for numpy.ndarray inputs. - assert_allclose(_convert_to_numpy(X_df, xp), _convert_to_numpy(X_df_res, xp)) - else: - assert_allclose(X_df, X_df_res)