diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 4d93c9d8..087a6d35 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -55,6 +55,6 @@ jobs: # to circumvent the currently slow performance of # JIT compile/link, which can otherwise cause issues # for hypothesis-driven test case generation - pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s + # pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_operators_and_elementwise_functions.py::test_log10 array_api_tests/test_operators_and_elementwise_functions.py::test_sqrt array_api_tests/test_operators_and_elementwise_functions.py::test_isfinite array_api_tests/test_operators_and_elementwise_functions.py::test_log2 array_api_tests/test_operators_and_elementwise_functions.py::test_log1p array_api_tests/test_operators_and_elementwise_functions.py::test_isinf array_api_tests/test_operators_and_elementwise_functions.py::test_log array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_square array_api_tests/test_operators_and_elementwise_functions.py::test_cos array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor array_api_tests/test_operators_and_elementwise_functions.py::test_exp array_api_tests/test_operators_and_elementwise_functions.py::test_sin array_api_tests/test_operators_and_elementwise_functions.py::test_tan array_api_tests/test_operators_and_elementwise_functions.py::test_tanh array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like array_api_tests/test_operators_and_elementwise_functions.py::test_positive array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal "array_api_tests/test_has_names.py::test_has_names[array_method-__pos__]" + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like diff --git a/examples/LogisticRegression/LR.py b/examples/LogisticRegression/LR.py index 0222fd8c..e9cfa614 100644 --- a/examples/LogisticRegression/LR.py +++ b/examples/LogisticRegression/LR.py @@ -31,7 +31,6 @@ import numbers import numpy as np -import pykokkos as pk import warnings from joblib import Parallel, effective_n_jobs @@ -62,12 +61,8 @@ ) -def asarray(arr, dtype=pk.double): - arr = np.asarray(arr) - - view = pk.View(arr.shape, dtype) - view[:] = arr - return view +def asarray(arr, dtype=np.float64): + return np.asarray(arr, dtype=dtype) def _check_solver(solver, penalty, dual): @@ -267,13 +262,13 @@ def _logistic_regression_path( The "copy" parameter was removed. """ if isinstance(Cs, numbers.Integral): - Cs = pk.logspace(-4, 4, Cs) + Cs = np.logspace(-4, 4, Cs) solver = _check_solver(solver, penalty, dual) _, n_features = X.shape - classes = pk.unique(y) + classes = np.unique(y) random_state = check_random_state(random_state) @@ -301,9 +296,9 @@ def _logistic_regression_path( # multinomial case this is not necessary. if multi_class == "ovr": - w0 = pk.zeros(n_features + int(fit_intercept), dtype=X.dtype) + w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype) mask = y == pos_class - y_bin = pk.ones(y.shape, dtype=X.dtype) + y_bin = np.ones(y.shape, dtype=X.dtype) if solver in ["lbfgs", "newton-cg"]: # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead # of in [-1, 1]. @@ -333,11 +328,11 @@ def _logistic_regression_path( lbin = LabelBinarizer() Y_multi = asarray(lbin.fit_transform(y)) if Y_multi.shape[1] == 1: - Y_multi = pk.hstack( - pk.negative(pk.subtract(Y_multi, asarray([1]))), Y_multi + Y_multi = np.hstack( + (np.negative(np.subtract(Y_multi, asarray([1]))), Y_multi) ) - w0 = pk.zeros((classes.size, n_features + int(fit_intercept)), dtype=X.dtype) + w0 = np.zeros((classes.size, n_features + int(fit_intercept)), dtype=X.dtype) if coef is not None: # it must work both giving the bias term and not @@ -384,7 +379,7 @@ def _logistic_regression_path( # i.e. 1d-arrays. LinearModelLoss expects classes to be contiguous and # reconstructs the 2d-array via w0.reshape((n_classes, -1), order="F"). # As w0 is F-contiguous, ravel(order="F") also avoids a copy. - w0 = pk.ravel(w0, order="F") + w0 = np.ravel(w0, order="F") loss = LinearModelLoss( base_loss=HalfMultinomialLoss(n_classes=classes.size), @@ -397,7 +392,7 @@ def _logistic_regression_path( func = loss.loss grad = loss.gradient hess = loss.gradient_hessian_product # hess = [gradient, hessp] - warm_start_sag = {"coef": pk.transpose(w0)} + warm_start_sag = {"coef": np.transpose(np.array(w0))} else: target = y_bin if solver == "lbfgs": @@ -412,15 +407,15 @@ def _logistic_regression_path( func = loss.loss grad = loss.gradient hess = loss.gradient_hessian_product # hess = [gradient, hessp] - warm_start_sag = {"coef": pk.expand_dims(w0, axis=1)} + warm_start_sag = {"coef": np.expand_dims(w0, axis=1)} coefs = list() - n_iter = pk.zeros(len(Cs), dtype=pk.int32) + n_iter = np.zeros(len(Cs), dtype=np.int32) for i, C in enumerate(Cs): if solver == "lbfgs": l2_reg_strength = 1.0 / C - iprint = [-1, 50, 1, 100, 101][pk.searchsorted([0, 1, 2, 3], verbose)] + iprint = [-1, 50, 1, 100, 101][np.searchsorted([0, 1, 2, 3], verbose)] opt_res = optimize.minimize( func, np.asarray(w0), @@ -471,9 +466,9 @@ def _logistic_regression_path( ) coef_ = asarray(coef_) if fit_intercept: - w0 = pk.hstack(pk.ravel(coef_), intercept_) + w0 = np.hstack((np.ravel(coef_), intercept_)) else: - w0 = pk.ravel(coef_) + w0 = np.ravel(coef_) elif solver in ["sag", "saga"]: if multi_class == "multinomial": @@ -518,7 +513,7 @@ def _logistic_regression_path( if multi_class == "multinomial": n_classes = max(2, classes.size) if solver in ["lbfgs", "newton-cg"]: - multi_w0 = pk.reshape(w0, (n_classes, -1), order="F") + multi_w0 = np.reshape(w0, (n_classes, -1), order="F") else: multi_w0 = w0 coefs.append(asarray(multi_w0)) @@ -829,7 +824,7 @@ def fit(self, X, y, sample_weight=None): "Setting penalty='none' will ignore the C and l1_ratio parameters" ) # Note that check for l1_ratio is done right above - C_ = pk.inf + C_ = np.inf penalty = "l2" else: C_ = self.C @@ -862,7 +857,7 @@ def fit(self, X, y, sample_weight=None): X = asarray(X) y = asarray(y) - self.classes_ = pk.unique(y) + self.classes_ = np.unique(np.array(y)) multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_)) @@ -969,7 +964,7 @@ def fit(self, X, y, sample_weight=None): ) fold_coefs_, _, n_iter_ = zip(*fold_coefs_) - self.n_iter_ = pk.col(asarray(n_iter_), 0) + self.n_iter_ = np.array(n_iter_) n_features = X.shape[1] if multi_class == "multinomial": @@ -984,7 +979,7 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = self.coef_[:, -1] self.coef_ = self.coef_[:, :-1] else: - self.intercept_ = pk.zeros(n_classes) + self.intercept_ = np.zeros(n_classes) return self @@ -1024,7 +1019,7 @@ def predict_proba(self, X): if decision.ndim == 1: # Workaround for multi_class="multinomial" and binary outcomes # which requires softmax prediction with only a 1D decision. - decision_2d = pk.hstack(pk.negative(decision), decision) + decision_2d = np.hstack((np.negative(decision), decision)) else: decision_2d = decision return softmax(decision_2d, copy=False) @@ -1045,7 +1040,7 @@ def predict_log_proba(self, X): Returns the log-probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ - return pk.log(self.predict_proba(X)) + return np.log(self.predict_proba(X)) def predict(self, X): """ @@ -1065,7 +1060,7 @@ def predict(self, X): else: indices = scores.argmax(axis=1) - return pk.index(self.classes_, asarray(indices, dtype=pk.int32)) + return self.classes_[np.array(indices, dtype=np.int32)] def main(): diff --git a/examples/NaiveBayes/GaussianNB.py b/examples/NaiveBayes/GaussianNB.py index 8c334f57..4d6a7488 100644 --- a/examples/NaiveBayes/GaussianNB.py +++ b/examples/NaiveBayes/GaussianNB.py @@ -36,17 +36,12 @@ from math import pi from typing import Sequence -import pykokkos as pk import numpy as np from sklearn.base import BaseEstimator def asarray(arr): - arr = np.asarray(arr) - - view = pk.View(arr.shape, pk.double) - view[:] = arr - return view + return np.asarray(arr, dtype=np.float64) def type_of_target(y, input_name=""): @@ -95,7 +90,7 @@ def type_of_target(y, input_name=""): else: suffix = "" # [1, 2, 3] or [[1], [2], [3]] - if (len(pk.unique(y)) > 2) or (len(y.shape) >= 2 and len(y[0]) > 1): + if (len(np.unique(y)) > 2) or (len(y.shape) >= 2 and len(y[0]) > 1): return "multiclass" + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]] else: return "binary" # [1, 2] or [["a"], ["b"]] @@ -103,7 +98,7 @@ def type_of_target(y, input_name=""): def _unique_multiclass(y): if hasattr(y, "__array__"): - return pk.unique(asarray(y)) + return np.unique(np.array(asarray(y))) else: return set(y) @@ -140,8 +135,7 @@ def unique_labels(*ys): raise ValueError("Mix of label input types (string and number)") sorted_label = sorted(ys_labels) - labels = pk.View([len(sorted_label)], pk.double) - labels[:] = sorted_label + labels = np.array(sorted_label, dtype=np.float64) return labels @@ -264,7 +258,7 @@ def predict(self, X): X = self._check_X(X) jll = self._joint_log_likelihood(X) - return pk.index(self.classes_, pk.argmax(jll, axis=1)) + return self.classes_[np.argmax(np.array(jll), axis=1)] def predict_log_proba(self, X): """ @@ -285,7 +279,7 @@ def predict_log_proba(self, X): jll = self._joint_log_likelihood(X) # normalize by P(x) = P(f_1, ..., f_n) # log_prob_x = logsumexp(jll, axis=1) - # return jll - pk.transpose(pk.atleast_2d()) + # return jll - np.transpose(pk.atleast_2d()) def predict_proba(self, X): """ @@ -301,7 +295,7 @@ def predict_proba(self, X): the model. The columns correspond to the classes in sorted order, as they appear in the attribute :term:`classes_`. """ - return pk.exp(self.predict_log_proba(X)) + return np.exp(self.predict_log_proba(X)) class GaussianNB(_BaseNB): @@ -366,7 +360,7 @@ class labels known to the classifier. >>> print(clf.predict([[-0.8, -1]])) [1] >>> clf_pf = GaussianNB() - >>> clf_pf.partial_fit(X, Y, pk.unique(Y)) + >>> clf_pf.partial_fit(X, Y, np.unique(Y)) GaussianNB() >>> print(clf_pf.predict([[-0.8, -1]])) [1] @@ -397,7 +391,7 @@ def fit(self, X, y, sample_weight=None): y = asarray(self._validate_data(y=y)) return self._partial_fit( - X, y, pk.unique(y), _refit=True, sample_weight=sample_weight + X, y, np.unique(y), _refit=True, sample_weight=sample_weight ) def _check_X(self, X): @@ -440,12 +434,14 @@ def _update_mean_variance(n_past, mu, var, X, sample_weight=None): # Compute (potentially weighted) mean and variance of new datapoints if sample_weight is not None: n_new = float(sample_weight.sum()) - new_mu = pk.average(X, axis=0, weights=sample_weight) - new_var = pk.average((X - new_mu) ** 2, axis=0, weights=sample_weight) + new_mu = np.average(np.array(X), axis=0, weights=sample_weight) + new_var = np.average( + np.array(X - new_mu) ** 2, axis=0, weights=sample_weight + ) else: n_new = X.shape[0] - new_var = pk.var(X, axis=0) - new_mu = pk.mean(X, axis=0) + new_var = np.var(np.array(X), axis=0) + new_mu = np.mean(np.array(X), axis=0) if n_past == 0: return new_mu, new_var @@ -534,17 +530,17 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # will cause numerical errors. To address this, we artificially # boost the variance by epsilon, a small fraction of the standard # deviation of the largest dimension. - self.epsilon_ = self.var_smoothing * pk.find_max(pk.var(X, axis=0)) + self.epsilon_ = self.var_smoothing * np.max(np.var(np.array(X), axis=0)) if first_call: # This is the first call to partial_fit: # initialize various cumulative counters n_features = X.shape[1] n_classes = len(self.classes_) - self.theta_ = pk.zeros((n_classes, n_features)) - self.var_ = pk.zeros((n_classes, n_features)) + self.theta_ = np.zeros((n_classes, n_features), dtype=np.float64) + self.var_ = np.zeros((n_classes, n_features), dtype=np.float64) - self.class_count_ = pk.zeros(n_classes, dtype=pk.double) + self.class_count_ = np.zeros(n_classes, dtype=np.float64) # Initialise the class prior # Take into account the priors @@ -559,7 +555,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): self.class_prior_ = priors else: # Initialize the priors to zeros for each class - self.class_prior_ = pk.zeros(len(self.classes_), dtype=pk.double) + self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64) else: if X.shape[1] != self.theta_.shape[1]: msg = "Number of features %d does not match previous data %d." @@ -569,17 +565,17 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): classes = self.classes_ - unique_y = pk.unique(y) - unique_y_in_classes = pk.in1d(unique_y, classes) + unique_y = np.unique(y) + unique_y_in_classes = np.in1d(unique_y, classes) - if not pk.all(unique_y_in_classes): + if not np.all(unique_y_in_classes): raise ValueError( "The target label(s) %s in y do not exist in the initial classes %s" - % (unique_y[pk.logical_not(unique_y_in_classes)], classes) + % (unique_y[np.logical_not(unique_y_in_classes)], classes) ) for y_i in unique_y: - i = int(pk.searchsorted(classes, y_i)) # linear search + i = int(np.searchsorted(classes, y_i)) # linear search X_i = X[y == y_i, :] if sample_weight is not None: @@ -602,7 +598,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # Update if only no priors is provided if self.priors is None: # Empirical prior, with sample_weight taken into account - self.class_prior_ = pk.divide(self.class_count_, pk.sum(self.class_count_)) + self.class_prior_ = np.divide(self.class_count_, np.sum(self.class_count_)) return self @@ -611,15 +607,15 @@ def _joint_log_likelihood(self, X): total_classes = reduce(lambda a, b: a * b, self.classes_.shape, 1) for i in range(total_classes): - jointi = pk.log(self.class_prior_[i]) - - n_ij = -0.5 * pk.sum(pk.log(pk.multiply(self.var_[i, :], 2.0 * pi))) - n_ij = pk.add( - pk.negative( - pk.multiply( - pk.sum( - pk.divide( - pk.power(pk.add(X, pk.negative(self.theta_[i, :])), 2), + jointi = np.log(self.class_prior_[i]) + + n_ij = -0.5 * np.sum(np.log(self.var_[i, :] * 2.0 * pi)) + n_ij = np.add( + np.negative( + np.multiply( + np.sum( + np.divide( + np.power(np.add(X, np.negative(self.theta_[i, :])), 2), self.var_[i, :], ), 1, @@ -630,9 +626,9 @@ def _joint_log_likelihood(self, X): n_ij, ) - joint_log_likelihood.append(pk.add(n_ij, jointi)) + joint_log_likelihood.append(np.add(n_ij, jointi)) - joint_log_likelihood = pk.transpose(asarray(joint_log_likelihood)) + joint_log_likelihood = np.transpose(asarray(joint_log_likelihood)) return joint_log_likelihood diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 9340945f..5ec25c86 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -18,61 +18,12 @@ ) from pykokkos.lib.ufuncs import ( - reciprocal, - log, - log2, - log10, - log1p, - sqrt, - sign, - add, - copyto, - subtract, - dot, - multiply, - matmul, - np_matmul, - divide, - negative, - positive, - power, - fmod, - square, - greater, - logaddexp, - true_divide, - logaddexp2, - floor_divide, - sin, - cos, - tan, - tanh, - logical_and, - logical_or, - logical_xor, - logical_not, - fmax, - fmin, - exp, - exp2, - argmax, - unique, - var, - in1d, - mean, - hstack, - transpose, - index, - isinf, - isnan, - equal, - isfinite, - round, - trunc, - ceil, - floor, - broadcast_view, + _isnan as isnan, + _isinf as isinf, + _isfinite as isfinite, + _equal as equal, ) + from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like from pykokkos.lib.manipulate import reshape, ravel, expand_dims diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 62ed04b4..a87de4e7 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -504,7 +504,7 @@ def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: def __eq__(self, other): # avoid circular import with scoped import - from pykokkos.lib.ufuncs import equal + from pykokkos.lib.ufuncs import _equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -539,7 +539,7 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - return equal(self, new_other) + return _equal(self, new_other) def __hash__(self): try: @@ -668,7 +668,7 @@ def _get_base_view(self, parent_view: Union[Subview, View]) -> View: def __eq__(self, other): # avoid circular import with scoped import - from pykokkos.lib.ufuncs import equal + from pykokkos.lib.ufuncs import _equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -703,7 +703,7 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - return equal(self, new_other) + return _equal(self, new_other) def __add__(self, other): if isinstance(other, float): diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index fe741bd1..9a16a4eb 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -11,3481 +11,85 @@ kernel_dict = dict(getmembers(ufunc_workunits, isfunction)) -def _supported_types_check(dtype_str, supported_type_strings): - options = "" - for type_str in supported_type_strings: - options += f".*{type_str}.*|" - options = options[:-1] - prog = re.compile(f"({options})") - result = prog.match(dtype_str) - if result is None: - raise NotImplementedError - - -def _ufunc_kernel_dispatcher( - profiler_name: Optional[str], tid, dtype, ndims, op, sub_dispatcher, **kwargs -): - dtype_extractor = re.compile(r".*(?:dtype|data_types|DataType)\.(\w+)") - if ndims == 0: - ndims = 1 - res = dtype_extractor.match(str(dtype)) - dtype_str = res.group(1) - if dtype_str == "float32": - dtype_str = "float" - elif dtype_str == "float64": - dtype_str = "double" - function_name_str = f"{op}_impl_{ndims}d_{dtype_str}" - desired_workunit = kernel_dict[function_name_str] - # call the kernel - ret = sub_dispatcher(profiler_name, tid, desired_workunit, **kwargs) - return ret - - -def _broadcast_views(view1, view2): - # support broadcasting by using the same - # shape matching rules as NumPy - # TODO: determine if this can be done with - # more memory efficiency? - if view1.shape != view2.shape: - new_shape = np.broadcast_shapes(view1.shape, view2.shape) - view1_new = pk.View([*new_shape], dtype=view1.dtype) - view1_new[:] = view1 - view1 = view1_new - view2_new = pk.View([*new_shape], dtype=view2.dtype) - view2_new[:] = view2 - view2 = view2_new - return view1, view2 - - -def _typematch_views(view1, view2): - # very crude casting implementation - # for binary ufuncs - dtype1 = view1.dtype - dtype2 = view2.dtype - dtype_extractor = re.compile(r".*(?:data_types|DataType)\.(\w+)") - res1 = dtype_extractor.match(str(dtype1)) - res2 = dtype_extractor.match(str(dtype2)) - effective_dtype = dtype1 - if res1 is not None and res2 is not None: - res1_dtype_str = res1.group(1) - res2_dtype_str = res2.group(1) - if res1_dtype_str == "double": - res1_dtype_str = "float64" - elif res1_dtype_str == "float": - res1_dtype_str = "float32" - if res2_dtype_str == "double": - res2_dtype_str = "float64" - elif res2_dtype_str == "float": - res2_dtype_str = "float32" - if res1_dtype_str == "bool" or res2_dtype_str == "bool": - res1_dtype_str = "uint8" - dtype1 = pk.uint8 - res2_dtype_str = "uint8" - dtype2 = pk.uint8 - if ("int" in res1_dtype_str and "int" in res2_dtype_str) or ( - "float" in res1_dtype_str and "float" in res2_dtype_str - ): - dtype_1_width = int(res1_dtype_str.split("t")[1]) - dtype_2_width = int(res2_dtype_str.split("t")[1]) - if dtype_1_width >= dtype_2_width: - effective_dtype = dtype1 - view2_new = pk.View([*view2.shape], dtype=effective_dtype) - view2_new[:] = view2.data - view2 = view2_new - else: - effective_dtype = dtype2 - view1_new = pk.View([*view1.shape], dtype=effective_dtype) - view1_new[:] = view1.data - view1 = view1_new - return view1, view2, effective_dtype - - -def reciprocal(view, profiler_name: Optional[str] = None): - """ - Return the reciprocal of the argument, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - Notes - ----- - .. note:: - This function is not designed to work with integers. - - """ - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=view.shape[0], - dtype=view.dtype.value, - ndims=len(view.shape), - op="reciprocal", - sub_dispatcher=pk.parallel_for, - view=view, - ) - # NOTE: pretty awkward to both return the view - # and operate on it in place; the former is closer - # to NumPy semantics - return view - - -@pk.workunit -def log_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = log(view[tid]) # type: ignore - - -@pk.workunit -def log_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log(view[tid][i]) # type: ignore - - -@pk.workunit -def log_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log(view[tid]) # type: ignore - - -@pk.workunit -def log_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log(view[tid][i]) # type: ignore - - -def log(view, profiler_name: Optional[str] = None): - """ - Natural logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if not isinstance(view, pk.ViewType): - return math.log(view) - - if len(view.shape) > 2: - raise NotImplementedError("log() ufunc only supports up to 2D views") - - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(profiler_name, 1, log_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_1d_double, view=view, out=out - ) - elif len(view.shape) == 2: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_2d_double, view=view, out=out - ) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(profiler_name, 1, log_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_1d_float, view=view, out=out - ) - elif len(view.shape) == 2: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_2d_float, view=view, out=out - ) - return out - - -@pk.workunit -def sqrt_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = sqrt(view[tid]) # type: ignore - - -@pk.workunit -def sqrt_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = sqrt(view[tid][i]) # type: ignore - - -@pk.workunit -def sqrt_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = sqrt(view[tid]) # type: ignore - - -@pk.workunit -def sqrt_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = sqrt(view[tid][i]) # type: ignore - - -def sqrt(view): - """ - Return the non-negative square root of the argument, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - Notes - ----- - .. note:: - This function should exhibit the same branch cut behavior - as the equivalent NumPy ufunc. - """ - if isinstance(view, (np.integer, np.floating)): - return math.sqrt(view) - # TODO: support complex types when they - # are available in pykokkos? - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for sqrt() ufunc." - ) - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, sqrt_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sqrt_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sqrt_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, sqrt_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sqrt_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sqrt_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log2_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log2(view[tid]) # type: ignore - - -@pk.workunit -def log2_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log2(view[tid][i]) # type: ignore - - -@pk.workunit -def log2_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log2(view[tid]) # type: ignore - - -@pk.workunit -def log2_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log2(view[tid][i]) # type: ignore - - -def log2(view): - """ - Base-2 logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError("log2() ufunc only supports up to 2D views") - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log2_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log2_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log2_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log2_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log2_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log2_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log10_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log10(view[tid]) # type: ignore - - -@pk.workunit -def log10_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log10(view[tid][i]) # type: ignore - - -@pk.workunit -def log10_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log10(view[tid]) # type: ignore - - -@pk.workunit -def log10_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log10(view[tid][i]) # type: ignore - - -def log10(view): - """ - Base-10 logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if view.size == 0: - return view - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log10_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log10_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log10_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log10_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log10_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log10_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log1p_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log1p(view[tid]) # type: ignore - - -@pk.workunit -def log1p_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log1p(view[tid]) # type: ignore - - -@pk.workunit -def log1p_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log1p(view[tid][i]) # type: ignore - - -@pk.workunit -def log1p_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log1p(view[tid][i]) # type: ignore - - -def log1p(view): - """ - Return the natural logarithm of one plus the input array, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if view.size == 0: - return view - out = pk.View(view.shape, view.dtype) - if len(view.shape) > 2: - raise NotImplementedError("log1p() ufunc only supports up to 2D views") - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, log1p_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log1p_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log1p_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, log1p_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log1p_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log1p_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def sign_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.int8]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.int8]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint16] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint16] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint32] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint32] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.int16]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.int16]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.int32]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.int32]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.int64]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.int64]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -def sign(view): - out = pk.View(view.shape, view.dtype) - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for sign() ufunc." - ) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.double) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_double, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.float) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_float, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_float, view=view, out=out) - elif "uint32" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint32) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint32, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint32, view=view, out=out) - elif "uint16" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint16) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint16, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint16, view=view, out=out) - elif "int16" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int16) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int16, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int16, view=view, out=out) - elif "int32" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int32) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int32, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int32, view=view, out=out) - elif "uint64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint64) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint64, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint64, view=view, out=out) - elif "int64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int64) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int64, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int64, view=view, out=out) - elif "uint8" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint8) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint8, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint8, view=view, out=out) - elif "int8" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int8) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int8, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int8, view=view, out=out) - return out - - -@pk.workunit -def add_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] + viewB[tid % viewB.extent(0)] - - -@pk.workunit -def add_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] + viewB[tid % viewB.extent(0)] - - -@pk.workunit -def add_impl_2d_1d(tid, viewA, viewB, out): - for i in range(viewA.extent(1)): - out[tid][i] = viewA[tid][i] + viewB[i % viewB.extent(0)] - - -@pk.workunit -def add_impl_2d_2d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] + viewB[r_idx][c_idx] - - -def add(viewA, viewB, profiler_name: Optional[str] = None): - """ - Sums positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view or scalar - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - # viewA must always be a view of type float 64 or 32 - if not isinstance(viewA, pk.ViewType) and viewA.dtype.__name__ not in [ - "float32", - "float64", - ]: - raise RuntimeError( - "Incompatible first argument of type: {}, must be a float32 or float 64 Pykokkos view".format( - viewA.dtype - ) - ) - - # then, if viewB is a scalar conform it to viewA's type - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View( - [1], pk.double if viewA.dtype.__name__ == "float64" else pk.float32 - ) - view_temp[0] = viewB - viewB = view_temp - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError("only 2D views currently supported for add() ufunc.") - - if viewA.rank() == 2 and viewB.rank() == 2 and viewA.shape != viewB.shape: - raise RuntimeError( - "2D views must have the same shape for add ufunc. Mismatch: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - if viewA.rank() == 1 and viewB.rank() == 1: - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - add_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif viewA.rank() == 2 and viewB.rank() == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - add_impl_2d_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - larger.shape[0], - add_impl_2d_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - if viewA.rank() == 1 and viewB.rank() == 1: - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - add_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif viewB.rank() == 2 and viewB.rank() == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - add_impl_2d_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - larger.shape[0], - add_impl_2d_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - else: - raise RuntimeError("Incompatible Types {}, {}".format(viewA.dtype, viewB.dtype)) - return out - - -@pk.workunit -def multiply_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] * viewB[tid % viewB.extent(0)] - - -@pk.workunit -def multiply_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] * viewB[tid % viewB.extent(0)] - - -@pk.workunit -def multiply_impl_2d_with_1d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] * viewB[r_idx % viewB.extent(0)] - - -@pk.workunit -def multiply_impl_2d_with_2d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] * viewB[r_idx][c_idx] - - -def multiply(viewA, viewB, profiler_name: Optional[str] = None): - """ - Multiplies positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view or scalar - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - # viewA must always be a view of type float 64 or 32 - if not isinstance(viewA, pk.ViewType) and viewA.dtype.__name__ not in [ - "float32", - "float64", - ]: - raise RuntimeError( - "Incompatible first argument of type: {}, must be a float32 or float 64 Pykokkos view".format( - viewA.dtype - ) - ) - - # then, if viewB is a scalar conform it to viewA's type - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View( - [1], pk.double if viewA.dtype.__name__ == "float64" else pk.float32 - ) - view_temp[0] = viewB - viewB = view_temp - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError( - "only 2D views currently supported for mulitply() ufunc." - ) - - if viewA.rank() == 2 and viewB.rank() == 2 and viewA.shape != viewB.shape: - raise RuntimeError( - "2D views must have the same shape for add ufunc. Mismatch: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - if len(viewA.shape) == 1 and len(viewB.shape) == 1: - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - multiply_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif len(viewA.shape) == 2 and len(viewB.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - multiply_impl_2d_with_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - larger.shape[0] * larger.shape[1], - multiply_impl_2d_with_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - if len(viewA.shape) == 1 and len(viewB.shape) == 1: - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - multiply_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif len(viewA.shape) == 2 and len(viewB.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - multiply_impl_2d_with_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - larger.shape[0] * larger.shape[1], - multiply_impl_2d_with_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - else: - raise RuntimeError("Incompatible Types {}, {}".format(viewA.dtype, viewB.dtype)) - return out - - -def check_broadcastable_impl(viewA, viewB): - """ - Check whether two views are broadcastable as defined here: - https://numpy.org/doc/stable/user/basics.broadcasting.html - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - _ : boolean - True if both views are compatible. - """ - - if viewA.shape == viewB.shape: - return False # cannot broadcast same dims - - v1_p = len(viewA.shape) - 1 - v2_p = len(viewB.shape) - 1 - - while v1_p > -1 and v2_p > -1: - if viewA.shape[v1_p] != viewB.shape[v2_p]: - if viewA.shape[v1_p] != 1 and viewB.shape[v2_p] != 1: - return False - - v1_p -= 1 - v2_p -= 1 - - return True - - -@pk.workunit -def stretch_fill_impl_scalar_into_1d(tid, scalar, viewOut): - viewOut[tid] = scalar - - -@pk.workunit -def stretch_fill_impl_scalar_into_2d(tid, cols, scalar, viewOut): - for i in range(cols): - viewOut[tid][i] = scalar - - -@pk.workunit -def stretch_fill_impl_1d_into_2d(tid, cols, viewIn, viewOut): - for i in range(cols): - viewOut[tid][i] = viewIn[i] - - -@pk.workunit -def stretch_fill_impl_2d(tid, inner_its, col_wise, viewIn, viewOut): - for i in range(inner_its): - if col_wise: - viewOut[i][tid] = viewIn[i][0] - else: - viewOut[tid][i] = viewIn[0][i] - - -def broadcast_view(val, viewB): - """ - Broadcasts val onto viewB, returns the "stretched" version of viewA - - Parameters - ---------- - val : pykokkos view or Scalar - View or scalar to be broadcasted (is shorter and compatible in dimensions). - viewB : pykokkos view - View to be broadcasted onto (is longer and compatible in dimensions). - - Returns - ------- - out : pykokkos view - Broadcasted version of viewA. - - """ - if len(viewB.shape) > 2: - raise NotImplementedError("Broadcasting is only supported upto 2D views") - - is_view = False - if isinstance(val, ViewType): - for dim in val.shape: - if dim != 1: - is_view = True - - if not is_view: - val = val[0] if len(val.shape) == 1 else val[0][0] - - if is_view: - is_first_small = len(val.shape) < len(viewB.shape) or ( - (len(val.shape) == len(viewB.shape)) and val.shape < viewB.shape - ) - if not check_broadcastable_impl(val, viewB) or not is_first_small: - raise ValueError("Incompatible broadcast") - if not val.dtype == viewB.dtype: - raise ValueError("Broadcastable views must have same dtypes") - - out = pk.View(viewB.shape, viewB.dtype) - - if is_view: - # if both 2D - if ( - len(val.shape) == 2 - ): # viewB must be 2 because of the val.shape < viewB.shape check - # figure which orientation is val (row or col) - col_wise = 1 if val.shape[1] == 1 else 0 - inner_its = viewB.shape[0] if col_wise else viewB.shape[1] - outer_its = viewB.shape[1] if col_wise else viewB.shape[0] - pk.parallel_for( - outer_its, - stretch_fill_impl_2d, - inner_its=inner_its, - col_wise=col_wise, - viewIn=val, - viewOut=out, - ) - else: # 1d to 2D - pk.parallel_for( - out.shape[0], - stretch_fill_impl_1d_into_2d, - cols=viewB.shape[1], - viewIn=val, - viewOut=out, - ) - - return out - - # scalar - - if len(viewB.shape) == 1: - out_1d = pk.View(viewB.shape) - pk.parallel_for( - viewB.shape[0], stretch_fill_impl_scalar_into_1d, scalar=val, viewOut=out_1d - ) - return out_1d - - # else 2d - pk.parallel_for( - out.shape[0], - stretch_fill_impl_scalar_into_2d, - cols=out.shape[1], - scalar=val, - viewOut=out, - ) - return out - - -@pk.workunit -def subtract_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] - viewB[tid] - - -@pk.workunit -def subtract_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] - viewB[tid] - - -@pk.workunit -def subtract_impl_2d(tid, cols, viewA, viewB, viewOut): - for i in range(cols): - viewOut[tid][i] = viewA[tid][i] - viewB[tid][i] - - -@pk.workunit -def subtract_impl_scalar_1d(tid, viewA, scalar, viewOut): - viewOut[tid] = viewA[tid] - scalar - - -@pk.workunit -def subtract_impl_scalar_2d(tid, cols, viewA, scalar, viewOut): - for i in range(cols): - viewOut[tid][i] = viewA[tid][i] - scalar - - -def subtract(viewA, valB, profiler_name: Optional[str] = None): - """ - Subtracts positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - valB : pykokkos view or scalar - Input view or scalar value. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - is_scalar = True - if isinstance(valB, ViewType): - # if this is a single valued view1D or view2D just count that as a scalar - for dim in valB.shape: - if dim != 1: - is_scalar = False - - if is_scalar: - valB = valB[0] if len(valB.shape) == 1 else valB[0][0] - - if len(viewA.shape) > 2 or (not is_scalar and len(valB.shape) > 2): - raise NotImplementedError( - "only 1D and 2D views currently supported for subtract() ufunc." - ) - - if not is_scalar: - - if viewA.shape != valB.shape and not check_broadcastable_impl( - viewA, valB - ): # if shape is not same check compatibility - raise ValueError("Views must be broadcastable") - - # check if size is same otherwise broadcast and fix - if len(viewA.shape) < len(valB.shape) or ( - len(viewA.shape) == len(valB.shape) and viewA.shape < valB.shape - ): - viewA = broadcast_view(viewA, valB) - elif len(valB.shape) < len(viewA.shape) or ( - len(viewA.shape) == len(valB.shape) and valB.shape < viewA.shape - ): - valB = broadcast_view(valB, viewA) - - if viewA.dtype.__name__ == "float64" and valB.dtype.__name__ == "float64": - - if len(viewA.shape) == 1: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_1d_double, - viewA=viewA, - viewB=valB, - out=out, - ) - - if len(viewA.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_2d, - cols=viewA.shape[1], - viewA=viewA, - viewB=valB, - viewOut=out, - ) - - elif viewA.dtype.__name__ == "float32" and valB.dtype.__name__ == "float32": - - if len(viewA.shape) == 1: - out = pk.View(viewA.shape, pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_1d_float, - viewA=viewA, - viewB=valB, - out=out, - ) - - if len(viewA.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_2d, - cols=viewA.shape[1], - viewA=viewA, - viewB=valB, - viewOut=out, - ) - else: - raise RuntimeError("Incompatible Types") - - return out - - # is scalar subtract ----------------------- - if len(viewA.shape) == 1: # 1D - out = None - if viewA.dtype.__name__ == "float64": - out = pk.View(viewA.shape, pk.double) - if viewA.dtype.__name__ == "float32": - out = pk.View(viewA.shape, pk.float) - - if out is None: - raise RuntimeError("Incompatible Types") - - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_scalar_1d, - viewA=viewA, - scalar=valB, - viewOut=out, - ) - - if len(viewA.shape) == 2: # 2D - out = None - if viewA.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - if viewA.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - - if out is None: - raise RuntimeError("Incompatible Types") - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_scalar_2d, - cols=viewA.shape[1], - viewA=viewA, - scalar=valB, - viewOut=out, - ) - - return out - - -@pk.workunit -def copyto_impl_2d(tid, viewA, viewB): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - - viewA[r_idx][c_idx] = viewB[r_idx][c_idx] - - -@pk.workunit -def copyto_impl_1d(tid, viewA, viewB): - viewA[tid] = viewB[tid] - - -def copyto(viewA, viewB, profiler_name: Optional[str] = None): - """ - copies values of viewB into valueA for corresponding indicies - - Parameters - ---------- - viewA : pykokkos view - Input view. - valB : pykokkos view or scalar - Input view - - Returns - ------- - Void - """ - - if not isinstance(viewA, ViewType): - raise ValueError("copyto: Cannot copy to a non-view type") - if not isinstance(viewB, ViewType): - raise ValueError("copyto: Cannot copy from a non-view type") - if viewA.shape != viewB.shape: - if not check_broadcastable_impl( - viewA, viewB - ): # if shape is not same check compatibility - raise ValueError( - "copyto: Views must be broadcastable or of the same size. {} against {}".format( - viewA.shape, viewB.shape - ) - ) - # check if size is same otherwise broadcast and fix - viewA = broadcast_view(viewB, viewA) - - # implementation constraint, for now - if viewA.rank() > 2: - raise NotImplementedError( - "copyto: This version of Pykokkos only supports copyto upto 2D views" - ) - - if viewA.rank() == 1: - pk.parallel_for( - profiler_name, viewA.shape[0], copyto_impl_1d, viewA=viewA, viewB=viewB - ) - - else: - outRows = viewA.shape[0] - outCols = viewA.shape[1] - totalThreads = outRows * outCols - pk.parallel_for( - profiler_name, totalThreads, copyto_impl_2d, viewA=viewA, viewB=viewB - ) - - -@pk.workunit -def np_matmul_impl_2d_2d(tid, cols, vec_length, viewA, viewB, viewOut): - r_idx: int = tid / cols - c_idx: int = tid - r_idx * cols - - for i in range(vec_length): - viewOut[r_idx][c_idx] += viewA[r_idx][i] * viewB[i][c_idx] - - -@pk.workunit -def np_matmul_impl_1d_2d(tid, vec_length, view1D, viewB, viewOut): - for i in range(vec_length): - viewOut[tid] += view1D[i] * viewB[i][tid] - - -@pk.workunit -def np_matmul_impl_2d_1d(tid, vec_length, viewA, view1D, viewOut): - for i in range(vec_length): - viewOut[tid] += viewA[tid][i] * view1D[i] - - -def np_matmul(viewA, viewB, profiler_name: Optional[str] = None): - """ - Upto 2D Matrix Multiplication of compatible views according to numpy specification - - The behavior depends on the arguments in the following way: - [*] If both arguments are 2-D they are multiplied like conventional matrices. - - [X] Not implemented yet - If either argument is N-D, N > 2, it is treated as a - stack of matrices residing in the last two indexes and broadcast accordingly. - - [*] If the first argument is 1-D, it is promoted to a matrix by prepending a 1 - to its dimensions. After matrix multiplication the prepended 1 is removed. - - [*] If the second argument is 1-D, it is promoted to a matrix by appending a 1 - to its dimensions. After matrix multiplication the appended 1 is removed. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Pykokkos view - Matmul result in a view or 0.0 in case views are empty - - - """ - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError("Matmul only supports upto 2D views") - - viewAType = viewA.dtype.__name__ - viewBType = viewB.dtype.__name__ - - if viewAType != viewBType: - raise RuntimeError( - "Cannot multiply {} with {} dtype. Types must be same.".format( - viewAType, viewBType - ) - ) - - if not viewA.shape and not viewB.shape: - return 0.0 - - viewALast = viewA.shape[1] if len(viewA.shape) == 2 else viewA.shape[0] - viewBFirst = viewB.shape[0] if len(viewB.shape) == 2 else viewB.shape[0] - - if viewALast != viewBFirst: - print(viewALast, viewBFirst) - raise RuntimeError( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - outRows = viewA.shape[0] if len(viewA.shape) == 2 else 1 - outCols = viewB.shape[1] if len(viewB.shape) == 2 else 1 - totalThreads = outRows * outCols - - out = None - if len(viewA.shape) == 1 or len(viewB.shape) == 1: - dim = max(outCols, outRows) - out = pk.View([dim], pk.float if viewBType == "float32" else pk.double) - else: - out = pk.View( - [outRows, outCols], pk.float if viewBType == "float32" else pk.double - ) - - # CASE 1 BOTH 2D - if len(viewA.shape) == len(viewB.shape) and len(viewA.shape) == 2: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_2d_2d, - cols=outCols, - vec_length=viewALast, - viewA=viewA, - viewB=viewB, - viewOut=out, - ) - - elif len(viewA.shape) == 1 and len(viewB.shape) == 1: - return dot(viewA, viewB) - - # CASE 2 Either is 1D - elif len(viewA.shape) == 1: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_1d_2d, - vec_length=viewA.shape[0], - view1D=viewA, - viewB=viewB, - viewOut=out, - ) - - elif len(viewB.shape) == 1: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_2d_1d, - vec_length=viewB.shape[0], - viewA=viewA, - view1D=viewB, - viewOut=out, - ) - - else: - raise RuntimeError( - "Unhandled case of matrix multiplication shapes: {} with {}".format( - viewA.shape, viewB.shape - ) - ) - - return out - - -def matmul(viewA, viewB, profiler_name: Optional[str] = None): - """ - 1D Matrix Multiplication of compatible views - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Float/Double - 1D Matmul result - - """ - if len(viewA.shape) != 1 or viewA.shape[0] != viewB.shape[0]: - raise RuntimeError( - "Input operand 1 has a mismatch in its core dimension (Size {} is different from {})".format( - viewA.shape[0], viewB.shape[0] - ) - ) - - a_dtype_str = viewA.dtype.__name__ - b_dtype_str = viewB.dtype.__name__ - if not (a_dtype_str == "float64" and b_dtype_str == "float64"): - if not (a_dtype_str == "float32" and b_dtype_str == "float32"): - raise RuntimeError("Incompatible Types") - - return _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=viewA.shape[0], - dtype=viewA.dtype.value, - ndims=1, - op="matmul", - sub_dispatcher=pk.parallel_reduce, - viewA=viewA, - viewB=viewB, - ) - - -@pk.workunit -def dot_impl_1d_double( - tid: int, - acc: pk.Acc[pk.double], - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], -): - acc += viewA[tid] * viewB[tid] - - -@pk.workunit -def dot_impl_1d_float( - tid: int, - acc: pk.Acc[pk.float], - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], -): - acc += viewA[tid] * viewB[tid] - - -def dot(viewA, viewB): - """ - 1D Matrix Multiplication of compatible views - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Float/Double - - """ - - if len(viewA.shape) == 0 and len(viewB.shape) == 0: - return 0 - - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("only 1D views supported for dot() ufunc.") - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.parallel_reduce( - viewA.shape[0], dot_impl_1d_double, viewA=viewA, viewB=viewB - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.parallel_reduce( - viewA.shape[0], dot_impl_1d_float, viewA=viewA, viewB=viewB - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def divide_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] / viewB[tid % viewB.extent(0)] - - -@pk.workunit -def divide_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] / viewB[tid % viewB.extent(0)] - - -@pk.workunit -def divide_impl_2d_1d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(viewA.extent(1)): - out[tid][i] = viewA[tid][i] / viewB[i % viewB.extent(0)] - - -def divide(viewA, viewB, profiler_name: Optional[str] = None): - """ - Divides positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - # viewA must always be a view of type float 64 or 32 - if not isinstance(viewA, pk.ViewType) and viewA.dtype.__name__ not in [ - "float32", - "float64", - ]: - raise RuntimeError( - "Incompatible first argument of type: {}, must be a float32 or float 64 Pykokkos view".format( - viewA.dtype - ) - ) - - # then, if viewB is a scalar conform it to viewA's type - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View( - [1], pk.double if viewA.dtype.__name__ == "float64" else pk.float32 - ) - view_temp[0] = viewB - viewB = view_temp - - if viewA.rank() == 2: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_2d_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - raise RuntimeError("Incompatible Types {}, {}".format(viewA.dtype, viewB.dtype)) - return out - - -@pk.workunit -def negative_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = view[tid] * -1 - - -@pk.workunit -def negative_impl_1d_float( - tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float] -): - out[tid] = view[tid] * -1 - - -def negative(view, profiler_name: Optional[str] = None): - """ - Element-wise negative of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for negative() ufunc." - ) - if view.dtype.__name__ == "float64": - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, view.shape[0], negative_impl_1d_double, view=view, out=out - ) - elif view.dtype.__name__ == "float32": - out = pk.View([view.shape[0]], pk.float) - pk.parallel_for( - profiler_name, view.shape[0], negative_impl_1d_float, view=view, out=out - ) - else: - raise NotImplementedError - return out - - -def positive(view): - """ - Element-wise positive of the view; - Essentially returns a copy of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if view.shape == (): - out = pk.View((), dtype=view.dtype) - else: - out = pk.View([*view.shape], dtype=view.dtype) - out[...] = view - return out - - -@pk.workunit -def power_impl_scalar_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = pow(viewA[0], viewB[tid]) - - -@pk.workunit -def power_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = pow(viewA[tid], viewB[tid]) - - -@pk.workunit -def power_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = pow(viewA[tid], viewB[tid]) - - -@pk.workunit -def power_impl_2d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(viewA.extent(1)): - out[tid][i] = pow(viewA[tid][i], viewB[i % viewB.extent(0)]) - - -def power(viewA, viewB): - """ - Returns a view with each val in viewA raised - to the positionally corresponding power in viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewB - viewB = view_temp - - if isinstance(viewA, int): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewA - viewA = view_temp - - out = pk.View([viewB.shape[0]], pk.double) - pk.parallel_for( - viewB.shape[0], power_impl_scalar_double, viewA=viewA, viewB=viewB, out=out - ) - elif viewA.rank() == 2: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - viewA.shape[0], power_impl_2d_double, viewA=viewA, viewB=viewB, out=out - ) - elif viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], power_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], power_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def fmod_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmod(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmod_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmod(viewA[tid], viewB[tid]) - - -def fmod(viewA, viewB): - """ - Element-wise remainder of division when element of viewA is - divided by positionally corresponding element of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmod() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmod_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmod_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def square_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint16] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint16] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint32] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint32] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.int8]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.int8]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.int16]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.int16]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.int32]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.int32]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.int64]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.int64]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -def square(view): - """ - Squares argument element-wise - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for square() ufunc." - ) - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_float, view=view, out=out) - elif "uint8" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint8, view=view, out=out) - elif "uint16" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint16, view=view, out=out) - elif "uint32" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint32, view=view, out=out) - elif "uint64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint64, view=view, out=out) - elif "int8" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int8, view=view, out=out) - elif "int16" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int16, view=view, out=out) - elif "int32" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int32, view=view, out=out) - elif "int64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int64, view=view, out=out) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def greater_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] > viewB[tid] - - -@pk.workunit -def greater_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] > viewB[tid] - - -def greater(viewA, viewB): - """ - Return the truth value of viewA > viewB element-wise. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("greater() ufunc only supports 1D views") - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], greater_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], greater_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logaddexp_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = log(exp(viewA[tid]) + exp(viewB[tid])) - - -@pk.workunit -def logaddexp_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = log(exp(viewA[tid]) + exp(viewB[tid])) - - -def logaddexp(viewA, viewB): - """ - Return a view with log(exp(a) + exp(b)) calculate for - positionally corresponding elements in viewA and viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logaddexp() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], logaddexp_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], logaddexp_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def true_divide(viewA, viewB): - """ - true_divide is an alias of divide - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - return divide(viewA, viewB) - - -@pk.workunit -def logaddexp2_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = log2(pow(2, viewA[tid]) + pow(2, viewB[tid])) - - -@pk.workunit -def logaddexp2_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = log2(pow(2, viewA[tid]) + pow(2, viewB[tid])) - - -def logaddexp2(viewA, viewB): - """ - Return a view with log(pow(2, a) + pow(2, b)) calculated for - positionally corresponding elements in viewA and viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logaddexp2() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], logaddexp2_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], logaddexp2_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def floor_divide_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] // viewB[tid] - - -@pk.workunit -def floor_divide_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] // viewB[tid] - - -def floor_divide(viewA, viewB): - """ - Divides positionally corresponding elements - of viewA with elements of viewB and floors the result - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for floor_divide() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], - floor_divide_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], - floor_divide_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def sin(view, profiler_name: Optional[str] = None): - """ - Element-wise trigonometric sine of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("sin() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="sin", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def cos_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = cos(view[tid]) - - -@pk.workunit -def cos_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): - out[tid][i] = cos(view[tid][i]) - - -@pk.workunit -def cos_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = cos(view[tid]) - - -@pk.workunit -def cos_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = cos(view[tid][i]) - - -def cos(view): - """ - Element-wise trigonometric cosine of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for cos() ufunc." - ) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - out = pk.View([*view.shape], dtype=pk.float64) - if len(view.shape) == 1: - pk.parallel_for(view.shape[0], cos_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], cos_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - out = pk.View([*view.shape], dtype=pk.float32) - if len(view.shape) == 1: - pk.parallel_for(view.shape[0], cos_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], cos_impl_2d_float, view=view, out=out) - else: - raise NotImplementedError - return out - - -def tan(view, profiler_name: Optional[str] = None): - """ - Element-wise tangent of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("tan() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="tan", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def logical_and_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] and viewB[tid] - - -@pk.workunit -def logical_and_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] and viewB[tid] - - -def logical_and(viewA, viewB): - """ - Return the element-wise truth value of viewA AND viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_and() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], - logical_and_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_and_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_or_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] or viewB[tid] - - -@pk.workunit -def logical_or_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] or viewB[tid] - - -def logical_or(viewA, viewB): - """ - Return the element-wise truth value of viewA OR viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_or() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], logical_or_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_or_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_xor_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = bool(viewA[tid]) ^ bool(viewB[tid]) - - -@pk.workunit -def logical_xor_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = bool(viewA[tid]) ^ bool(viewB[tid]) - - -def logical_xor(viewA, viewB): - """ - Return the element-wise truth value of viewA XOR viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_xor() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], - logical_xor_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_xor_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_not_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] -): - out[tid] = not view[tid] - - -@pk.workunit -def logical_not_impl_1d_float( - tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8] -): - out[tid] = not view[tid] - - -def logical_not(view): - """ - Element-wise logical_not of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_not() ufunc." - ) - out = pk.View([view.shape[0]], pk.uint8) - if view.dtype.__name__ == "float64": - pk.parallel_for(view.shape[0], logical_not_impl_1d_double, view=view, out=out) - elif view.dtype.__name__ == "float32": - pk.parallel_for(view.shape[0], logical_not_impl_1d_float, view=view, out=out) - return out - - -@pk.workunit -def fmax_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmax(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmax_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmax(viewA[tid], viewB[tid]) - - -def fmax(viewA, viewB): - """ - Return the element-wise fmax. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmax() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmax_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmax_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def fmin_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmin(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmin_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmin(viewA[tid], viewB[tid]) - - -def fmin(viewA, viewB): - """ - Return the element-wise fmin. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmax() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmin_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmin_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def exp(view, profiler_name: Optional[str] = None): - """ - Element-wise exp of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("exp() ufunc only supports up to 2D views") - if view.size == 0: - return view - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="exp", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def exp2_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = pow(2, view[tid]) - - -@pk.workunit -def exp2_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = pow(2, view[tid]) - - -def exp2(view): - """ - Element-wise 2**x of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError("only 1D views currently supported for exp2() ufunc.") - if view.dtype.__name__ == "float64": - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for(view.shape[0], exp2_impl_1d_double, view=view, out=out) - elif view.dtype.__name__ == "float32": - out = pk.View([view.shape[0]], pk.float) - pk.parallel_for(view.shape[0], exp2_impl_1d_float, view=view, out=out) - else: - raise NotImplementedError - return out - - -# TODO: Implement parallel max reduction with index -def argmax(view, axis=None): - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - res = np.argmax(view, axis=axis) - view = pk.View(res.shape, pk.int32) - view[:] = res - - return view - - -# TODO: Implement parallel sorting + filtering -def unique(view): - res = np.unique(view) - view = pk.View(res.shape, pk.double) - view[:] = res - - return view - - -@pk.workunit -def var_impl_2d_axis0_double( - tid: int, - view: pk.View2D[pk.double], - view_mean: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = 0 - for i in range(view.extent(0)): - out[tid] += (pow(view[i][tid] - view_mean[tid], 2)) / view.extent(0) - - -@pk.workunit -def var_imple_2d_axis1_double( - tid: int, - view: pk.View2D[pk.double], - view_mean: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = 0 - for i in range(view.extent(1)): - out[tid] += (pow(view[tid][i] - view_mean[tid], 2)) / view.extent(1) - - -@pk.workunit -def var_impl_1d(tid, acc, view, mean): - acc += pow(view[tid] - mean, 2) / view.extent(0) - - -def var(view, axis=None, profiler_name: Optional[str] = None): # population - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - if view.rank() > 2: - raise NotImplementedError( - "Current version of Pykokkos only supports variance for upto 2D views" - ) - - if view.rank() == 2: # legacy code - if view.dtype.__name__ == "float64": - if axis == 0: - view_mean = mean(view, 0, profiler_name) - out = pk.View([view.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[1], - var_impl_2d_axis0_double, - view=view, - view_mean=view_mean, - out=out, - ) - return out - else: - view_mean = mean(view, 1, profiler_name) - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[0], - var_imple_2d_axis1_double, - view=view, - view_mean=view_mean, - out=out, - ) - return out - else: - raise RuntimeError("Incompatible Types") - elif view.rank() == 1: # newer impl - mean_val = mean(view, profiler_name) - return pk.parallel_reduce( - profiler_name, view.shape[0], var_impl_1d, view=view, mean=mean_val - ) - else: - raise RuntimeError("Unexpected view of shape {}".format(view.shape)) - - -@pk.workunit -def mean_impl_1d_axis0_double( - tid: int, view: pk.View2D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = 0 - for i in range(view.extent(0)): - out[tid] += view[i][tid] / view.extent(0) - - -@pk.workunit -def mean_impl_1d_axis1_double( - tid: int, view: pk.View2D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = 0 - for i in range(view.extent(1)): - out[tid] += view[tid][i] / view.extent(1) - - -@pk.workunit -def mean_impl_1d(tid, acc, view): - acc += view[tid] / view.extent(0) - - -def mean(view, axis=None, profiler_name: Optional[str] = None): - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - if view.rank() > 2: - raise NotImplementedError( - "Current version of Pykokkos only supports variance for upto 2D views" - ) - - if view.rank() == 2: - if view.dtype.__name__ == "float64": # legacy - if axis == 0: - out = pk.View([view.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[1], - mean_impl_1d_axis0_double, - view=view, - out=out, - ) - return out - else: - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[0], - mean_impl_1d_axis1_double, - view=view, - out=out, - ) - - return out - else: - raise RuntimeError("Incompatible Types") - - elif view.rank() == 1: - return pk.parallel_reduce(profiler_name, view.shape[0], mean_impl_1d, view=view) - else: - raise RuntimeError("Unexpected view of shape {}".format(view.shape)) - - -@pk.workunit -def in1d_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.int8], -): - out[tid] = 0 - for i in range(viewB.extent(0)): - if viewB[i] == viewA[tid]: - out[tid] = 1 - break - - -def in1d(viewA, viewB): - if viewA.dtype.__name__ == "float64": - out = pk.View(viewA.shape, pk.int8) - pk.parallel_for( - viewA.shape[0], in1d_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - - return out - - -@pk.workunit -def transpose_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[i][tid] = view[tid][i] - - -def transpose(view): - if view.rank() == 1: - return view - - if view.rank() == 2: - if view.dtype.__name__ == "float64": - out = pk.View(view.shape[::-1], pk.double) - pk.parallel_for(view.shape[0], transpose_impl_2d_double, view=view, out=out) - return out - - raise RuntimeError("Transpose supports 2D views only") - - -@pk.workunit -def hstack_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - if tid >= viewA.extent(0): - out[tid] = viewB[tid - viewA.extent(0)] - else: - out[tid] = viewA[tid] - - -@pk.workunit -def hstack_impl_2d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View2D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(out.extent(1)): - if i >= viewA.extent(1): - out[tid][i] = viewB[tid][i - viewA.extent(1)] - else: - out[tid][i] = viewA[tid][i] - - -def hstack(viewA, viewB): - if viewA.shape != viewB.shape: - raise RuntimeError( - "All the input view dimensions for the concatenation axis must match exactly" - ) - - if viewA.rank() == 2 and viewB.rank() == 2: - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0], viewA.shape[1] * 2], pk.double) - pk.parallel_for( - out.shape[0], hstack_impl_2d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("hstack supports 2D views of type double only") - elif viewA.rank() == 1 and viewB.rank() == 1: - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0] + viewB.shape[0]], pk.double) - pk.parallel_for( - out.shape[0], hstack_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("hstack supports 1D views of type double only") - else: - raise RuntimeError("hstack supports views of same shape (1D and 2D) only") - - return out - - -@pk.workunit -def index_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.int32], - out: pk.View1D[pk.double], +def _ufunc_kernel_dispatcher( + profiler_name: Optional[str], tid, dtype, ndims, op, sub_dispatcher, **kwargs ): - out[tid] = viewA[viewB[tid]] - - -def index(viewA, viewB): - if viewB.dtype == pk.int32: - out = pk.View(viewB.shape, pk.double) - pk.parallel_for( - viewB.shape[0], index_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out + dtype_extractor = re.compile(r".*(?:dtype|data_types|DataType)\.(\w+)") + if ndims == 0: + ndims = 1 + res = dtype_extractor.match(str(dtype)) + dtype_str = res.group(1) + if dtype_str == "float32": + dtype_str = "float" + elif dtype_str == "float64": + dtype_str = "double" + function_name_str = f"{op}_impl_{ndims}d_{dtype_str}" + desired_workunit = kernel_dict[function_name_str] + # call the kernel + ret = sub_dispatcher(profiler_name, tid, desired_workunit, **kwargs) + return ret -def isnan(view, profiler_name: Optional[str] = None): - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("isnan() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - if view.ndim == 0: - new_view = pk.View([1], dtype=view.dtype) - new_view[0] = view - view = new_view - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isnan", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out +def _broadcast_views(view1, view2): + # support broadcasting by using the same + # shape matching rules as NumPy + # TODO: determine if this can be done with + # more memory efficiency? + if view1.shape != view2.shape: + new_shape = np.broadcast_shapes(view1.shape, view2.shape) + view1_new = pk.View([*new_shape], dtype=view1.dtype) + view1_new[:] = view1 + view1 = view1_new + view2_new = pk.View([*new_shape], dtype=view2.dtype) + view2_new[:] = view2 + view2 = view2_new + return view1, view2 -def isinf(view, profiler_name: Optional[str] = None): - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("isinf() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isinf", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out +def _typematch_views(view1, view2): + # very crude casting implementation + # for binary ufuncs + dtype1 = view1.dtype + dtype2 = view2.dtype + dtype_extractor = re.compile(r".*(?:data_types|DataType)\.(\w+)") + res1 = dtype_extractor.match(str(dtype1)) + res2 = dtype_extractor.match(str(dtype2)) + effective_dtype = dtype1 + if res1 is not None and res2 is not None: + res1_dtype_str = res1.group(1) + res2_dtype_str = res2.group(1) + if res1_dtype_str == "double": + res1_dtype_str = "float64" + elif res1_dtype_str == "float": + res1_dtype_str = "float32" + if res2_dtype_str == "double": + res2_dtype_str = "float64" + elif res2_dtype_str == "float": + res2_dtype_str = "float32" + if res1_dtype_str == "bool" or res2_dtype_str == "bool": + res1_dtype_str = "uint8" + dtype1 = pk.uint8 + res2_dtype_str = "uint8" + dtype2 = pk.uint8 + if ("int" in res1_dtype_str and "int" in res2_dtype_str) or ( + "float" in res1_dtype_str and "float" in res2_dtype_str + ): + dtype_1_width = int(res1_dtype_str.split("t")[1]) + dtype_2_width = int(res2_dtype_str.split("t")[1]) + if dtype_1_width >= dtype_2_width: + effective_dtype = dtype1 + view2_new = pk.View([*view2.shape], dtype=effective_dtype) + view2_new[:] = view2.data + view2 = view2_new + else: + effective_dtype = dtype2 + view1_new = pk.View([*view1.shape], dtype=effective_dtype) + view1_new[:] = view1.data + view1 = view1_new + return view1, view2, effective_dtype -def equal(view1, view2, profiler_name: Optional[str] = None): +def _equal(view1, view2, profiler_name: Optional[str] = None): """ Computes the truth value of ``view1_i`` == ``view2_i`` for each element ``x1_i`` of the input view ``view1`` with the respective element ``x2_i`` @@ -3543,186 +147,26 @@ def equal(view1, view2, profiler_name: Optional[str] = None): return out -def isfinite(view, profiler_name: Optional[str] = None): +def _isnan(view, profiler_name: Optional[str] = None): dtype = view.dtype ndims = len(view.shape) if ndims > 2: - raise NotImplementedError("isfinite() ufunc only supports up to 2D views") - if view.size == 0: - out = pk.View(view.shape, dtype=pk.bool) - return out + raise NotImplementedError("isnan() ufunc only supports up to 2D views") out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - new_view = pk.View([1], dtype=dtype) - new_view[:] = view - view = new_view - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isfinite", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def round(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the nearest integer-valued number. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for round() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="round", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def trunc(view, profiler_name: Optional[str] = None): - """ - Rounds each element ``i`` of the input view to the integer-valued number - that is closest to but no greater than ``i``. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for trunc() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="trunc", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def ceil(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the smallest (i.e., closest to -infinity) - integer-valued number that is not less than a given element. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for ceil() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - if view.shape == (): tid = 1 else: tid = view.shape[0] + if view.ndim == 0: + new_view = pk.View([1], dtype=view.dtype) + new_view[0] = view + view = new_view _ufunc_kernel_dispatcher( profiler_name=profiler_name, tid=tid, dtype=dtype, ndims=ndims, - op="ceil", + op="isnan", sub_dispatcher=pk.parallel_for, out=out, view=view, @@ -3730,42 +174,12 @@ def ceil(view, profiler_name: Optional[str] = None): return out -def floor(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the greatest (i.e., closest to +infinity) - integer-valued number that is not greater than a given element. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ +def _isinf(view, profiler_name: Optional[str] = None): dtype = view.dtype ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for floor() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - + if ndims > 2: + raise NotImplementedError("isinf() ufunc only supports up to 2D views") + out = pk.View([*view.shape], dtype=pk.bool) if view.shape == (): tid = 1 else: @@ -3775,7 +189,7 @@ def floor(view, profiler_name: Optional[str] = None): tid=tid, dtype=dtype, ndims=ndims, - op="floor", + op="isinf", sub_dispatcher=pk.parallel_for, out=out, view=view, @@ -3783,27 +197,19 @@ def floor(view, profiler_name: Optional[str] = None): return out -def tanh(view, profiler_name: Optional[str] = None): - """ - Calculates an approximation to the hyperbolic tangent for each element x_i of the input view. - - Parameters - ---------- - view : pykokkos view - Input view whose elements each represent a hyperbolic angle. Should have a floating-point data type. - - Returns - ------- - y : pykokkos view - A view containing the hyperbolic tangent of each element in the input view. The returned view must - have a floating-point data type determined by type promotion rules. - """ +def _isfinite(view, profiler_name: Optional[str] = None): dtype = view.dtype ndims = len(view.shape) if ndims > 2: - raise NotImplementedError("tanh() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) + raise NotImplementedError("isfinite() ufunc only supports up to 2D views") + if view.size == 0: + out = pk.View(view.shape, dtype=pk.bool) + return out + out = pk.View([*view.shape], dtype=pk.bool) if view.shape == (): + new_view = pk.View([1], dtype=dtype) + new_view[:] = view + view = new_view tid = 1 else: tid = view.shape[0] @@ -3812,7 +218,7 @@ def tanh(view, profiler_name: Optional[str] = None): tid=tid, dtype=dtype, ndims=ndims, - op="tanh", + op="isfinite", sub_dispatcher=pk.parallel_for, out=out, view=view, diff --git a/tests/test_ufuncs.py b/tests/test_ufuncs.py index fd97c5e3..e69de29b 100644 --- a/tests/test_ufuncs.py +++ b/tests/test_ufuncs.py @@ -1,750 +0,0 @@ -import pykokkos as pk - -import numpy as np -from numpy.random import default_rng -from numpy.testing import assert_allclose -import pytest - - -@pk.workunit -def sqrt_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = sqrt(view[tid]) - - -@pk.workunit -def exp_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = exp(view[tid]) - - -@pk.workunit -def exp2_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = exp2(view[tid]) - - -@pk.workunit -def positive_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = positive(view[tid]) - - -@pk.workunit -def negative_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = negative(view[tid]) - - -@pk.workunit -def absolute_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = abs(view[tid]) - - -@pk.workunit -def fabsolute_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = fabs(view[tid]) - - -@pk.workunit -def rint_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = rint(view[tid]) - - -@pk.workunit -def conjugate_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = conj(view[tid]) - - -@pk.workunit -def sign_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = sign(view[tid]) - - -@pk.workunit -def log_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log(view[tid]) - - -@pk.workunit -def log2_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log2(view[tid]) - - -@pk.workunit -def log10_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log10(view[tid]) - - -@pk.workunit -def expm1_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = expm1(view[tid]) - - -@pk.workunit -def log1p_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log1p(view[tid]) - - -@pk.workunit -def square_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = square(view[tid]) - - -@pk.workunit -def cbrt_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = cbrt(view[tid]) - - -@pk.workunit -def reciprocal_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = reciprocal(view[tid]) - - -@pytest.mark.parametrize( - "kokkos_workunit, numpy_ufunc", - [ - (sqrt_workunit, np.sqrt), - (exp_workunit, np.exp), - pytest.param( - exp2_workunit, np.exp2, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - positive_workunit, np.positive, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - negative_workunit, np.negative, marks=pytest.mark.xfail(reason="see gh-27") - ), - (absolute_workunit, np.absolute), - (fabsolute_workunit, np.fabs), - pytest.param( - rint_workunit, np.rint, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - conjugate_workunit, - np.conjugate, - marks=pytest.mark.xfail(reason="see gh-27"), - ), - pytest.param( - sign_workunit, np.sign, marks=pytest.mark.xfail(reason="see gh-27") - ), - (log_workunit, np.log), - (log2_workunit, np.log2), - (log10_workunit, np.log10), - (expm1_workunit, np.expm1), - (log1p_workunit, np.log1p), - pytest.param( - square_workunit, np.square, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - cbrt_workunit, np.cbrt, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - reciprocal_workunit, - np.reciprocal, - marks=pytest.mark.xfail(reason="see gh-27"), - ), - ], -) -def test_1d_unary_ufunc_vs_numpy(kokkos_workunit, numpy_ufunc): - # verify that we can easily recreate the functionality - # of most NumPy "unary" ufuncs on 1D views/arrays without much - # custom code - # NOTE: maybe we directly provide i.e., pk.sqrt(view) - # "pykokkos ufuncs" some day? - view: pk.View1d = pk.View([10], pk.double) - view[:] = np.arange(10, dtype=np.float64) - pk.parallel_for(10, kokkos_workunit, view=view) - actual = view - expected = numpy_ufunc(range(10)) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.reciprocal, np.reciprocal), - (pk.log, np.log), - (pk.log2, np.log2), - (pk.log10, np.log10), - (pk.log1p, np.log1p), - (pk.sqrt, np.sqrt), - (pk.sign, np.sign), - (pk.negative, np.negative), - (pk.positive, np.positive), - (pk.square, np.square), - (pk.sin, np.sin), - (pk.cos, np.cos), - (pk.tan, np.tan), - (pk.logical_not, np.logical_not), - (pk.exp, np.exp), - (pk.exp2, np.exp2), - (pk.mean, np.mean), - (pk.var, np.var), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_1d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype): - # test the ufuncs we have exposed in the pk namespace - # vs. their NumPy equivalents - expected = numpy_ufunc(np.arange(10, dtype=numpy_dtype)) - - view: pk.View1d = pk.View([10], pk_dtype) - view[:] = np.arange(10, dtype=numpy_dtype) - actual = pk_ufunc(view=view) - # log10 single-precision needs relaxed tol - # for now - if numpy_ufunc in {np.log10, np.cos, np.tan} and numpy_dtype == np.float32: - assert_allclose(actual, expected, rtol=1.5e-7) - else: - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.add, np.add), - (pk.subtract, np.subtract), - (pk.multiply, np.multiply), - (pk.divide, np.divide), - (pk.np_matmul, np.matmul), - (pk.power, np.power), - (pk.fmod, np.fmod), - (pk.greater, np.greater), - (pk.logaddexp, np.logaddexp), - (pk.floor_divide, np.floor_divide), - (pk.true_divide, np.true_divide), - (pk.logaddexp2, np.logaddexp2), - (pk.logical_and, np.logical_and), - (pk.logical_or, np.logical_or), - (pk.logical_xor, np.logical_xor), - (pk.fmax, np.fmax), - (pk.fmin, np.fmin), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_multi_array_1d_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype -): - - # test the multi array ufuncs we have exposed - # in the pk namespace vs. their NumPy equivalents - expected = numpy_ufunc( - np.arange(10, dtype=numpy_dtype), np.full(10, 5, dtype=numpy_dtype) - ) - - viewA: pk.View1d = pk.View([10], pk_dtype) - viewA[:] = np.arange(10, dtype=numpy_dtype) - viewB: pk.View1d = pk.View([10], pk_dtype) - viewB[:] = np.full(10, 5, dtype=numpy_dtype) - - actual = pk_ufunc(viewA, viewB) - - assert_allclose(actual, expected) - - -# TODO: There may be more funcs that support scalars -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [(pk.add, np.add), (pk.multiply, np.multiply), (pk.divide, np.divide)], -) -@pytest.mark.parametrize("numpy_dtype", [np.float64, np.float32]) -def test_scalar_operations_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype): - data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - expected = numpy_ufunc(np.array(data, dtype=numpy_dtype), 1) - actual = pk_ufunc(pk.array(np.array(data, dtype=numpy_dtype)), 1) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_matmul_1d_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype -): - expected = numpy_ufunc( - np.arange(10, dtype=numpy_dtype), np.full((10, 1), 2, dtype=numpy_dtype) - ) - - viewA = pk.View([10], pk_dtype) - viewB = pk.View([10, 1], pk_dtype) - viewA[:] = np.arange(10, dtype=numpy_dtype) - viewB[:] = np.full((10, 1), 2, dtype=numpy_dtype) - - with pytest.raises(RuntimeError) as e_info: - viewC = pk.View([11], pk_dtype) - viewC[:] = np.arange(11, dtype=numpy_dtype) - pk_ufunc(viewC, viewB) - - assert ( - e_info.value.args[0] - == "Input operand 1 has a mismatch in its core dimension (Size 11 is different from 10)" - ) - - actual = pk_ufunc(viewA, viewB) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "arr", - [ - np.array([4, -1, np.inf]), - np.array([-np.inf, np.nan, np.inf]), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_1d_sqrt_negative_values(arr, pk_dtype, numpy_dtype): - # verify sqrt behavior for negative reals, - # NaN and infinite values - expected = np.sqrt(arr, dtype=numpy_dtype) - view: pk.View1d = pk.View([arr.size], pk_dtype) - view[:] = arr - actual = pk.sqrt(view=view) - assert_allclose(actual, expected) - - -def test_caching(): - # regression test for gh-34 - expected = np.reciprocal(np.arange(10, dtype=np.float32)) - for i in range(300): - view: pk.View1d = pk.View([10], pk.float) - view[:] = np.arange(10, dtype=np.float32) - actual = pk.reciprocal(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.reciprocal, np.reciprocal), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_2d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype): - rng = default_rng(123) - in_arr = rng.random((5, 5)).astype(numpy_dtype) - expected = numpy_ufunc(in_arr) - - view: pk.View2d = pk.View([5, 5], pk_dtype) - view[:] = in_arr - actual = pk_ufunc(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", [[4, 4, 4, 4], [4, 3, 3, 4], [1, 1, 1, 1], [2, 5, 5, 1]] -) -def test_np_matmul_2d_2d_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype, test_dim -): - - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - M2 = test_dim[3] - rng = default_rng(123) - np1 = rng.random((N1, M1)).astype(numpy_dtype) - np2 = rng.random((N2, M2)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1: pk.View2d = pk.View([N1, M1], pk_dtype) - view1[:] = np1 - view2: pk.View2d = pk.View([N2, M2], pk_dtype) - view2[:] = np2 - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected, rtol=1.5e-7) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize("test_dim", [[4, 4, 4], [4, 3, 3], [1, 1, 1], [2, 5, 5]]) -def test_np_matmul_2d_1d_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - rng = default_rng(123) - np1 = rng.random((N1, M1)).astype(numpy_dtype) - np2 = rng.random(N2).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize("test_dim", [[4, 4, 4], [3, 3, 6], [1, 1, 1], [5, 5, 1]]) -def test_np_matmul_1d_2d_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - - N1 = test_dim[0] - N2 = test_dim[1] - M2 = test_dim[2] - rng = default_rng(123) - np1 = rng.random(N1).astype(numpy_dtype) - np2 = rng.random((N2, M2)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", [[4, 3, 3], [3, 1, 6], [1, 4, 2], [5, 6, 1], [4, 3, 2, 1], [2, 3, 2, 4]] -) -def test_np_matmul_fails(numpy_dtype, test_dim): - N1 = None - N2 = None - M1 = None - M2 = None - np1 = None - rng = default_rng(123) - - if len(test_dim) == 3: - N1 = test_dim[0] - N2 = test_dim[1] - M2 = test_dim[2] - np1 = rng.random(N1).astype(numpy_dtype) - - if len(test_dim) == 4: - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - M2 = test_dim[3] - np1 = rng.random((N1, M1)).astype(numpy_dtype) - - np2 = rng.random((N2, M2)).astype(numpy_dtype) - - with pytest.raises(RuntimeError) as e_info: - view1 = pk.array(np1) - view2 = pk.array(np2) - pk.np_matmul(view1, view2) # Should fail with 1d x 2d - - err_np_matmul = ( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - view1.shape, view2.shape - ) - ) - assert e_info.value.args[0] == err_np_matmul - - with pytest.raises(RuntimeError) as e_info: - pk.np_matmul(view2, view1) # should fail with 2d x 1 as well - - err_np_matmul = ( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - view2.shape, view1.shape - ) - ) - assert e_info.value.args[0] == err_np_matmul - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [(pk.subtract, np.subtract), (pk.add, np.add), (pk.multiply, np.multiply)], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -def test_multi_array_2d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype): - N = 4 - M = 7 - rng = default_rng(123) - np1 = rng.random((N, M)).astype(numpy_dtype) - np2 = rng.random((N, M)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.subtract, np.subtract), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", - [[4, 3, 1, 1], [4, 3, 1, 3], [4, 3, 4, 1], [4, 3, 1], [4, 3, 3], [4, 3], [4]], -) -def test_broadcast_array_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, numpy_dtype, test_dim -): - - np1 = None - np2 = None - rng = default_rng(123) - scalar = 3.0 - - if len(test_dim) == 4: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2], test_dim[3])).astype(numpy_dtype) - elif len(test_dim) == 3: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2])).astype(numpy_dtype) - elif len(test_dim) == 2: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = scalar # 2d with scalar - elif len(test_dim) == 1: - np1 = rng.random((test_dim[0])).astype(numpy_dtype) - np2 = scalar # 1d with scalar - else: - raise NotImplementedError( - "Invalid test conditions: Broadcasting operations are only supported uptil 2D" - ) - - assert ( - np1 is not None and np2 is not None - ), "Invalid test conditions: Are parameters uptil 2D?" - - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) if isinstance(np2, np.ndarray) else np2 - actual = pk_ufunc(view1, view2) - - assert_allclose(expected, actual) - - -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -@pytest.mark.parametrize( - "in_arr", - [ - np.array([-5, 4.5, np.nan]), - np.array([np.nan, np.nan, np.nan]), - ], -) -def test_sign_1d_special_cases(in_arr, pk_dtype, numpy_dtype): - in_arr = in_arr.astype(numpy_dtype) - view: pk.View1D = pk.View([in_arr.size], pk_dtype) - view[:] = in_arr - expected = np.sign(in_arr) - actual = pk.sign(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.copyto, np.copyto), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -def test_copyto_1d(pk_ufunc, numpy_ufunc, numpy_dtype): - N = 4 - M = 7 - rng = default_rng(123) - np1 = rng.random((N, M)).astype(numpy_dtype) - np2 = rng.random((N, M)).astype(numpy_dtype) - numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - pk_ufunc(view1, view2) - - assert_allclose(np1, view1) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.subtract, np.subtract), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", - [ - [4, 3, 4, 3], - [4, 3, 1, 1], - [4, 3, 1, 3], - [4, 3, 4, 1], - [4, 3, 1], - [4, 3, 3], - [4, 3], - [4], - ], -) -def test_copyto_broadcast_2d(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - np1 = None - np2 = None - rng = default_rng(123) - scalar = 3.0 - - if len(test_dim) == 4: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2], test_dim[3])).astype(numpy_dtype) - elif len(test_dim) == 3: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2])).astype(numpy_dtype) - elif len(test_dim) == 2: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = scalar # 2d with scalar - elif len(test_dim) == 1: - np1 = rng.random((test_dim[0])).astype(numpy_dtype) - np2 = scalar # 1d with scalar - else: - raise NotImplementedError( - "Invalid test conditions: Broadcasting operations are only supported uptil 2D" - ) - - assert ( - np1 is not None and np2 is not None - ), "Invalid test conditions: Are parameters uptil 2D?" - - numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) if isinstance(np2, np.ndarray) else np2 - pk_ufunc(view1, view2) - - assert_allclose(np1, view1) - - -@pytest.mark.parametrize( - "input_dtype", - [ - pk.double, - pk.float, - ], -) -@pytest.mark.parametrize( - "pk_ufunc", - [ - pk.floor, - pk.round, - pk.ceil, - pk.trunc, - ], -) -@pytest.mark.parametrize( - "shape", - [ - [1], - [1, 1], - [1, 1, 1], - ], -) -def test_rounding_dtype_preservation(input_dtype, pk_ufunc, shape): - # at the time of writing the array API standard - # conformance test suite doesn't appear to probe - # floating point data types for many of the rounding - # functions - - # for now, we simply test data type preservation - # of output vs. input so that we flush these codepaths - # a bit - view = pk.View(shape, input_dtype) - actual_dtype = pk_ufunc(view).dtype - assert actual_dtype.value == input_dtype.value