From 5395e73ba55208456140aae8afb5c655cd8868c0 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Thu, 22 Jan 2026 23:25:12 -0600 Subject: [PATCH 01/23] Convert examples from workload style to standalone style --- examples/kokkos/01_hello_world.py | 22 ++--- examples/kokkos/01_hello_world_lambda.py | 18 ++--- examples/kokkos/02_simple_reduce.py | 34 ++++---- examples/kokkos/02_simple_reduce_lambda.py | 30 +++---- examples/kokkos/03_simple_view.py | 51 +++++------- examples/kokkos/03_simple_view_lambda.py | 47 +++++------ examples/kokkos/04_simple_memoryspaces.py | 41 +++++----- .../kokkos/04_simple_memoryspaces_lambda.py | 43 +++++----- examples/kokkos/05_simple_atomics.py | 81 +++++++++---------- examples/kokkos/README.md | 2 + examples/kokkos/add1.py | 45 +++++------ examples/kokkos/add1_lambda.py | 41 +++++----- examples/kokkos/inclusive_scan_team.py | 70 +++++----------- examples/kokkos/math_functions.py | 53 ++++++------ examples/kokkos/matrix_sum.py | 64 +++++++-------- examples/kokkos/random_sum.py | 48 +++++------ examples/kokkos/scan_functor.py | 44 +++++----- examples/kokkos/scan_workload.py | 53 +++++------- 18 files changed, 338 insertions(+), 449 deletions(-) diff --git a/examples/kokkos/01_hello_world.py b/examples/kokkos/01_hello_world.py index 2822e817..a905df23 100644 --- a/examples/kokkos/01_hello_world.py +++ b/examples/kokkos/01_hello_world.py @@ -1,19 +1,13 @@ import pykokkos as pk +@pk.workunit +def hello(i: int): + pk.printf("Hello from i = %d\n", i) -@pk.workload -class HelloWorld: - def __init__(self, n): - self.N: int = n - - @pk.main - def run(self): - pk.parallel_for(self.N, self.hello) - - @pk.workunit - def hello(self, i: int): - pk.printf("Hello from i = %d\n", i) - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + pk.parallel_for(N, hello) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, HelloWorld(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/01_hello_world_lambda.py b/examples/kokkos/01_hello_world_lambda.py index 05d5104a..a18cd1dc 100644 --- a/examples/kokkos/01_hello_world_lambda.py +++ b/examples/kokkos/01_hello_world_lambda.py @@ -1,15 +1,13 @@ import pykokkos as pk +@pk.workunit +def hello(i: int): + pk.printf("Hello from i = %i\n", i) -@pk.workload -class HelloWorld: - def __init__(self, n): - self.N: int = n - - @pk.main - def run(self): - pk.parallel_for(self.N, lambda i: pk.printf("Hello from i = %i\n", i)) - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + pk.parallel_for(N, hello) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, HelloWorld(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/02_simple_reduce.py b/examples/kokkos/02_simple_reduce.py index 2dff5a29..a63dbf8f 100644 --- a/examples/kokkos/02_simple_reduce.py +++ b/examples/kokkos/02_simple_reduce.py @@ -1,24 +1,20 @@ +import numpy as np import pykokkos as pk +@pk.workunit +def squaresum(i: int, acc, values): + acc += values[i] -@pk.workload -class SquareSum: - def __init__(self, n): - self.N: int = n - self.total: pk.double = 0 - - @pk.main - def run(self): - self.total = pk.parallel_reduce(self.N, self.squaresum) - - @pk.callback - def results(self): - print("Sum:", self.total) - - @pk.workunit - def squaresum(self, i: int, acc: pk.Acc[pk.double]): - acc += i * i - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + # Create array with squares + values = np.array([i * i for i in range(N)], dtype=np.int32) + + total = pk.parallel_reduce(N, squaresum, values=values) + + print("Sum:", total) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SquareSum(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/02_simple_reduce_lambda.py b/examples/kokkos/02_simple_reduce_lambda.py index b54c92ea..a63dbf8f 100644 --- a/examples/kokkos/02_simple_reduce_lambda.py +++ b/examples/kokkos/02_simple_reduce_lambda.py @@ -1,20 +1,20 @@ +import numpy as np import pykokkos as pk +@pk.workunit +def squaresum(i: int, acc, values): + acc += values[i] -@pk.workload -class SquareSum: - def __init__(self, n): - self.N: int = n - self.total: int = 0 - - @pk.main - def run(self): - self.total = pk.parallel_reduce(self.N, lambda i, acc: acc + i * i) - - @pk.callback - def results(self): - print("Sum:", self.total) - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + # Create array with squares + values = np.array([i * i for i in range(N)], dtype=np.int32) + + total = pk.parallel_reduce(N, squaresum, values=values) + + print("Sum:", total) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SquareSum(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/03_simple_view.py b/examples/kokkos/03_simple_view.py index bef1e95e..d03f8772 100644 --- a/examples/kokkos/03_simple_view.py +++ b/examples/kokkos/03_simple_view.py @@ -1,33 +1,26 @@ import pykokkos as pk - -@pk.workload -class SimpleView: - def __init__(self, n): - self.N: int = n - self.total: int = 0 - self.a: pk.View2D[pk.int32] = pk.View([self.N, 3], pk.int32) - - @pk.callback - def results(self): - for row in self.a: - print(row) - print("\nResult is", self.total) - - @pk.main - def run(self): - pk.parallel_for(self.N, self.initialize_view) - self.total = pk.parallel_reduce(self.N, self.my_reduction) - - @pk.workunit - def initialize_view(self, i: int): - for j in range(3): - self.a[i][j] = (i + 1) ** (j + 1) - - @pk.workunit - def my_reduction(self, i: int, accumulator: pk.Acc[pk.double]): - accumulator += self.a[i][0] * self.a[i][1] / (self.a[i][2]) - +@pk.workunit +def initialize_view(i: int, a: pk.View2D[pk.int32]): + for j in range(3): + a[i][j] = (i + 1) ** (j + 1) + +@pk.workunit +def my_reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): + accumulator += a[i][0] * a[i][1] / (a[i][2]) + +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) + + pk.parallel_for(N, initialize_view, a=a) + total: int = pk.parallel_reduce(N, my_reduction, a=a) + + for row in a: + print(row) + print("\nResult is", total) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SimpleView(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/03_simple_view_lambda.py b/examples/kokkos/03_simple_view_lambda.py index ea9dbe8e..d03f8772 100644 --- a/examples/kokkos/03_simple_view_lambda.py +++ b/examples/kokkos/03_simple_view_lambda.py @@ -1,33 +1,26 @@ import pykokkos as pk +@pk.workunit +def initialize_view(i: int, a: pk.View2D[pk.int32]): + for j in range(3): + a[i][j] = (i + 1) ** (j + 1) -@pk.workload -class SimpleView: - def __init__(self, n): - self.N: int = n - self.total: int = 0 - self.a: pk.View2D[pk.int32] = pk.View([self.N, 3], pk.int32) - - @pk.callback - def results(self): - for row in self.a: - print(row) - print("\nResult is", self.total) - - @pk.main - def run(self): - pk.parallel_for(self.N, self.initialize_view) - self.total = pk.parallel_reduce( - self.N, - lambda i, accumulator: accumulator - + self.a[i][0] * self.a[i][1] / (self.a[i][2]), - ) - - @pk.workunit - def initialize_view(self, i: int): - for j in range(3): - self.a[i][j] = (i + 1) ** (j + 1) +@pk.workunit +def my_reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): + accumulator += a[i][0] * a[i][1] / (a[i][2]) +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) + + pk.parallel_for(N, initialize_view, a=a) + total: int = pk.parallel_reduce(N, my_reduction, a=a) + + for row in a: + print(row) + print("\nResult is", total) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SimpleView(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/04_simple_memoryspaces.py b/examples/kokkos/04_simple_memoryspaces.py index 4a6f58aa..2e0fed2f 100644 --- a/examples/kokkos/04_simple_memoryspaces.py +++ b/examples/kokkos/04_simple_memoryspaces.py @@ -1,28 +1,23 @@ import pykokkos as pk +@pk.workunit +def reduction(i: int, acc: pk.Acc[pk.double], a: pk.View2D[pk.int32]): + acc += a[i][0] - a[i][1] + a[i][2] -@pk.workload -class SimpleSpaces: - def __init__(self, n): - self.N: int = n - self.sum: int = 0 - self.a: pk.View2D[pk.int32] = pk.View([n, 3], pk.int32) - for i in range(n): - for j in range(3): - self.a[i][j] = i * n + j - - @pk.main - def run(self): - self.sum = pk.parallel_reduce(self.N, self.reduction) - - @pk.callback - def use_results(self): - print(self.sum) - - @pk.workunit - def reduction(self, i: int, acc: pk.Acc[pk.double]): - acc += self.a[i][0] - self.a[i][1] + self.a[i][2] - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) + + # Initialize the view + for i in range(N): + for j in range(3): + a[i][j] = i * N + j + + sum_result: int = pk.parallel_reduce(N, reduction, a=a) + + print(sum_result) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SimpleSpaces(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/04_simple_memoryspaces_lambda.py b/examples/kokkos/04_simple_memoryspaces_lambda.py index b301350e..8bd7adb0 100644 --- a/examples/kokkos/04_simple_memoryspaces_lambda.py +++ b/examples/kokkos/04_simple_memoryspaces_lambda.py @@ -1,30 +1,23 @@ import pykokkos as pk +@pk.workunit +def reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): + accumulator += a[i][0] - a[i][1] + a[i][2] -@pk.workload -class SimpleSpaces: - def __init__(self, n): - self.N: int = n - self.sum: int = 0 - self.a: pk.View2D[pk.int32] = pk.View([n, 3], pk.int32) - for i in range(n): - for j in range(3): - self.a[i][j] = i * n + j - - @pk.main - def run(self): - self.sum = pk.parallel_reduce( - self.N, - lambda i, accumulator: accumulator - + self.a[i][0] - - self.a[i][1] - + self.a[i][2], - ) - - @pk.callback - def use_results(self): - print(self.sum) - +def main(): + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) + + # Initialize the view + for i in range(N): + for j in range(3): + a[i][j] = i * N + j + + sum_result: int = pk.parallel_reduce(N, reduction, a=a) + + print(sum_result) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SimpleSpaces(10)) + main() \ No newline at end of file diff --git a/examples/kokkos/05_simple_atomics.py b/examples/kokkos/05_simple_atomics.py index 93114624..ad2d4a73 100644 --- a/examples/kokkos/05_simple_atomics.py +++ b/examples/kokkos/05_simple_atomics.py @@ -1,47 +1,46 @@ import math import random - import pykokkos as pk - -@pk.workload(count=pk.ViewTypeInfo(trait=pk.Atomic)) -class SimpleAtomics: - def __init__(self, n): - self.N: int = n - - self.data: pk.View1D[pk.int32] = pk.View([n], pk.int32) - self.result: pk.View1D[pk.int32] = pk.View([n], pk.int32) - self.count: pk.View1D[pk.int32] = pk.View([1], pk.int32, trait=pk.Trait.Atomic) - - for i in range(n): - self.data[i] = random.randint(0, n) - - @pk.main - def run(self): - pk.parallel_for(self.N, self.findprimes) - - @pk.callback - def results(self): - for i in range(int(self.count[0])): - print(int(self.result[i]), end=", ") - print( - "\nFound", int(self.count[0]), "prime numbers in", self.N, "random numbers" - ) - - @pk.workunit - def findprimes(self, i: int): - number: int = self.data[i] - upper_bound: int = math.sqrt(number) + 1 - is_prime: bool = not (number % 2 == 0) - k: int = 3 - idx: int = 0 - while k < upper_bound and is_prime: - is_prime = not (number % k == 0) - k += 2 - if is_prime: - idx = self.count[0] = self.count[0] + 1 - self.result[idx - 1] = number - +@pk.workunit +def findprimes(i: int, data: pk.View1D[pk.int32], result: pk.View1D[pk.int32], count: pk.View1D[pk.int32]): + number: int = data[i] + upper_bound: int = int(math.sqrt(number)) + 1 + is_prime: bool = not (number % 2 == 0) + k: int = 3 + idx: int = 0 + + while k < upper_bound and is_prime: + is_prime = not (number % k == 0) + k += 2 + + if is_prime: + # Note: This atomic operation may have race conditions without proper atomic support + # For now, we remove the atomic trait as it's not supported + idx = count[0] = count[0] + 1 + result[idx - 1] = number + +def simple_atomics(): + N: int = 100 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + data: pk.View1D[pk.int32] = pk.View([N], pk.int32) + result: pk.View1D[pk.int32] = pk.View([N], pk.int32) + # FIXED: Removed trait=pk.Trait.Atomic as it's not supported + count: pk.View1D[pk.int32] = pk.View([1], pk.int32) + + # Initialize data with random numbers + for i in range(N): + data[i] = random.randint(0, N) + + pk.parallel_for(N, findprimes, data=data, result=result, count=count) + + # Print results + for i in range(int(count[0])): + print(int(result[i]), end=", ") + print( + "\nFound", int(count[0]), "prime numbers in", N, "random numbers" + ) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, SimpleAtomics(100)) + simple_atomics() \ No newline at end of file diff --git a/examples/kokkos/README.md b/examples/kokkos/README.md index 87545332..c706eeab 100644 --- a/examples/kokkos/README.md +++ b/examples/kokkos/README.md @@ -1,2 +1,4 @@ This directory contains examples translated from the main Kokkos repository: https://github.com/kokkos/kokkos/tree/develop/example/tutorial + +These examples have been converted from the deprecated workload/functor style to the standalone style using @pk.workunit decorators. \ No newline at end of file diff --git a/examples/kokkos/add1.py b/examples/kokkos/add1.py index 37b6b863..80bfbf08 100644 --- a/examples/kokkos/add1.py +++ b/examples/kokkos/add1.py @@ -1,29 +1,24 @@ import pykokkos as pk - -@pk.workload -class AddOne: - def __init__(self, n): - self.N: int = n - self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) - - for i in range(self.N): - self.a[i] = 2 - print(f"Initialized view: [{self.a[0]}, ... repeats {n-1} times]") - - @pk.main - def run(self): - pk.parallel_for(self.N, self.add1) - - @pk.callback - def results(self): - print(f"Results: [{self.a[0]}, ... repeats {n-1} times]") - - @pk.workunit - def add1(self, i: int): - self.a[i] += 1 - +@pk.workunit +def add1(i: int, a: pk.View1D[pk.int32]): + a[i] += 1 + +def main(): + n: int = 100 * 1000 + N: int = n + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) + + # Initialize the view + for i in range(N): + a[i] = 2 + print(f"Initialized view: [{a[0]}, ... repeats {n-1} times]") + + pk.parallel_for(N, add1, a=a) + + print(f"Results: [{a[0]}, ... repeats {n-1} times]") if __name__ == "__main__": - n = 100 * 1000 - pk.execute(pk.ExecutionSpace.OpenMP, AddOne(n)) + main() \ No newline at end of file diff --git a/examples/kokkos/add1_lambda.py b/examples/kokkos/add1_lambda.py index f422fa0e..9290eefc 100644 --- a/examples/kokkos/add1_lambda.py +++ b/examples/kokkos/add1_lambda.py @@ -1,26 +1,25 @@ import pykokkos as pk +@pk.workunit +def add_y(i: int, a: pk.View1D[pk.int32], y: int): + a[i] += y -@pk.workload -class AddOne: - def __init__(self, n): - self.N: int = n - self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) - - for i in range(self.N): - self.a[i] = 2 - print(f"Initialized view: [{self.a[0]}, ... repeats {n-1} times]") - - @pk.main - def run(self): - y: int = 1 - pk.parallel_for(self.N, lambda i: self.a[i] + y, self.a) - - @pk.callback - def results(self): - print(f"Results: [{self.a[0]}, ... repeats {n-1} times]") - +def main(): + n: int = 100 * 1000 + N: int = n + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) + + # Initialize the view + for i in range(N): + a[i] = 2 + print(f"Initialized view: [{a[0]}, ... repeats {n-1} times]") + + y: int = 1 + pk.parallel_for(N, add_y, a=a, y=y) + + print(f"Results: [{a[0]}, ... repeats {n-1} times]") if __name__ == "__main__": - n = 100 * 1000 - pk.execute(pk.ExecutionSpace.OpenMP, AddOne(n)) + main() \ No newline at end of file diff --git a/examples/kokkos/inclusive_scan_team.py b/examples/kokkos/inclusive_scan_team.py index 609e098c..36303d21 100644 --- a/examples/kokkos/inclusive_scan_team.py +++ b/examples/kokkos/inclusive_scan_team.py @@ -1,59 +1,27 @@ -import numpy as np -import pykokkos as pk - +import math -@pk.workunit -def init_data(i: int, view): - view[i] = i + 1 +import pykokkos as pk -# Test inclusive_scan with scratch memory @pk.workunit -def team_scan(team_member: pk.TeamMember, view): - team_size: int = team_member.team_size() - offset: int = team_member.league_rank() * team_size - localIdx: int = team_member.team_rank() - globalIdx: int = offset + localIdx - team_rank: int = team_member.team_rank() - - scratch: pk.ScratchView1D[int] = pk.ScratchView1D( - team_member.team_scratch(0), team_size +def my_calculation(i: int, a: pk.View1D[pk.int32], N: int): + pk.printf("Running index %d\n", i) + a[i] += ( + math.cos(a[i]) + 2**i - math.pi / math.fabs(a[(i + 1) % N]) ) - scratch[team_rank] = view[globalIdx] - team_member.team_barrier() - - pk.inclusive_scan(team_member, scratch) - team_member.team_barrier() - - view[globalIdx] = scratch[team_rank] - - -def main(): - N = 64 - team_size = 32 - num_teams = (N + team_size - 1) // team_size - - view = np.zeros([N], dtype=np.int32) - p_init = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) - pk.parallel_for(p_init, init_data, view=view) - - print(f"Total elements: {N}, Team size: {team_size}, Number of teams: {num_teams}") - - # Use TeamPolicy - team_policy = pk.TeamPolicy(pk.ExecutionSpace.OpenMP, num_teams, team_size) - - # for now these functions are useless, since they are not implemented corectly - # TODO: implement scratch size setting - # scratch_size = pk.ScratchView1D[int].shmem_size(team_size) - # team_policy.set_scratch_size(0, pk.PerTeam(scratch_size)) - - # Kernel call - just allocate and write to scratch - print("Running kernel...") - pk.parallel_for(team_policy, team_scan, view=view) - print(f"View, splitted by two groups of size = {team_size}") - print(view) - if __name__ == "__main__": - main() + n = 10 + N = n + a = pk.View([N], pk.int32) + + # Initialize view + for i in range(N): + a[i] = math.sqrt(math.tau) + + print("Initialized view:", a) + + pk.parallel_for(N, my_calculation, a=a, N=N) + + print("Results: ", a) \ No newline at end of file diff --git a/examples/kokkos/math_functions.py b/examples/kokkos/math_functions.py index 0ecce641..48b7574e 100644 --- a/examples/kokkos/math_functions.py +++ b/examples/kokkos/math_functions.py @@ -1,35 +1,28 @@ import math - import pykokkos as pk - -@pk.workload -class Math: - def __init__(self, n): - self.N: int = n - self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) - - for i in range(self.N): - self.a[i] = math.sqrt(math.tau) - - print("Initialized view:", self.a) - - @pk.main - def run(self): - pk.parallel_for(self.N, self.my_calculation) - - @pk.callback - def results(self): - print("Results: ", self.a) - - @pk.workunit - def my_calculation(self, i: int): - pk.printf("Running index %d\n", i) - self.a[i] += ( - math.cos(self.a[i]) + 2**i - math.pi / math.fabs(self.a[(i + 1) % self.N]) - ) - +@pk.workunit +def my_calculation(i: int, a: pk.View1D[pk.int32], N: int): + pk.printf("Running index %d\n", i) + a[i] += ( + math.cos(a[i]) + 2**i - math.pi / math.fabs(a[(i + 1) % N]) + ) + +def main(): + n: int = 10 + N: int = n + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) + + # Initialize the view + for i in range(N): + a[i] = math.sqrt(math.tau) + print("Initialized view:", a) + + pk.parallel_for(N, my_calculation, a=a, N=N) + + print("Results: ", a) if __name__ == "__main__": - n = 10 - pk.execute(pk.ExecutionSpace.OpenMP, Math(n)) + main() \ No newline at end of file diff --git a/examples/kokkos/matrix_sum.py b/examples/kokkos/matrix_sum.py index 3603a2bb..c28a4cf2 100644 --- a/examples/kokkos/matrix_sum.py +++ b/examples/kokkos/matrix_sum.py @@ -1,39 +1,33 @@ import pykokkos as pk - -@pk.workload -class MatrixSum: - def __init__(self, r, c): - self.r: int = r - self.c: int = c - self.total: int = 0 - self.mat: pk.View2D[pk.int32] = pk.View([r, c], pk.int32) - - for i in range(r): - self.mat[i] = list(range(c * i, c * (i + 1))) - - for row in self.mat: - print(row) - print(f"Initialized {r}x{c} array") - - @pk.main - def run(self): - pk.parallel_for(self.r, self.sum_row) - self.total = pk.parallel_reduce(self.r, self.final_sum) - - @pk.callback - def results(self): - print("Total =", self.total) - - @pk.workunit - def sum_row(self, i: int): - for j in range(1, self.c): - self.mat[i][0] += self.mat[i][j] - - @pk.workunit - def final_sum(self, i: int, accumulator: pk.Acc[pk.double]): - accumulator += self.mat[i][0] - +@pk.workunit +def sum_row(i: int, mat: pk.View2D[pk.int32], c: int): + for j in range(1, c): + mat[i][0] += mat[i][j] + +@pk.workunit +def final_sum(i: int, accumulator: pk.Acc[pk.double], mat: pk.View2D[pk.int32]): + accumulator += mat[i][0] + +def main(): + r: int = 5 + c: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + mat: pk.View2D[pk.int32] = pk.View([r, c], pk.int32) + + # Initialize the matrix + for i in range(r): + mat[i] = list(range(c * i, c * (i + 1))) + + for row in mat: + print(row) + print(f"Initialized {r}x{c} array") + + pk.parallel_for(r, sum_row, mat=mat, c=c) + total: int = pk.parallel_reduce(r, final_sum, mat=mat) + + print("Total =", total) if __name__ == "__main__": - pk.execute(pk.ExecutionSpace.OpenMP, MatrixSum(5, 10)) + main() \ No newline at end of file diff --git a/examples/kokkos/random_sum.py b/examples/kokkos/random_sum.py index 3da72b07..e8bdcb39 100644 --- a/examples/kokkos/random_sum.py +++ b/examples/kokkos/random_sum.py @@ -1,33 +1,25 @@ import random - import pykokkos as pk - -@pk.workload -class RandomSum: - def __init__(self, n): - self.N: int = n - self.total: int = 0 - self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) - - for i in range(self.N): - self.a[i] = random.randint(0, 10) - - print("Initialized view:", self.a) - - @pk.main - def run(self): - self.total = pk.parallel_reduce(self.N, self.my_reduction) - - @pk.callback - def results(self): - print("Sum:", self.total) - - @pk.workunit - def my_reduction(self, i: int, accumulator: pk.Acc[pk.int32]): - accumulator += self.a[i] - +@pk.workunit +def my_reduction(i: int, accumulator: pk.Acc[pk.int32], a: pk.View1D[pk.int32]): + accumulator += a[i] + +def main(): + n: int = 10 + N: int = n + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) + + # Initialize the view with random values + for i in range(N): + a[i] = random.randint(0, 10) + print("Initialized view:", a) + + total: int = pk.parallel_reduce(N, my_reduction, a=a) + + print("Sum:", total) if __name__ == "__main__": - n = 10 - pk.execute(pk.ExecutionSpace.OpenMP, RandomSum(n)) + main() \ No newline at end of file diff --git a/examples/kokkos/scan_functor.py b/examples/kokkos/scan_functor.py index 7f4b812b..663f4395 100644 --- a/examples/kokkos/scan_functor.py +++ b/examples/kokkos/scan_functor.py @@ -1,34 +1,30 @@ import pykokkos as pk +@pk.workunit +def init(i: int, A: pk.View1D[pk.int32]): + A[i] = i -@pk.functor -class Workload: - def __init__(self, N: int): - self.A: pk.View1D[pk.int32] = pk.View([N], pk.int32) - - @pk.workunit - def init(self, i: int): - self.A[i] = i - - @pk.workunit - def scan(self, i: int, acc: pk.Acc[pk.double], last_pass: bool): - acc += self.A[i] - if last_pass: - self.A[i] = acc - +@pk.workunit +def scan(i: int, acc: pk.Acc[pk.double], last_pass: bool, A: pk.View1D[pk.int32]): + acc += A[i] + if last_pass: + A[i] = acc def run() -> None: - N = 10 - w = Workload(N) + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + A: pk.View1D[pk.int32] = pk.View([N], pk.int32) + p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) - pk.parallel_for(p, w.init) - + + pk.parallel_for(p, init, A=A) + timer = pk.Timer() - result = pk.parallel_scan(p, w.scan) + result = pk.parallel_scan(p, scan, A=A) timer_result = timer.seconds() - - print(f"{w.A} total={result} time({timer_result})") - + + print(f"{A} total={result} time({timer_result})") if __name__ == "__main__": - run() + run() \ No newline at end of file diff --git a/examples/kokkos/scan_workload.py b/examples/kokkos/scan_workload.py index fd79f6a0..c6a37cf1 100644 --- a/examples/kokkos/scan_workload.py +++ b/examples/kokkos/scan_workload.py @@ -1,39 +1,28 @@ import pykokkos as pk +@pk.workunit +def init(i: int, A: pk.View1D[pk.int32]): + A[i] = i -@pk.workload -class Workload: - def __init__(self, N: int): - self.N: int = N - self.A: pk.View1D[pk.int32] = pk.View([N], pk.int32) - - self.result: int = 0 - self.timer_result: float = 0 - - @pk.main - def run(self): - pk.parallel_for(self.N, lambda i: i, self.A) - - timer = pk.Timer() - - self.result = pk.parallel_scan(self.N, self.scan) - - self.timer_result = timer.seconds() - - @pk.callback - def results(self): - print(f"{self.A} total={self.result} time({self.timer_result})") - - @pk.workunit - def scan(self, i: int, acc: pk.Acc[pk.double], last_pass: bool): - acc += self.A[i] - if last_pass: - self.A[i] = acc - +@pk.workunit +def scan(i: int, acc: pk.Acc[pk.double], last_pass: bool, A: pk.View1D[pk.int32]): + acc += A[i] + if last_pass: + A[i] = acc def run() -> None: - pk.execute(pk.ExecutionSpace.OpenMP, Workload(10)) - + N: int = 10 + pk.set_default_space(pk.ExecutionSpace.OpenMP) + + A: pk.View1D[pk.int32] = pk.View([N], pk.int32) + + pk.parallel_for(N, init, A=A) + + timer = pk.Timer() + result: int = pk.parallel_scan(N, scan, A=A) + timer_result: float = timer.seconds() + + print(f"{A} total={result} time({timer_result})") if __name__ == "__main__": - run() + run() \ No newline at end of file From cd6811c0ffe796b861032b40a1cac38439ab3b63 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 26 Jan 2026 12:22:47 -0600 Subject: [PATCH 02/23] Addressed comments (Reversed READ.ME, inclusive_scan_team.py, and all lambda files) --- examples/kokkos/01_hello_world.py | 2 +- examples/kokkos/01_hello_world_lambda.py | 18 ++--- examples/kokkos/02_simple_reduce.py | 2 +- examples/kokkos/02_simple_reduce_lambda.py | 30 ++++---- examples/kokkos/03_simple_view.py | 2 +- examples/kokkos/03_simple_view_lambda.py | 47 +++++++------ examples/kokkos/04_simple_memoryspaces.py | 2 +- .../kokkos/04_simple_memoryspaces_lambda.py | 43 +++++++----- examples/kokkos/05_simple_atomics.py | 2 +- examples/kokkos/README.md | 2 - examples/kokkos/add1.py | 2 +- examples/kokkos/add1_lambda.py | 41 +++++------ examples/kokkos/inclusive_scan_team.py | 70 ++++++++++++++----- examples/kokkos/math_functions.py | 2 +- examples/kokkos/matrix_sum.py | 2 +- examples/kokkos/random_sum.py | 2 +- examples/kokkos/scan_functor.py | 2 +- examples/kokkos/scan_workload.py | 2 +- 18 files changed, 160 insertions(+), 113 deletions(-) diff --git a/examples/kokkos/01_hello_world.py b/examples/kokkos/01_hello_world.py index a905df23..6745707d 100644 --- a/examples/kokkos/01_hello_world.py +++ b/examples/kokkos/01_hello_world.py @@ -10,4 +10,4 @@ def main(): pk.parallel_for(N, hello) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/01_hello_world_lambda.py b/examples/kokkos/01_hello_world_lambda.py index a18cd1dc..05d5104a 100644 --- a/examples/kokkos/01_hello_world_lambda.py +++ b/examples/kokkos/01_hello_world_lambda.py @@ -1,13 +1,15 @@ import pykokkos as pk -@pk.workunit -def hello(i: int): - pk.printf("Hello from i = %i\n", i) -def main(): - N: int = 10 - pk.set_default_space(pk.ExecutionSpace.OpenMP) - pk.parallel_for(N, hello) +@pk.workload +class HelloWorld: + def __init__(self, n): + self.N: int = n + + @pk.main + def run(self): + pk.parallel_for(self.N, lambda i: pk.printf("Hello from i = %i\n", i)) + if __name__ == "__main__": - main() \ No newline at end of file + pk.execute(pk.ExecutionSpace.OpenMP, HelloWorld(10)) diff --git a/examples/kokkos/02_simple_reduce.py b/examples/kokkos/02_simple_reduce.py index a63dbf8f..cfbb288c 100644 --- a/examples/kokkos/02_simple_reduce.py +++ b/examples/kokkos/02_simple_reduce.py @@ -17,4 +17,4 @@ def main(): print("Sum:", total) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/02_simple_reduce_lambda.py b/examples/kokkos/02_simple_reduce_lambda.py index a63dbf8f..b54c92ea 100644 --- a/examples/kokkos/02_simple_reduce_lambda.py +++ b/examples/kokkos/02_simple_reduce_lambda.py @@ -1,20 +1,20 @@ -import numpy as np import pykokkos as pk -@pk.workunit -def squaresum(i: int, acc, values): - acc += values[i] -def main(): - N: int = 10 - pk.set_default_space(pk.ExecutionSpace.OpenMP) - - # Create array with squares - values = np.array([i * i for i in range(N)], dtype=np.int32) - - total = pk.parallel_reduce(N, squaresum, values=values) - - print("Sum:", total) +@pk.workload +class SquareSum: + def __init__(self, n): + self.N: int = n + self.total: int = 0 + + @pk.main + def run(self): + self.total = pk.parallel_reduce(self.N, lambda i, acc: acc + i * i) + + @pk.callback + def results(self): + print("Sum:", self.total) + if __name__ == "__main__": - main() \ No newline at end of file + pk.execute(pk.ExecutionSpace.OpenMP, SquareSum(10)) diff --git a/examples/kokkos/03_simple_view.py b/examples/kokkos/03_simple_view.py index d03f8772..d2bb53f8 100644 --- a/examples/kokkos/03_simple_view.py +++ b/examples/kokkos/03_simple_view.py @@ -23,4 +23,4 @@ def main(): print("\nResult is", total) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/03_simple_view_lambda.py b/examples/kokkos/03_simple_view_lambda.py index d03f8772..ea9dbe8e 100644 --- a/examples/kokkos/03_simple_view_lambda.py +++ b/examples/kokkos/03_simple_view_lambda.py @@ -1,26 +1,33 @@ import pykokkos as pk -@pk.workunit -def initialize_view(i: int, a: pk.View2D[pk.int32]): - for j in range(3): - a[i][j] = (i + 1) ** (j + 1) -@pk.workunit -def my_reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): - accumulator += a[i][0] * a[i][1] / (a[i][2]) +@pk.workload +class SimpleView: + def __init__(self, n): + self.N: int = n + self.total: int = 0 + self.a: pk.View2D[pk.int32] = pk.View([self.N, 3], pk.int32) + + @pk.callback + def results(self): + for row in self.a: + print(row) + print("\nResult is", self.total) + + @pk.main + def run(self): + pk.parallel_for(self.N, self.initialize_view) + self.total = pk.parallel_reduce( + self.N, + lambda i, accumulator: accumulator + + self.a[i][0] * self.a[i][1] / (self.a[i][2]), + ) + + @pk.workunit + def initialize_view(self, i: int): + for j in range(3): + self.a[i][j] = (i + 1) ** (j + 1) -def main(): - N: int = 10 - pk.set_default_space(pk.ExecutionSpace.OpenMP) - - a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) - - pk.parallel_for(N, initialize_view, a=a) - total: int = pk.parallel_reduce(N, my_reduction, a=a) - - for row in a: - print(row) - print("\nResult is", total) if __name__ == "__main__": - main() \ No newline at end of file + pk.execute(pk.ExecutionSpace.OpenMP, SimpleView(10)) diff --git a/examples/kokkos/04_simple_memoryspaces.py b/examples/kokkos/04_simple_memoryspaces.py index 2e0fed2f..690ec95d 100644 --- a/examples/kokkos/04_simple_memoryspaces.py +++ b/examples/kokkos/04_simple_memoryspaces.py @@ -20,4 +20,4 @@ def main(): print(sum_result) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/04_simple_memoryspaces_lambda.py b/examples/kokkos/04_simple_memoryspaces_lambda.py index 8bd7adb0..b301350e 100644 --- a/examples/kokkos/04_simple_memoryspaces_lambda.py +++ b/examples/kokkos/04_simple_memoryspaces_lambda.py @@ -1,23 +1,30 @@ import pykokkos as pk -@pk.workunit -def reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): - accumulator += a[i][0] - a[i][1] + a[i][2] -def main(): - N: int = 10 - pk.set_default_space(pk.ExecutionSpace.OpenMP) - - a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) - - # Initialize the view - for i in range(N): - for j in range(3): - a[i][j] = i * N + j - - sum_result: int = pk.parallel_reduce(N, reduction, a=a) - - print(sum_result) +@pk.workload +class SimpleSpaces: + def __init__(self, n): + self.N: int = n + self.sum: int = 0 + self.a: pk.View2D[pk.int32] = pk.View([n, 3], pk.int32) + for i in range(n): + for j in range(3): + self.a[i][j] = i * n + j + + @pk.main + def run(self): + self.sum = pk.parallel_reduce( + self.N, + lambda i, accumulator: accumulator + + self.a[i][0] + - self.a[i][1] + + self.a[i][2], + ) + + @pk.callback + def use_results(self): + print(self.sum) + if __name__ == "__main__": - main() \ No newline at end of file + pk.execute(pk.ExecutionSpace.OpenMP, SimpleSpaces(10)) diff --git a/examples/kokkos/05_simple_atomics.py b/examples/kokkos/05_simple_atomics.py index ad2d4a73..4f31171f 100644 --- a/examples/kokkos/05_simple_atomics.py +++ b/examples/kokkos/05_simple_atomics.py @@ -43,4 +43,4 @@ def simple_atomics(): ) if __name__ == "__main__": - simple_atomics() \ No newline at end of file + simple_atomics() diff --git a/examples/kokkos/README.md b/examples/kokkos/README.md index c706eeab..87545332 100644 --- a/examples/kokkos/README.md +++ b/examples/kokkos/README.md @@ -1,4 +1,2 @@ This directory contains examples translated from the main Kokkos repository: https://github.com/kokkos/kokkos/tree/develop/example/tutorial - -These examples have been converted from the deprecated workload/functor style to the standalone style using @pk.workunit decorators. \ No newline at end of file diff --git a/examples/kokkos/add1.py b/examples/kokkos/add1.py index 80bfbf08..3179e31d 100644 --- a/examples/kokkos/add1.py +++ b/examples/kokkos/add1.py @@ -21,4 +21,4 @@ def main(): print(f"Results: [{a[0]}, ... repeats {n-1} times]") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/add1_lambda.py b/examples/kokkos/add1_lambda.py index 9290eefc..f422fa0e 100644 --- a/examples/kokkos/add1_lambda.py +++ b/examples/kokkos/add1_lambda.py @@ -1,25 +1,26 @@ import pykokkos as pk -@pk.workunit -def add_y(i: int, a: pk.View1D[pk.int32], y: int): - a[i] += y -def main(): - n: int = 100 * 1000 - N: int = n - pk.set_default_space(pk.ExecutionSpace.OpenMP) - - a: pk.View1D[pk.int32] = pk.View([N], pk.int32) - - # Initialize the view - for i in range(N): - a[i] = 2 - print(f"Initialized view: [{a[0]}, ... repeats {n-1} times]") - - y: int = 1 - pk.parallel_for(N, add_y, a=a, y=y) - - print(f"Results: [{a[0]}, ... repeats {n-1} times]") +@pk.workload +class AddOne: + def __init__(self, n): + self.N: int = n + self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) + + for i in range(self.N): + self.a[i] = 2 + print(f"Initialized view: [{self.a[0]}, ... repeats {n-1} times]") + + @pk.main + def run(self): + y: int = 1 + pk.parallel_for(self.N, lambda i: self.a[i] + y, self.a) + + @pk.callback + def results(self): + print(f"Results: [{self.a[0]}, ... repeats {n-1} times]") + if __name__ == "__main__": - main() \ No newline at end of file + n = 100 * 1000 + pk.execute(pk.ExecutionSpace.OpenMP, AddOne(n)) diff --git a/examples/kokkos/inclusive_scan_team.py b/examples/kokkos/inclusive_scan_team.py index 36303d21..609e098c 100644 --- a/examples/kokkos/inclusive_scan_team.py +++ b/examples/kokkos/inclusive_scan_team.py @@ -1,27 +1,59 @@ -import math - +import numpy as np import pykokkos as pk @pk.workunit -def my_calculation(i: int, a: pk.View1D[pk.int32], N: int): - pk.printf("Running index %d\n", i) - a[i] += ( - math.cos(a[i]) + 2**i - math.pi / math.fabs(a[(i + 1) % N]) +def init_data(i: int, view): + view[i] = i + 1 + + +# Test inclusive_scan with scratch memory +@pk.workunit +def team_scan(team_member: pk.TeamMember, view): + team_size: int = team_member.team_size() + offset: int = team_member.league_rank() * team_size + localIdx: int = team_member.team_rank() + globalIdx: int = offset + localIdx + team_rank: int = team_member.team_rank() + + scratch: pk.ScratchView1D[int] = pk.ScratchView1D( + team_member.team_scratch(0), team_size ) + scratch[team_rank] = view[globalIdx] + team_member.team_barrier() + + pk.inclusive_scan(team_member, scratch) + team_member.team_barrier() + + view[globalIdx] = scratch[team_rank] + + +def main(): + N = 64 + team_size = 32 + num_teams = (N + team_size - 1) // team_size + + view = np.zeros([N], dtype=np.int32) + p_init = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) + pk.parallel_for(p_init, init_data, view=view) + + print(f"Total elements: {N}, Team size: {team_size}, Number of teams: {num_teams}") + + # Use TeamPolicy + team_policy = pk.TeamPolicy(pk.ExecutionSpace.OpenMP, num_teams, team_size) + + # for now these functions are useless, since they are not implemented corectly + # TODO: implement scratch size setting + # scratch_size = pk.ScratchView1D[int].shmem_size(team_size) + # team_policy.set_scratch_size(0, pk.PerTeam(scratch_size)) + + # Kernel call - just allocate and write to scratch + print("Running kernel...") + pk.parallel_for(team_policy, team_scan, view=view) + print(f"View, splitted by two groups of size = {team_size}") + print(view) + if __name__ == "__main__": - n = 10 - N = n - a = pk.View([N], pk.int32) - - # Initialize view - for i in range(N): - a[i] = math.sqrt(math.tau) - - print("Initialized view:", a) - - pk.parallel_for(N, my_calculation, a=a, N=N) - - print("Results: ", a) \ No newline at end of file + main() diff --git a/examples/kokkos/math_functions.py b/examples/kokkos/math_functions.py index 48b7574e..a6a38eab 100644 --- a/examples/kokkos/math_functions.py +++ b/examples/kokkos/math_functions.py @@ -25,4 +25,4 @@ def main(): print("Results: ", a) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/matrix_sum.py b/examples/kokkos/matrix_sum.py index c28a4cf2..a8d913a7 100644 --- a/examples/kokkos/matrix_sum.py +++ b/examples/kokkos/matrix_sum.py @@ -30,4 +30,4 @@ def main(): print("Total =", total) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/random_sum.py b/examples/kokkos/random_sum.py index e8bdcb39..61f74123 100644 --- a/examples/kokkos/random_sum.py +++ b/examples/kokkos/random_sum.py @@ -22,4 +22,4 @@ def main(): print("Sum:", total) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/kokkos/scan_functor.py b/examples/kokkos/scan_functor.py index 663f4395..0c7a98a4 100644 --- a/examples/kokkos/scan_functor.py +++ b/examples/kokkos/scan_functor.py @@ -27,4 +27,4 @@ def run() -> None: print(f"{A} total={result} time({timer_result})") if __name__ == "__main__": - run() \ No newline at end of file + run() diff --git a/examples/kokkos/scan_workload.py b/examples/kokkos/scan_workload.py index c6a37cf1..6196b280 100644 --- a/examples/kokkos/scan_workload.py +++ b/examples/kokkos/scan_workload.py @@ -25,4 +25,4 @@ def run() -> None: print(f"{A} total={result} time({timer_result})") if __name__ == "__main__": - run() \ No newline at end of file + run() From 42217197d7b202ef6d3b96736804b6cce9987ff2 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Thu, 29 Jan 2026 15:23:22 -0600 Subject: [PATCH 03/23] Format examples with Black --- examples/kokkos/01_hello_world.py | 3 +++ examples/kokkos/02_simple_reduce.py | 9 +++++--- examples/kokkos/03_simple_view.py | 10 ++++++--- examples/kokkos/04_simple_memoryspaces.py | 11 ++++++---- examples/kokkos/05_simple_atomics.py | 26 ++++++++++++++--------- examples/kokkos/add1.py | 11 ++++++---- examples/kokkos/math_functions.py | 15 +++++++------ examples/kokkos/matrix_sum.py | 14 +++++++----- examples/kokkos/random_sum.py | 11 ++++++---- examples/kokkos/scan_functor.py | 14 +++++++----- examples/kokkos/scan_workload.py | 12 +++++++---- 11 files changed, 87 insertions(+), 49 deletions(-) diff --git a/examples/kokkos/01_hello_world.py b/examples/kokkos/01_hello_world.py index 6745707d..7c9ee228 100644 --- a/examples/kokkos/01_hello_world.py +++ b/examples/kokkos/01_hello_world.py @@ -1,13 +1,16 @@ import pykokkos as pk + @pk.workunit def hello(i: int): pk.printf("Hello from i = %d\n", i) + def main(): N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) pk.parallel_for(N, hello) + if __name__ == "__main__": main() diff --git a/examples/kokkos/02_simple_reduce.py b/examples/kokkos/02_simple_reduce.py index cfbb288c..cf14bccc 100644 --- a/examples/kokkos/02_simple_reduce.py +++ b/examples/kokkos/02_simple_reduce.py @@ -1,20 +1,23 @@ import numpy as np import pykokkos as pk + @pk.workunit def squaresum(i: int, acc, values): acc += values[i] + def main(): N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + # Create array with squares values = np.array([i * i for i in range(N)], dtype=np.int32) - + total = pk.parallel_reduce(N, squaresum, values=values) - + print("Sum:", total) + if __name__ == "__main__": main() diff --git a/examples/kokkos/03_simple_view.py b/examples/kokkos/03_simple_view.py index d2bb53f8..5e9aa22a 100644 --- a/examples/kokkos/03_simple_view.py +++ b/examples/kokkos/03_simple_view.py @@ -1,26 +1,30 @@ import pykokkos as pk + @pk.workunit def initialize_view(i: int, a: pk.View2D[pk.int32]): for j in range(3): a[i][j] = (i + 1) ** (j + 1) + @pk.workunit def my_reduction(i: int, accumulator: pk.Acc[pk.double], a: pk.View2D[pk.int32]): accumulator += a[i][0] * a[i][1] / (a[i][2]) + def main(): N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) - + pk.parallel_for(N, initialize_view, a=a) total: int = pk.parallel_reduce(N, my_reduction, a=a) - + for row in a: print(row) print("\nResult is", total) + if __name__ == "__main__": main() diff --git a/examples/kokkos/04_simple_memoryspaces.py b/examples/kokkos/04_simple_memoryspaces.py index 690ec95d..7ffb61f9 100644 --- a/examples/kokkos/04_simple_memoryspaces.py +++ b/examples/kokkos/04_simple_memoryspaces.py @@ -1,23 +1,26 @@ import pykokkos as pk + @pk.workunit def reduction(i: int, acc: pk.Acc[pk.double], a: pk.View2D[pk.int32]): acc += a[i][0] - a[i][1] + a[i][2] + def main(): N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + a: pk.View2D[pk.int32] = pk.View([N, 3], pk.int32) - + # Initialize the view for i in range(N): for j in range(3): a[i][j] = i * N + j - + sum_result: int = pk.parallel_reduce(N, reduction, a=a) - + print(sum_result) + if __name__ == "__main__": main() diff --git a/examples/kokkos/05_simple_atomics.py b/examples/kokkos/05_simple_atomics.py index 4f31171f..9bb833b1 100644 --- a/examples/kokkos/05_simple_atomics.py +++ b/examples/kokkos/05_simple_atomics.py @@ -2,45 +2,51 @@ import random import pykokkos as pk + @pk.workunit -def findprimes(i: int, data: pk.View1D[pk.int32], result: pk.View1D[pk.int32], count: pk.View1D[pk.int32]): +def findprimes( + i: int, + data: pk.View1D[pk.int32], + result: pk.View1D[pk.int32], + count: pk.View1D[pk.int32], +): number: int = data[i] upper_bound: int = int(math.sqrt(number)) + 1 is_prime: bool = not (number % 2 == 0) k: int = 3 idx: int = 0 - + while k < upper_bound and is_prime: is_prime = not (number % k == 0) k += 2 - + if is_prime: # Note: This atomic operation may have race conditions without proper atomic support # For now, we remove the atomic trait as it's not supported idx = count[0] = count[0] + 1 result[idx - 1] = number + def simple_atomics(): N: int = 100 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + data: pk.View1D[pk.int32] = pk.View([N], pk.int32) result: pk.View1D[pk.int32] = pk.View([N], pk.int32) # FIXED: Removed trait=pk.Trait.Atomic as it's not supported count: pk.View1D[pk.int32] = pk.View([1], pk.int32) - + # Initialize data with random numbers for i in range(N): data[i] = random.randint(0, N) - + pk.parallel_for(N, findprimes, data=data, result=result, count=count) - + # Print results for i in range(int(count[0])): print(int(result[i]), end=", ") - print( - "\nFound", int(count[0]), "prime numbers in", N, "random numbers" - ) + print("\nFound", int(count[0]), "prime numbers in", N, "random numbers") + if __name__ == "__main__": simple_atomics() diff --git a/examples/kokkos/add1.py b/examples/kokkos/add1.py index 3179e31d..78dbb158 100644 --- a/examples/kokkos/add1.py +++ b/examples/kokkos/add1.py @@ -1,24 +1,27 @@ import pykokkos as pk + @pk.workunit def add1(i: int, a: pk.View1D[pk.int32]): a[i] += 1 + def main(): n: int = 100 * 1000 N: int = n pk.set_default_space(pk.ExecutionSpace.OpenMP) - + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) - + # Initialize the view for i in range(N): a[i] = 2 print(f"Initialized view: [{a[0]}, ... repeats {n-1} times]") - + pk.parallel_for(N, add1, a=a) - + print(f"Results: [{a[0]}, ... repeats {n-1} times]") + if __name__ == "__main__": main() diff --git a/examples/kokkos/math_functions.py b/examples/kokkos/math_functions.py index a6a38eab..92118a6a 100644 --- a/examples/kokkos/math_functions.py +++ b/examples/kokkos/math_functions.py @@ -1,28 +1,29 @@ import math import pykokkos as pk + @pk.workunit def my_calculation(i: int, a: pk.View1D[pk.int32], N: int): pk.printf("Running index %d\n", i) - a[i] += ( - math.cos(a[i]) + 2**i - math.pi / math.fabs(a[(i + 1) % N]) - ) + a[i] += math.cos(a[i]) + 2**i - math.pi / math.fabs(a[(i + 1) % N]) + def main(): n: int = 10 N: int = n pk.set_default_space(pk.ExecutionSpace.OpenMP) - + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) - + # Initialize the view for i in range(N): a[i] = math.sqrt(math.tau) print("Initialized view:", a) - + pk.parallel_for(N, my_calculation, a=a, N=N) - + print("Results: ", a) + if __name__ == "__main__": main() diff --git a/examples/kokkos/matrix_sum.py b/examples/kokkos/matrix_sum.py index a8d913a7..61a7362b 100644 --- a/examples/kokkos/matrix_sum.py +++ b/examples/kokkos/matrix_sum.py @@ -1,33 +1,37 @@ import pykokkos as pk + @pk.workunit def sum_row(i: int, mat: pk.View2D[pk.int32], c: int): for j in range(1, c): mat[i][0] += mat[i][j] + @pk.workunit def final_sum(i: int, accumulator: pk.Acc[pk.double], mat: pk.View2D[pk.int32]): accumulator += mat[i][0] + def main(): r: int = 5 c: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + mat: pk.View2D[pk.int32] = pk.View([r, c], pk.int32) - + # Initialize the matrix for i in range(r): mat[i] = list(range(c * i, c * (i + 1))) - + for row in mat: print(row) print(f"Initialized {r}x{c} array") - + pk.parallel_for(r, sum_row, mat=mat, c=c) total: int = pk.parallel_reduce(r, final_sum, mat=mat) - + print("Total =", total) + if __name__ == "__main__": main() diff --git a/examples/kokkos/random_sum.py b/examples/kokkos/random_sum.py index 61f74123..893860e3 100644 --- a/examples/kokkos/random_sum.py +++ b/examples/kokkos/random_sum.py @@ -1,25 +1,28 @@ import random import pykokkos as pk + @pk.workunit def my_reduction(i: int, accumulator: pk.Acc[pk.int32], a: pk.View1D[pk.int32]): accumulator += a[i] + def main(): n: int = 10 N: int = n pk.set_default_space(pk.ExecutionSpace.OpenMP) - + a: pk.View1D[pk.int32] = pk.View([N], pk.int32) - + # Initialize the view with random values for i in range(N): a[i] = random.randint(0, 10) print("Initialized view:", a) - + total: int = pk.parallel_reduce(N, my_reduction, a=a) - + print("Sum:", total) + if __name__ == "__main__": main() diff --git a/examples/kokkos/scan_functor.py b/examples/kokkos/scan_functor.py index 0c7a98a4..f85e09ba 100644 --- a/examples/kokkos/scan_functor.py +++ b/examples/kokkos/scan_functor.py @@ -1,30 +1,34 @@ import pykokkos as pk + @pk.workunit def init(i: int, A: pk.View1D[pk.int32]): A[i] = i + @pk.workunit def scan(i: int, acc: pk.Acc[pk.double], last_pass: bool, A: pk.View1D[pk.int32]): acc += A[i] if last_pass: A[i] = acc + def run() -> None: N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + A: pk.View1D[pk.int32] = pk.View([N], pk.int32) - + p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) - + pk.parallel_for(p, init, A=A) - + timer = pk.Timer() result = pk.parallel_scan(p, scan, A=A) timer_result = timer.seconds() - + print(f"{A} total={result} time({timer_result})") + if __name__ == "__main__": run() diff --git a/examples/kokkos/scan_workload.py b/examples/kokkos/scan_workload.py index 6196b280..e51235ad 100644 --- a/examples/kokkos/scan_workload.py +++ b/examples/kokkos/scan_workload.py @@ -1,28 +1,32 @@ import pykokkos as pk + @pk.workunit def init(i: int, A: pk.View1D[pk.int32]): A[i] = i + @pk.workunit def scan(i: int, acc: pk.Acc[pk.double], last_pass: bool, A: pk.View1D[pk.int32]): acc += A[i] if last_pass: A[i] = acc + def run() -> None: N: int = 10 pk.set_default_space(pk.ExecutionSpace.OpenMP) - + A: pk.View1D[pk.int32] = pk.View([N], pk.int32) - + pk.parallel_for(N, init, A=A) - + timer = pk.Timer() result: int = pk.parallel_scan(N, scan, A=A) timer_result: float = timer.seconds() - + print(f"{A} total={result} time({timer_result})") + if __name__ == "__main__": run() From c6a0215b0d4ad09956cf554df6a7076f5e60b656 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 11 Feb 2026 22:11:59 -0600 Subject: [PATCH 04/23] Deleted ufunc implementations and tests. Replaced ufunc usage in examples with NumPy. Scalar math in kernels remains. Adjusted __init__.py and views.py accordingly. Array API CI workflow kept but precompile ufuncs test is commented out. Addresses issue #361 --- .github/workflows/array_api.yml | 2 +- examples/LogisticRegression/LR.py | 18 +- examples/NaiveBayes/GaussianNB.py | 52 +- pykokkos/__init__.py | 60 - pykokkos/interface/views.py | 14 +- pykokkos/lib/ufunc_workunits.py | 1393 ----------- pykokkos/lib/ufuncs.py | 3777 ----------------------------- tests/test_ufuncs.py | 737 ------ 8 files changed, 45 insertions(+), 6008 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 2cb6a855..26ea341e 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -54,6 +54,6 @@ jobs: # to circumvent the currently slow performance of # JIT compile/link, which can otherwise cause issues # for hypothesis-driven test case generation - pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s + # pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_operators_and_elementwise_functions.py::test_log10 array_api_tests/test_operators_and_elementwise_functions.py::test_sqrt array_api_tests/test_operators_and_elementwise_functions.py::test_isfinite array_api_tests/test_operators_and_elementwise_functions.py::test_log2 array_api_tests/test_operators_and_elementwise_functions.py::test_log1p array_api_tests/test_operators_and_elementwise_functions.py::test_isinf array_api_tests/test_operators_and_elementwise_functions.py::test_log array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_square array_api_tests/test_operators_and_elementwise_functions.py::test_cos array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor array_api_tests/test_operators_and_elementwise_functions.py::test_exp array_api_tests/test_operators_and_elementwise_functions.py::test_sin array_api_tests/test_operators_and_elementwise_functions.py::test_tan array_api_tests/test_operators_and_elementwise_functions.py::test_tanh array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like array_api_tests/test_operators_and_elementwise_functions.py::test_positive array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal "array_api_tests/test_has_names.py::test_has_names[array_method-__pos__]" diff --git a/examples/LogisticRegression/LR.py b/examples/LogisticRegression/LR.py index 0222fd8c..3fea34a1 100644 --- a/examples/LogisticRegression/LR.py +++ b/examples/LogisticRegression/LR.py @@ -273,7 +273,7 @@ def _logistic_regression_path( _, n_features = X.shape - classes = pk.unique(y) + classes = np.unique(y) random_state = check_random_state(random_state) @@ -333,8 +333,8 @@ def _logistic_regression_path( lbin = LabelBinarizer() Y_multi = asarray(lbin.fit_transform(y)) if Y_multi.shape[1] == 1: - Y_multi = pk.hstack( - pk.negative(pk.subtract(Y_multi, asarray([1]))), Y_multi + Y_multi = np.hstack( + np.negative(np.subtract(Y_multi, asarray([1]))), Y_multi ) w0 = pk.zeros((classes.size, n_features + int(fit_intercept)), dtype=X.dtype) @@ -397,7 +397,7 @@ def _logistic_regression_path( func = loss.loss grad = loss.gradient hess = loss.gradient_hessian_product # hess = [gradient, hessp] - warm_start_sag = {"coef": pk.transpose(w0)} + warm_start_sag = {"coef": np.transpose(np.array(w0))} else: target = y_bin if solver == "lbfgs": @@ -471,7 +471,7 @@ def _logistic_regression_path( ) coef_ = asarray(coef_) if fit_intercept: - w0 = pk.hstack(pk.ravel(coef_), intercept_) + w0 = np.hstack(pk.ravel(coef_), intercept_) else: w0 = pk.ravel(coef_) @@ -862,7 +862,7 @@ def fit(self, X, y, sample_weight=None): X = asarray(X) y = asarray(y) - self.classes_ = pk.unique(y) + self.classes_ = np.unique(np.array(y)) multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_)) @@ -1024,7 +1024,7 @@ def predict_proba(self, X): if decision.ndim == 1: # Workaround for multi_class="multinomial" and binary outcomes # which requires softmax prediction with only a 1D decision. - decision_2d = pk.hstack(pk.negative(decision), decision) + decision_2d = np.hstack((np.negative(decision), decision)) else: decision_2d = decision return softmax(decision_2d, copy=False) @@ -1045,7 +1045,7 @@ def predict_log_proba(self, X): Returns the log-probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ - return pk.log(self.predict_proba(X)) + return np.log(self.predict_proba(X)) def predict(self, X): """ @@ -1065,7 +1065,7 @@ def predict(self, X): else: indices = scores.argmax(axis=1) - return pk.index(self.classes_, asarray(indices, dtype=pk.int32)) + return self.classes_[np.array(indices, dtype=np.int32)] def main(): diff --git a/examples/NaiveBayes/GaussianNB.py b/examples/NaiveBayes/GaussianNB.py index 8c334f57..e1dff18f 100644 --- a/examples/NaiveBayes/GaussianNB.py +++ b/examples/NaiveBayes/GaussianNB.py @@ -95,7 +95,7 @@ def type_of_target(y, input_name=""): else: suffix = "" # [1, 2, 3] or [[1], [2], [3]] - if (len(pk.unique(y)) > 2) or (len(y.shape) >= 2 and len(y[0]) > 1): + if (len(np.unique(y)) > 2) or (len(y.shape) >= 2 and len(y[0]) > 1): return "multiclass" + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]] else: return "binary" # [1, 2] or [["a"], ["b"]] @@ -103,7 +103,7 @@ def type_of_target(y, input_name=""): def _unique_multiclass(y): if hasattr(y, "__array__"): - return pk.unique(asarray(y)) + return np.unique(asarray(y)) else: return set(y) @@ -264,7 +264,7 @@ def predict(self, X): X = self._check_X(X) jll = self._joint_log_likelihood(X) - return pk.index(self.classes_, pk.argmax(jll, axis=1)) + return self.classes_[np.argmax(np.array(jll), axis=1)] def predict_log_proba(self, X): """ @@ -285,7 +285,7 @@ def predict_log_proba(self, X): jll = self._joint_log_likelihood(X) # normalize by P(x) = P(f_1, ..., f_n) # log_prob_x = logsumexp(jll, axis=1) - # return jll - pk.transpose(pk.atleast_2d()) + # return jll - np.transpose(pk.atleast_2d()) def predict_proba(self, X): """ @@ -301,7 +301,7 @@ def predict_proba(self, X): the model. The columns correspond to the classes in sorted order, as they appear in the attribute :term:`classes_`. """ - return pk.exp(self.predict_log_proba(X)) + return np.exp(self.predict_log_proba(X)) class GaussianNB(_BaseNB): @@ -366,7 +366,7 @@ class labels known to the classifier. >>> print(clf.predict([[-0.8, -1]])) [1] >>> clf_pf = GaussianNB() - >>> clf_pf.partial_fit(X, Y, pk.unique(Y)) + >>> clf_pf.partial_fit(X, Y, np.unique(Y)) GaussianNB() >>> print(clf_pf.predict([[-0.8, -1]])) [1] @@ -397,7 +397,7 @@ def fit(self, X, y, sample_weight=None): y = asarray(self._validate_data(y=y)) return self._partial_fit( - X, y, pk.unique(y), _refit=True, sample_weight=sample_weight + X, y, np.unique(y), _refit=True, sample_weight=sample_weight ) def _check_X(self, X): @@ -440,12 +440,14 @@ def _update_mean_variance(n_past, mu, var, X, sample_weight=None): # Compute (potentially weighted) mean and variance of new datapoints if sample_weight is not None: n_new = float(sample_weight.sum()) - new_mu = pk.average(X, axis=0, weights=sample_weight) - new_var = pk.average((X - new_mu) ** 2, axis=0, weights=sample_weight) + new_mu = np.average(np.array(X), axis=0, weights=sample_weight) + new_var = np.average( + np.array(X - new_mu) ** 2, axis=0, weights=sample_weight + ) else: n_new = X.shape[0] - new_var = pk.var(X, axis=0) - new_mu = pk.mean(X, axis=0) + new_var = np.var(np.array(X), axis=0) + new_mu = np.mean(np.array(X), axis=0) if n_past == 0: return new_mu, new_var @@ -534,7 +536,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # will cause numerical errors. To address this, we artificially # boost the variance by epsilon, a small fraction of the standard # deviation of the largest dimension. - self.epsilon_ = self.var_smoothing * pk.find_max(pk.var(X, axis=0)) + self.epsilon_ = self.var_smoothing * pk.find_max(np.var(np.array(X), axis=0)) if first_call: # This is the first call to partial_fit: @@ -569,13 +571,13 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): classes = self.classes_ - unique_y = pk.unique(y) - unique_y_in_classes = pk.in1d(unique_y, classes) + unique_y = np.unique(y) + unique_y_in_classes = np.in1d(unique_y, classes) if not pk.all(unique_y_in_classes): raise ValueError( "The target label(s) %s in y do not exist in the initial classes %s" - % (unique_y[pk.logical_not(unique_y_in_classes)], classes) + % (unique_y[np.logical_not(unique_y_in_classes)], classes) ) for y_i in unique_y: @@ -602,7 +604,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # Update if only no priors is provided if self.priors is None: # Empirical prior, with sample_weight taken into account - self.class_prior_ = pk.divide(self.class_count_, pk.sum(self.class_count_)) + self.class_prior_ = np.divide(self.class_count_, pk.sum(self.class_count_)) return self @@ -611,15 +613,15 @@ def _joint_log_likelihood(self, X): total_classes = reduce(lambda a, b: a * b, self.classes_.shape, 1) for i in range(total_classes): - jointi = pk.log(self.class_prior_[i]) + jointi = np.log(self.class_prior_[i]) - n_ij = -0.5 * pk.sum(pk.log(pk.multiply(self.var_[i, :], 2.0 * pi))) - n_ij = pk.add( - pk.negative( - pk.multiply( + n_ij = -0.5 * pk.sum(np.log(np.multiply(self.var_[i, :], 2.0 * pi))) + n_ij = np.add( + np.negative( + np.multiply( pk.sum( - pk.divide( - pk.power(pk.add(X, pk.negative(self.theta_[i, :])), 2), + np.divide( + np.power(np.add(X, np.negative(self.theta_[i, :])), 2), self.var_[i, :], ), 1, @@ -630,9 +632,9 @@ def _joint_log_likelihood(self, X): n_ij, ) - joint_log_likelihood.append(pk.add(n_ij, jointi)) + joint_log_likelihood.append(np.add(n_ij, jointi)) - joint_log_likelihood = pk.transpose(asarray(joint_log_likelihood)) + joint_log_likelihood = np.transpose(asarray(joint_log_likelihood)) return joint_log_likelihood diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 9340945f..11ce5135 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -17,62 +17,6 @@ set_device_id, ) -from pykokkos.lib.ufuncs import ( - reciprocal, - log, - log2, - log10, - log1p, - sqrt, - sign, - add, - copyto, - subtract, - dot, - multiply, - matmul, - np_matmul, - divide, - negative, - positive, - power, - fmod, - square, - greater, - logaddexp, - true_divide, - logaddexp2, - floor_divide, - sin, - cos, - tan, - tanh, - logical_and, - logical_or, - logical_xor, - logical_not, - fmax, - fmin, - exp, - exp2, - argmax, - unique, - var, - in1d, - mean, - hstack, - transpose, - index, - isinf, - isnan, - equal, - isfinite, - round, - trunc, - ceil, - floor, - broadcast_view, -) from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like from pykokkos.lib.manipulate import reshape, ravel, expand_dims @@ -89,10 +33,6 @@ from pykokkos.lib.constants import e, pi, inf, nan from pykokkos.interface.views import astype -__array_api_version__ = "2021.12" - -__all__ = ["__array_api_version__"] - runtime_singleton.runtime = Runtime() import weakref diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 62ed04b4..ada65dfd 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -503,8 +503,6 @@ def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: return None def __eq__(self, other): - # avoid circular import with scoped import - from pykokkos.lib.ufuncs import equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -539,7 +537,10 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - return equal(self, new_other) + result_np = np.equal(np.array(self), np.array(new_other)) + result = pk.View(result_np.shape, dtype=pk.bool) + result[:] = result_np + return result def __hash__(self): try: @@ -667,8 +668,6 @@ def _get_base_view(self, parent_view: Union[Subview, View]) -> View: return base_view def __eq__(self, other): - # avoid circular import with scoped import - from pykokkos.lib.ufuncs import equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -703,7 +702,10 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - return equal(self, new_other) + result_np = np.equal(np.array(self), np.array(new_other)) + result = pk.View(result_np.shape, dtype=pk.bool) + result[:] = result_np + return result def __add__(self, other): if isinstance(other, float): diff --git a/pykokkos/lib/ufunc_workunits.py b/pykokkos/lib/ufunc_workunits.py index 04c5606f..e69de29b 100644 --- a/pykokkos/lib/ufunc_workunits.py +++ b/pykokkos/lib/ufunc_workunits.py @@ -1,1393 +0,0 @@ -import pykokkos as pk - - -@pk.workunit -def exp_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = exp(view[tid]) - - -@pk.workunit -def exp_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): - out[tid][i] = exp(view[tid][i]) - - -@pk.workunit -def exp_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = exp(view[tid]) - - -@pk.workunit -def exp_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = exp(view[tid][i]) - - -@pk.workunit -def sin_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = sin(view[tid]) - - -@pk.workunit -def sin_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): - out[tid][i] = sin(view[tid][i]) - - -@pk.workunit -def sin_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = sin(view[tid]) - - -@pk.workunit -def sin_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = sin(view[tid][i]) - - -@pk.workunit -def tan_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = tan(view[tid]) - - -@pk.workunit -def tan_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): - out[tid][i] = tan(view[tid][i]) - - -@pk.workunit -def tan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = tan(view[tid]) - - -@pk.workunit -def tan_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = tan(view[tid][i]) - - -@pk.workunit -def tanh_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = tanh(view[tid]) - - -@pk.workunit -def tanh_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = tanh(view[tid][i]) - - -@pk.workunit -def tanh_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = tanh(view[tid]) - - -@pk.workunit -def tanh_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = tanh(view[tid][i]) - - -@pk.workunit -def equal_impl_5d_int8( - tid: int, - view1: pk.View5D[pk.int8], - view2: pk.View5D[pk.int8], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_float( - tid: int, - view1: pk.View5D[pk.float], - view2: pk.View5D[pk.float], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_double( - tid: int, - view1: pk.View5D[pk.double], - view2: pk.View5D[pk.double], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_int16( - tid: int, - view1: pk.View5D[pk.int16], - view2: pk.View5D[pk.int16], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_int32( - tid: int, - view1: pk.View5D[pk.int32], - view2: pk.View5D[pk.int32], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_int64( - tid: int, - view1: pk.View5D[pk.int64], - view2: pk.View5D[pk.int64], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_uint8( - tid: int, - view1: pk.View5D[pk.uint8], - view2: pk.View5D[pk.uint8], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_bool( - tid: int, - view1: pk.View5D[pk.uint8], - view2: pk.View5D[pk.uint8], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_uint16( - tid: int, - view1: pk.View5D[pk.uint16], - view2: pk.View5D[pk.uint16], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_uint32( - tid: int, - view1: pk.View5D[pk.uint32], - view2: pk.View5D[pk.uint32], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_5d_uint64( - tid: int, - view1: pk.View5D[pk.uint64], - view2: pk.View5D[pk.uint64], - out: pk.View5D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - for l in range(view1.extent(4)): - out[tid][i][j][k][l] = ( - view1[tid][i][j][k][l] == view2[tid][i][j][k][l] - ) - - -@pk.workunit -def equal_impl_4d_uint8( - tid: int, - view1: pk.View4D[pk.uint8], - view2: pk.View4D[pk.uint8], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_bool( - tid: int, - view1: pk.View4D[pk.uint8], - view2: pk.View4D[pk.uint8], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_float( - tid: int, - view1: pk.View4D[pk.float], - view2: pk.View4D[pk.float], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_double( - tid: int, - view1: pk.View4D[pk.double], - view2: pk.View4D[pk.double], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_uint16( - tid: int, - view1: pk.View4D[pk.uint16], - view2: pk.View4D[pk.uint16], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_uint32( - tid: int, - view1: pk.View4D[pk.uint32], - view2: pk.View4D[pk.uint32], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_uint64( - tid: int, - view1: pk.View4D[pk.uint64], - view2: pk.View4D[pk.uint64], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_3d_uint8( - tid: int, - view1: pk.View3D[pk.uint8], - view2: pk.View3D[pk.uint8], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_bool( - tid: int, - view1: pk.View3D[pk.uint8], - view2: pk.View3D[pk.uint8], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_uint16( - tid: int, - view1: pk.View3D[pk.uint16], - view2: pk.View3D[pk.uint16], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_uint32( - tid: int, - view1: pk.View3D[pk.uint32], - view2: pk.View3D[pk.uint32], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_uint64( - tid: int, - view1: pk.View3D[pk.uint64], - view2: pk.View3D[pk.uint64], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_float( - tid: int, - view1: pk.View3D[pk.float], - view2: pk.View3D[pk.float], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_double( - tid: int, - view1: pk.View3D[pk.double], - view2: pk.View3D[pk.double], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_2d_uint8( - tid: int, - view1: pk.View2D[pk.uint8], - view2: pk.View2D[pk.uint8], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_uint16( - tid: int, - view1: pk.View2D[pk.uint16], - view2: pk.View2D[pk.uint16], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_uint32( - tid: int, - view1: pk.View2D[pk.uint32], - view2: pk.View2D[pk.uint32], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_uint64( - tid: int, - view1: pk.View2D[pk.uint64], - view2: pk.View2D[pk.uint64], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_float( - tid: int, - view1: pk.View2D[pk.float], - view2: pk.View2D[pk.float], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_double( - tid: int, - view1: pk.View2D[pk.double], - view2: pk.View2D[pk.double], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_1d_uint8( - tid: int, - view1: pk.View1D[pk.uint8], - view2: pk.View1D[pk.uint8], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_bool( - tid: int, - view1: pk.View1D[pk.uint8], - view2: pk.View1D[pk.uint8], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_float( - tid: int, - view1: pk.View1D[pk.float], - view2: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_double( - tid: int, - view1: pk.View1D[pk.double], - view2: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_int8( - tid: int, - view1: pk.View1D[pk.int8], - view2: pk.View1D[pk.int8], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_int16( - tid: int, - view1: pk.View1D[pk.int16], - view2: pk.View1D[pk.int16], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_int32( - tid: int, - view1: pk.View1D[pk.int32], - view2: pk.View1D[pk.int32], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_int64( - tid: int, - view1: pk.View1D[pk.int64], - view2: pk.View1D[pk.int64], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_uint16( - tid: int, - view1: pk.View1D[pk.uint16], - view2: pk.View1D[pk.uint16], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_uint32( - tid: int, - view1: pk.View1D[pk.uint32], - view2: pk.View1D[pk.uint32], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_uint64( - tid: int, - view1: pk.View1D[pk.uint64], - view2: pk.View1D[pk.uint64], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_1d_int64( - tid: int, - view1: pk.View1D[pk.int64], - view2: pk.View1D[pk.int64], - out: pk.View1D[pk.uint8], -): - out[tid] = view1[tid] == view2[tid] - - -@pk.workunit -def equal_impl_2d_int8( - tid: int, - view1: pk.View2D[pk.int8], - view2: pk.View2D[pk.int8], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_bool( - tid: int, - view1: pk.View2D[pk.uint8], - view2: pk.View2D[pk.uint8], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_int16( - tid: int, - view1: pk.View2D[pk.int16], - view2: pk.View2D[pk.int16], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_int32( - tid: int, - view1: pk.View2D[pk.int32], - view2: pk.View2D[pk.int32], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_2d_int64( - tid: int, - view1: pk.View2D[pk.int64], - view2: pk.View2D[pk.int64], - out: pk.View2D[pk.uint8], -): - for i in range(view1.extent(1)): - out[tid][i] = view1[tid][i] == view2[tid][i] - - -@pk.workunit -def equal_impl_3d_int8( - tid: int, - view1: pk.View3D[pk.int8], - view2: pk.View3D[pk.int8], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_int16( - tid: int, - view1: pk.View3D[pk.int16], - view2: pk.View3D[pk.int16], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_int32( - tid: int, - view1: pk.View3D[pk.int32], - view2: pk.View3D[pk.int32], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_3d_int64( - tid: int, - view1: pk.View3D[pk.int64], - view2: pk.View3D[pk.int64], - out: pk.View3D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] - - -@pk.workunit -def equal_impl_4d_int8( - tid: int, - view1: pk.View4D[pk.int8], - view2: pk.View4D[pk.int8], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_int16( - tid: int, - view1: pk.View4D[pk.int16], - view2: pk.View4D[pk.int16], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_int32( - tid: int, - view1: pk.View4D[pk.int32], - view2: pk.View4D[pk.int32], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def equal_impl_4d_int64( - tid: int, - view1: pk.View4D[pk.int64], - view2: pk.View4D[pk.int64], - out: pk.View4D[pk.uint8], -): - for i in range(view1.extent(1)): - for j in range(view1.extent(2)): - for k in range(view1.extent(3)): - out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] - - -@pk.workunit -def floor_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = floor(view[tid]) - - -@pk.workunit -def floor_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = floor(view[tid][i]) - - -@pk.workunit -def floor_impl_3d_double( - tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] -): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = floor(view[tid][i][j]) - - -@pk.workunit -def floor_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = floor(view[tid]) - - -@pk.workunit -def floor_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = floor(view[tid][i]) - - -@pk.workunit -def floor_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = floor(view[tid][i][j]) - - -@pk.workunit -def ceil_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = ceil(view[tid]) - - -@pk.workunit -def ceil_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = ceil(view[tid][i]) - - -@pk.workunit -def ceil_impl_3d_double( - tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] -): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = ceil(view[tid][i][j]) - - -@pk.workunit -def ceil_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = ceil(view[tid]) - - -@pk.workunit -def ceil_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = ceil(view[tid][i]) - - -@pk.workunit -def ceil_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = ceil(view[tid][i][j]) - - -@pk.workunit -def trunc_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = trunc(view[tid]) - - -@pk.workunit -def trunc_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = trunc(view[tid][i]) - - -@pk.workunit -def trunc_impl_3d_double( - tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] -): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = trunc(view[tid][i][j]) - - -@pk.workunit -def trunc_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = trunc(view[tid]) - - -@pk.workunit -def trunc_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = trunc(view[tid][i]) - - -@pk.workunit -def trunc_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = trunc(view[tid][i][j]) - - -@pk.workunit -def round_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = round(view[tid]) - - -@pk.workunit -def round_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = round(view[tid][i]) - - -@pk.workunit -def round_impl_3d_double( - tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] -): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = round(view[tid][i][j]) - - -@pk.workunit -def round_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = round(view[tid]) - - -@pk.workunit -def round_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = round(view[tid][i]) - - -@pk.workunit -def round_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): - for i in range(view.extent(1)): - for j in range(view.extent(2)): - out[tid][i][j] = round(view[tid][i][j]) - - -@pk.workunit -def isnan_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = isnan(view[tid][i]) - - -@pk.workunit -def isnan_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] -): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] -): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] -): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] -): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isfinite_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_float( - tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_float( - tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_uint8( - tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_uint8( - tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_int16( - tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_int16( - tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_int32( - tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_int32( - tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_int64( - tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_int64( - tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isfinite_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] -): - out[tid] = isfinite(view[tid]) - - -@pk.workunit -def isfinite_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isfinite(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] -): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] -): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8]): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] -): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def isinf_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] -): - out[tid] = isinf(view[tid]) - - -@pk.workunit -def isinf_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = isinf(view[tid][i]) # type: ignore - - -@pk.workunit -def matmul_impl_1d_double( - tid: int, - acc: pk.Acc[pk.double], - viewA: pk.View1D[pk.double], - viewB: pk.View2D[pk.double], -): - acc += viewA[tid] * viewB[0][tid] - - -@pk.workunit -def matmul_impl_1d_float( - tid: int, - acc: pk.Acc[pk.float], - viewA: pk.View1D[pk.float], - viewB: pk.View2D[pk.float], -): - acc += viewA[tid] * viewB[0][tid] - - -@pk.workunit -def reciprocal_impl_1d_double(tid: int, view: pk.View1D[pk.double]): - view[tid] = 1 / view[tid] # type: ignore - - -@pk.workunit -def reciprocal_impl_1d_float(tid: int, view: pk.View1D[pk.float]): - view[tid] = 1 / view[tid] # type: ignore - - -@pk.workunit -def reciprocal_impl_2d_double(tid: int, view: pk.View2D[pk.double]): - for i in range(view.extent(1)): # type: ignore - view[tid][i] = 1 / view[tid][i] # type: ignore - - -@pk.workunit -def reciprocal_impl_2d_float(tid: int, view: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - view[tid][i] = 1 / view[tid][i] # type: ignore diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 1a095942..e69de29b 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -1,3777 +0,0 @@ -import re -import math -from inspect import getmembers, isfunction -from typing import Optional - -import numpy as np -import pykokkos as pk -from pykokkos.lib import ufunc_workunits -from pykokkos.interface import ViewType - -kernel_dict = dict(getmembers(ufunc_workunits, isfunction)) - - -def _supported_types_check(dtype_str, supported_type_strings): - options = "" - for type_str in supported_type_strings: - options += f".*{type_str}.*|" - options = options[:-1] - prog = re.compile(f"({options})") - result = prog.match(dtype_str) - if result is None: - raise NotImplementedError - - -def _ufunc_kernel_dispatcher( - profiler_name: Optional[str], tid, dtype, ndims, op, sub_dispatcher, **kwargs -): - dtype_extractor = re.compile(r".*(?:dtype|data_types|DataType)\.(\w+)") - if ndims == 0: - ndims = 1 - res = dtype_extractor.match(str(dtype)) - dtype_str = res.group(1) - if dtype_str == "float32": - dtype_str = "float" - elif dtype_str == "float64": - dtype_str = "double" - function_name_str = f"{op}_impl_{ndims}d_{dtype_str}" - desired_workunit = kernel_dict[function_name_str] - # call the kernel - ret = sub_dispatcher(profiler_name, tid, desired_workunit, **kwargs) - return ret - - -def _broadcast_views(view1, view2): - # support broadcasting by using the same - # shape matching rules as NumPy - # TODO: determine if this can be done with - # more memory efficiency? - if view1.shape != view2.shape: - new_shape = np.broadcast_shapes(view1.shape, view2.shape) - view1_new = pk.View([*new_shape], dtype=view1.dtype) - view1_new[:] = view1 - view1 = view1_new - view2_new = pk.View([*new_shape], dtype=view2.dtype) - view2_new[:] = view2 - view2 = view2_new - return view1, view2 - - -def _typematch_views(view1, view2): - # very crude casting implementation - # for binary ufuncs - dtype1 = view1.dtype - dtype2 = view2.dtype - dtype_extractor = re.compile(r".*(?:data_types|DataType)\.(\w+)") - res1 = dtype_extractor.match(str(dtype1)) - res2 = dtype_extractor.match(str(dtype2)) - effective_dtype = dtype1 - if res1 is not None and res2 is not None: - res1_dtype_str = res1.group(1) - res2_dtype_str = res2.group(1) - if res1_dtype_str == "double": - res1_dtype_str = "float64" - elif res1_dtype_str == "float": - res1_dtype_str = "float32" - if res2_dtype_str == "double": - res2_dtype_str = "float64" - elif res2_dtype_str == "float": - res2_dtype_str = "float32" - if res1_dtype_str == "bool" or res2_dtype_str == "bool": - res1_dtype_str = "uint8" - dtype1 = pk.uint8 - res2_dtype_str = "uint8" - dtype2 = pk.uint8 - if ("int" in res1_dtype_str and "int" in res2_dtype_str) or ( - "float" in res1_dtype_str and "float" in res2_dtype_str - ): - dtype_1_width = int(res1_dtype_str.split("t")[1]) - dtype_2_width = int(res2_dtype_str.split("t")[1]) - if dtype_1_width >= dtype_2_width: - effective_dtype = dtype1 - view2_new = pk.View([*view2.shape], dtype=effective_dtype) - view2_new[:] = view2.data - view2 = view2_new - else: - effective_dtype = dtype2 - view1_new = pk.View([*view1.shape], dtype=effective_dtype) - view1_new[:] = view1.data - view1 = view1_new - return view1, view2, effective_dtype - - -def reciprocal(view, profiler_name: Optional[str] = None): - """ - Return the reciprocal of the argument, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - Notes - ----- - .. note:: - This function is not designed to work with integers. - - """ - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=view.shape[0], - dtype=view.dtype.value, - ndims=len(view.shape), - op="reciprocal", - sub_dispatcher=pk.parallel_for, - view=view, - ) - # NOTE: pretty awkward to both return the view - # and operate on it in place; the former is closer - # to NumPy semantics - return view - - -@pk.workunit -def log_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = log(view[tid]) # type: ignore - - -@pk.workunit -def log_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log(view[tid][i]) # type: ignore - - -@pk.workunit -def log_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log(view[tid]) # type: ignore - - -@pk.workunit -def log_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log(view[tid][i]) # type: ignore - - -def log(view, profiler_name: Optional[str] = None): - """ - Natural logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if not isinstance(view, pk.ViewType): - return math.log(view) - - if len(view.shape) > 2: - raise NotImplementedError("log() ufunc only supports up to 2D views") - - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(profiler_name, 1, log_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_1d_double, view=view, out=out - ) - elif len(view.shape) == 2: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_2d_double, view=view, out=out - ) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(profiler_name, 1, log_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_1d_float, view=view, out=out - ) - elif len(view.shape) == 2: - pk.parallel_for( - profiler_name, view.shape[0], log_impl_2d_float, view=view, out=out - ) - return out - - -@pk.workunit -def sqrt_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = sqrt(view[tid]) # type: ignore - - -@pk.workunit -def sqrt_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = sqrt(view[tid][i]) # type: ignore - - -@pk.workunit -def sqrt_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = sqrt(view[tid]) # type: ignore - - -@pk.workunit -def sqrt_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = sqrt(view[tid][i]) # type: ignore - - -def sqrt(view): - """ - Return the non-negative square root of the argument, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - Notes - ----- - .. note:: - This function should exhibit the same branch cut behavior - as the equivalent NumPy ufunc. - """ - if isinstance(view, (np.integer, np.floating)): - return math.sqrt(view) - # TODO: support complex types when they - # are available in pykokkos? - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for sqrt() ufunc." - ) - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, sqrt_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sqrt_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sqrt_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, sqrt_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sqrt_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sqrt_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log2_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log2(view[tid]) # type: ignore - - -@pk.workunit -def log2_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log2(view[tid][i]) # type: ignore - - -@pk.workunit -def log2_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log2(view[tid]) # type: ignore - - -@pk.workunit -def log2_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log2(view[tid][i]) # type: ignore - - -def log2(view): - """ - Base-2 logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError("log2() ufunc only supports up to 2D views") - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log2_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log2_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log2_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log2_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log2_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log2_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log10_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log10(view[tid]) # type: ignore - - -@pk.workunit -def log10_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log10(view[tid][i]) # type: ignore - - -@pk.workunit -def log10_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log10(view[tid]) # type: ignore - - -@pk.workunit -def log10_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log10(view[tid][i]) # type: ignore - - -def log10(view): - """ - Base-10 logarithm, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if view.size == 0: - return view - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log10_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log10_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log10_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - # NOTE: is this really worth sending to a kernel? - pk.parallel_for(1, log10_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log10_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log10_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def log1p_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = log1p(view[tid]) # type: ignore - - -@pk.workunit -def log1p_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = log1p(view[tid]) # type: ignore - - -@pk.workunit -def log1p_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log1p(view[tid][i]) # type: ignore - - -@pk.workunit -def log1p_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - out[tid][i] = log1p(view[tid][i]) # type: ignore - - -def log1p(view): - """ - Return the natural logarithm of one plus the input array, element-wise. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - y : pykokkos view - Output view. - - """ - if view.size == 0: - return view - out = pk.View(view.shape, view.dtype) - if len(view.shape) > 2: - raise NotImplementedError("log1p() ufunc only supports up to 2D views") - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, log1p_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log1p_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log1p_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, log1p_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], log1p_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], log1p_impl_2d_float, view=view, out=out) - return out - - -@pk.workunit -def sign_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.int8]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.int8]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint16] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint16] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint32] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint32] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.int16]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.int16]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.int32]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.int32]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.int64]): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.int64]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - if view[tid] > 0: - out[tid] = 1 - elif view[tid] == 0: - out[tid] = 0 - elif view[tid] < 0: - out[tid] = -1 - else: - out[tid] = nan("") - - -@pk.workunit -def sign_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -@pk.workunit -def sign_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): # type: ignore - if view[tid][i] > 0: - out[tid][i] = 1 - elif view[tid][i] == 0: - out[tid][i] = 0 - elif view[tid][i] < 0: - out[tid][i] = -1 - else: - out[tid][i] = nan("") - - -def sign(view): - out = pk.View(view.shape, view.dtype) - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for sign() ufunc." - ) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.double) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_double, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.float) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_float, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_float, view=view, out=out) - elif "uint32" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint32) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint32, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint32, view=view, out=out) - elif "uint16" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint16) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint16, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint16, view=view, out=out) - elif "int16" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int16) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int16, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int16, view=view, out=out) - elif "int32" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int32) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int32, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int32, view=view, out=out) - elif "uint64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint64) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint64, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint64, view=view, out=out) - elif "int64" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int64) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int64, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int64, view=view, out=out) - elif "uint8" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.uint8) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_uint8, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_uint8, view=view, out=out) - elif "int8" in view.dtype.__name__: - if view.shape == (): - new_view = pk.View([1], dtype=pk.int8) - new_view[:] = view - pk.parallel_for(1, sign_impl_1d_int8, view=new_view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], sign_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], sign_impl_2d_int8, view=view, out=out) - return out - - -@pk.workunit -def add_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] + viewB[tid % viewB.extent(0)] - - -@pk.workunit -def add_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] + viewB[tid] - - -@pk.workunit -def add_impl_2d_1d(tid, viewA, viewB, out): - for i in range(viewA.extent(1)): - out[tid][i] = viewA[tid][i] + viewB[i % viewB.extent(0)] - - -@pk.workunit -def add_impl_2d_2d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] + viewB[r_idx][c_idx] - - -def add(viewA, viewB, profiler_name: Optional[str] = None): - """ - Sums positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view or scalar - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewB - viewB = view_temp - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError("only 2D views currently supported for add() ufunc.") - - if viewA.rank() == 2 and viewB.rank() == 2 and viewA.shape != viewB.shape: - raise RuntimeError( - "2D views must have the same shape for add ufunc. Mismatch: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - if viewA.rank() == 1 and viewB.rank() == 1: - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - add_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif viewA.rank() == 2 and viewB.rank() == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - add_impl_2d_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - larger.shape[0], - add_impl_2d_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - if viewA.rank() == 1 and viewB.rank() == 1: - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - add_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif viewB.rank() == 2 and viewB.rank() == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - add_impl_2d_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - larger.shape[0], - add_impl_2d_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def multiply_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] * viewB[tid % viewB.extent(0)] - - -@pk.workunit -def multiply_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] * viewB[tid] - - -@pk.workunit -def multiply_impl_2d_with_1d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] * viewB[r_idx % viewB.extent(0)] - - -@pk.workunit -def multiply_impl_2d_with_2d(tid, viewA, viewB, out): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - out[r_idx][c_idx] = viewA[r_idx][c_idx] * viewB[r_idx][c_idx] - - -def multiply(viewA, viewB, profiler_name: Optional[str] = None): - """ - Multiplies positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view or scalar - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewB - viewB = view_temp - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError( - "only 2D views currently supported for mulitply() ufunc." - ) - - if viewA.rank() == 2 and viewB.rank() == 2 and viewA.shape != viewB.shape: - raise RuntimeError( - "2D views must have the same shape for add ufunc. Mismatch: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - if len(viewA.shape) == 1 and len(viewB.shape) == 1: - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - multiply_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif len(viewA.shape) == 2 and len(viewB.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - multiply_impl_2d_with_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - larger.shape[0] * larger.shape[1], - multiply_impl_2d_with_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - if len(viewA.shape) == 1 and len(viewB.shape) == 1: - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - multiply_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - elif len(viewA.shape) == 2 and len(viewB.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0] * viewA.shape[1], - multiply_impl_2d_with_2d, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - larger = viewA if len(viewA.shape) > len(viewB.shape) else viewB - smaller = viewB if len(viewA.shape) == len(larger.shape) else viewA - out = pk.View([larger.shape[0], larger.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - larger.shape[0] * larger.shape[1], - multiply_impl_2d_with_1d, - viewA=larger, - viewB=smaller, - out=out, - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def check_broadcastable_impl(viewA, viewB): - """ - Check whether two views are broadcastable as defined here: - https://numpy.org/doc/stable/user/basics.broadcasting.html - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - _ : boolean - True if both views are compatible. - """ - - if viewA.shape == viewB.shape: - return False # cannot broadcast same dims - - v1_p = len(viewA.shape) - 1 - v2_p = len(viewB.shape) - 1 - - while v1_p > -1 and v2_p > -1: - if viewA.shape[v1_p] != viewB.shape[v2_p]: - if viewA.shape[v1_p] != 1 and viewB.shape[v2_p] != 1: - return False - - v1_p -= 1 - v2_p -= 1 - - return True - - -@pk.workunit -def stretch_fill_impl_scalar_into_1d(tid, scalar, viewOut): - viewOut[tid] = scalar - - -@pk.workunit -def stretch_fill_impl_scalar_into_2d(tid, cols, scalar, viewOut): - for i in range(cols): - viewOut[tid][i] = scalar - - -@pk.workunit -def stretch_fill_impl_1d_into_2d(tid, cols, viewIn, viewOut): - for i in range(cols): - viewOut[tid][i] = viewIn[i] - - -@pk.workunit -def stretch_fill_impl_2d(tid, inner_its, col_wise, viewIn, viewOut): - for i in range(inner_its): - if col_wise: - viewOut[i][tid] = viewIn[i][0] - else: - viewOut[tid][i] = viewIn[0][i] - - -def broadcast_view(val, viewB): - """ - Broadcasts val onto viewB, returns the "stretched" version of viewA - - Parameters - ---------- - val : pykokkos view or Scalar - View or scalar to be broadcasted (is shorter and compatible in dimensions). - viewB : pykokkos view - View to be broadcasted onto (is longer and compatible in dimensions). - - Returns - ------- - out : pykokkos view - Broadcasted version of viewA. - - """ - if len(viewB.shape) > 2: - raise NotImplementedError("Broadcasting is only supported upto 2D views") - - is_view = False - if isinstance(val, ViewType): - for dim in val.shape: - if dim != 1: - is_view = True - - if not is_view: - val = val[0] if len(val.shape) == 1 else val[0][0] - - if is_view: - is_first_small = len(val.shape) < len(viewB.shape) or ( - (len(val.shape) == len(viewB.shape)) and val.shape < viewB.shape - ) - if not check_broadcastable_impl(val, viewB) or not is_first_small: - raise ValueError("Incompatible broadcast") - if not val.dtype == viewB.dtype: - raise ValueError("Broadcastable views must have same dtypes") - - out = pk.View(viewB.shape, viewB.dtype) - - if is_view: - # if both 2D - if ( - len(val.shape) == 2 - ): # viewB must be 2 because of the val.shape < viewB.shape check - # figure which orientation is val (row or col) - col_wise = 1 if val.shape[1] == 1 else 0 - inner_its = viewB.shape[0] if col_wise else viewB.shape[1] - outer_its = viewB.shape[1] if col_wise else viewB.shape[0] - pk.parallel_for( - outer_its, - stretch_fill_impl_2d, - inner_its=inner_its, - col_wise=col_wise, - viewIn=val, - viewOut=out, - ) - else: # 1d to 2D - pk.parallel_for( - out.shape[0], - stretch_fill_impl_1d_into_2d, - cols=viewB.shape[1], - viewIn=val, - viewOut=out, - ) - - return out - - # scalar - - if len(viewB.shape) == 1: - out_1d = pk.View(viewB.shape) - pk.parallel_for( - viewB.shape[0], stretch_fill_impl_scalar_into_1d, scalar=val, viewOut=out_1d - ) - return out_1d - - # else 2d - pk.parallel_for( - out.shape[0], - stretch_fill_impl_scalar_into_2d, - cols=out.shape[1], - scalar=val, - viewOut=out, - ) - return out - - -@pk.workunit -def subtract_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] - viewB[tid] - - -@pk.workunit -def subtract_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] - viewB[tid] - - -@pk.workunit -def subtract_impl_2d(tid, cols, viewA, viewB, viewOut): - for i in range(cols): - viewOut[tid][i] = viewA[tid][i] - viewB[tid][i] - - -@pk.workunit -def subtract_impl_scalar_1d(tid, viewA, scalar, viewOut): - viewOut[tid] = viewA[tid] - scalar - - -@pk.workunit -def subtract_impl_scalar_2d(tid, cols, viewA, scalar, viewOut): - for i in range(cols): - viewOut[tid][i] = viewA[tid][i] - scalar - - -def subtract(viewA, valB, profiler_name: Optional[str] = None): - """ - Subtracts positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - valB : pykokkos view or scalar - Input view or scalar value. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - is_scalar = True - if isinstance(valB, ViewType): - # if this is a single valued view1D or view2D just count that as a scalar - for dim in valB.shape: - if dim != 1: - is_scalar = False - - if is_scalar: - valB = valB[0] if len(valB.shape) == 1 else valB[0][0] - - if len(viewA.shape) > 2 or (not is_scalar and len(valB.shape) > 2): - raise NotImplementedError( - "only 1D and 2D views currently supported for subtract() ufunc." - ) - - if not is_scalar: - - if viewA.shape != valB.shape and not check_broadcastable_impl( - viewA, valB - ): # if shape is not same check compatibility - raise ValueError("Views must be broadcastable") - - # check if size is same otherwise broadcast and fix - if len(viewA.shape) < len(valB.shape) or ( - len(viewA.shape) == len(valB.shape) and viewA.shape < valB.shape - ): - viewA = broadcast_view(viewA, valB) - elif len(valB.shape) < len(viewA.shape) or ( - len(viewA.shape) == len(valB.shape) and valB.shape < viewA.shape - ): - valB = broadcast_view(valB, viewA) - - if viewA.dtype.__name__ == "float64" and valB.dtype.__name__ == "float64": - - if len(viewA.shape) == 1: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_1d_double, - viewA=viewA, - viewB=valB, - out=out, - ) - - if len(viewA.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_2d, - cols=viewA.shape[1], - viewA=viewA, - viewB=valB, - viewOut=out, - ) - - elif viewA.dtype.__name__ == "float32" and valB.dtype.__name__ == "float32": - - if len(viewA.shape) == 1: - out = pk.View(viewA.shape, pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_1d_float, - viewA=viewA, - viewB=valB, - out=out, - ) - - if len(viewA.shape) == 2: - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_2d, - cols=viewA.shape[1], - viewA=viewA, - viewB=valB, - viewOut=out, - ) - else: - raise RuntimeError("Incompatible Types") - - return out - - # is scalar subtract ----------------------- - if len(viewA.shape) == 1: # 1D - out = None - if viewA.dtype.__name__ == "float64": - out = pk.View(viewA.shape, pk.double) - if viewA.dtype.__name__ == "float32": - out = pk.View(viewA.shape, pk.float) - - if out is None: - raise RuntimeError("Incompatible Types") - - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_scalar_1d, - viewA=viewA, - scalar=valB, - viewOut=out, - ) - - if len(viewA.shape) == 2: # 2D - out = None - if viewA.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) - if viewA.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) - - if out is None: - raise RuntimeError("Incompatible Types") - pk.parallel_for( - profiler_name, - viewA.shape[0], - subtract_impl_scalar_2d, - cols=viewA.shape[1], - viewA=viewA, - scalar=valB, - viewOut=out, - ) - - return out - - -@pk.workunit -def copyto_impl_2d(tid, viewA, viewB): - r_idx: int = tid / viewA.extent(1) - c_idx: int = tid - r_idx * viewA.extent(1) - - viewA[r_idx][c_idx] = viewB[r_idx][c_idx] - - -@pk.workunit -def copyto_impl_1d(tid, viewA, viewB): - viewA[tid] = viewB[tid] - - -def copyto(viewA, viewB, profiler_name: Optional[str] = None): - """ - copies values of viewB into valueA for corresponding indicies - - Parameters - ---------- - viewA : pykokkos view - Input view. - valB : pykokkos view or scalar - Input view - - Returns - ------- - Void - """ - - if not isinstance(viewA, ViewType): - raise ValueError("copyto: Cannot copy to a non-view type") - if not isinstance(viewB, ViewType): - raise ValueError("copyto: Cannot copy from a non-view type") - if viewA.shape != viewB.shape: - if not check_broadcastable_impl( - viewA, viewB - ): # if shape is not same check compatibility - raise ValueError( - "copyto: Views must be broadcastable or of the same size. {} against {}".format( - viewA.shape, viewB.shape - ) - ) - # check if size is same otherwise broadcast and fix - viewA = broadcast_view(viewB, viewA) - - # implementation constraint, for now - if viewA.rank() > 2: - raise NotImplementedError( - "copyto: This version of Pykokkos only supports copyto upto 2D views" - ) - - if viewA.rank() == 1: - pk.parallel_for( - profiler_name, viewA.shape[0], copyto_impl_1d, viewA=viewA, viewB=viewB - ) - - else: - outRows = viewA.shape[0] - outCols = viewA.shape[1] - totalThreads = outRows * outCols - pk.parallel_for( - profiler_name, totalThreads, copyto_impl_2d, viewA=viewA, viewB=viewB - ) - - -@pk.workunit -def np_matmul_impl_2d_2d(tid, cols, vec_length, viewA, viewB, viewOut): - r_idx: int = tid / cols - c_idx: int = tid - r_idx * cols - - for i in range(vec_length): - viewOut[r_idx][c_idx] += viewA[r_idx][i] * viewB[i][c_idx] - - -@pk.workunit -def np_matmul_impl_1d_2d(tid, vec_length, view1D, viewB, viewOut): - for i in range(vec_length): - viewOut[tid] += view1D[i] * viewB[i][tid] - - -@pk.workunit -def np_matmul_impl_2d_1d(tid, vec_length, viewA, view1D, viewOut): - for i in range(vec_length): - viewOut[tid] += viewA[tid][i] * view1D[i] - - -def np_matmul(viewA, viewB, profiler_name: Optional[str] = None): - """ - Upto 2D Matrix Multiplication of compatible views according to numpy specification - - The behavior depends on the arguments in the following way: - [*] If both arguments are 2-D they are multiplied like conventional matrices. - - [X] Not implemented yet - If either argument is N-D, N > 2, it is treated as a - stack of matrices residing in the last two indexes and broadcast accordingly. - - [*] If the first argument is 1-D, it is promoted to a matrix by prepending a 1 - to its dimensions. After matrix multiplication the prepended 1 is removed. - - [*] If the second argument is 1-D, it is promoted to a matrix by appending a 1 - to its dimensions. After matrix multiplication the appended 1 is removed. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Pykokkos view - Matmul result in a view or 0.0 in case views are empty - - - """ - - if len(viewA.shape) > 2 or len(viewB.shape) > 2: - raise NotImplementedError("Matmul only supports upto 2D views") - - viewAType = viewA.dtype.__name__ - viewBType = viewB.dtype.__name__ - - if viewAType != viewBType: - raise RuntimeError( - "Cannot multiply {} with {} dtype. Types must be same.".format( - viewAType, viewBType - ) - ) - - if not viewA.shape and not viewB.shape: - return 0.0 - - viewALast = viewA.shape[1] if len(viewA.shape) == 2 else viewA.shape[0] - viewBFirst = viewB.shape[0] if len(viewB.shape) == 2 else viewB.shape[0] - - if viewALast != viewBFirst: - print(viewALast, viewBFirst) - raise RuntimeError( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - viewA.shape, viewB.shape - ) - ) - - outRows = viewA.shape[0] if len(viewA.shape) == 2 else 1 - outCols = viewB.shape[1] if len(viewB.shape) == 2 else 1 - totalThreads = outRows * outCols - - out = None - if len(viewA.shape) == 1 or len(viewB.shape) == 1: - dim = max(outCols, outRows) - out = pk.View([dim], pk.float if viewBType == "float32" else pk.double) - else: - out = pk.View( - [outRows, outCols], pk.float if viewBType == "float32" else pk.double - ) - - # CASE 1 BOTH 2D - if len(viewA.shape) == len(viewB.shape) and len(viewA.shape) == 2: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_2d_2d, - cols=outCols, - vec_length=viewALast, - viewA=viewA, - viewB=viewB, - viewOut=out, - ) - - elif len(viewA.shape) == 1 and len(viewB.shape) == 1: - return dot(viewA, viewB) - - # CASE 2 Either is 1D - elif len(viewA.shape) == 1: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_1d_2d, - vec_length=viewA.shape[0], - view1D=viewA, - viewB=viewB, - viewOut=out, - ) - - elif len(viewB.shape) == 1: - pk.parallel_for( - profiler_name, - totalThreads, - np_matmul_impl_2d_1d, - vec_length=viewB.shape[0], - viewA=viewA, - view1D=viewB, - viewOut=out, - ) - - else: - raise RuntimeError( - "Unhandled case of matrix multiplication shapes: {} with {}".format( - viewA.shape, viewB.shape - ) - ) - - return out - - -def matmul(viewA, viewB, profiler_name: Optional[str] = None): - """ - 1D Matrix Multiplication of compatible views - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Float/Double - 1D Matmul result - - """ - if len(viewA.shape) != 1 or viewA.shape[0] != viewB.shape[0]: - raise RuntimeError( - "Input operand 1 has a mismatch in its core dimension (Size {} is different from {})".format( - viewA.shape[0], viewB.shape[0] - ) - ) - - a_dtype_str = viewA.dtype.__name__ - b_dtype_str = viewB.dtype.__name__ - if not (a_dtype_str == "float64" and b_dtype_str == "float64"): - if not (a_dtype_str == "float32" and b_dtype_str == "float32"): - raise RuntimeError("Incompatible Types") - - return _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=viewA.shape[0], - dtype=viewA.dtype.value, - ndims=1, - op="matmul", - sub_dispatcher=pk.parallel_reduce, - viewA=viewA, - viewB=viewB, - ) - - -@pk.workunit -def dot_impl_1d_double( - tid: int, - acc: pk.Acc[pk.double], - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], -): - acc += viewA[tid] * viewB[tid] - - -@pk.workunit -def dot_impl_1d_float( - tid: int, - acc: pk.Acc[pk.float], - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], -): - acc += viewA[tid] * viewB[tid] - - -def dot(viewA, viewB): - """ - 1D Matrix Multiplication of compatible views - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - Float/Double - - """ - - if len(viewA.shape) == 0 and len(viewB.shape) == 0: - return 0 - - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("only 1D views supported for dot() ufunc.") - - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.parallel_reduce( - viewA.shape[0], dot_impl_1d_double, viewA=viewA, viewB=viewB - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.parallel_reduce( - viewA.shape[0], dot_impl_1d_float, viewA=viewA, viewB=viewB - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def divide_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] / viewB[tid % viewB.extent(0)] - - -@pk.workunit -def divide_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] / viewB[tid] - - -@pk.workunit -def divide_impl_2d_1d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(viewA.extent(1)): - out[tid][i] = viewA[tid][i] / viewB[i % viewB.extent(0)] - - -def divide(viewA, viewB, profiler_name: Optional[str] = None): - """ - Divides positionally corresponding elements - of viewA with elements of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if not isinstance(viewB, pk.ViewType) and not isinstance(viewB, pk.ViewType): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewB - viewB = view_temp - - if viewA.rank() == 2: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_2d_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - profiler_name, - viewA.shape[0], - divide_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def negative_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = view[tid] * -1 - - -@pk.workunit -def negative_impl_1d_float( - tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float] -): - out[tid] = view[tid] * -1 - - -def negative(view, profiler_name: Optional[str] = None): - """ - Element-wise negative of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for negative() ufunc." - ) - if view.dtype.__name__ == "float64": - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, view.shape[0], negative_impl_1d_double, view=view, out=out - ) - elif view.dtype.__name__ == "float32": - out = pk.View([view.shape[0]], pk.float) - pk.parallel_for( - profiler_name, view.shape[0], negative_impl_1d_float, view=view, out=out - ) - else: - raise NotImplementedError - return out - - -def positive(view): - """ - Element-wise positive of the view; - Essentially returns a copy of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if view.shape == (): - out = pk.View((), dtype=view.dtype) - else: - out = pk.View([*view.shape], dtype=view.dtype) - out[...] = view - return out - - -@pk.workunit -def power_impl_scalar_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = pow(viewA[0], viewB[tid]) - - -@pk.workunit -def power_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = pow(viewA[tid], viewB[tid]) - - -@pk.workunit -def power_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = pow(viewA[tid], viewB[tid]) - - -@pk.workunit -def power_impl_2d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(viewA.extent(1)): - out[tid][i] = pow(viewA[tid][i], viewB[i % viewB.extent(0)]) - - -def power(viewA, viewB): - """ - Returns a view with each val in viewA raised - to the positionally corresponding power in viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if not isinstance(viewB, pk.ViewType): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewB - viewB = view_temp - - if isinstance(viewA, int): - view_temp = pk.View([1], pk.double) - view_temp[0] = viewA - viewA = view_temp - - out = pk.View([viewB.shape[0]], pk.double) - pk.parallel_for( - viewB.shape[0], power_impl_scalar_double, viewA=viewA, viewB=viewB, out=out - ) - elif viewA.rank() == 2: - out = pk.View(viewA.shape, pk.double) - pk.parallel_for( - viewA.shape[0], power_impl_2d_double, viewA=viewA, viewB=viewB, out=out - ) - elif viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], power_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], power_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def fmod_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmod(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmod_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmod(viewA[tid], viewB[tid]) - - -def fmod(viewA, viewB): - """ - Element-wise remainder of division when element of viewA is - divided by positionally corresponding element of viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmod() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmod_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmod_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def square_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint16( - tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint16] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint16( - tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint16] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint32( - tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint32] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint32( - tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint32] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_uint64( - tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint64] -): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_uint64( - tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint64] -): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.int8]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.int8]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.int16]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.int16]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.int32]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.int32]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -@pk.workunit -def square_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.int64]): - out[tid] = view[tid] * view[tid] - - -@pk.workunit -def square_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.int64]): - for i in range(view.extent(1)): - out[tid][i] = view[tid][i] * view[tid][i] - - -def square(view): - """ - Squares argument element-wise - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for square() ufunc." - ) - out = pk.View(view.shape, view.dtype) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_double, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_float, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_float, view=view, out=out) - elif "uint8" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint8, view=view, out=out) - elif "uint16" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint16, view=view, out=out) - elif "uint32" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint32, view=view, out=out) - elif "uint64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_uint64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_uint64, view=view, out=out) - elif "int8" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int8, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int8, view=view, out=out) - elif "int16" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int16, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int16, view=view, out=out) - elif "int32" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int32, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int32, view=view, out=out) - elif "int64" in view.dtype.__name__: - if view.shape == (): - pk.parallel_for(1, square_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 1: - pk.parallel_for(view.shape[0], square_impl_1d_int64, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], square_impl_2d_int64, view=view, out=out) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def greater_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] > viewB[tid] - - -@pk.workunit -def greater_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] > viewB[tid] - - -def greater(viewA, viewB): - """ - Return the truth value of viewA > viewB element-wise. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("greater() ufunc only supports 1D views") - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], greater_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], greater_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logaddexp_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = log(exp(viewA[tid]) + exp(viewB[tid])) - - -@pk.workunit -def logaddexp_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = log(exp(viewA[tid]) + exp(viewB[tid])) - - -def logaddexp(viewA, viewB): - """ - Return a view with log(exp(a) + exp(b)) calculate for - positionally corresponding elements in viewA and viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logaddexp() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], logaddexp_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], logaddexp_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def true_divide(viewA, viewB): - """ - true_divide is an alias of divide - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - - return divide(viewA, viewB) - - -@pk.workunit -def logaddexp2_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = log2(pow(2, viewA[tid]) + pow(2, viewB[tid])) - - -@pk.workunit -def logaddexp2_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = log2(pow(2, viewA[tid]) + pow(2, viewB[tid])) - - -def logaddexp2(viewA, viewB): - """ - Return a view with log(pow(2, a) + pow(2, b)) calculated for - positionally corresponding elements in viewA and viewB - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logaddexp2() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], logaddexp2_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], logaddexp2_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def floor_divide_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = viewA[tid] // viewB[tid] - - -@pk.workunit -def floor_divide_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = viewA[tid] // viewB[tid] - - -def floor_divide(viewA, viewB): - """ - Divides positionally corresponding elements - of viewA with elements of viewB and floors the result - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for floor_divide() ufunc." - ) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], - floor_divide_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], - floor_divide_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out, - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def sin(view, profiler_name: Optional[str] = None): - """ - Element-wise trigonometric sine of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("sin() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="sin", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def cos_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = cos(view[tid]) - - -@pk.workunit -def cos_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): - for i in range(view.extent(1)): - out[tid][i] = cos(view[tid][i]) - - -@pk.workunit -def cos_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = cos(view[tid]) - - -@pk.workunit -def cos_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): - for i in range(view.extent(1)): - out[tid][i] = cos(view[tid][i]) - - -def cos(view): - """ - Element-wise trigonometric cosine of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 2: - raise NotImplementedError( - "only up to 2D views currently supported for cos() ufunc." - ) - if "double" in view.dtype.__name__ or "float64" in view.dtype.__name__: - out = pk.View([*view.shape], dtype=pk.float64) - if len(view.shape) == 1: - pk.parallel_for(view.shape[0], cos_impl_1d_double, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], cos_impl_2d_double, view=view, out=out) - elif "float" in view.dtype.__name__: - out = pk.View([*view.shape], dtype=pk.float32) - if len(view.shape) == 1: - pk.parallel_for(view.shape[0], cos_impl_1d_float, view=view, out=out) - elif len(view.shape) == 2: - pk.parallel_for(view.shape[0], cos_impl_2d_float, view=view, out=out) - else: - raise NotImplementedError - return out - - -def tan(view, profiler_name: Optional[str] = None): - """ - Element-wise tangent of the view - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("tan() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="tan", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def logical_and_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] and viewB[tid] - - -@pk.workunit -def logical_and_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] and viewB[tid] - - -def logical_and(viewA, viewB): - """ - Return the element-wise truth value of viewA AND viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_and() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], - logical_and_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_and_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_or_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] or viewB[tid] - - -@pk.workunit -def logical_or_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = viewA[tid] or viewB[tid] - - -def logical_or(viewA, viewB): - """ - Return the element-wise truth value of viewA OR viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_or() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], logical_or_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_or_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_xor_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.uint8], -): - out[tid] = bool(viewA[tid]) ^ bool(viewB[tid]) - - -@pk.workunit -def logical_xor_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.uint8], -): - out[tid] = bool(viewA[tid]) ^ bool(viewB[tid]) - - -def logical_xor(viewA, viewB): - """ - Return the element-wise truth value of viewA XOR viewB. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_xor() ufunc." - ) - out = pk.View([viewA.shape[0]], pk.uint8) - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - pk.parallel_for( - viewA.shape[0], - logical_xor_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out, - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - pk.parallel_for( - viewA.shape[0], logical_xor_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def logical_not_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] -): - out[tid] = not view[tid] - - -@pk.workunit -def logical_not_impl_1d_float( - tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8] -): - out[tid] = not view[tid] - - -def logical_not(view): - """ - Element-wise logical_not of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view (uint8) - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError( - "only 1D views currently supported for logical_not() ufunc." - ) - out = pk.View([view.shape[0]], pk.uint8) - if view.dtype.__name__ == "float64": - pk.parallel_for(view.shape[0], logical_not_impl_1d_double, view=view, out=out) - elif view.dtype.__name__ == "float32": - pk.parallel_for(view.shape[0], logical_not_impl_1d_float, view=view, out=out) - return out - - -@pk.workunit -def fmax_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmax(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmax_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmax(viewA[tid], viewB[tid]) - - -def fmax(viewA, viewB): - """ - Return the element-wise fmax. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmax() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmax_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmax_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -@pk.workunit -def fmin_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = fmin(viewA[tid], viewB[tid]) - - -@pk.workunit -def fmin_impl_1d_float( - tid: int, - viewA: pk.View1D[pk.float], - viewB: pk.View1D[pk.float], - out: pk.View1D[pk.float], -): - out[tid] = fmin(viewA[tid], viewB[tid]) - - -def fmin(viewA, viewB): - """ - Return the element-wise fmin. - - Parameters - ---------- - viewA : pykokkos view - Input view. - viewB : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("fmax() ufunc only supports 1D views") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], fmin_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], fmin_impl_1d_float, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def exp(view, profiler_name: Optional[str] = None): - """ - Element-wise exp of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("exp() ufunc only supports up to 2D views") - if view.size == 0: - return view - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="exp", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -@pk.workunit -def exp2_impl_1d_double( - tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = pow(2, view[tid]) - - -@pk.workunit -def exp2_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = pow(2, view[tid]) - - -def exp2(view): - """ - Element-wise 2**x of the view. - - Parameters - ---------- - view : pykokkos view - Input view. - - Returns - ------- - out : pykokkos view - Output view. - - """ - if len(view.shape) > 1: - raise NotImplementedError("only 1D views currently supported for exp2() ufunc.") - if view.dtype.__name__ == "float64": - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for(view.shape[0], exp2_impl_1d_double, view=view, out=out) - elif view.dtype.__name__ == "float32": - out = pk.View([view.shape[0]], pk.float) - pk.parallel_for(view.shape[0], exp2_impl_1d_float, view=view, out=out) - else: - raise NotImplementedError - return out - - -# TODO: Implement parallel max reduction with index -def argmax(view, axis=None): - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - res = np.argmax(view, axis=axis) - view = pk.View(res.shape, pk.int32) - view[:] = res - - return view - - -# TODO: Implement parallel sorting + filtering -def unique(view): - res = np.unique(view) - view = pk.View(res.shape, pk.double) - view[:] = res - - return view - - -@pk.workunit -def var_impl_2d_axis0_double( - tid: int, - view: pk.View2D[pk.double], - view_mean: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = 0 - for i in range(view.extent(0)): - out[tid] += (pow(view[i][tid] - view_mean[tid], 2)) / view.extent(0) - - -@pk.workunit -def var_imple_2d_axis1_double( - tid: int, - view: pk.View2D[pk.double], - view_mean: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - out[tid] = 0 - for i in range(view.extent(1)): - out[tid] += (pow(view[tid][i] - view_mean[tid], 2)) / view.extent(1) - - -@pk.workunit -def var_impl_1d(tid, acc, view, mean): - acc += pow(view[tid] - mean, 2) / view.extent(0) - - -def var(view, axis=None, profiler_name: Optional[str] = None): # population - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - if view.rank() > 2: - raise NotImplementedError( - "Current version of Pykokkos only supports variance for upto 2D views" - ) - - if view.rank() == 2: # legacy code - if view.dtype.__name__ == "float64": - if axis == 0: - view_mean = mean(view, 0, profiler_name) - out = pk.View([view.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[1], - var_impl_2d_axis0_double, - view=view, - view_mean=view_mean, - out=out, - ) - return out - else: - view_mean = mean(view, 1, profiler_name) - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[0], - var_imple_2d_axis1_double, - view=view, - view_mean=view_mean, - out=out, - ) - return out - else: - raise RuntimeError("Incompatible Types") - elif view.rank() == 1: # newer impl - mean_val = mean(view, profiler_name) - return pk.parallel_reduce( - profiler_name, view.shape[0], var_impl_1d, view=view, mean=mean_val - ) - else: - raise RuntimeError("Unexpected view of shape {}".format(view.shape)) - - -@pk.workunit -def mean_impl_1d_axis0_double( - tid: int, view: pk.View2D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = 0 - for i in range(view.extent(0)): - out[tid] += view[i][tid] / view.extent(0) - - -@pk.workunit -def mean_impl_1d_axis1_double( - tid: int, view: pk.View2D[pk.double], out: pk.View1D[pk.double] -): - out[tid] = 0 - for i in range(view.extent(1)): - out[tid] += view[tid][i] / view.extent(1) - - -@pk.workunit -def mean_impl_1d(tid, acc, view): - acc += view[tid] / view.extent(0) - - -def mean(view, axis=None, profiler_name: Optional[str] = None): - if isinstance(axis, pk.ViewType): - raise NotImplementedError - - if view.rank() > 2: - raise NotImplementedError( - "Current version of Pykokkos only supports variance for upto 2D views" - ) - - if view.rank() == 2: - if view.dtype.__name__ == "float64": # legacy - if axis == 0: - out = pk.View([view.shape[1]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[1], - mean_impl_1d_axis0_double, - view=view, - out=out, - ) - return out - else: - out = pk.View([view.shape[0]], pk.double) - pk.parallel_for( - profiler_name, - view.shape[0], - mean_impl_1d_axis1_double, - view=view, - out=out, - ) - - return out - else: - raise RuntimeError("Incompatible Types") - - elif view.rank() == 1: - return pk.parallel_reduce(profiler_name, view.shape[0], mean_impl_1d, view=view) - else: - raise RuntimeError("Unexpected view of shape {}".format(view.shape)) - - -@pk.workunit -def in1d_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.int8], -): - out[tid] = 0 - for i in range(viewB.extent(0)): - if viewB[i] == viewA[tid]: - out[tid] = 1 - break - - -def in1d(viewA, viewB): - if viewA.dtype.__name__ == "float64": - out = pk.View(viewA.shape, pk.int8) - pk.parallel_for( - viewA.shape[0], in1d_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - - return out - - -@pk.workunit -def transpose_impl_2d_double( - tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] -): - for i in range(view.extent(1)): - out[i][tid] = view[tid][i] - - -def transpose(view): - if view.rank() == 1: - return view - - if view.rank() == 2: - if view.dtype.__name__ == "float64": - out = pk.View(view.shape[::-1], pk.double) - pk.parallel_for(view.shape[0], transpose_impl_2d_double, view=view, out=out) - return out - - raise RuntimeError("Transpose supports 2D views only") - - -@pk.workunit -def hstack_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.double], - out: pk.View1D[pk.double], -): - if tid >= viewA.extent(0): - out[tid] = viewB[tid - viewA.extent(0)] - else: - out[tid] = viewA[tid] - - -@pk.workunit -def hstack_impl_2d_double( - tid: int, - viewA: pk.View2D[pk.double], - viewB: pk.View2D[pk.double], - out: pk.View2D[pk.double], -): - for i in range(out.extent(1)): - if i >= viewA.extent(1): - out[tid][i] = viewB[tid][i - viewA.extent(1)] - else: - out[tid][i] = viewA[tid][i] - - -def hstack(viewA, viewB): - if viewA.shape != viewB.shape: - raise RuntimeError( - "All the input view dimensions for the concatenation axis must match exactly" - ) - - if viewA.rank() == 2 and viewB.rank() == 2: - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0], viewA.shape[1] * 2], pk.double) - pk.parallel_for( - out.shape[0], hstack_impl_2d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("hstack supports 2D views of type double only") - elif viewA.rank() == 1 and viewB.rank() == 1: - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0] + viewB.shape[0]], pk.double) - pk.parallel_for( - out.shape[0], hstack_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("hstack supports 1D views of type double only") - else: - raise RuntimeError("hstack supports views of same shape (1D and 2D) only") - - return out - - -@pk.workunit -def index_impl_1d_double( - tid: int, - viewA: pk.View1D[pk.double], - viewB: pk.View1D[pk.int32], - out: pk.View1D[pk.double], -): - out[tid] = viewA[viewB[tid]] - - -def index(viewA, viewB): - if viewB.dtype == pk.int32: - out = pk.View(viewB.shape, pk.double) - pk.parallel_for( - viewB.shape[0], index_impl_1d_double, viewA=viewA, viewB=viewB, out=out - ) - else: - raise RuntimeError("Incompatible Types") - return out - - -def isnan(view, profiler_name: Optional[str] = None): - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("isnan() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - if view.ndim == 0: - new_view = pk.View([1], dtype=view.dtype) - new_view[0] = view - view = new_view - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isnan", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def isinf(view, profiler_name: Optional[str] = None): - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("isinf() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isinf", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def equal(view1, view2, profiler_name: Optional[str] = None): - """ - Computes the truth value of ``view1_i`` == ``view2_i`` for each element - ``x1_i`` of the input view ``view1`` with the respective element ``x2_i`` - of the input view ``view2``. - - - Parameters - ---------- - view1 : pykokkos view - Input view. May have any data type. - view2 : pykokkos view - Input view. May have any data type, but must be shape-compatible - with ``view1`` via broadcasting. - - Returns - ------- - out : pykokkos view (bool) - Output view. - """ - if view1.size == 0 and view2.size == 0: - ret = pk.View((), dtype=pk.bool) - ret[...] = 1 - return ret - view1, view2 = _broadcast_views(view1, view2) - dtype1 = view1.dtype - dtype2 = view2.dtype - view1, view2, effective_dtype = _typematch_views(view1, view2) - ndims = len(view1.shape) - if ndims > 5: - raise NotImplementedError("equal() ufunc only supports up to 5D views") - out = pk.View([*view1.shape], dtype=pk.bool) - if view1.shape == (): - tid = 1 - else: - tid = view1.shape[0] - if isinstance(view1, pk.Subview): - new_view = pk.View((), dtype=view1.dtype) - new_view[:] = view1.data - view1 = new_view - if isinstance(view2, pk.Subview): - new_view = pk.View((), dtype=view2.dtype) - new_view[:] = view2.data - view2 = new_view - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=effective_dtype, - ndims=ndims, - op="equal", - sub_dispatcher=pk.parallel_for, - out=out, - view1=view1, - view2=view2, - ) - return out - - -def isfinite(view, profiler_name: Optional[str] = None): - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("isfinite() ufunc only supports up to 2D views") - if view.size == 0: - out = pk.View(view.shape, dtype=pk.bool) - return out - out = pk.View([*view.shape], dtype=pk.bool) - if view.shape == (): - new_view = pk.View([1], dtype=dtype) - new_view[:] = view - view = new_view - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="isfinite", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def round(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the nearest integer-valued number. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for round() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="round", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def trunc(view, profiler_name: Optional[str] = None): - """ - Rounds each element ``i`` of the input view to the integer-valued number - that is closest to but no greater than ``i``. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for trunc() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="trunc", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def ceil(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the smallest (i.e., closest to -infinity) - integer-valued number that is not less than a given element. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for ceil() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="ceil", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def floor(view, profiler_name: Optional[str] = None): - """ - Rounds each element of the input view to the greatest (i.e., closest to +infinity) - integer-valued number that is not greater than a given element. - - Parameters - ---------- - view : pykokkos view - Should have a numeric data type. - - Returns - ------- - out: pykokkos view - A view containing the rounded result for each element in - the input view. The returned view must have the same data - type as the input view. - - Notes - ----- - If view element ``i`` is already integer-valued, the result is ``i``. - - """ - dtype = view.dtype - ndims = len(view.shape) - dtype_str = str(dtype) - if "int" in dtype_str: - # special case defined in API std - return view - out = pk.View(view.shape, dtype=dtype) - if ndims > 3: - raise NotImplementedError( - "only up to 3D views currently supported for floor() ufunc." - ) - - _supported_types_check(dtype_str, {"double", "float64", "float"}) - - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="floor", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out - - -def tanh(view, profiler_name: Optional[str] = None): - """ - Calculates an approximation to the hyperbolic tangent for each element x_i of the input view. - - Parameters - ---------- - view : pykokkos view - Input view whose elements each represent a hyperbolic angle. Should have a floating-point data type. - - Returns - ------- - y : pykokkos view - A view containing the hyperbolic tangent of each element in the input view. The returned view must - have a floating-point data type determined by type promotion rules. - """ - dtype = view.dtype - ndims = len(view.shape) - if ndims > 2: - raise NotImplementedError("tanh() ufunc only supports up to 2D views") - out = pk.View([*view.shape], dtype=dtype) - if view.shape == (): - tid = 1 - else: - tid = view.shape[0] - _ufunc_kernel_dispatcher( - profiler_name=profiler_name, - tid=tid, - dtype=dtype, - ndims=ndims, - op="tanh", - sub_dispatcher=pk.parallel_for, - out=out, - view=view, - ) - return out diff --git a/tests/test_ufuncs.py b/tests/test_ufuncs.py index c8049b3f..e69de29b 100644 --- a/tests/test_ufuncs.py +++ b/tests/test_ufuncs.py @@ -1,737 +0,0 @@ -import pykokkos as pk - -import numpy as np -from numpy.random import default_rng -from numpy.testing import assert_allclose -import pytest - - -@pk.workunit -def sqrt_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = sqrt(view[tid]) - - -@pk.workunit -def exp_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = exp(view[tid]) - - -@pk.workunit -def exp2_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = exp2(view[tid]) - - -@pk.workunit -def positive_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = positive(view[tid]) - - -@pk.workunit -def negative_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = negative(view[tid]) - - -@pk.workunit -def absolute_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = abs(view[tid]) - - -@pk.workunit -def fabsolute_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = fabs(view[tid]) - - -@pk.workunit -def rint_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = rint(view[tid]) - - -@pk.workunit -def conjugate_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = conj(view[tid]) - - -@pk.workunit -def sign_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = sign(view[tid]) - - -@pk.workunit -def log_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log(view[tid]) - - -@pk.workunit -def log2_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log2(view[tid]) - - -@pk.workunit -def log10_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log10(view[tid]) - - -@pk.workunit -def expm1_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = expm1(view[tid]) - - -@pk.workunit -def log1p_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = log1p(view[tid]) - - -@pk.workunit -def square_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = square(view[tid]) - - -@pk.workunit -def cbrt_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = cbrt(view[tid]) - - -@pk.workunit -def reciprocal_workunit(tid: int, view: pk.View1D[pk.double]) -> None: - view[tid] = reciprocal(view[tid]) - - -@pytest.mark.parametrize( - "kokkos_workunit, numpy_ufunc", - [ - (sqrt_workunit, np.sqrt), - (exp_workunit, np.exp), - pytest.param( - exp2_workunit, np.exp2, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - positive_workunit, np.positive, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - negative_workunit, np.negative, marks=pytest.mark.xfail(reason="see gh-27") - ), - (absolute_workunit, np.absolute), - (fabsolute_workunit, np.fabs), - pytest.param( - rint_workunit, np.rint, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - conjugate_workunit, - np.conjugate, - marks=pytest.mark.xfail(reason="see gh-27"), - ), - pytest.param( - sign_workunit, np.sign, marks=pytest.mark.xfail(reason="see gh-27") - ), - (log_workunit, np.log), - (log2_workunit, np.log2), - (log10_workunit, np.log10), - (expm1_workunit, np.expm1), - (log1p_workunit, np.log1p), - pytest.param( - square_workunit, np.square, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - cbrt_workunit, np.cbrt, marks=pytest.mark.xfail(reason="see gh-27") - ), - pytest.param( - reciprocal_workunit, - np.reciprocal, - marks=pytest.mark.xfail(reason="see gh-27"), - ), - ], -) -def test_1d_unary_ufunc_vs_numpy(kokkos_workunit, numpy_ufunc): - # verify that we can easily recreate the functionality - # of most NumPy "unary" ufuncs on 1D views/arrays without much - # custom code - # NOTE: maybe we directly provide i.e., pk.sqrt(view) - # "pykokkos ufuncs" some day? - view: pk.View1d = pk.View([10], pk.double) - view[:] = np.arange(10, dtype=np.float64) - pk.parallel_for(10, kokkos_workunit, view=view) - actual = view - expected = numpy_ufunc(range(10)) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.reciprocal, np.reciprocal), - (pk.log, np.log), - (pk.log2, np.log2), - (pk.log10, np.log10), - (pk.log1p, np.log1p), - (pk.sqrt, np.sqrt), - (pk.sign, np.sign), - (pk.negative, np.negative), - (pk.positive, np.positive), - (pk.square, np.square), - (pk.sin, np.sin), - (pk.cos, np.cos), - (pk.tan, np.tan), - (pk.logical_not, np.logical_not), - (pk.exp, np.exp), - (pk.exp2, np.exp2), - (pk.mean, np.mean), - (pk.var, np.var), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_1d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype): - # test the ufuncs we have exposed in the pk namespace - # vs. their NumPy equivalents - expected = numpy_ufunc(np.arange(10, dtype=numpy_dtype)) - - view: pk.View1d = pk.View([10], pk_dtype) - view[:] = np.arange(10, dtype=numpy_dtype) - actual = pk_ufunc(view=view) - # log10 single-precision needs relaxed tol - # for now - if numpy_ufunc in {np.log10, np.cos, np.tan} and numpy_dtype == np.float32: - assert_allclose(actual, expected, rtol=1.5e-7) - else: - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.add, np.add), - (pk.subtract, np.subtract), - (pk.multiply, np.multiply), - (pk.divide, np.divide), - (pk.np_matmul, np.matmul), - (pk.power, np.power), - (pk.fmod, np.fmod), - (pk.greater, np.greater), - (pk.logaddexp, np.logaddexp), - (pk.floor_divide, np.floor_divide), - (pk.true_divide, np.true_divide), - (pk.logaddexp2, np.logaddexp2), - (pk.logical_and, np.logical_and), - (pk.logical_or, np.logical_or), - (pk.logical_xor, np.logical_xor), - (pk.fmax, np.fmax), - (pk.fmin, np.fmin), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_multi_array_1d_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype -): - - # test the multi array ufuncs we have exposed - # in the pk namespace vs. their NumPy equivalents - expected = numpy_ufunc( - np.arange(10, dtype=numpy_dtype), np.full(10, 5, dtype=numpy_dtype) - ) - - viewA: pk.View1d = pk.View([10], pk_dtype) - viewA[:] = np.arange(10, dtype=numpy_dtype) - viewB: pk.View1d = pk.View([10], pk_dtype) - viewB[:] = np.full(10, 5, dtype=numpy_dtype) - - actual = pk_ufunc(viewA, viewB) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_matmul_1d_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype -): - expected = numpy_ufunc( - np.arange(10, dtype=numpy_dtype), np.full((10, 1), 2, dtype=numpy_dtype) - ) - - viewA = pk.View([10], pk_dtype) - viewB = pk.View([10, 1], pk_dtype) - viewA[:] = np.arange(10, dtype=numpy_dtype) - viewB[:] = np.full((10, 1), 2, dtype=numpy_dtype) - - with pytest.raises(RuntimeError) as e_info: - viewC = pk.View([11], pk_dtype) - viewC[:] = np.arange(11, dtype=numpy_dtype) - pk_ufunc(viewC, viewB) - - assert ( - e_info.value.args[0] - == "Input operand 1 has a mismatch in its core dimension (Size 11 is different from 10)" - ) - - actual = pk_ufunc(viewA, viewB) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "arr", - [ - np.array([4, -1, np.inf]), - np.array([-np.inf, np.nan, np.inf]), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_1d_sqrt_negative_values(arr, pk_dtype, numpy_dtype): - # verify sqrt behavior for negative reals, - # NaN and infinite values - expected = np.sqrt(arr, dtype=numpy_dtype) - view: pk.View1d = pk.View([arr.size], pk_dtype) - view[:] = arr - actual = pk.sqrt(view=view) - assert_allclose(actual, expected) - - -def test_caching(): - # regression test for gh-34 - expected = np.reciprocal(np.arange(10, dtype=np.float32)) - for i in range(300): - view: pk.View1d = pk.View([10], pk.float) - view[:] = np.arange(10, dtype=np.float32) - actual = pk.reciprocal(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.reciprocal, np.reciprocal), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -def test_2d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype): - rng = default_rng(123) - in_arr = rng.random((5, 5)).astype(numpy_dtype) - expected = numpy_ufunc(in_arr) - - view: pk.View2d = pk.View([5, 5], pk_dtype) - view[:] = in_arr - actual = pk_ufunc(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", [[4, 4, 4, 4], [4, 3, 3, 4], [1, 1, 1, 1], [2, 5, 5, 1]] -) -def test_np_matmul_2d_2d_vs_numpy( - pk_ufunc, numpy_ufunc, pk_dtype, numpy_dtype, test_dim -): - - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - M2 = test_dim[3] - rng = default_rng(123) - np1 = rng.random((N1, M1)).astype(numpy_dtype) - np2 = rng.random((N2, M2)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1: pk.View2d = pk.View([N1, M1], pk_dtype) - view1[:] = np1 - view2: pk.View2d = pk.View([N2, M2], pk_dtype) - view2[:] = np2 - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected, rtol=1.5e-7) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize("test_dim", [[4, 4, 4], [4, 3, 3], [1, 1, 1], [2, 5, 5]]) -def test_np_matmul_2d_1d_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - rng = default_rng(123) - np1 = rng.random((N1, M1)).astype(numpy_dtype) - np2 = rng.random(N2).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.np_matmul, np.matmul), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize("test_dim", [[4, 4, 4], [3, 3, 6], [1, 1, 1], [5, 5, 1]]) -def test_np_matmul_1d_2d_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - - N1 = test_dim[0] - N2 = test_dim[1] - M2 = test_dim[2] - rng = default_rng(123) - np1 = rng.random(N1).astype(numpy_dtype) - np2 = rng.random((N2, M2)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", [[4, 3, 3], [3, 1, 6], [1, 4, 2], [5, 6, 1], [4, 3, 2, 1], [2, 3, 2, 4]] -) -def test_np_matmul_fails(numpy_dtype, test_dim): - N1 = None - N2 = None - M1 = None - M2 = None - np1 = None - rng = default_rng(123) - - if len(test_dim) == 3: - N1 = test_dim[0] - N2 = test_dim[1] - M2 = test_dim[2] - np1 = rng.random(N1).astype(numpy_dtype) - - if len(test_dim) == 4: - N1 = test_dim[0] - M1 = test_dim[1] - N2 = test_dim[2] - M2 = test_dim[3] - np1 = rng.random((N1, M1)).astype(numpy_dtype) - - np2 = rng.random((N2, M2)).astype(numpy_dtype) - - with pytest.raises(RuntimeError) as e_info: - view1 = pk.array(np1) - view2 = pk.array(np2) - pk.np_matmul(view1, view2) # Should fail with 1d x 2d - - err_np_matmul = ( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - view1.shape, view2.shape - ) - ) - assert e_info.value.args[0] == err_np_matmul - - with pytest.raises(RuntimeError) as e_info: - pk.np_matmul(view2, view1) # should fail with 2d x 1 as well - - err_np_matmul = ( - "Matrix dimensions are not compatible for multiplication: {} and {}".format( - view2.shape, view1.shape - ) - ) - assert e_info.value.args[0] == err_np_matmul - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [(pk.subtract, np.subtract), (pk.add, np.add), (pk.multiply, np.multiply)], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -def test_multi_array_2d_exposed_ufuncs_vs_numpy(pk_ufunc, numpy_ufunc, numpy_dtype): - N = 4 - M = 7 - rng = default_rng(123) - np1 = rng.random((N, M)).astype(numpy_dtype) - np2 = rng.random((N, M)).astype(numpy_dtype) - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - actual = pk_ufunc(view1, view2) - - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.subtract, np.subtract), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", - [[4, 3, 1, 1], [4, 3, 1, 3], [4, 3, 4, 1], [4, 3, 1], [4, 3, 3], [4, 3], [4]], -) -def test_broadcast_array_exposed_ufuncs_vs_numpy( - pk_ufunc, numpy_ufunc, numpy_dtype, test_dim -): - - np1 = None - np2 = None - rng = default_rng(123) - scalar = 3.0 - - if len(test_dim) == 4: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2], test_dim[3])).astype(numpy_dtype) - elif len(test_dim) == 3: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2])).astype(numpy_dtype) - elif len(test_dim) == 2: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = scalar # 2d with scalar - elif len(test_dim) == 1: - np1 = rng.random((test_dim[0])).astype(numpy_dtype) - np2 = scalar # 1d with scalar - else: - raise NotImplementedError( - "Invalid test conditions: Broadcasting operations are only supported uptil 2D" - ) - - assert ( - np1 is not None and np2 is not None - ), "Invalid test conditions: Are parameters uptil 2D?" - - expected = numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) if isinstance(np2, np.ndarray) else np2 - actual = pk_ufunc(view1, view2) - - assert_allclose(expected, actual) - - -@pytest.mark.parametrize( - "pk_dtype, numpy_dtype", - [ - (pk.double, np.float64), - (pk.float, np.float32), - ], -) -@pytest.mark.parametrize( - "in_arr", - [ - np.array([-5, 4.5, np.nan]), - np.array([np.nan, np.nan, np.nan]), - ], -) -def test_sign_1d_special_cases(in_arr, pk_dtype, numpy_dtype): - in_arr = in_arr.astype(numpy_dtype) - view: pk.View1D = pk.View([in_arr.size], pk_dtype) - view[:] = in_arr - expected = np.sign(in_arr) - actual = pk.sign(view=view) - assert_allclose(actual, expected) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.copyto, np.copyto), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -def test_copyto_1d(pk_ufunc, numpy_ufunc, numpy_dtype): - N = 4 - M = 7 - rng = default_rng(123) - np1 = rng.random((N, M)).astype(numpy_dtype) - np2 = rng.random((N, M)).astype(numpy_dtype) - numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) - pk_ufunc(view1, view2) - - assert_allclose(np1, view1) - - -@pytest.mark.parametrize( - "pk_ufunc, numpy_ufunc", - [ - (pk.subtract, np.subtract), - ], -) -@pytest.mark.parametrize( - "numpy_dtype", - [ - (np.float64), - (np.float32), - ], -) -@pytest.mark.parametrize( - "test_dim", - [ - [4, 3, 4, 3], - [4, 3, 1, 1], - [4, 3, 1, 3], - [4, 3, 4, 1], - [4, 3, 1], - [4, 3, 3], - [4, 3], - [4], - ], -) -def test_copyto_broadcast_2d(pk_ufunc, numpy_ufunc, numpy_dtype, test_dim): - np1 = None - np2 = None - rng = default_rng(123) - scalar = 3.0 - - if len(test_dim) == 4: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2], test_dim[3])).astype(numpy_dtype) - elif len(test_dim) == 3: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = rng.random((test_dim[2])).astype(numpy_dtype) - elif len(test_dim) == 2: - np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) - np2 = scalar # 2d with scalar - elif len(test_dim) == 1: - np1 = rng.random((test_dim[0])).astype(numpy_dtype) - np2 = scalar # 1d with scalar - else: - raise NotImplementedError( - "Invalid test conditions: Broadcasting operations are only supported uptil 2D" - ) - - assert ( - np1 is not None and np2 is not None - ), "Invalid test conditions: Are parameters uptil 2D?" - - numpy_ufunc(np1, np2) - - view1 = pk.array(np1) - view2 = pk.array(np2) if isinstance(np2, np.ndarray) else np2 - pk_ufunc(view1, view2) - - assert_allclose(np1, view1) - - -@pytest.mark.parametrize( - "input_dtype", - [ - pk.double, - pk.float, - ], -) -@pytest.mark.parametrize( - "pk_ufunc", - [ - pk.floor, - pk.round, - pk.ceil, - pk.trunc, - ], -) -@pytest.mark.parametrize( - "shape", - [ - [1], - [1, 1], - [1, 1, 1], - ], -) -def test_rounding_dtype_preservation(input_dtype, pk_ufunc, shape): - # at the time of writing the array API standard - # conformance test suite doesn't appear to probe - # floating point data types for many of the rounding - # functions - - # for now, we simply test data type preservation - # of output vs. input so that we flush these codepaths - # a bit - view = pk.View(shape, input_dtype) - actual_dtype = pk_ufunc(view).dtype - assert actual_dtype.value == input_dtype.value From 275c89d21b33c35bba051c5653c8465ef26fa21b Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Sat, 21 Feb 2026 21:25:46 -0600 Subject: [PATCH 05/23] fix array api tests and further removed pk usage --- .github/workflows/array_api.yml | 2 +- examples/LogisticRegression/LR.py | 39 ++++++++++++++----------------- examples/NaiveBayes/GaussianNB.py | 32 +++++++++++-------------- 3 files changed, 31 insertions(+), 42 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index b2c727dc..3ed00af5 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -57,4 +57,4 @@ jobs: # for hypothesis-driven test case generation # pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_operators_and_elementwise_functions.py::test_log10 array_api_tests/test_operators_and_elementwise_functions.py::test_sqrt array_api_tests/test_operators_and_elementwise_functions.py::test_isfinite array_api_tests/test_operators_and_elementwise_functions.py::test_log2 array_api_tests/test_operators_and_elementwise_functions.py::test_log1p array_api_tests/test_operators_and_elementwise_functions.py::test_isinf array_api_tests/test_operators_and_elementwise_functions.py::test_log array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_square array_api_tests/test_operators_and_elementwise_functions.py::test_cos array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor array_api_tests/test_operators_and_elementwise_functions.py::test_exp array_api_tests/test_operators_and_elementwise_functions.py::test_sin array_api_tests/test_operators_and_elementwise_functions.py::test_tan array_api_tests/test_operators_and_elementwise_functions.py::test_tanh array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like array_api_tests/test_operators_and_elementwise_functions.py::test_positive array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal "array_api_tests/test_has_names.py::test_has_names[array_method-__pos__]" + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like diff --git a/examples/LogisticRegression/LR.py b/examples/LogisticRegression/LR.py index 3fea34a1..e9cfa614 100644 --- a/examples/LogisticRegression/LR.py +++ b/examples/LogisticRegression/LR.py @@ -31,7 +31,6 @@ import numbers import numpy as np -import pykokkos as pk import warnings from joblib import Parallel, effective_n_jobs @@ -62,12 +61,8 @@ ) -def asarray(arr, dtype=pk.double): - arr = np.asarray(arr) - - view = pk.View(arr.shape, dtype) - view[:] = arr - return view +def asarray(arr, dtype=np.float64): + return np.asarray(arr, dtype=dtype) def _check_solver(solver, penalty, dual): @@ -267,7 +262,7 @@ def _logistic_regression_path( The "copy" parameter was removed. """ if isinstance(Cs, numbers.Integral): - Cs = pk.logspace(-4, 4, Cs) + Cs = np.logspace(-4, 4, Cs) solver = _check_solver(solver, penalty, dual) @@ -301,9 +296,9 @@ def _logistic_regression_path( # multinomial case this is not necessary. if multi_class == "ovr": - w0 = pk.zeros(n_features + int(fit_intercept), dtype=X.dtype) + w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype) mask = y == pos_class - y_bin = pk.ones(y.shape, dtype=X.dtype) + y_bin = np.ones(y.shape, dtype=X.dtype) if solver in ["lbfgs", "newton-cg"]: # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead # of in [-1, 1]. @@ -334,10 +329,10 @@ def _logistic_regression_path( Y_multi = asarray(lbin.fit_transform(y)) if Y_multi.shape[1] == 1: Y_multi = np.hstack( - np.negative(np.subtract(Y_multi, asarray([1]))), Y_multi + (np.negative(np.subtract(Y_multi, asarray([1]))), Y_multi) ) - w0 = pk.zeros((classes.size, n_features + int(fit_intercept)), dtype=X.dtype) + w0 = np.zeros((classes.size, n_features + int(fit_intercept)), dtype=X.dtype) if coef is not None: # it must work both giving the bias term and not @@ -384,7 +379,7 @@ def _logistic_regression_path( # i.e. 1d-arrays. LinearModelLoss expects classes to be contiguous and # reconstructs the 2d-array via w0.reshape((n_classes, -1), order="F"). # As w0 is F-contiguous, ravel(order="F") also avoids a copy. - w0 = pk.ravel(w0, order="F") + w0 = np.ravel(w0, order="F") loss = LinearModelLoss( base_loss=HalfMultinomialLoss(n_classes=classes.size), @@ -412,15 +407,15 @@ def _logistic_regression_path( func = loss.loss grad = loss.gradient hess = loss.gradient_hessian_product # hess = [gradient, hessp] - warm_start_sag = {"coef": pk.expand_dims(w0, axis=1)} + warm_start_sag = {"coef": np.expand_dims(w0, axis=1)} coefs = list() - n_iter = pk.zeros(len(Cs), dtype=pk.int32) + n_iter = np.zeros(len(Cs), dtype=np.int32) for i, C in enumerate(Cs): if solver == "lbfgs": l2_reg_strength = 1.0 / C - iprint = [-1, 50, 1, 100, 101][pk.searchsorted([0, 1, 2, 3], verbose)] + iprint = [-1, 50, 1, 100, 101][np.searchsorted([0, 1, 2, 3], verbose)] opt_res = optimize.minimize( func, np.asarray(w0), @@ -471,9 +466,9 @@ def _logistic_regression_path( ) coef_ = asarray(coef_) if fit_intercept: - w0 = np.hstack(pk.ravel(coef_), intercept_) + w0 = np.hstack((np.ravel(coef_), intercept_)) else: - w0 = pk.ravel(coef_) + w0 = np.ravel(coef_) elif solver in ["sag", "saga"]: if multi_class == "multinomial": @@ -518,7 +513,7 @@ def _logistic_regression_path( if multi_class == "multinomial": n_classes = max(2, classes.size) if solver in ["lbfgs", "newton-cg"]: - multi_w0 = pk.reshape(w0, (n_classes, -1), order="F") + multi_w0 = np.reshape(w0, (n_classes, -1), order="F") else: multi_w0 = w0 coefs.append(asarray(multi_w0)) @@ -829,7 +824,7 @@ def fit(self, X, y, sample_weight=None): "Setting penalty='none' will ignore the C and l1_ratio parameters" ) # Note that check for l1_ratio is done right above - C_ = pk.inf + C_ = np.inf penalty = "l2" else: C_ = self.C @@ -969,7 +964,7 @@ def fit(self, X, y, sample_weight=None): ) fold_coefs_, _, n_iter_ = zip(*fold_coefs_) - self.n_iter_ = pk.col(asarray(n_iter_), 0) + self.n_iter_ = np.array(n_iter_) n_features = X.shape[1] if multi_class == "multinomial": @@ -984,7 +979,7 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = self.coef_[:, -1] self.coef_ = self.coef_[:, :-1] else: - self.intercept_ = pk.zeros(n_classes) + self.intercept_ = np.zeros(n_classes) return self diff --git a/examples/NaiveBayes/GaussianNB.py b/examples/NaiveBayes/GaussianNB.py index e1dff18f..4d6a7488 100644 --- a/examples/NaiveBayes/GaussianNB.py +++ b/examples/NaiveBayes/GaussianNB.py @@ -36,17 +36,12 @@ from math import pi from typing import Sequence -import pykokkos as pk import numpy as np from sklearn.base import BaseEstimator def asarray(arr): - arr = np.asarray(arr) - - view = pk.View(arr.shape, pk.double) - view[:] = arr - return view + return np.asarray(arr, dtype=np.float64) def type_of_target(y, input_name=""): @@ -103,7 +98,7 @@ def type_of_target(y, input_name=""): def _unique_multiclass(y): if hasattr(y, "__array__"): - return np.unique(asarray(y)) + return np.unique(np.array(asarray(y))) else: return set(y) @@ -140,8 +135,7 @@ def unique_labels(*ys): raise ValueError("Mix of label input types (string and number)") sorted_label = sorted(ys_labels) - labels = pk.View([len(sorted_label)], pk.double) - labels[:] = sorted_label + labels = np.array(sorted_label, dtype=np.float64) return labels @@ -536,17 +530,17 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # will cause numerical errors. To address this, we artificially # boost the variance by epsilon, a small fraction of the standard # deviation of the largest dimension. - self.epsilon_ = self.var_smoothing * pk.find_max(np.var(np.array(X), axis=0)) + self.epsilon_ = self.var_smoothing * np.max(np.var(np.array(X), axis=0)) if first_call: # This is the first call to partial_fit: # initialize various cumulative counters n_features = X.shape[1] n_classes = len(self.classes_) - self.theta_ = pk.zeros((n_classes, n_features)) - self.var_ = pk.zeros((n_classes, n_features)) + self.theta_ = np.zeros((n_classes, n_features), dtype=np.float64) + self.var_ = np.zeros((n_classes, n_features), dtype=np.float64) - self.class_count_ = pk.zeros(n_classes, dtype=pk.double) + self.class_count_ = np.zeros(n_classes, dtype=np.float64) # Initialise the class prior # Take into account the priors @@ -561,7 +555,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): self.class_prior_ = priors else: # Initialize the priors to zeros for each class - self.class_prior_ = pk.zeros(len(self.classes_), dtype=pk.double) + self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64) else: if X.shape[1] != self.theta_.shape[1]: msg = "Number of features %d does not match previous data %d." @@ -574,14 +568,14 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): unique_y = np.unique(y) unique_y_in_classes = np.in1d(unique_y, classes) - if not pk.all(unique_y_in_classes): + if not np.all(unique_y_in_classes): raise ValueError( "The target label(s) %s in y do not exist in the initial classes %s" % (unique_y[np.logical_not(unique_y_in_classes)], classes) ) for y_i in unique_y: - i = int(pk.searchsorted(classes, y_i)) # linear search + i = int(np.searchsorted(classes, y_i)) # linear search X_i = X[y == y_i, :] if sample_weight is not None: @@ -604,7 +598,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): # Update if only no priors is provided if self.priors is None: # Empirical prior, with sample_weight taken into account - self.class_prior_ = np.divide(self.class_count_, pk.sum(self.class_count_)) + self.class_prior_ = np.divide(self.class_count_, np.sum(self.class_count_)) return self @@ -615,11 +609,11 @@ def _joint_log_likelihood(self, X): for i in range(total_classes): jointi = np.log(self.class_prior_[i]) - n_ij = -0.5 * pk.sum(np.log(np.multiply(self.var_[i, :], 2.0 * pi))) + n_ij = -0.5 * np.sum(np.log(self.var_[i, :] * 2.0 * pi)) n_ij = np.add( np.negative( np.multiply( - pk.sum( + np.sum( np.divide( np.power(np.add(X, np.negative(self.theta_[i, :])), 2), self.var_[i, :], From e1bd7ef6cc0d0809e4e61b35202143f3d0ee5e7a Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Sat, 21 Feb 2026 21:53:50 -0600 Subject: [PATCH 06/23] fix array api test --- pykokkos/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 11ce5135..fbfeb0c7 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -33,6 +33,14 @@ from pykokkos.lib.constants import e, pi, inf, nan from pykokkos.interface.views import astype +import numpy as np + +isnan = np.isnan +isinf = np.isinf +isfinite = np.isfinite +equal = np.equal + + runtime_singleton.runtime = Runtime() import weakref From f056ad9b66a3736e7cc1be6fd0f824569414091a Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 15:56:03 -0600 Subject: [PATCH 07/23] pass tests --- pykokkos/__init__.py | 5 +++++ pykokkos/interface/data_types.py | 12 ++++++++++++ pykokkos/interface/views.py | 8 ++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index fbfeb0c7..4a1c88ba 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -39,6 +39,11 @@ isinf = np.isinf isfinite = np.isfinite equal = np.equal +sign = np.sign +round = np.round +trunc = np.trunc +ceil = np.ceil +floor = np.floor runtime_singleton.runtime = Runtime() diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index bdf1e94e..0df80c35 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -36,6 +36,18 @@ class DataType(Enum): class DataTypeClass: pass +class DataTypeMeta(type): + def __eq__(cls, other): + if isinstance(other, np.dtype): + return hasattr(cls, 'np_equiv') and cls.np_equiv == other.type + return super().__eq__(other) + + def __hash__(cls): + return super().__hash__() + + +class DataTypeClass(metaclass=DataTypeMeta): + pass class uint8(DataTypeClass): value = kokkos.uint8 diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index ada65dfd..f9acf6df 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -538,9 +538,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - result = pk.View(result_np.shape, dtype=pk.bool) - result[:] = result_np - return result + return result_np def __hash__(self): try: @@ -703,9 +701,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - result = pk.View(result_np.shape, dtype=pk.bool) - result[:] = result_np - return result + return result_np def __add__(self, other): if isinstance(other, float): From d2962f07ec4a7ebadec2561a47ced5cd8b311e22 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 15:57:16 -0600 Subject: [PATCH 08/23] format --- pykokkos/__init__.py | 10 +++++----- pykokkos/interface/data_types.py | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 4a1c88ba..66d95534 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -39,11 +39,11 @@ isinf = np.isinf isfinite = np.isfinite equal = np.equal -sign = np.sign -round = np.round -trunc = np.trunc -ceil = np.ceil -floor = np.floor +sign = np.sign +round = np.round +trunc = np.trunc +ceil = np.ceil +floor = np.floor runtime_singleton.runtime = Runtime() diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index 0df80c35..baaf32c9 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -36,10 +36,11 @@ class DataType(Enum): class DataTypeClass: pass + class DataTypeMeta(type): def __eq__(cls, other): if isinstance(other, np.dtype): - return hasattr(cls, 'np_equiv') and cls.np_equiv == other.type + return hasattr(cls, "np_equiv") and cls.np_equiv == other.type return super().__eq__(other) def __hash__(cls): @@ -49,6 +50,7 @@ def __hash__(cls): class DataTypeClass(metaclass=DataTypeMeta): pass + class uint8(DataTypeClass): value = kokkos.uint8 np_equiv = np.uint8 From 5ea2991b34dec6677fd7d718ae140ff53aa19ca8 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:08:26 -0600 Subject: [PATCH 09/23] remove duplicate --- pykokkos/interface/data_types.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index baaf32c9..5d1bf19b 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -33,10 +33,6 @@ class DataType(Enum): complex128 = kokkos.complex_float64_dtype -class DataTypeClass: - pass - - class DataTypeMeta(type): def __eq__(cls, other): if isinstance(other, np.dtype): From 97be73f9e7fdb3bd26d46ede28704960ad3859db Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:22:44 -0600 Subject: [PATCH 10/23] revert data_type.py --- pykokkos/__init__.py | 45 +++++++++++++++++++++++++------- pykokkos/interface/data_types.py | 12 +-------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 66d95534..9b7334b8 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -35,15 +35,42 @@ import numpy as np -isnan = np.isnan -isinf = np.isinf -isfinite = np.isfinite -equal = np.equal -sign = np.sign -round = np.round -trunc = np.trunc -ceil = np.ceil -floor = np.floor +class PKArray(np.ndarray): + def __new__(cls, array): + return np.asarray(array).view(cls) + + @property + def dtype(self): + from pykokkos.interface import data_types as dt + mapping = { + np.dtype('bool'): dt.bool, + np.dtype('int8'): dt.int8, + np.dtype('int16'): dt.int16, + np.dtype('int32'): dt.int32, + np.dtype('int64'): dt.int64, + np.dtype('uint8'): dt.uint8, + np.dtype('uint16'): dt.uint16, + np.dtype('uint32'): dt.uint32, + np.dtype('uint64'): dt.uint64, + np.dtype('float32'): dt.float32, + np.dtype('float64'): dt.float64, + } + return mapping.get(super().dtype, super().dtype) + +def _pk_func(np_func): + def wrapper(*args, **kwargs): + return PKArray(np_func(*args, **kwargs)) + return wrapper + +isnan = _pk_func(np.isnan) +isinf = _pk_func(np.isinf) +isfinite = _pk_func(np.isfinite) +equal = _pk_func(np.equal) +sign = _pk_func(np.sign) +round = _pk_func(np.round) +trunc = _pk_func(np.trunc) +ceil = _pk_func(np.ceil) +floor = _pk_func(np.floor) runtime_singleton.runtime = Runtime() diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index 5d1bf19b..bdf1e94e 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -33,17 +33,7 @@ class DataType(Enum): complex128 = kokkos.complex_float64_dtype -class DataTypeMeta(type): - def __eq__(cls, other): - if isinstance(other, np.dtype): - return hasattr(cls, "np_equiv") and cls.np_equiv == other.type - return super().__eq__(other) - - def __hash__(cls): - return super().__hash__() - - -class DataTypeClass(metaclass=DataTypeMeta): +class DataTypeClass: pass From 4e42b48b8d13ff9961bfec917477cc54f045f278 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:23:48 -0600 Subject: [PATCH 11/23] format --- pykokkos/__init__.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 9b7334b8..d9ed9ab9 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -35,6 +35,7 @@ import numpy as np + class PKArray(np.ndarray): def __new__(cls, array): return np.asarray(array).view(cls) @@ -42,35 +43,39 @@ def __new__(cls, array): @property def dtype(self): from pykokkos.interface import data_types as dt + mapping = { - np.dtype('bool'): dt.bool, - np.dtype('int8'): dt.int8, - np.dtype('int16'): dt.int16, - np.dtype('int32'): dt.int32, - np.dtype('int64'): dt.int64, - np.dtype('uint8'): dt.uint8, - np.dtype('uint16'): dt.uint16, - np.dtype('uint32'): dt.uint32, - np.dtype('uint64'): dt.uint64, - np.dtype('float32'): dt.float32, - np.dtype('float64'): dt.float64, + np.dtype("bool"): dt.bool, + np.dtype("int8"): dt.int8, + np.dtype("int16"): dt.int16, + np.dtype("int32"): dt.int32, + np.dtype("int64"): dt.int64, + np.dtype("uint8"): dt.uint8, + np.dtype("uint16"): dt.uint16, + np.dtype("uint32"): dt.uint32, + np.dtype("uint64"): dt.uint64, + np.dtype("float32"): dt.float32, + np.dtype("float64"): dt.float64, } return mapping.get(super().dtype, super().dtype) + def _pk_func(np_func): def wrapper(*args, **kwargs): return PKArray(np_func(*args, **kwargs)) + return wrapper -isnan = _pk_func(np.isnan) -isinf = _pk_func(np.isinf) + +isnan = _pk_func(np.isnan) +isinf = _pk_func(np.isinf) isfinite = _pk_func(np.isfinite) -equal = _pk_func(np.equal) -sign = _pk_func(np.sign) -round = _pk_func(np.round) -trunc = _pk_func(np.trunc) -ceil = _pk_func(np.ceil) -floor = _pk_func(np.floor) +equal = _pk_func(np.equal) +sign = _pk_func(np.sign) +round = _pk_func(np.round) +trunc = _pk_func(np.trunc) +ceil = _pk_func(np.ceil) +floor = _pk_func(np.floor) runtime_singleton.runtime = Runtime() From 3254455c316f7a631b8b8e725937de66f72cb0b8 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:43:23 -0600 Subject: [PATCH 12/23] add pk_array helper --- pykokkos/lib/pk_array.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 pykokkos/lib/pk_array.py diff --git a/pykokkos/lib/pk_array.py b/pykokkos/lib/pk_array.py new file mode 100644 index 00000000..54e628c7 --- /dev/null +++ b/pykokkos/lib/pk_array.py @@ -0,0 +1,23 @@ +import numpy as np + +class PKArray(np.ndarray): + def __new__(cls, array): + return np.asarray(array).view(cls) + + @property + def dtype(self): + from pykokkos.interface import data_types as dt + mapping = { + np.dtype('bool'): dt.bool, + np.dtype('int8'): dt.int8, + np.dtype('int16'): dt.int16, + np.dtype('int32'): dt.int32, + np.dtype('int64'): dt.int64, + np.dtype('uint8'): dt.uint8, + np.dtype('uint16'): dt.uint16, + np.dtype('uint32'): dt.uint32, + np.dtype('uint64'): dt.uint64, + np.dtype('float32'): dt.float32, + np.dtype('float64'): dt.float64, + } + return mapping.get(super().dtype, super().dtype) \ No newline at end of file From 06fe71518b2304a2863613b6d790a2892348d852 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:44:53 -0600 Subject: [PATCH 13/23] add pk_array lib --- pykokkos/__init__.py | 26 +------------------------- pykokkos/interface/views.py | 11 +++++++---- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index d9ed9ab9..512eafaf 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -34,31 +34,7 @@ from pykokkos.interface.views import astype import numpy as np - - -class PKArray(np.ndarray): - def __new__(cls, array): - return np.asarray(array).view(cls) - - @property - def dtype(self): - from pykokkos.interface import data_types as dt - - mapping = { - np.dtype("bool"): dt.bool, - np.dtype("int8"): dt.int8, - np.dtype("int16"): dt.int16, - np.dtype("int32"): dt.int32, - np.dtype("int64"): dt.int64, - np.dtype("uint8"): dt.uint8, - np.dtype("uint16"): dt.uint16, - np.dtype("uint32"): dt.uint32, - np.dtype("uint64"): dt.uint64, - np.dtype("float32"): dt.float32, - np.dtype("float64"): dt.float64, - } - return mapping.get(super().dtype, super().dtype) - +from pykokkos.lib.pk_array import PKArray def _pk_func(np_func): def wrapper(*args, **kwargs): diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index f9acf6df..c173390b 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -9,6 +9,7 @@ from typing import Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union import numpy as np +from pykokkos.lib.pk_array import PKArray import pykokkos as pk from pykokkos.bindings import kokkos @@ -538,7 +539,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - return result_np + return PKArray(result_np) def __hash__(self): try: @@ -550,7 +551,9 @@ def __hash__(self): def __index__(self) -> int: return int(self.data[0]) - def __array__(self, dtype=None): + def __array__(self, dtype=None, copy=None): + if self.shape == (): + return np.squeeze(self.data) return self.data def __pos__(self): @@ -701,7 +704,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - return result_np + return PKArray(result_np) def __add__(self, other): if isinstance(other, float): @@ -739,7 +742,7 @@ def from_numpy( """ dtype: DataTypeClass - np_dtype = array.dtype.type + np_dtype = array.dtype.type if isinstance(array.dtype, np.dtype) else array.dtype.np_equiv if np_dtype is np.void and cp_array is not None: # This means that this is a cupy array passed through From 8f6dea9b342ad73e9f0a51dccbd6eff83aba2071 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Wed, 25 Feb 2026 16:45:21 -0600 Subject: [PATCH 14/23] format --- pykokkos/__init__.py | 1 + pykokkos/interface/views.py | 4 +++- pykokkos/lib/pk_array.py | 26 ++++++++++++++------------ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 512eafaf..89964cae 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -36,6 +36,7 @@ import numpy as np from pykokkos.lib.pk_array import PKArray + def _pk_func(np_func): def wrapper(*args, **kwargs): return PKArray(np_func(*args, **kwargs)) diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index c173390b..a2a5b506 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -742,7 +742,9 @@ def from_numpy( """ dtype: DataTypeClass - np_dtype = array.dtype.type if isinstance(array.dtype, np.dtype) else array.dtype.np_equiv + np_dtype = ( + array.dtype.type if isinstance(array.dtype, np.dtype) else array.dtype.np_equiv + ) if np_dtype is np.void and cp_array is not None: # This means that this is a cupy array passed through diff --git a/pykokkos/lib/pk_array.py b/pykokkos/lib/pk_array.py index 54e628c7..26fc15b0 100644 --- a/pykokkos/lib/pk_array.py +++ b/pykokkos/lib/pk_array.py @@ -1,5 +1,6 @@ import numpy as np + class PKArray(np.ndarray): def __new__(cls, array): return np.asarray(array).view(cls) @@ -7,17 +8,18 @@ def __new__(cls, array): @property def dtype(self): from pykokkos.interface import data_types as dt + mapping = { - np.dtype('bool'): dt.bool, - np.dtype('int8'): dt.int8, - np.dtype('int16'): dt.int16, - np.dtype('int32'): dt.int32, - np.dtype('int64'): dt.int64, - np.dtype('uint8'): dt.uint8, - np.dtype('uint16'): dt.uint16, - np.dtype('uint32'): dt.uint32, - np.dtype('uint64'): dt.uint64, - np.dtype('float32'): dt.float32, - np.dtype('float64'): dt.float64, + np.dtype("bool"): dt.bool, + np.dtype("int8"): dt.int8, + np.dtype("int16"): dt.int16, + np.dtype("int32"): dt.int32, + np.dtype("int64"): dt.int64, + np.dtype("uint8"): dt.uint8, + np.dtype("uint16"): dt.uint16, + np.dtype("uint32"): dt.uint32, + np.dtype("uint64"): dt.uint64, + np.dtype("float32"): dt.float32, + np.dtype("float64"): dt.float64, } - return mapping.get(super().dtype, super().dtype) \ No newline at end of file + return mapping.get(super().dtype, super().dtype) From 58715ec05e18a84a0b16d96642ba072ca10744cd Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 2 Mar 2026 14:08:21 -0600 Subject: [PATCH 15/23] revert abstraction --- .github/workflows/array_api.yml | 2 +- pykokkos/__init__.py | 22 ++++------------------ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 3ed00af5..f0cbcb18 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -57,4 +57,4 @@ jobs: # for hypothesis-driven test case generation # pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 89964cae..ac92b668 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -34,26 +34,12 @@ from pykokkos.interface.views import astype import numpy as np -from pykokkos.lib.pk_array import PKArray -def _pk_func(np_func): - def wrapper(*args, **kwargs): - return PKArray(np_func(*args, **kwargs)) - - return wrapper - - -isnan = _pk_func(np.isnan) -isinf = _pk_func(np.isinf) -isfinite = _pk_func(np.isfinite) -equal = _pk_func(np.equal) -sign = _pk_func(np.sign) -round = _pk_func(np.round) -trunc = _pk_func(np.trunc) -ceil = _pk_func(np.ceil) -floor = _pk_func(np.floor) - +isnan = np.isnan +isinf = np.isinf +isfinite = np.isfinite +equal = np.equal runtime_singleton.runtime = Runtime() From 9ec995ce48ea0118316398a7fbfb4e42a50faae8 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 2 Mar 2026 21:37:14 -0600 Subject: [PATCH 16/23] remove unnessesary test --- .github/workflows/array_api.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index f0cbcb18..087a6d35 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -57,4 +57,4 @@ jobs: # for hypothesis-driven test case generation # pytest $GITHUB_WORKSPACE/tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_isnan array_api_tests/test_operators_and_elementwise_functions.py::test_equal array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_creation_functions.py::test_zeros array_api_tests/test_creation_functions.py::test_zeros_like array_api_tests/test_creation_functions.py::test_full_like From bd1c1e924c7eac65c23aa510d7910b31a567a123 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 2 Mar 2026 21:54:51 -0600 Subject: [PATCH 17/23] remove all PKArray usage --- pykokkos/interface/views.py | 5 ++--- pykokkos/lib/pk_array.py | 25 ------------------------- 2 files changed, 2 insertions(+), 28 deletions(-) delete mode 100644 pykokkos/lib/pk_array.py diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index a2a5b506..2f04fd13 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -9,7 +9,6 @@ from typing import Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union import numpy as np -from pykokkos.lib.pk_array import PKArray import pykokkos as pk from pykokkos.bindings import kokkos @@ -539,7 +538,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - return PKArray(result_np) + return result_np def __hash__(self): try: @@ -704,7 +703,7 @@ def __eq__(self, other): else: raise ValueError("unexpected types!") result_np = np.equal(np.array(self), np.array(new_other)) - return PKArray(result_np) + return result_np def __add__(self, other): if isinstance(other, float): diff --git a/pykokkos/lib/pk_array.py b/pykokkos/lib/pk_array.py deleted file mode 100644 index 26fc15b0..00000000 --- a/pykokkos/lib/pk_array.py +++ /dev/null @@ -1,25 +0,0 @@ -import numpy as np - - -class PKArray(np.ndarray): - def __new__(cls, array): - return np.asarray(array).view(cls) - - @property - def dtype(self): - from pykokkos.interface import data_types as dt - - mapping = { - np.dtype("bool"): dt.bool, - np.dtype("int8"): dt.int8, - np.dtype("int16"): dt.int16, - np.dtype("int32"): dt.int32, - np.dtype("int64"): dt.int64, - np.dtype("uint8"): dt.uint8, - np.dtype("uint16"): dt.uint16, - np.dtype("uint32"): dt.uint32, - np.dtype("uint64"): dt.uint64, - np.dtype("float32"): dt.float32, - np.dtype("float64"): dt.float64, - } - return mapping.get(super().dtype, super().dtype) From 4159771b7a8bc6db6d5310695abffeebbfb8e2f3 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 16:23:30 -0500 Subject: [PATCH 18/23] fix for other execution spaces --- pykokkos/__init__.py | 8 +- pykokkos/interface/views.py | 18 ++--- pykokkos/lib/ufuncs.py | 147 ++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 16 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index ac92b668..e0aeb0ba 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -33,13 +33,9 @@ from pykokkos.lib.constants import e, pi, inf, nan from pykokkos.interface.views import astype -import numpy as np +__array_api_version__ = "2021.12" - -isnan = np.isnan -isinf = np.isinf -isfinite = np.isfinite -equal = np.equal +__all__ = ["__array_api_version__"] runtime_singleton.runtime = Runtime() diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 2f04fd13..a87de4e7 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -503,6 +503,8 @@ def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: return None def __eq__(self, other): + # avoid circular import with scoped import + from pykokkos.lib.ufuncs import _equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -537,8 +539,7 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - result_np = np.equal(np.array(self), np.array(new_other)) - return result_np + return _equal(self, new_other) def __hash__(self): try: @@ -550,9 +551,7 @@ def __hash__(self): def __index__(self) -> int: return int(self.data[0]) - def __array__(self, dtype=None, copy=None): - if self.shape == (): - return np.squeeze(self.data) + def __array__(self, dtype=None): return self.data def __pos__(self): @@ -668,6 +667,8 @@ def _get_base_view(self, parent_view: Union[Subview, View]) -> View: return base_view def __eq__(self, other): + # avoid circular import with scoped import + from pykokkos.lib.ufuncs import _equal if isinstance(other, float): new_other = pk.View((), dtype=pk.double) @@ -702,8 +703,7 @@ def __eq__(self, other): new_other = other else: raise ValueError("unexpected types!") - result_np = np.equal(np.array(self), np.array(new_other)) - return result_np + return _equal(self, new_other) def __add__(self, other): if isinstance(other, float): @@ -741,9 +741,7 @@ def from_numpy( """ dtype: DataTypeClass - np_dtype = ( - array.dtype.type if isinstance(array.dtype, np.dtype) else array.dtype.np_equiv - ) + np_dtype = array.dtype.type if np_dtype is np.void and cp_array is not None: # This means that this is a cupy array passed through diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index e69de29b..f52c43a8 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -0,0 +1,147 @@ +import re +import math +from inspect import getmembers, isfunction +from typing import Optional + +import numpy as np +import pykokkos as pk +from pykokkos.lib import ufunc_workunits +from pykokkos.interface import ViewType + +kernel_dict = dict(getmembers(ufunc_workunits, isfunction)) + + +def _ufunc_kernel_dispatcher( + profiler_name: Optional[str], tid, dtype, ndims, op, sub_dispatcher, **kwargs +): + dtype_extractor = re.compile(r".*(?:dtype|data_types|DataType)\.(\w+)") + if ndims == 0: + ndims = 1 + res = dtype_extractor.match(str(dtype)) + dtype_str = res.group(1) + if dtype_str == "float32": + dtype_str = "float" + elif dtype_str == "float64": + dtype_str = "double" + function_name_str = f"{op}_impl_{ndims}d_{dtype_str}" + desired_workunit = kernel_dict[function_name_str] + # call the kernel + ret = sub_dispatcher(profiler_name, tid, desired_workunit, **kwargs) + return ret + + +def _broadcast_views(view1, view2): + # support broadcasting by using the same + # shape matching rules as NumPy + # TODO: determine if this can be done with + # more memory efficiency? + if view1.shape != view2.shape: + new_shape = np.broadcast_shapes(view1.shape, view2.shape) + view1_new = pk.View([*new_shape], dtype=view1.dtype) + view1_new[:] = view1 + view1 = view1_new + view2_new = pk.View([*new_shape], dtype=view2.dtype) + view2_new[:] = view2 + view2 = view2_new + return view1, view2 + + +def _typematch_views(view1, view2): + # very crude casting implementation + # for binary ufuncs + dtype1 = view1.dtype + dtype2 = view2.dtype + dtype_extractor = re.compile(r".*(?:data_types|DataType)\.(\w+)") + res1 = dtype_extractor.match(str(dtype1)) + res2 = dtype_extractor.match(str(dtype2)) + effective_dtype = dtype1 + if res1 is not None and res2 is not None: + res1_dtype_str = res1.group(1) + res2_dtype_str = res2.group(1) + if res1_dtype_str == "double": + res1_dtype_str = "float64" + elif res1_dtype_str == "float": + res1_dtype_str = "float32" + if res2_dtype_str == "double": + res2_dtype_str = "float64" + elif res2_dtype_str == "float": + res2_dtype_str = "float32" + if res1_dtype_str == "bool" or res2_dtype_str == "bool": + res1_dtype_str = "uint8" + dtype1 = pk.uint8 + res2_dtype_str = "uint8" + dtype2 = pk.uint8 + if ("int" in res1_dtype_str and "int" in res2_dtype_str) or ( + "float" in res1_dtype_str and "float" in res2_dtype_str + ): + dtype_1_width = int(res1_dtype_str.split("t")[1]) + dtype_2_width = int(res2_dtype_str.split("t")[1]) + if dtype_1_width >= dtype_2_width: + effective_dtype = dtype1 + view2_new = pk.View([*view2.shape], dtype=effective_dtype) + view2_new[:] = view2.data + view2 = view2_new + else: + effective_dtype = dtype2 + view1_new = pk.View([*view1.shape], dtype=effective_dtype) + view1_new[:] = view1.data + view1 = view1_new + return view1, view2, effective_dtype + + +def _equal(view1, view2, profiler_name: Optional[str] = None): + """ + Computes the truth value of ``view1_i`` == ``view2_i`` for each element + ``x1_i`` of the input view ``view1`` with the respective element ``x2_i`` + of the input view ``view2``. + + + Parameters + ---------- + view1 : pykokkos view + Input view. May have any data type. + view2 : pykokkos view + Input view. May have any data type, but must be shape-compatible + with ``view1`` via broadcasting. + + Returns + ------- + out : pykokkos view (bool) + Output view. + """ + if view1.size == 0 and view2.size == 0: + ret = pk.View((), dtype=pk.bool) + ret[...] = 1 + return ret + view1, view2 = _broadcast_views(view1, view2) + dtype1 = view1.dtype + dtype2 = view2.dtype + view1, view2, effective_dtype = _typematch_views(view1, view2) + ndims = len(view1.shape) + if ndims > 5: + raise NotImplementedError("equal() ufunc only supports up to 5D views") + out = pk.View([*view1.shape], dtype=pk.bool) + if view1.shape == (): + tid = 1 + else: + tid = view1.shape[0] + if isinstance(view1, pk.Subview): + new_view = pk.View((), dtype=view1.dtype) + new_view[:] = view1.data + view1 = new_view + if isinstance(view2, pk.Subview): + new_view = pk.View((), dtype=view2.dtype) + new_view[:] = view2.data + view2 = new_view + _ufunc_kernel_dispatcher( + profiler_name=profiler_name, + tid=tid, + dtype=effective_dtype, + ndims=ndims, + op="equal", + sub_dispatcher=pk.parallel_for, + out=out, + view1=view1, + view2=view2, + ) + return out From 5d61e304cb13714753f6720d15e96ce94fd8ba5c Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 16:41:55 -0500 Subject: [PATCH 19/23] fix isnan and isfinite errors --- pykokkos/__init__.py | 2 ++ pykokkos/lib/ufuncs.py | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index e0aeb0ba..3fadfa69 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -17,6 +17,8 @@ set_device_id, ) +from pykokkos.lib.ufuncs import _isnan as isnan, _isfinite as isfinite + from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like from pykokkos.lib.manipulate import reshape, ravel, expand_dims diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index f52c43a8..2a890ff9 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -145,3 +145,57 @@ def _equal(view1, view2, profiler_name: Optional[str] = None): view2=view2, ) return out + +def _isnan(view, profiler_name: Optional[str] = None): + dtype = view.dtype + ndims = len(view.shape) + if ndims > 2: + raise NotImplementedError("isnan() ufunc only supports up to 2D views") + out = pk.View([*view.shape], dtype=pk.bool) + if view.shape == (): + tid = 1 + else: + tid = view.shape[0] + if view.ndim == 0: + new_view = pk.View([1], dtype=view.dtype) + new_view[0] = view + view = new_view + _ufunc_kernel_dispatcher( + profiler_name=profiler_name, + tid=tid, + dtype=dtype, + ndims=ndims, + op="isnan", + sub_dispatcher=pk.parallel_for, + out=out, + view=view, + ) + return out + +def _isfinite(view, profiler_name: Optional[str] = None): + dtype = view.dtype + ndims = len(view.shape) + if ndims > 2: + raise NotImplementedError("isfinite() ufunc only supports up to 2D views") + if view.size == 0: + out = pk.View(view.shape, dtype=pk.bool) + return out + out = pk.View([*view.shape], dtype=pk.bool) + if view.shape == (): + new_view = pk.View([1], dtype=dtype) + new_view[:] = view + view = new_view + tid = 1 + else: + tid = view.shape[0] + _ufunc_kernel_dispatcher( + profiler_name=profiler_name, + tid=tid, + dtype=dtype, + ndims=ndims, + op="isfinite", + sub_dispatcher=pk.parallel_for, + out=out, + view=view, + ) + return out From 299b46fb5bd33394aa72fc520ae3d0fb6caebd2e Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 16:53:15 -0500 Subject: [PATCH 20/23] add isinf function --- pykokkos/__init__.py | 2 +- pykokkos/lib/ufuncs.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 3fadfa69..12b0905d 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -17,7 +17,7 @@ set_device_id, ) -from pykokkos.lib.ufuncs import _isnan as isnan, _isfinite as isfinite +from pykokkos.lib.ufuncs import _isnan as isnan, _isinf as isinf, _isfinite as isfinite from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 2a890ff9..9a16a4eb 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -146,6 +146,7 @@ def _equal(view1, view2, profiler_name: Optional[str] = None): ) return out + def _isnan(view, profiler_name: Optional[str] = None): dtype = view.dtype ndims = len(view.shape) @@ -172,6 +173,30 @@ def _isnan(view, profiler_name: Optional[str] = None): ) return out + +def _isinf(view, profiler_name: Optional[str] = None): + dtype = view.dtype + ndims = len(view.shape) + if ndims > 2: + raise NotImplementedError("isinf() ufunc only supports up to 2D views") + out = pk.View([*view.shape], dtype=pk.bool) + if view.shape == (): + tid = 1 + else: + tid = view.shape[0] + _ufunc_kernel_dispatcher( + profiler_name=profiler_name, + tid=tid, + dtype=dtype, + ndims=ndims, + op="isinf", + sub_dispatcher=pk.parallel_for, + out=out, + view=view, + ) + return out + + def _isfinite(view, profiler_name: Optional[str] = None): dtype = view.dtype ndims = len(view.shape) From 6b1d18537b2b7798ccb7bc28c983c09df6cf247b Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 17:02:42 -0500 Subject: [PATCH 21/23] restore workunits --- pykokkos/lib/ufunc_workunits.py | 1393 +++++++++++++++++++++++++++++++ 1 file changed, 1393 insertions(+) diff --git a/pykokkos/lib/ufunc_workunits.py b/pykokkos/lib/ufunc_workunits.py index e69de29b..04c5606f 100644 --- a/pykokkos/lib/ufunc_workunits.py +++ b/pykokkos/lib/ufunc_workunits.py @@ -0,0 +1,1393 @@ +import pykokkos as pk + + +@pk.workunit +def exp_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): + out[tid] = exp(view[tid]) + + +@pk.workunit +def exp_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): + for i in range(view.extent(1)): + out[tid][i] = exp(view[tid][i]) + + +@pk.workunit +def exp_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = exp(view[tid]) + + +@pk.workunit +def exp_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = exp(view[tid][i]) + + +@pk.workunit +def sin_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): + out[tid] = sin(view[tid]) + + +@pk.workunit +def sin_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): + for i in range(view.extent(1)): + out[tid][i] = sin(view[tid][i]) + + +@pk.workunit +def sin_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = sin(view[tid]) + + +@pk.workunit +def sin_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = sin(view[tid][i]) + + +@pk.workunit +def tan_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): + out[tid] = tan(view[tid]) + + +@pk.workunit +def tan_impl_2d_double(tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double]): + for i in range(view.extent(1)): + out[tid][i] = tan(view[tid][i]) + + +@pk.workunit +def tan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = tan(view[tid]) + + +@pk.workunit +def tan_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = tan(view[tid][i]) + + +@pk.workunit +def tanh_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] +): + out[tid] = tanh(view[tid]) + + +@pk.workunit +def tanh_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] +): + for i in range(view.extent(1)): + out[tid][i] = tanh(view[tid][i]) + + +@pk.workunit +def tanh_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = tanh(view[tid]) + + +@pk.workunit +def tanh_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = tanh(view[tid][i]) + + +@pk.workunit +def equal_impl_5d_int8( + tid: int, + view1: pk.View5D[pk.int8], + view2: pk.View5D[pk.int8], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_float( + tid: int, + view1: pk.View5D[pk.float], + view2: pk.View5D[pk.float], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_double( + tid: int, + view1: pk.View5D[pk.double], + view2: pk.View5D[pk.double], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_int16( + tid: int, + view1: pk.View5D[pk.int16], + view2: pk.View5D[pk.int16], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_int32( + tid: int, + view1: pk.View5D[pk.int32], + view2: pk.View5D[pk.int32], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_int64( + tid: int, + view1: pk.View5D[pk.int64], + view2: pk.View5D[pk.int64], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_uint8( + tid: int, + view1: pk.View5D[pk.uint8], + view2: pk.View5D[pk.uint8], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_bool( + tid: int, + view1: pk.View5D[pk.uint8], + view2: pk.View5D[pk.uint8], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_uint16( + tid: int, + view1: pk.View5D[pk.uint16], + view2: pk.View5D[pk.uint16], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_uint32( + tid: int, + view1: pk.View5D[pk.uint32], + view2: pk.View5D[pk.uint32], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_5d_uint64( + tid: int, + view1: pk.View5D[pk.uint64], + view2: pk.View5D[pk.uint64], + out: pk.View5D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = ( + view1[tid][i][j][k][l] == view2[tid][i][j][k][l] + ) + + +@pk.workunit +def equal_impl_4d_uint8( + tid: int, + view1: pk.View4D[pk.uint8], + view2: pk.View4D[pk.uint8], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_bool( + tid: int, + view1: pk.View4D[pk.uint8], + view2: pk.View4D[pk.uint8], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_float( + tid: int, + view1: pk.View4D[pk.float], + view2: pk.View4D[pk.float], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_double( + tid: int, + view1: pk.View4D[pk.double], + view2: pk.View4D[pk.double], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_uint16( + tid: int, + view1: pk.View4D[pk.uint16], + view2: pk.View4D[pk.uint16], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_uint32( + tid: int, + view1: pk.View4D[pk.uint32], + view2: pk.View4D[pk.uint32], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_uint64( + tid: int, + view1: pk.View4D[pk.uint64], + view2: pk.View4D[pk.uint64], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_3d_uint8( + tid: int, + view1: pk.View3D[pk.uint8], + view2: pk.View3D[pk.uint8], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_bool( + tid: int, + view1: pk.View3D[pk.uint8], + view2: pk.View3D[pk.uint8], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_uint16( + tid: int, + view1: pk.View3D[pk.uint16], + view2: pk.View3D[pk.uint16], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_uint32( + tid: int, + view1: pk.View3D[pk.uint32], + view2: pk.View3D[pk.uint32], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_uint64( + tid: int, + view1: pk.View3D[pk.uint64], + view2: pk.View3D[pk.uint64], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_float( + tid: int, + view1: pk.View3D[pk.float], + view2: pk.View3D[pk.float], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_double( + tid: int, + view1: pk.View3D[pk.double], + view2: pk.View3D[pk.double], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_2d_uint8( + tid: int, + view1: pk.View2D[pk.uint8], + view2: pk.View2D[pk.uint8], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_uint16( + tid: int, + view1: pk.View2D[pk.uint16], + view2: pk.View2D[pk.uint16], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_uint32( + tid: int, + view1: pk.View2D[pk.uint32], + view2: pk.View2D[pk.uint32], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_uint64( + tid: int, + view1: pk.View2D[pk.uint64], + view2: pk.View2D[pk.uint64], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_float( + tid: int, + view1: pk.View2D[pk.float], + view2: pk.View2D[pk.float], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_double( + tid: int, + view1: pk.View2D[pk.double], + view2: pk.View2D[pk.double], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_1d_uint8( + tid: int, + view1: pk.View1D[pk.uint8], + view2: pk.View1D[pk.uint8], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_bool( + tid: int, + view1: pk.View1D[pk.uint8], + view2: pk.View1D[pk.uint8], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_float( + tid: int, + view1: pk.View1D[pk.float], + view2: pk.View1D[pk.float], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_double( + tid: int, + view1: pk.View1D[pk.double], + view2: pk.View1D[pk.double], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_int8( + tid: int, + view1: pk.View1D[pk.int8], + view2: pk.View1D[pk.int8], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_int16( + tid: int, + view1: pk.View1D[pk.int16], + view2: pk.View1D[pk.int16], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_int32( + tid: int, + view1: pk.View1D[pk.int32], + view2: pk.View1D[pk.int32], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_int64( + tid: int, + view1: pk.View1D[pk.int64], + view2: pk.View1D[pk.int64], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_uint16( + tid: int, + view1: pk.View1D[pk.uint16], + view2: pk.View1D[pk.uint16], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_uint32( + tid: int, + view1: pk.View1D[pk.uint32], + view2: pk.View1D[pk.uint32], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_uint64( + tid: int, + view1: pk.View1D[pk.uint64], + view2: pk.View1D[pk.uint64], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_1d_int64( + tid: int, + view1: pk.View1D[pk.int64], + view2: pk.View1D[pk.int64], + out: pk.View1D[pk.uint8], +): + out[tid] = view1[tid] == view2[tid] + + +@pk.workunit +def equal_impl_2d_int8( + tid: int, + view1: pk.View2D[pk.int8], + view2: pk.View2D[pk.int8], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_bool( + tid: int, + view1: pk.View2D[pk.uint8], + view2: pk.View2D[pk.uint8], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_int16( + tid: int, + view1: pk.View2D[pk.int16], + view2: pk.View2D[pk.int16], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_int32( + tid: int, + view1: pk.View2D[pk.int32], + view2: pk.View2D[pk.int32], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_2d_int64( + tid: int, + view1: pk.View2D[pk.int64], + view2: pk.View2D[pk.int64], + out: pk.View2D[pk.uint8], +): + for i in range(view1.extent(1)): + out[tid][i] = view1[tid][i] == view2[tid][i] + + +@pk.workunit +def equal_impl_3d_int8( + tid: int, + view1: pk.View3D[pk.int8], + view2: pk.View3D[pk.int8], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_int16( + tid: int, + view1: pk.View3D[pk.int16], + view2: pk.View3D[pk.int16], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_int32( + tid: int, + view1: pk.View3D[pk.int32], + view2: pk.View3D[pk.int32], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_3d_int64( + tid: int, + view1: pk.View3D[pk.int64], + view2: pk.View3D[pk.int64], + out: pk.View3D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = view1[tid][i][j] == view2[tid][i][j] + + +@pk.workunit +def equal_impl_4d_int8( + tid: int, + view1: pk.View4D[pk.int8], + view2: pk.View4D[pk.int8], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_int16( + tid: int, + view1: pk.View4D[pk.int16], + view2: pk.View4D[pk.int16], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_int32( + tid: int, + view1: pk.View4D[pk.int32], + view2: pk.View4D[pk.int32], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def equal_impl_4d_int64( + tid: int, + view1: pk.View4D[pk.int64], + view2: pk.View4D[pk.int64], + out: pk.View4D[pk.uint8], +): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = view1[tid][i][j][k] == view2[tid][i][j][k] + + +@pk.workunit +def floor_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] +): + out[tid] = floor(view[tid]) + + +@pk.workunit +def floor_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] +): + for i in range(view.extent(1)): + out[tid][i] = floor(view[tid][i]) + + +@pk.workunit +def floor_impl_3d_double( + tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] +): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = floor(view[tid][i][j]) + + +@pk.workunit +def floor_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = floor(view[tid]) + + +@pk.workunit +def floor_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = floor(view[tid][i]) + + +@pk.workunit +def floor_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = floor(view[tid][i][j]) + + +@pk.workunit +def ceil_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] +): + out[tid] = ceil(view[tid]) + + +@pk.workunit +def ceil_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] +): + for i in range(view.extent(1)): + out[tid][i] = ceil(view[tid][i]) + + +@pk.workunit +def ceil_impl_3d_double( + tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] +): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = ceil(view[tid][i][j]) + + +@pk.workunit +def ceil_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = ceil(view[tid]) + + +@pk.workunit +def ceil_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = ceil(view[tid][i]) + + +@pk.workunit +def ceil_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = ceil(view[tid][i][j]) + + +@pk.workunit +def trunc_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] +): + out[tid] = trunc(view[tid]) + + +@pk.workunit +def trunc_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] +): + for i in range(view.extent(1)): + out[tid][i] = trunc(view[tid][i]) + + +@pk.workunit +def trunc_impl_3d_double( + tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] +): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = trunc(view[tid][i][j]) + + +@pk.workunit +def trunc_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = trunc(view[tid]) + + +@pk.workunit +def trunc_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = trunc(view[tid][i]) + + +@pk.workunit +def trunc_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = trunc(view[tid][i][j]) + + +@pk.workunit +def round_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double] +): + out[tid] = round(view[tid]) + + +@pk.workunit +def round_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.double] +): + for i in range(view.extent(1)): + out[tid][i] = round(view[tid][i]) + + +@pk.workunit +def round_impl_3d_double( + tid: int, view: pk.View3D[pk.double], out: pk.View3D[pk.double] +): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = round(view[tid][i][j]) + + +@pk.workunit +def round_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.float]): + out[tid] = round(view[tid]) + + +@pk.workunit +def round_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.float]): + for i in range(view.extent(1)): + out[tid][i] = round(view[tid][i]) + + +@pk.workunit +def round_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.float]): + for i in range(view.extent(1)): + for j in range(view.extent(2)): + out[tid][i][j] = round(view[tid][i][j]) + + +@pk.workunit +def isnan_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_uint16( + tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_uint32( + tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_uint64( + tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): + out[tid][i] = isnan(view[tid][i]) + + +@pk.workunit +def isnan_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_uint16( + tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] +): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_uint32( + tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] +): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_uint64( + tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] +): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] +): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isfinite_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_float( + tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_float( + tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_uint8( + tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_uint8( + tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_int16( + tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_int16( + tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_uint16( + tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_uint16( + tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_int32( + tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_int32( + tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_uint32( + tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_uint32( + tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_int64( + tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_int64( + tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isfinite_impl_1d_uint64( + tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] +): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_2d_uint64( + tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isfinite(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_1d_double( + tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8] +): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_int8(tid: int, view: pk.View1D[pk.int8], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_int64(tid: int, view: pk.View1D[pk.int64], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_int32(tid: int, view: pk.View1D[pk.int32], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_uint8(tid: int, view: pk.View1D[pk.uint8], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_2d_uint8(tid: int, view: pk.View2D[pk.uint8], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_float(tid: int, view: pk.View2D[pk.float], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_double( + tid: int, view: pk.View2D[pk.double], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_int8(tid: int, view: pk.View2D[pk.int8], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_int64(tid: int, view: pk.View2D[pk.int64], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_1d_uint16( + tid: int, view: pk.View1D[pk.uint16], out: pk.View1D[pk.uint8] +): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_1d_int16(tid: int, view: pk.View1D[pk.int16], out: pk.View1D[pk.uint8]): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_2d_int16(tid: int, view: pk.View2D[pk.int16], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_int32(tid: int, view: pk.View2D[pk.int32], out: pk.View2D[pk.uint8]): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_2d_uint16( + tid: int, view: pk.View2D[pk.uint16], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_1d_uint32( + tid: int, view: pk.View1D[pk.uint32], out: pk.View1D[pk.uint8] +): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_2d_uint32( + tid: int, view: pk.View2D[pk.uint32], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def isinf_impl_1d_uint64( + tid: int, view: pk.View1D[pk.uint64], out: pk.View1D[pk.uint8] +): + out[tid] = isinf(view[tid]) + + +@pk.workunit +def isinf_impl_2d_uint64( + tid: int, view: pk.View2D[pk.uint64], out: pk.View2D[pk.uint8] +): + for i in range(view.extent(1)): # type: ignore + out[tid][i] = isinf(view[tid][i]) # type: ignore + + +@pk.workunit +def matmul_impl_1d_double( + tid: int, + acc: pk.Acc[pk.double], + viewA: pk.View1D[pk.double], + viewB: pk.View2D[pk.double], +): + acc += viewA[tid] * viewB[0][tid] + + +@pk.workunit +def matmul_impl_1d_float( + tid: int, + acc: pk.Acc[pk.float], + viewA: pk.View1D[pk.float], + viewB: pk.View2D[pk.float], +): + acc += viewA[tid] * viewB[0][tid] + + +@pk.workunit +def reciprocal_impl_1d_double(tid: int, view: pk.View1D[pk.double]): + view[tid] = 1 / view[tid] # type: ignore + + +@pk.workunit +def reciprocal_impl_1d_float(tid: int, view: pk.View1D[pk.float]): + view[tid] = 1 / view[tid] # type: ignore + + +@pk.workunit +def reciprocal_impl_2d_double(tid: int, view: pk.View2D[pk.double]): + for i in range(view.extent(1)): # type: ignore + view[tid][i] = 1 / view[tid][i] # type: ignore + + +@pk.workunit +def reciprocal_impl_2d_float(tid: int, view: pk.View2D[pk.float]): + for i in range(view.extent(1)): # type: ignore + view[tid][i] = 1 / view[tid][i] # type: ignore From 0cd79dca92706ed6e484c0d7d4cb0c980b0544dd Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 17:13:31 -0500 Subject: [PATCH 22/23] fix equal --- pykokkos/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 12b0905d..c5f277a5 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -17,7 +17,7 @@ set_device_id, ) -from pykokkos.lib.ufuncs import _isnan as isnan, _isinf as isinf, _isfinite as isfinite +from pykokkos.lib.ufuncs import _isnan as isnan, _isinf as isinf, _isfinite as isfinite, _equal as equal from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like From c97a5bc0a15faeb1ade4ff9700475b86d1c49de6 Mon Sep 17 00:00:00 2001 From: nayyirahsan Date: Mon, 9 Mar 2026 17:23:39 -0500 Subject: [PATCH 23/23] format --- pykokkos/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index c5f277a5..5ec25c86 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -17,7 +17,12 @@ set_device_id, ) -from pykokkos.lib.ufuncs import _isnan as isnan, _isinf as isinf, _isfinite as isfinite, _equal as equal +from pykokkos.lib.ufuncs import ( + _isnan as isnan, + _isinf as isinf, + _isfinite as isfinite, + _equal as equal, +) from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import zeros, zeros_like, ones, ones_like, full, full_like