From 5001d8ce912f793220a30ee563268dfdc34f4967 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 26 Nov 2025 14:25:25 -0600 Subject: [PATCH 01/12] Update configure --- configure | 8 ++++---- configure.ac | 8 ++++---- src/Makevars | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configure b/configure index d374245..457d354 100755 --- a/configure +++ b/configure @@ -5230,12 +5230,12 @@ fi ## Determine Default Backend ## ============================================================================= -if test "x${HAVE_OPENCL}" = x1; then - DEFAULT_BACKEND="CL_BACKEND" - BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CL_BACKEND" -elif test "x${HAVE_CUDA}" = x1; then +if test "x${HAVE_CUDA}" = x1; then DEFAULT_BACKEND="CUDA_BACKEND" BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND" +elif test "x${HAVE_OPENCL}" = x1; then + DEFAULT_BACKEND="CL_BACKEND" + BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CL_BACKEND" else as_fn_error $? " ================================================================================ diff --git a/configure.ac b/configure.ac index 81d6ab9..da65986 100644 --- a/configure.ac +++ b/configure.ac @@ -359,12 +359,12 @@ fi ## Determine Default Backend ## ============================================================================= -if test "x${HAVE_OPENCL}" = x1; then - DEFAULT_BACKEND="CL_BACKEND" - BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CL_BACKEND" -elif test "x${HAVE_CUDA}" = x1; then +if test "x${HAVE_CUDA}" = x1; then DEFAULT_BACKEND="CUDA_BACKEND" BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND" +elif test "x${HAVE_OPENCL}" = x1; then + DEFAULT_BACKEND="CL_BACKEND" + BANDICOOT_CXXFLAGS="${BANDICOOT_CXXFLAGS} -DCOOT_DEFAULT_BACKEND=CL_BACKEND" else AC_MSG_ERROR([ ================================================================================ diff --git a/src/Makevars b/src/Makevars index 9155b2b..c758e45 100644 --- a/src/Makevars +++ b/src/Makevars @@ -6,7 +6,7 @@ PKG_CPPFLAGS = -I../inst/include ## Compiler flags from configure -PKG_CXXFLAGS = -DCOOT_USE_OPENCL -F/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks -DCOOT_USE_CLBLAST -I/opt/homebrew/opt/clblast/include -DCOOT_USE_CLBLAS -I/opt/homebrew/opt/clblas/include -DCOOT_DONT_USE_CUDA -DCOOT_DEFAULT_BACKEND=CL_BACKEND +PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) $(SHLIB_OPENMP_CXXFLAGS) ## Linker flags from configure PKG_LIBS = -L/opt/homebrew/opt/clblas/lib -lclBLAS -L/opt/homebrew/opt/clblast/lib -lclblast -framework OpenCL -L/Library/Frameworks/R.framework/Resources/lib -lRlapack -L/Library/Frameworks/R.framework/Resources/lib -lRblas -L/opt/gfortran/lib/gcc/aarch64-apple-darwin20.0/14.2.0 -L/opt/gfortran/lib -lemutls_w -lheapt_w -lgfortran -lquadmath From db973ecafd22d0559dfa8bc66c9daea0a19f0346 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Sat, 29 Nov 2025 07:08:09 -0600 Subject: [PATCH 02/12] Local snapshot of package --- R/RcppExports.R | 4 ++++ R/rcppbandicoot-package.R | 24 +++++++++++++++++++----- src/Makevars | 4 ++-- src/Makevars.in | 5 +++-- src/RcppExports.cpp | 12 ++++++++++++ src/coot.cpp | 6 ++++++ 6 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 src/coot.cpp diff --git a/R/RcppExports.R b/R/RcppExports.R index 38bea8b..270878c 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,6 +1,10 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +gpu_initialize <- function(type = "opencl", print_info = TRUE) { + invisible(.Call(`_RcppBandicoot_gpu_initialize`, type, print_info)) +} + #' Get Bandicoot version #' #' Prints the current version of the Bandicoot library. diff --git a/R/rcppbandicoot-package.R b/R/rcppbandicoot-package.R index bf71f21..107d7d6 100644 --- a/R/rcppbandicoot-package.R +++ b/R/rcppbandicoot-package.R @@ -9,10 +9,24 @@ NULL .onLoad <- function(libname, pkgname) { # Set the kernel path for Bandicoot OpenCL kernels - kernel_path <- system.file("include/bandicoot_bits/opencl/kernels", - package = "RcppBandicoot") + #kernel_path <- system.file("include/bandicoot_bits/opencl/kernels", + # package = "RcppBandicoot") - if (nzchar(kernel_path)) { - Sys.setenv(COOT_CL_KERNEL_PATH = kernel_path) - } + #if (nzchar(kernel_path)) { + # Sys.setenv(COOT_CL_KERNEL_PATH = kernel_path) + #} + + #cuda_path <- system.file("include/bandicoot_bits/cuda/kernels", + # package = "RcppBandicoot") + #if (nzchar(cuda_path)) { + # Sys.setenv(COOT_CUDA_KERNEL_PATH = kernel_path) + #} + + #kernel_path <- system.file("include/bandicoot_bits/", package = "RcppBandicoot") + #cat("kernel_path: ", kernel_path, "\n") + #if (nzchar(kernel_path)) { + # Sys.setenv(COOT_KERNEL_SOURCE_DIR = kernel_path) + #} + + #gpu_initialize("cuda", TRUE) } diff --git a/src/Makevars b/src/Makevars index c758e45..ffe832c 100644 --- a/src/Makevars +++ b/src/Makevars @@ -3,10 +3,10 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 ## Compiler flags from configure -PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) $(SHLIB_OPENMP_CXXFLAGS) +PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) ## Linker flags from configure PKG_LIBS = -L/opt/homebrew/opt/clblas/lib -lclBLAS -L/opt/homebrew/opt/clblast/lib -lclblast -framework OpenCL -L/Library/Frameworks/R.framework/Resources/lib -lRlapack -L/Library/Frameworks/R.framework/Resources/lib -lRblas -L/opt/gfortran/lib/gcc/aarch64-apple-darwin20.0/14.2.0 -L/opt/gfortran/lib -lemutls_w -lheapt_w -lgfortran -lquadmath diff --git a/src/Makevars.in b/src/Makevars.in index dde61e5..67f5e62 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,10 +3,11 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCL_TARGET_OPENCL_VERSION=@OPENCL_TARGET_VERSION@ +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 +#-DCOOT_KERNEL_SOURCE_DIR="/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/" ## Compiler flags from configure -PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ @OPENMP_CXXFLAGS@ +PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ ## Linker flags from configure PKG_LIBS = @OPENMP_CXXFLAGS@ @BANDICOOT_LIBS@ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 1be7b32..a2c5d12 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,6 +11,17 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif +// gpu_initialize +void gpu_initialize(std::string type, bool print_info); +RcppExport SEXP _RcppBandicoot_gpu_initialize(SEXP typeSEXP, SEXP print_infoSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type type(typeSEXP); + Rcpp::traits::input_parameter< bool >::type print_info(print_infoSEXP); + gpu_initialize(type, print_info); + return R_NilValue; +END_RCPP +} // bandicoot_version void bandicoot_version(); RcppExport SEXP _RcppBandicoot_bandicoot_version() { @@ -113,6 +124,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { + {"_RcppBandicoot_gpu_initialize", (DL_FUNC) &_RcppBandicoot_gpu_initialize, 2}, {"_RcppBandicoot_bandicoot_version", (DL_FUNC) &_RcppBandicoot_bandicoot_version, 0}, {"_RcppBandicoot_gpu_matrix_multiply", (DL_FUNC) &_RcppBandicoot_gpu_matrix_multiply, 2}, {"_RcppBandicoot_gpu_transpose", (DL_FUNC) &_RcppBandicoot_gpu_transpose, 1}, diff --git a/src/coot.cpp b/src/coot.cpp new file mode 100644 index 0000000..4861cb1 --- /dev/null +++ b/src/coot.cpp @@ -0,0 +1,6 @@ +#include + +// [[Rcpp::export]] +void gpu_initialize(std::string type = "opencl", bool print_info = true) { + coot::coot_init(type.c_str(), print_info); +} From 624c7d71c463bb65034f658e5c776f78ce1f0bfd Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 16:52:43 -0600 Subject: [PATCH 03/12] Makevars update following 3.1.0 --- src/Makevars | 4 ++-- src/Makevars.in | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Makevars b/src/Makevars index ffe832c..c4a4f1e 100644 --- a/src/Makevars +++ b/src/Makevars @@ -3,10 +3,10 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/"' ## Compiler flags from configure PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) ## Linker flags from configure -PKG_LIBS = -L/opt/homebrew/opt/clblas/lib -lclBLAS -L/opt/homebrew/opt/clblast/lib -lclblast -framework OpenCL -L/Library/Frameworks/R.framework/Resources/lib -lRlapack -L/Library/Frameworks/R.framework/Resources/lib -lRblas -L/opt/gfortran/lib/gcc/aarch64-apple-darwin20.0/14.2.0 -L/opt/gfortran/lib -lemutls_w -lheapt_w -lgfortran -lquadmath +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -lclBLAS -lclblast -lOpenCL -L/usr/lib64 -L/usr/lib -lcuda -lcudart -lcublas -lcusolver -lcurand -lnvrtc -llapack -lblas -lgfortran -lm -lquadmath diff --git a/src/Makevars.in b/src/Makevars.in index 67f5e62..ae9e60d 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,8 +3,7 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -#-DCOOT_KERNEL_SOURCE_DIR="/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/" +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/"' ## Compiler flags from configure PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ From d330738908c28e7b56fad623496693467ce503ed Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 16:54:47 -0600 Subject: [PATCH 04/12] Expand Authors@R, remove spurious linebreaks, remove C++14 standard --- DESCRIPTION | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6175a09..90fe43e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,11 @@ Package: RcppBandicoot Title: 'Rcpp' Integration for the `Bandicoot` Header-Only C++ GPU Accelerated Linear Algebra Library Version: 0.1.11.1.9000 -Authors@R: - c( - person("James Joseph", "Balamuta", email = "balamut2@illinois.edu", - role = c("aut", "cre", "cph"), - comment = c(ORCID = "0000-0003-2826-8458")) - ) +Authors@R: c(person("James Joseph", "Balamuta", email = "balamut2@illinois.edu", + role = c("aut", "cre", "cph"), + comment = c(ORCID = "0000-0003-2826-8458")), + person("Dirk", "Eddelbuettel", role = c("aut"), email = "edd@debian.org", + comment = c(ORCID = "0000-0001-6419-907X"))) Description: 'Bandicoot' is a templated C++ GPU Linear Algebra library (by the 'Armadillo' team) that provides a simple set of abstractions for writing high performant code for graphics processing units. @@ -17,13 +16,10 @@ Description: 'Bandicoot' is a templated C++ GPU Linear Algebra library the GNU GPL version 2 or later. Note that 'Bandicoot' requires a compiler that supports 'C++14' and 'OpenCL' (>= 1.2) or 'CUDA' (>= 9.8). License: GPL (>= 2) -Depends: - R (>= 4.4) -LinkingTo: - Rcpp (>= 1.1.0) -Imports: - Rcpp (>= 1.1.0) -SystemRequirements: C++14, OpenCL (>= 1.2) or CUDA (>= 9.8), GPU device with appropriate drivers. For OpenCL: CLBlast (recommended) or clBLAS for BLAS operations. +Depends: R (>= 4.4) +LinkingTo: Rcpp (>= 1.1.0) +Imports: Rcpp (>= 1.1.0) +SystemRequirements: OpenCL (>= 1.2) or CUDA (>= 9.8), GPU device with appropriate drivers. For OpenCL: CLBlast (recommended) or clBLAS for BLAS operations. Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 From 67369f42577fcaed8da1e6a8f1f4692b03c6d9a1 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 18:23:30 -0600 Subject: [PATCH 05/12] Correct COOT_KERNEL_SOURCE_DIR to now append 'kernels/' --- src/Makevars | 2 +- src/Makevars.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makevars b/src/Makevars index c4a4f1e..a4565dc 100644 --- a/src/Makevars +++ b/src/Makevars @@ -3,7 +3,7 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/"' +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/kernels/"' ## Compiler flags from configure PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) diff --git a/src/Makevars.in b/src/Makevars.in index ae9e60d..c38f0f7 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,7 +3,7 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/"' +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/kernels/"' ## Compiler flags from configure PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ From c7ad24121c60efade87cc34c9dd808bff2f83907 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 18:24:00 -0600 Subject: [PATCH 06/12] Correction to opencl file computation (also MR 193 upstream) --- inst/include/bandicoot_bits/opencl/kernel_src.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/include/bandicoot_bits/opencl/kernel_src.hpp b/inst/include/bandicoot_bits/opencl/kernel_src.hpp index 465e7de..8c703eb 100644 --- a/inst/include/bandicoot_bits/opencl/kernel_src.hpp +++ b/inst/include/bandicoot_bits/opencl/kernel_src.hpp @@ -54,7 +54,7 @@ read_file(const std::string& filename) { #if defined(COOT_KERNEL_SOURCE_DIR) const char* source_dir = COOT_KERNEL_SOURCE_DIR; - const std::string full_filename = std::string(source_dir) + "opencl/" + full_filename; + const std::string full_filename = std::string(source_dir) + "opencl/" + filename; #else const std::string this_file = __FILE__; From ed97f5d5eb361a93c33e0467bcc1d350fc0b8420 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 18:39:50 -0600 Subject: [PATCH 07/12] Installing kernels in 'ks/' avoids nag on > 100 chars --- .../bandicoot_bits/{kernels => ks}/cuda/defs/cuda_prelims.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/d_defs.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/f_defs.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/h_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/s16_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/s32_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/s64_defs.cu | 0 .../include/bandicoot_bits/{kernels => ks}/cuda/defs/s8_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/u16_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/u32_defs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/defs/u64_defs.cu | 0 .../include/bandicoot_bits/{kernels => ks}/cuda/defs/u8_defs.cu | 0 .../{kernels => ks}/cuda/deps/accu_subgroup_reduce.cu | 0 .../{kernels => ks}/cuda/deps/and_subgroup_reduce_u32.cu | 0 .../{kernels => ks}/cuda/deps/max_subgroup_reduce.cu | 0 .../{kernels => ks}/cuda/deps/min_subgroup_reduce.cu | 0 .../{kernels => ks}/cuda/deps/or_subgroup_reduce_u32.cu | 0 .../{kernels => ks}/cuda/deps/prod_subgroup_reduce.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/deps/var_philox.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/accu.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/accu_simple.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/accu_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/approx_equal.cu | 0 .../{kernels => ks}/cuda/oneway/approx_equal_cube.cu | 0 .../{kernels => ks}/cuda/oneway/approx_equal_cube_small.cu | 0 .../{kernels => ks}/cuda/oneway/approx_equal_small.cu | 0 .../{kernels => ks}/cuda/oneway/count_nonzeros.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/fill.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/fill_sve1.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/fill_sve2.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/find.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/find_first.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/find_last.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/index_max.cu | 0 .../{kernels => ks}/cuda/oneway/index_max_colwise.cu | 0 .../{kernels => ks}/cuda/oneway/index_max_cube_col.cu | 0 .../{kernels => ks}/cuda/oneway/index_max_rowwise.cu | 0 .../{kernels => ks}/cuda/oneway/index_max_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/index_min.cu | 0 .../{kernels => ks}/cuda/oneway/index_min_colwise.cu | 0 .../{kernels => ks}/cuda/oneway/index_min_cube_col.cu | 0 .../{kernels => ks}/cuda/oneway/index_min_rowwise.cu | 0 .../{kernels => ks}/cuda/oneway/index_min_small.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_philox_randn.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_set_eye.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_xorwow32_randi.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_xorwow32_randu.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_xorwow64_randi.cu | 0 .../{kernels => ks}/cuda/oneway/inplace_xorwow64_randu.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/linspace.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/logspace.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/ltri_set_zero.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/max.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/max_abs.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/max_abs_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/max_small.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/min.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/min_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/mul_colwise.cu | 0 .../{kernels => ks}/cuda/oneway/mul_colwise_trans.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/mul_rowwise.cu | 0 .../{kernels => ks}/cuda/oneway/mul_rowwise_trans.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/prod.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/prod_small.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_asc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_colwise_asc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_colwise_desc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_desc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_index_asc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_index_desc.cu | 0 .../cuda/oneway/radix_sort_index_multi_wg_shuffle.cu | 0 .../cuda/oneway/radix_sort_multi_wg_bit_count.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_multi_wg_shuffle.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_rowwise_asc.cu | 0 .../{kernels => ks}/cuda/oneway/radix_sort_rowwise_desc.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/regspace_desc.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/reorder_cols.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/rotate_180.cu | 0 .../cuda/oneway/shifted_prefix_sum_add_offset.cu | 0 .../{kernels => ks}/cuda/oneway/shifted_prefix_sum_small.cu | 0 .../{kernels => ks}/cuda/oneway/shifted_prefix_sum_subgroups.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/shuffle.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/shuffle_large.cu | 0 .../{kernels => ks}/cuda/oneway/stable_radix_sort_index_asc.cu | 0 .../{kernels => ks}/cuda/oneway/stable_radix_sort_index_desc.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/submat_var.cu | 0 .../{kernels => ks}/cuda/oneway/submat_var_small.cu | 0 .../{kernels => ks}/cuda/oneway/symmatl_inplace.cu | 0 .../{kernels => ks}/cuda/oneway/symmatu_inplace.cu | 0 .../include/bandicoot_bits/{kernels => ks}/cuda/oneway/trace.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/var.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/var_colwise.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/var_rowwise.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/oneway/var_small.cu | 0 .../{kernels => ks}/cuda/oneway_integral/and_reduce.cu | 0 .../{kernels => ks}/cuda/oneway_integral/and_reduce_small.cu | 0 .../{kernels => ks}/cuda/oneway_integral/ipiv_det.cu | 0 .../{kernels => ks}/cuda/oneway_integral/ipiv_det_small.cu | 0 .../{kernels => ks}/cuda/oneway_integral/or_reduce.cu | 0 .../{kernels => ks}/cuda/oneway_integral/or_reduce_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/diag_prod.cu | 0 .../{kernels => ks}/cuda/oneway_real/diag_prod_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/extract_cx.cu | 0 .../{kernels => ks}/cuda/oneway_real/lu_extract_l.cu | 0 .../{kernels => ks}/cuda/oneway_real/lu_extract_p.cu | 0 .../{kernels => ks}/cuda/oneway_real/lu_extract_pivoted_l.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_inf.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_inf_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_nan.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_nan_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_nonfinite.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_any_nonfinite_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_isfinite.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_isnan.cu | 0 .../{kernels => ks}/cuda/oneway_real/rel_isnonfinite.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_1.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_1_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_2.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_2_robust.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_2_robust_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_2_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_k.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_k_small.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_min.cu | 0 .../{kernels => ks}/cuda/oneway_real/vec_norm_min_small.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_atan2.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_div_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_div_array_cube.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_hypot.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_max_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_min_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_minus_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_minus_array_cube.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_mul_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_mul_array_cube.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_plus_array.cu | 0 .../{kernels => ks}/cuda/threeway/equ_array_plus_array_cube.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_div_post.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_div_pre.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_minus_post.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_minus_pre.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_plus.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_schur.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_set.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_div_post.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_div_pre.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_minus_post.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_minus_pre.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_plus.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_schur.cu | 0 .../{kernels => ks}/cuda/twoway/broadcast_subset_set.cu | 0 .../include/bandicoot_bits/{kernels => ks}/cuda/twoway/clamp.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/convert_type.cu | 0 .../{kernels => ks}/cuda/twoway/convert_type_cube.cu | 0 .../include/bandicoot_bits/{kernels => ks}/cuda/twoway/cross.cu | 0 inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/dot.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/dot_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_abs.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_acos_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_acos_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_acosh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_acosh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_asin_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_asin_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_asinh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_asinh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_atan_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_atan_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_atanh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_atanh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_ceil_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_ceil_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_cos_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_cos_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_cosh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_cosh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_div_scalar_post.cu | 0 .../cuda/twoway/equ_array_div_scalar_post_sve1.cu | 0 .../cuda/twoway/equ_array_div_scalar_post_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_div_scalar_pre.cu | 0 .../cuda/twoway/equ_array_div_scalar_pre_sve1.cu | 0 .../cuda/twoway/equ_array_div_scalar_pre_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_erf_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_erf_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_erfc_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_erfc_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp10_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp10_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp2_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp2_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_exp_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_floor_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_floor_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_lgamma_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_lgamma_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log10_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log10_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log2_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log2_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_log_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_max_array_cube.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_min_array_cube.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_minus_scalar_post.cu | 0 .../cuda/twoway/equ_array_minus_scalar_post_sve1.cu | 0 .../cuda/twoway/equ_array_minus_scalar_post_sve2.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_post.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_pre.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu | 0 .../cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_mod_scalar.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_mul_scalar.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_mul_scalar_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_mul_scalar_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_neg_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_neg_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_plus_scalar.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_plus_scalar_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_plus_scalar_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_pow_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_pow_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_round_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_round_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sign_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sign_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sin_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sin_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sinc_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sinc_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sinh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sinh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sqrt_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_sqrt_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_square_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_square_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_tan_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_tan_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_tanh_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_tanh_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_exp_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_exp_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_log_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_log_pre.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_post.cu | 0 .../{kernels => ks}/cuda/twoway/equ_array_trunc_pre.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/extract_sve1.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/extract_sve2.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/htrans.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_div_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_div_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_eq_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_eq_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_minus_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_minus_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_mul_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_mul_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_plus_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve1_plus_sve1.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_div_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_div_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_eq_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_eq_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_minus_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_minus_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_mul_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_mul_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_plus_array.cu | 0 .../{kernels => ks}/cuda/twoway/inplace_sve2_plus_sve2.cu | 0 .../{kernels => ks}/cuda/twoway/max_colwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/max_colwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/max_cube_col_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/max_cube_col_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/max_rowwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/max_rowwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/mean_colwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/mean_colwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/mean_rowwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/mean_rowwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/min_colwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/min_colwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/min_cube_col_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/min_cube_col_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/min_rowwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/min_rowwise_conv_pre.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_all_neq.cu | 0 .../{kernels => ks}/cuda/twoway/rel_all_neq_colwise.cu | 0 .../{kernels => ks}/cuda/twoway/rel_all_neq_rowwise.cu | 0 .../{kernels => ks}/cuda/twoway/rel_all_neq_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_and_array.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_any_neq.cu | 0 .../{kernels => ks}/cuda/twoway/rel_any_neq_colwise.cu | 0 .../{kernels => ks}/cuda/twoway/rel_any_neq_rowwise.cu | 0 .../{kernels => ks}/cuda/twoway/rel_any_neq_small.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_eq_array.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_eq_scalar.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gt_array.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gt_scalar.cu | 0 .../{kernels => ks}/cuda/twoway/rel_gteq_array.cu | 0 .../{kernels => ks}/cuda/twoway/rel_gteq_scalar.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lt_array.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lt_scalar.cu | 0 .../{kernels => ks}/cuda/twoway/rel_lteq_array.cu | 0 .../{kernels => ks}/cuda/twoway/rel_lteq_scalar.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_neq_array.cu | 0 .../{kernels => ks}/cuda/twoway/rel_neq_scalar.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/rel_or_array.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/replace.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/strans.cu | 0 .../{kernels => ks}/cuda/twoway/sum_colwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/sum_colwise_conv_pre.cu | 0 .../{kernels => ks}/cuda/twoway/sum_rowwise_conv_post.cu | 0 .../{kernels => ks}/cuda/twoway/sum_rowwise_conv_pre.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/symmatl.cu | 0 .../bandicoot_bits/{kernels => ks}/cuda/twoway/symmatu.cu | 0 .../{kernels => ks}/cuda/zeroway/shuffle_large_compute_locs.cu | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/d_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/f_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/h_defs.cl | 0 .../{kernels => ks}/opencl/defs/opencl_prelims.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/s16_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/s32_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/s64_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/s8_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/u16_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/u32_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/u64_defs.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/defs/u8_defs.cl | 0 .../{kernels => ks}/opencl/deps/accu_subgroup_reduce.cl | 0 .../{kernels => ks}/opencl/deps/and_subgroup_reduce_u32.cl | 0 .../{kernels => ks}/opencl/deps/max_subgroup_reduce.cl | 0 .../{kernels => ks}/opencl/deps/min_subgroup_reduce.cl | 0 .../{kernels => ks}/opencl/deps/or_subgroup_reduce_u32.cl | 0 .../{kernels => ks}/opencl/deps/prod_subgroup_reduce.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/deps/var_philox.cl | 0 .../{kernels => ks}/opencl/magma_real/lansy_inf_lower.cl | 0 .../{kernels => ks}/opencl/magma_real/lansy_inf_upper.cl | 0 .../{kernels => ks}/opencl/magma_real/lansy_max_lower.cl | 0 .../{kernels => ks}/opencl/magma_real/lansy_max_upper.cl | 0 .../{kernels => ks}/opencl/magma_real/lascl_full.cl | 0 .../{kernels => ks}/opencl/magma_real/lascl_lower.cl | 0 .../{kernels => ks}/opencl/magma_real/lascl_upper.cl | 0 .../{kernels => ks}/opencl/magma_real/laset_band_lower.cl | 0 .../{kernels => ks}/opencl/magma_real/laset_band_upper.cl | 0 .../{kernels => ks}/opencl/magma_real/laset_full.cl | 0 .../{kernels => ks}/opencl/magma_real/laset_lower.cl | 0 .../{kernels => ks}/opencl/magma_real/laset_upper.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/magma_real/laswp.cl | 0 .../opencl/magma_real/transpose_inplace_even_magma.cl | 0 .../opencl/magma_real/transpose_inplace_odd_magma.cl | 0 .../{kernels => ks}/opencl/magma_real/transpose_magma.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/accu.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/accu_simple.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/accu_small.cl | 0 .../{kernels => ks}/opencl/oneway/approx_equal.cl | 0 .../{kernels => ks}/opencl/oneway/approx_equal_cube.cl | 0 .../{kernels => ks}/opencl/oneway/approx_equal_cube_small.cl | 0 .../{kernels => ks}/opencl/oneway/approx_equal_small.cl | 0 .../{kernels => ks}/opencl/oneway/count_nonzeros.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/fill.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/fill_sve1.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/fill_sve2.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/find.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/find_first.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/find_last.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/index_max.cl | 0 .../{kernels => ks}/opencl/oneway/index_max_colwise.cl | 0 .../{kernels => ks}/opencl/oneway/index_max_cube_col.cl | 0 .../{kernels => ks}/opencl/oneway/index_max_rowwise.cl | 0 .../{kernels => ks}/opencl/oneway/index_max_small.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/index_min.cl | 0 .../{kernels => ks}/opencl/oneway/index_min_colwise.cl | 0 .../{kernels => ks}/opencl/oneway/index_min_cube_col.cl | 0 .../{kernels => ks}/opencl/oneway/index_min_rowwise.cl | 0 .../{kernels => ks}/opencl/oneway/index_min_small.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_philox_randn.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_set_eye.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_xorwow32_randi.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_xorwow32_randu.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_xorwow64_randi.cl | 0 .../{kernels => ks}/opencl/oneway/inplace_xorwow64_randu.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/linspace.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/logspace.cl | 0 .../{kernels => ks}/opencl/oneway/ltri_set_zero.cl | 0 .../include/bandicoot_bits/{kernels => ks}/opencl/oneway/max.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/max_abs.cl | 0 .../{kernels => ks}/opencl/oneway/max_abs_small.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/max_small.cl | 0 .../include/bandicoot_bits/{kernels => ks}/opencl/oneway/min.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/min_small.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/mul_colwise.cl | 0 .../{kernels => ks}/opencl/oneway/mul_colwise_trans.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/mul_rowwise.cl | 0 .../{kernels => ks}/opencl/oneway/mul_rowwise_trans.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/prod.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/prod_small.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_asc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_colwise_asc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_colwise_desc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_desc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_index_asc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_index_desc.cl | 0 .../opencl/oneway/radix_sort_index_multi_wg_shuffle.cl | 0 .../opencl/oneway/radix_sort_multi_wg_bit_count.cl | 0 .../opencl/oneway/radix_sort_multi_wg_shuffle.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_rowwise_asc.cl | 0 .../{kernels => ks}/opencl/oneway/radix_sort_rowwise_desc.cl | 0 .../{kernels => ks}/opencl/oneway/regspace_desc.cl | 0 .../{kernels => ks}/opencl/oneway/reorder_cols.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/rotate_180.cl | 0 .../opencl/oneway/shifted_prefix_sum_add_offset.cl | 0 .../{kernels => ks}/opencl/oneway/shifted_prefix_sum_small.cl | 0 .../opencl/oneway/shifted_prefix_sum_subgroups.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/shuffle.cl | 0 .../{kernels => ks}/opencl/oneway/shuffle_large.cl | 0 .../opencl/oneway/stable_radix_sort_index_asc.cl | 0 .../opencl/oneway/stable_radix_sort_index_desc.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/submat_var.cl | 0 .../{kernels => ks}/opencl/oneway/submat_var_small.cl | 0 .../{kernels => ks}/opencl/oneway/symmatl_inplace.cl | 0 .../{kernels => ks}/opencl/oneway/symmatu_inplace.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/trace.cl | 0 .../include/bandicoot_bits/{kernels => ks}/opencl/oneway/var.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/var_colwise.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/var_rowwise.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/oneway/var_small.cl | 0 .../{kernels => ks}/opencl/oneway_integral/and_reduce.cl | 0 .../{kernels => ks}/opencl/oneway_integral/and_reduce_small.cl | 0 .../{kernels => ks}/opencl/oneway_integral/ipiv_det.cl | 0 .../{kernels => ks}/opencl/oneway_integral/ipiv_det_small.cl | 0 .../{kernels => ks}/opencl/oneway_integral/or_reduce.cl | 0 .../{kernels => ks}/opencl/oneway_integral/or_reduce_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/diag_prod.cl | 0 .../{kernels => ks}/opencl/oneway_real/diag_prod_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/extract_cx.cl | 0 .../{kernels => ks}/opencl/oneway_real/lu_extract_l.cl | 0 .../{kernels => ks}/opencl/oneway_real/lu_extract_p.cl | 0 .../{kernels => ks}/opencl/oneway_real/lu_extract_pivoted_l.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_any_inf.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_any_inf_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_any_nan.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_any_nan_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_any_nonfinite.cl | 0 .../opencl/oneway_real/rel_any_nonfinite_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_isfinite.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_isnan.cl | 0 .../{kernels => ks}/opencl/oneway_real/rel_isnonfinite.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_1.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_1_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_2.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_2_robust.cl | 0 .../opencl/oneway_real/vec_norm_2_robust_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_2_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_k.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_k_small.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_min.cl | 0 .../{kernels => ks}/opencl/oneway_real/vec_norm_min_small.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_atan2.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_div_array.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_div_array_cube.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_hypot.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_max_array.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_min_array.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_minus_array.cl | 0 .../opencl/threeway/equ_array_minus_array_cube.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_mul_array.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_mul_array_cube.cl | 0 .../{kernels => ks}/opencl/threeway/equ_array_plus_array.cl | 0 .../opencl/threeway/equ_array_plus_array_cube.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_div_post.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_div_pre.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_minus_post.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_minus_pre.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_plus.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_schur.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_set.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_div_post.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_div_pre.cl | 0 .../opencl/twoway/broadcast_subset_minus_post.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_minus_pre.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_plus.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_schur.cl | 0 .../{kernels => ks}/opencl/twoway/broadcast_subset_set.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/clamp.cl | 0 .../{kernels => ks}/opencl/twoway/convert_type.cl | 0 .../{kernels => ks}/opencl/twoway/convert_type_cube.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/cross.cl | 0 .../include/bandicoot_bits/{kernels => ks}/opencl/twoway/dot.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/dot_small.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_abs.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_acos_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_acos_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_acosh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_acosh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_asin_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_asin_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_asinh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_asinh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_atan_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_atan_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_atanh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_atanh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_ceil_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_ceil_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_cos_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_cos_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_cosh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_cosh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_div_scalar_post.cl | 0 .../opencl/twoway/equ_array_div_scalar_post_sve1.cl | 0 .../opencl/twoway/equ_array_div_scalar_post_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_div_scalar_pre.cl | 0 .../opencl/twoway/equ_array_div_scalar_pre_sve1.cl | 0 .../opencl/twoway/equ_array_div_scalar_pre_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_erf_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_erf_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_erfc_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_erfc_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp10_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp10_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp2_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp2_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_exp_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_floor_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_floor_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_lgamma_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_lgamma_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log10_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log10_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log2_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log2_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_log_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_max_array_cube.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_min_array_cube.cl | 0 .../opencl/twoway/equ_array_minus_scalar_post.cl | 0 .../opencl/twoway/equ_array_minus_scalar_post_sve1.cl | 0 .../opencl/twoway/equ_array_minus_scalar_post_sve2.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_post.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_pre.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl | 0 .../opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_mod_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_mul_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_mul_scalar_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_mul_scalar_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_neg_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_neg_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_plus_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_plus_scalar_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_plus_scalar_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_pow_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_pow_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_round_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_round_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sign_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sign_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sin_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sin_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sinc_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sinc_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sinh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sinh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sqrt_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_sqrt_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_square_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_square_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_tan_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_tan_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_tanh_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_tanh_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_exp_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_exp_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_log_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_log_pre.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_post.cl | 0 .../{kernels => ks}/opencl/twoway/equ_array_trunc_pre.cl | 0 .../{kernels => ks}/opencl/twoway/extract_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/extract_sve2.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/htrans.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_div_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_div_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_eq_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_eq_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_minus_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_minus_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_mul_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_mul_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_plus_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve1_plus_sve1.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_div_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_div_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_eq_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_eq_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_minus_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_minus_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_mul_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_mul_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_plus_array.cl | 0 .../{kernels => ks}/opencl/twoway/inplace_sve2_plus_sve2.cl | 0 .../{kernels => ks}/opencl/twoway/max_colwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/max_colwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/max_cube_col_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/max_cube_col_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/max_rowwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/max_rowwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/mean_colwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/mean_colwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/mean_rowwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/mean_rowwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/min_colwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/min_colwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/min_cube_col_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/min_cube_col_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/min_rowwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/min_rowwise_conv_pre.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/rel_all_neq.cl | 0 .../{kernels => ks}/opencl/twoway/rel_all_neq_colwise.cl | 0 .../{kernels => ks}/opencl/twoway/rel_all_neq_rowwise.cl | 0 .../{kernels => ks}/opencl/twoway/rel_all_neq_small.cl | 0 .../{kernels => ks}/opencl/twoway/rel_and_array.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/rel_any_neq.cl | 0 .../{kernels => ks}/opencl/twoway/rel_any_neq_colwise.cl | 0 .../{kernels => ks}/opencl/twoway/rel_any_neq_rowwise.cl | 0 .../{kernels => ks}/opencl/twoway/rel_any_neq_small.cl | 0 .../{kernels => ks}/opencl/twoway/rel_eq_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_eq_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_gt_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_gt_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_gteq_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_gteq_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_lt_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_lt_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_lteq_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_lteq_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_neq_array.cl | 0 .../{kernels => ks}/opencl/twoway/rel_neq_scalar.cl | 0 .../{kernels => ks}/opencl/twoway/rel_or_array.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/replace.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/repmat.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/strans.cl | 0 .../{kernels => ks}/opencl/twoway/sum_colwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/sum_colwise_conv_pre.cl | 0 .../{kernels => ks}/opencl/twoway/sum_rowwise_conv_post.cl | 0 .../{kernels => ks}/opencl/twoway/sum_rowwise_conv_pre.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/symmatl.cl | 0 .../bandicoot_bits/{kernels => ks}/opencl/twoway/symmatu.cl | 0 .../opencl/zeroway/shuffle_large_compute_locs.cl | 0 src/Makevars.in | 2 +- 654 files changed, 1 insertion(+), 1 deletion(-) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/cuda_prelims.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/d_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/f_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/h_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/s16_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/s32_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/s64_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/s8_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/u16_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/u32_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/u64_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/defs/u8_defs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/accu_subgroup_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/and_subgroup_reduce_u32.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/max_subgroup_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/min_subgroup_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/or_subgroup_reduce_u32.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/prod_subgroup_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/deps/var_philox.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/accu.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/accu_simple.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/accu_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/approx_equal.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/approx_equal_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/approx_equal_cube_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/approx_equal_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/count_nonzeros.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/fill.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/fill_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/fill_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/find.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/find_first.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/find_last.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_max.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_max_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_max_cube_col.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_max_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_max_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_min.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_min_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_min_cube_col.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_min_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/index_min_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_philox_randn.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_set_eye.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_xorwow32_randi.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_xorwow32_randu.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_xorwow64_randi.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/inplace_xorwow64_randu.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/linspace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/logspace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/ltri_set_zero.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/max.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/max_abs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/max_abs_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/max_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/min.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/min_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/mul_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/mul_colwise_trans.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/mul_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/mul_rowwise_trans.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/prod.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/prod_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_asc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_colwise_asc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_colwise_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_index_asc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_index_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_index_multi_wg_shuffle.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_multi_wg_bit_count.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_multi_wg_shuffle.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_rowwise_asc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/radix_sort_rowwise_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/regspace_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/reorder_cols.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/rotate_180.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/shifted_prefix_sum_add_offset.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/shifted_prefix_sum_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/shifted_prefix_sum_subgroups.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/shuffle.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/shuffle_large.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/stable_radix_sort_index_asc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/stable_radix_sort_index_desc.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/submat_var.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/submat_var_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/symmatl_inplace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/symmatu_inplace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/trace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/var.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/var_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/var_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway/var_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/and_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/and_reduce_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/ipiv_det.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/ipiv_det_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/or_reduce.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_integral/or_reduce_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/diag_prod.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/diag_prod_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/extract_cx.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/lu_extract_l.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/lu_extract_p.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/lu_extract_pivoted_l.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_inf.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_inf_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_nan.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_nan_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_nonfinite.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_any_nonfinite_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_isfinite.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_isnan.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/rel_isnonfinite.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_1_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_2_robust.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_2_robust_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_2_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_k.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_k_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_min.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/oneway_real/vec_norm_min_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_atan2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_div_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_div_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_hypot.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_max_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_min_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_minus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_minus_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_mul_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_mul_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_plus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/threeway/equ_array_plus_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_div_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_div_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_minus_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_minus_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_plus.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_schur.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_set.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_div_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_div_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_minus_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_minus_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_plus.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_schur.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/broadcast_subset_set.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/clamp.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/convert_type.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/convert_type_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/cross.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/dot.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/dot_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_abs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_acos_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_acos_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_acosh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_acosh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_asin_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_asin_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_asinh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_asinh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_atan_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_atan_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_atanh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_atanh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_ceil_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_ceil_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_cos_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_cos_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_cosh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_cosh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_post_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_post_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_pre_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_div_scalar_pre_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_erf_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_erf_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_erfc_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_erfc_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp10_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp10_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp2_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp2_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_exp_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_floor_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_floor_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_lgamma_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_lgamma_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log10_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log10_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log2_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log2_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_log_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_max_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_min_array_cube.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_post_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_post_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_mod_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_mul_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_mul_scalar_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_mul_scalar_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_neg_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_neg_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_plus_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_plus_scalar_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_plus_scalar_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_pow_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_pow_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_round_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_round_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sign_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sign_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sin_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sin_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sinc_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sinc_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sinh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sinh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sqrt_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_sqrt_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_square_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_square_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_tan_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_tan_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_tanh_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_tanh_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_exp_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_exp_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_log_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_log_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/equ_array_trunc_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/extract_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/extract_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/htrans.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_div_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_div_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_eq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_eq_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_minus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_minus_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_mul_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_mul_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_plus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve1_plus_sve1.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_div_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_div_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_eq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_eq_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_minus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_minus_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_mul_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_mul_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_plus_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/inplace_sve2_plus_sve2.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_colwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_colwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_cube_col_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_cube_col_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_rowwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/max_rowwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/mean_colwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/mean_colwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/mean_rowwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/mean_rowwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_colwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_colwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_cube_col_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_cube_col_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_rowwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/min_rowwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_all_neq.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_all_neq_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_all_neq_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_all_neq_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_and_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_any_neq.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_any_neq_colwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_any_neq_rowwise.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_any_neq_small.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_eq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_eq_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gt_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gt_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gteq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_gteq_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lt_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lt_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lteq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_lteq_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_neq_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_neq_scalar.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/rel_or_array.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/replace.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/strans.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/sum_colwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/sum_colwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/sum_rowwise_conv_post.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/sum_rowwise_conv_pre.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/symmatl.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/twoway/symmatu.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/cuda/zeroway/shuffle_large_compute_locs.cu (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/d_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/f_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/h_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/opencl_prelims.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/s16_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/s32_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/s64_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/s8_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/u16_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/u32_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/u64_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/defs/u8_defs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/accu_subgroup_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/and_subgroup_reduce_u32.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/max_subgroup_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/min_subgroup_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/or_subgroup_reduce_u32.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/prod_subgroup_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/deps/var_philox.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lansy_inf_lower.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lansy_inf_upper.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lansy_max_lower.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lansy_max_upper.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lascl_full.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lascl_lower.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/lascl_upper.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laset_band_lower.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laset_band_upper.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laset_full.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laset_lower.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laset_upper.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/laswp.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/transpose_inplace_even_magma.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/transpose_inplace_odd_magma.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/magma_real/transpose_magma.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/accu.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/accu_simple.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/accu_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/approx_equal.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/approx_equal_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/approx_equal_cube_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/approx_equal_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/count_nonzeros.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/fill.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/fill_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/fill_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/find.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/find_first.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/find_last.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_max.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_max_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_max_cube_col.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_max_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_max_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_min.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_min_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_min_cube_col.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_min_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/index_min_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_philox_randn.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_set_eye.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_xorwow32_randi.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_xorwow32_randu.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_xorwow64_randi.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/inplace_xorwow64_randu.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/linspace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/logspace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/ltri_set_zero.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/max.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/max_abs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/max_abs_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/max_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/min.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/min_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/mul_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/mul_colwise_trans.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/mul_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/mul_rowwise_trans.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/prod.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/prod_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_asc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_colwise_asc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_colwise_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_index_asc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_index_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_index_multi_wg_shuffle.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_multi_wg_bit_count.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_multi_wg_shuffle.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_rowwise_asc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/radix_sort_rowwise_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/regspace_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/reorder_cols.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/rotate_180.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/shifted_prefix_sum_add_offset.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/shifted_prefix_sum_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/shifted_prefix_sum_subgroups.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/shuffle.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/shuffle_large.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/stable_radix_sort_index_asc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/stable_radix_sort_index_desc.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/submat_var.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/submat_var_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/symmatl_inplace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/symmatu_inplace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/trace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/var.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/var_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/var_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway/var_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/and_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/and_reduce_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/ipiv_det.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/ipiv_det_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/or_reduce.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_integral/or_reduce_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/diag_prod.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/diag_prod_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/extract_cx.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/lu_extract_l.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/lu_extract_p.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/lu_extract_pivoted_l.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_inf.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_inf_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_nan.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_nan_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_nonfinite.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_any_nonfinite_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_isfinite.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_isnan.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/rel_isnonfinite.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_1_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_2_robust.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_2_robust_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_2_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_k.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_k_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_min.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/oneway_real/vec_norm_min_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_atan2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_div_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_div_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_hypot.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_max_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_min_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_minus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_minus_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_mul_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_mul_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_plus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/threeway/equ_array_plus_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_div_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_div_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_minus_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_minus_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_plus.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_schur.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_set.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_div_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_div_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_minus_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_minus_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_plus.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_schur.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/broadcast_subset_set.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/clamp.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/convert_type.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/convert_type_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/cross.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/dot.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/dot_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_abs.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_acos_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_acos_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_acosh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_acosh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_asin_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_asin_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_asinh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_asinh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_atan_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_atan_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_atanh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_atanh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_ceil_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_ceil_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_cos_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_cos_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_cosh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_cosh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_post_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_post_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_pre_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_div_scalar_pre_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_erf_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_erf_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_erfc_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_erfc_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp10_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp10_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp2_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp2_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_exp_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_floor_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_floor_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_lgamma_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_lgamma_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log10_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log10_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log2_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log2_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_log_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_max_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_min_array_cube.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_post_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_post_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_mod_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_mul_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_mul_scalar_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_mul_scalar_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_neg_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_neg_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_plus_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_plus_scalar_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_plus_scalar_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_pow_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_pow_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_round_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_round_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sign_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sign_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sin_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sin_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sinc_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sinc_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sinh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sinh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sqrt_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_sqrt_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_square_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_square_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_tan_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_tan_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_tanh_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_tanh_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_exp_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_exp_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_log_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_log_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/equ_array_trunc_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/extract_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/extract_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/htrans.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_div_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_div_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_eq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_eq_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_minus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_minus_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_mul_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_mul_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_plus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve1_plus_sve1.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_div_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_div_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_eq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_eq_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_minus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_minus_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_mul_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_mul_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_plus_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/inplace_sve2_plus_sve2.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_colwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_colwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_cube_col_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_cube_col_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_rowwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/max_rowwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/mean_colwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/mean_colwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/mean_rowwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/mean_rowwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_colwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_colwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_cube_col_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_cube_col_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_rowwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/min_rowwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_all_neq.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_all_neq_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_all_neq_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_all_neq_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_and_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_any_neq.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_any_neq_colwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_any_neq_rowwise.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_any_neq_small.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_eq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_eq_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_gt_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_gt_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_gteq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_gteq_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_lt_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_lt_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_lteq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_lteq_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_neq_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_neq_scalar.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/rel_or_array.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/replace.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/repmat.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/strans.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/sum_colwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/sum_colwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/sum_rowwise_conv_post.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/sum_rowwise_conv_pre.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/symmatl.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/twoway/symmatu.cl (100%) rename inst/include/bandicoot_bits/{kernels => ks}/opencl/zeroway/shuffle_large_compute_locs.cl (100%) diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/cuda_prelims.cu b/inst/include/bandicoot_bits/ks/cuda/defs/cuda_prelims.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/cuda_prelims.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/cuda_prelims.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/d_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/d_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/d_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/d_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/f_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/f_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/f_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/f_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/h_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/h_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/h_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/h_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/s16_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/s16_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/s16_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/s16_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/s32_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/s32_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/s32_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/s32_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/s64_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/s64_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/s64_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/s64_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/s8_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/s8_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/s8_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/s8_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/u16_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/u16_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/u16_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/u16_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/u32_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/u32_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/u32_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/u32_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/u64_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/u64_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/u64_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/u64_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/defs/u8_defs.cu b/inst/include/bandicoot_bits/ks/cuda/defs/u8_defs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/defs/u8_defs.cu rename to inst/include/bandicoot_bits/ks/cuda/defs/u8_defs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/accu_subgroup_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/deps/accu_subgroup_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/accu_subgroup_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/accu_subgroup_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/and_subgroup_reduce_u32.cu b/inst/include/bandicoot_bits/ks/cuda/deps/and_subgroup_reduce_u32.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/and_subgroup_reduce_u32.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/and_subgroup_reduce_u32.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/max_subgroup_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/deps/max_subgroup_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/max_subgroup_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/max_subgroup_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/min_subgroup_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/deps/min_subgroup_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/min_subgroup_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/min_subgroup_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/or_subgroup_reduce_u32.cu b/inst/include/bandicoot_bits/ks/cuda/deps/or_subgroup_reduce_u32.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/or_subgroup_reduce_u32.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/or_subgroup_reduce_u32.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/prod_subgroup_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/deps/prod_subgroup_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/prod_subgroup_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/prod_subgroup_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/deps/var_philox.cu b/inst/include/bandicoot_bits/ks/cuda/deps/var_philox.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/deps/var_philox.cu rename to inst/include/bandicoot_bits/ks/cuda/deps/var_philox.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/accu.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/accu.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/accu.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/accu.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/accu_simple.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/accu_simple.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/accu_simple.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/accu_simple.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/accu_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/accu_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/accu_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/accu_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_cube.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_cube_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_cube_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_cube_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_cube_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/approx_equal_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/approx_equal_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/count_nonzeros.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/count_nonzeros.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/count_nonzeros.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/count_nonzeros.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/fill.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/fill.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/fill.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/fill.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/fill_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/fill_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/fill_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/fill_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/fill_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/fill_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/fill_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/fill_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/find.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/find.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/find.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/find.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/find_first.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/find_first.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/find_first.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/find_first.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/find_last.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/find_last.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/find_last.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/find_last.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_max.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_max.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_max.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_max.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_max_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_max_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_cube_col.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_max_cube_col.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_cube_col.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_max_cube_col.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_max_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_max_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_max_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_max_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_max_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_min.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_min.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_min.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_min.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_min_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_min_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_cube_col.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_min_cube_col.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_cube_col.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_min_cube_col.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_min_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_min_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/index_min_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/index_min_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/index_min_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_philox_randn.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_philox_randn.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_philox_randn.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_philox_randn.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_set_eye.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_set_eye.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_set_eye.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_set_eye.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow32_randi.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow32_randi.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow32_randi.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow32_randi.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow32_randu.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow32_randu.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow32_randu.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow32_randu.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow64_randi.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow64_randi.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow64_randi.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow64_randi.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow64_randu.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow64_randu.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/inplace_xorwow64_randu.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/inplace_xorwow64_randu.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/linspace.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/linspace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/linspace.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/linspace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/logspace.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/logspace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/logspace.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/logspace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/ltri_set_zero.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/ltri_set_zero.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/ltri_set_zero.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/ltri_set_zero.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/max.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/max.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/max.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/max.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/max_abs.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/max_abs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/max_abs.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/max_abs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/max_abs_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/max_abs_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/max_abs_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/max_abs_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/max_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/max_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/max_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/max_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/min.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/min.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/min.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/min.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/min_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/min_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/min_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/min_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/mul_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/mul_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/mul_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/mul_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/mul_colwise_trans.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/mul_colwise_trans.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/mul_colwise_trans.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/mul_colwise_trans.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/mul_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/mul_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/mul_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/mul_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/mul_rowwise_trans.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/mul_rowwise_trans.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/mul_rowwise_trans.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/mul_rowwise_trans.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/prod.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/prod.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/prod.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/prod.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/prod_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/prod_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/prod_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/prod_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_asc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_asc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_asc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_asc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_colwise_asc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_colwise_asc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_colwise_asc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_colwise_asc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_colwise_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_colwise_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_colwise_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_colwise_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_asc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_asc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_asc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_asc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_multi_wg_shuffle.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_multi_wg_shuffle.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_index_multi_wg_shuffle.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_index_multi_wg_shuffle.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_multi_wg_bit_count.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_multi_wg_bit_count.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_multi_wg_bit_count.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_multi_wg_bit_count.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_multi_wg_shuffle.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_multi_wg_shuffle.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_multi_wg_shuffle.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_multi_wg_shuffle.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_rowwise_asc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_rowwise_asc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_rowwise_asc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_rowwise_asc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_rowwise_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_rowwise_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/radix_sort_rowwise_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/radix_sort_rowwise_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/regspace_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/regspace_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/regspace_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/regspace_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/reorder_cols.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/reorder_cols.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/reorder_cols.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/reorder_cols.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/rotate_180.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/rotate_180.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/rotate_180.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/rotate_180.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_add_offset.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_add_offset.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_add_offset.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_add_offset.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_subgroups.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_subgroups.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/shifted_prefix_sum_subgroups.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/shifted_prefix_sum_subgroups.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/shuffle.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/shuffle.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/shuffle.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/shuffle.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/shuffle_large.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/shuffle_large.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/shuffle_large.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/shuffle_large.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/stable_radix_sort_index_asc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/stable_radix_sort_index_asc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/stable_radix_sort_index_asc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/stable_radix_sort_index_asc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/stable_radix_sort_index_desc.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/stable_radix_sort_index_desc.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/stable_radix_sort_index_desc.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/stable_radix_sort_index_desc.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/submat_var.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/submat_var.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/submat_var.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/submat_var.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/submat_var_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/submat_var_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/submat_var_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/submat_var_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/symmatl_inplace.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/symmatl_inplace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/symmatl_inplace.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/symmatl_inplace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/symmatu_inplace.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/symmatu_inplace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/symmatu_inplace.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/symmatu_inplace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/trace.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/trace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/trace.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/trace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/var.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/var.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/var.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/var.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/var_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/var_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/var_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/var_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/var_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/var_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/var_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/var_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway/var_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway/var_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway/var_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway/var_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/and_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/and_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/and_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/and_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/and_reduce_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/and_reduce_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/and_reduce_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/and_reduce_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/ipiv_det.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/ipiv_det.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/ipiv_det.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/ipiv_det.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/ipiv_det_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/ipiv_det_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/ipiv_det_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/ipiv_det_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/or_reduce.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/or_reduce.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/or_reduce.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/or_reduce.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_integral/or_reduce_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_integral/or_reduce_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_integral/or_reduce_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_integral/or_reduce_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/diag_prod.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/diag_prod.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/diag_prod.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/diag_prod.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/diag_prod_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/diag_prod_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/diag_prod_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/diag_prod_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/extract_cx.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/extract_cx.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/extract_cx.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/extract_cx.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_l.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_l.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_l.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_l.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_p.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_p.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_p.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_p.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_pivoted_l.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_pivoted_l.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/lu_extract_pivoted_l.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/lu_extract_pivoted_l.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_inf.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_inf.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_inf.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_inf.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_inf_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_inf_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_inf_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_inf_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nan.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nan.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nan.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nan.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nan_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nan_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nan_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nan_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nonfinite.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nonfinite.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nonfinite.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nonfinite.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nonfinite_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nonfinite_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_any_nonfinite_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_any_nonfinite_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isfinite.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isfinite.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isfinite.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isfinite.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isnan.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isnan.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isnan.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isnan.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isnonfinite.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isnonfinite.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/rel_isnonfinite.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/rel_isnonfinite.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_1.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_1.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_1_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_1_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_1_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_1_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_robust.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_robust.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_robust.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_robust.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_robust_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_robust_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_robust_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_robust_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_2_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_2_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_k.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_k.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_k.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_k.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_k_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_k_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_k_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_k_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_min.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_min.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_min.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_min.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_min_small.cu b/inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_min_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/oneway_real/vec_norm_min_small.cu rename to inst/include/bandicoot_bits/ks/cuda/oneway_real/vec_norm_min_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_atan2.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_atan2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_atan2.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_atan2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_div_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_div_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_div_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_div_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_div_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_div_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_div_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_div_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_hypot.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_hypot.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_hypot.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_hypot.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_max_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_max_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_max_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_max_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_min_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_min_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_min_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_min_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_minus_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_minus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_minus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_minus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_minus_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_minus_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_minus_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_minus_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_mul_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_mul_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_mul_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_mul_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_mul_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_mul_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_mul_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_mul_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_plus_array.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_plus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_plus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_plus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_plus_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_plus_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/threeway/equ_array_plus_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/threeway/equ_array_plus_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_div_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_div_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_div_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_div_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_div_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_div_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_div_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_div_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_minus_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_minus_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_minus_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_minus_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_minus_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_minus_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_minus_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_minus_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_plus.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_plus.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_plus.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_plus.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_schur.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_schur.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_schur.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_schur.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_set.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_set.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_set.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_set.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_div_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_div_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_div_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_div_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_div_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_div_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_div_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_div_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_minus_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_minus_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_minus_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_minus_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_minus_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_minus_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_minus_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_minus_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_plus.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_plus.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_plus.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_plus.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_schur.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_schur.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_schur.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_schur.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_set.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_set.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/broadcast_subset_set.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/broadcast_subset_set.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/clamp.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/clamp.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/clamp.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/clamp.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/convert_type.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/convert_type.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/convert_type.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/convert_type.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/convert_type_cube.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/convert_type_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/convert_type_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/convert_type_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/cross.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/cross.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/cross.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/cross.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/dot.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/dot.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/dot.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/dot.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/dot_small.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/dot_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/dot_small.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/dot_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_abs.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_abs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_abs.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_abs.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acos_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acos_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acos_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acos_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acos_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acos_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acos_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acos_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acosh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acosh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acosh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acosh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acosh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acosh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_acosh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_acosh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asin_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asin_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asin_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asin_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asin_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asin_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asin_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asin_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asinh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asinh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asinh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asinh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asinh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asinh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_asinh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_asinh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atan_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atan_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atan_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atan_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atan_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atan_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atan_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atan_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atanh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atanh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atanh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atanh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atanh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atanh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_atanh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_atanh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_ceil_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_ceil_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_ceil_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_ceil_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_ceil_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_ceil_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_ceil_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_ceil_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cos_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cos_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cos_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cos_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cos_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cos_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cos_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cos_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cosh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cosh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cosh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cosh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cosh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cosh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_cosh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_cosh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_post_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_post_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_div_scalar_pre_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_div_scalar_pre_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erf_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erf_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erf_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erf_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erf_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erf_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erf_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erf_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erfc_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erfc_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erfc_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erfc_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erfc_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erfc_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_erfc_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_erfc_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp10_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp10_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp10_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp10_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp10_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp10_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp10_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp10_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp2_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp2_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp2_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp2_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp2_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp2_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp2_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp2_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_exp_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_exp_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_floor_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_floor_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_floor_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_floor_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_floor_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_floor_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_floor_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_floor_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_lgamma_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_lgamma_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_lgamma_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_lgamma_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_lgamma_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_lgamma_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_lgamma_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_lgamma_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log10_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log10_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log10_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log10_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log10_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log10_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log10_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log10_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log2_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log2_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log2_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log2_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log2_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log2_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log2_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log2_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_log_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_log_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_max_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_max_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_max_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_max_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_min_array_cube.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_min_array_cube.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_min_array_cube.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_min_array_cube.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_post_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_post_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_post_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_minus_scalar_pre_pre_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mod_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mod_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mod_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mod_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_mul_scalar_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_mul_scalar_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_neg_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_neg_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_neg_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_neg_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_neg_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_neg_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_neg_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_neg_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_plus_scalar_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_plus_scalar_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_pow_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_pow_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_pow_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_pow_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_pow_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_pow_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_pow_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_pow_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_round_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_round_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_round_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_round_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_round_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_round_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_round_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_round_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sign_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sign_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sign_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sign_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sign_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sign_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sign_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sign_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sin_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sin_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sin_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sin_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sin_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sin_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sin_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sin_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinc_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinc_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinc_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinc_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinc_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinc_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinc_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinc_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sinh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sinh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sqrt_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sqrt_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sqrt_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sqrt_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sqrt_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sqrt_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_sqrt_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_sqrt_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_square_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_square_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_square_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_square_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_square_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_square_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_square_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_square_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tan_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tan_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tan_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tan_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tan_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tan_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tan_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tan_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tanh_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tanh_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tanh_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tanh_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tanh_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tanh_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_tanh_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_tanh_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_exp_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_exp_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_exp_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_exp_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_exp_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_exp_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_exp_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_exp_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_log_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_log_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_log_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_log_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_log_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_log_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_log_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_log_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/equ_array_trunc_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/equ_array_trunc_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/extract_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/extract_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/extract_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/extract_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/extract_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/extract_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/extract_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/extract_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/htrans.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/htrans.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/htrans.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/htrans.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_div_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_div_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_div_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_div_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_div_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_div_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_div_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_div_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_eq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_eq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_eq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_eq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_eq_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_eq_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_eq_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_eq_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_minus_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_minus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_minus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_minus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_minus_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_minus_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_minus_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_minus_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_mul_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_mul_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_mul_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_mul_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_mul_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_mul_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_mul_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_mul_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_plus_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_plus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_plus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_plus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_plus_sve1.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_plus_sve1.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve1_plus_sve1.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve1_plus_sve1.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_div_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_div_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_div_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_div_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_div_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_div_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_div_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_div_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_eq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_eq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_eq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_eq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_eq_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_eq_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_eq_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_eq_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_minus_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_minus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_minus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_minus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_minus_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_minus_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_minus_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_minus_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_mul_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_mul_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_mul_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_mul_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_mul_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_mul_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_mul_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_mul_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_plus_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_plus_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_plus_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_plus_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_plus_sve2.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_plus_sve2.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/inplace_sve2_plus_sve2.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/inplace_sve2_plus_sve2.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_colwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_colwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_colwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_colwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_colwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_colwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_colwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_colwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_cube_col_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_cube_col_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_cube_col_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_cube_col_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_cube_col_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_cube_col_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_cube_col_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_cube_col_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_rowwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_rowwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_rowwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_rowwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/max_rowwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/max_rowwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/max_rowwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/max_rowwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/mean_colwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/mean_colwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/mean_colwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/mean_colwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/mean_colwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/mean_colwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/mean_colwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/mean_colwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/mean_rowwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/mean_rowwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/mean_rowwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/mean_rowwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/mean_rowwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/mean_rowwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/mean_rowwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/mean_rowwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_colwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_colwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_colwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_colwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_colwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_colwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_colwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_colwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_cube_col_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_cube_col_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_cube_col_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_cube_col_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_cube_col_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_cube_col_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_cube_col_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_cube_col_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_rowwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_rowwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_rowwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_rowwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/min_rowwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/min_rowwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/min_rowwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/min_rowwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_small.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_all_neq_small.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_all_neq_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_and_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_and_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_and_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_and_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_colwise.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_colwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_colwise.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_colwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_rowwise.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_rowwise.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_rowwise.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_rowwise.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_small.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_small.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_any_neq_small.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_any_neq_small.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_eq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_eq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_eq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_eq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_eq_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_eq_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_eq_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_eq_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gt_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_gt_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gt_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_gt_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gt_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_gt_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gt_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_gt_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gteq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_gteq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gteq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_gteq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gteq_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_gteq_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_gteq_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_gteq_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lt_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_lt_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lt_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_lt_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lt_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_lt_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lt_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_lt_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lteq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_lteq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lteq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_lteq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lteq_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_lteq_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_lteq_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_lteq_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_neq_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_neq_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_neq_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_neq_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_neq_scalar.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_neq_scalar.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_neq_scalar.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_neq_scalar.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/rel_or_array.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/rel_or_array.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/rel_or_array.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/rel_or_array.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/replace.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/replace.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/replace.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/replace.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/strans.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/strans.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/strans.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/strans.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/sum_colwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/sum_colwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/sum_colwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/sum_colwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/sum_colwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/sum_colwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/sum_colwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/sum_colwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/sum_rowwise_conv_post.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/sum_rowwise_conv_post.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/sum_rowwise_conv_post.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/sum_rowwise_conv_post.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/sum_rowwise_conv_pre.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/sum_rowwise_conv_pre.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/sum_rowwise_conv_pre.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/sum_rowwise_conv_pre.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/symmatl.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/symmatl.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/symmatl.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/symmatl.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/twoway/symmatu.cu b/inst/include/bandicoot_bits/ks/cuda/twoway/symmatu.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/twoway/symmatu.cu rename to inst/include/bandicoot_bits/ks/cuda/twoway/symmatu.cu diff --git a/inst/include/bandicoot_bits/kernels/cuda/zeroway/shuffle_large_compute_locs.cu b/inst/include/bandicoot_bits/ks/cuda/zeroway/shuffle_large_compute_locs.cu similarity index 100% rename from inst/include/bandicoot_bits/kernels/cuda/zeroway/shuffle_large_compute_locs.cu rename to inst/include/bandicoot_bits/ks/cuda/zeroway/shuffle_large_compute_locs.cu diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/d_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/d_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/d_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/d_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/f_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/f_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/f_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/f_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/h_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/h_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/h_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/h_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/opencl_prelims.cl b/inst/include/bandicoot_bits/ks/opencl/defs/opencl_prelims.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/opencl_prelims.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/opencl_prelims.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/s16_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/s16_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/s16_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/s16_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/s32_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/s32_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/s32_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/s32_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/s64_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/s64_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/s64_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/s64_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/s8_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/s8_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/s8_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/s8_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/u16_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/u16_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/u16_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/u16_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/u32_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/u32_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/u32_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/u32_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/u64_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/u64_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/u64_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/u64_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/defs/u8_defs.cl b/inst/include/bandicoot_bits/ks/opencl/defs/u8_defs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/defs/u8_defs.cl rename to inst/include/bandicoot_bits/ks/opencl/defs/u8_defs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/accu_subgroup_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/deps/accu_subgroup_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/accu_subgroup_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/accu_subgroup_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/and_subgroup_reduce_u32.cl b/inst/include/bandicoot_bits/ks/opencl/deps/and_subgroup_reduce_u32.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/and_subgroup_reduce_u32.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/and_subgroup_reduce_u32.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/max_subgroup_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/deps/max_subgroup_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/max_subgroup_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/max_subgroup_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/min_subgroup_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/deps/min_subgroup_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/min_subgroup_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/min_subgroup_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/or_subgroup_reduce_u32.cl b/inst/include/bandicoot_bits/ks/opencl/deps/or_subgroup_reduce_u32.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/or_subgroup_reduce_u32.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/or_subgroup_reduce_u32.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/prod_subgroup_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/deps/prod_subgroup_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/prod_subgroup_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/prod_subgroup_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/deps/var_philox.cl b/inst/include/bandicoot_bits/ks/opencl/deps/var_philox.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/deps/var_philox.cl rename to inst/include/bandicoot_bits/ks/opencl/deps/var_philox.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_inf_lower.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_inf_lower.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_inf_lower.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_inf_lower.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_inf_upper.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_inf_upper.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_inf_upper.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_inf_upper.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_max_lower.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_max_lower.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_max_lower.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_max_lower.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_max_upper.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_max_upper.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lansy_max_upper.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lansy_max_upper.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_full.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_full.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_full.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_full.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_lower.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_lower.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_lower.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_lower.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_upper.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_upper.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/lascl_upper.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/lascl_upper.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_band_lower.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laset_band_lower.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_band_lower.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laset_band_lower.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_band_upper.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laset_band_upper.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_band_upper.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laset_band_upper.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_full.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laset_full.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_full.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laset_full.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_lower.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laset_lower.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_lower.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laset_lower.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_upper.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laset_upper.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laset_upper.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laset_upper.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/laswp.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/laswp.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/laswp.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/laswp.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_inplace_even_magma.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_inplace_even_magma.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_inplace_even_magma.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_inplace_even_magma.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_inplace_odd_magma.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_inplace_odd_magma.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_inplace_odd_magma.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_inplace_odd_magma.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_magma.cl b/inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_magma.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/magma_real/transpose_magma.cl rename to inst/include/bandicoot_bits/ks/opencl/magma_real/transpose_magma.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/accu.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/accu.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/accu.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/accu.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/accu_simple.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/accu_simple.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/accu_simple.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/accu_simple.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/accu_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/accu_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/accu_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/accu_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_cube.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_cube_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_cube_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_cube_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_cube_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/approx_equal_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/approx_equal_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/count_nonzeros.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/count_nonzeros.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/count_nonzeros.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/count_nonzeros.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/fill.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/fill.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/fill.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/fill.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/fill_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/fill_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/fill_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/fill_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/fill_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/fill_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/fill_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/fill_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/find.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/find.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/find.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/find.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/find_first.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/find_first.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/find_first.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/find_first.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/find_last.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/find_last.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/find_last.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/find_last.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_max.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_max.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_max.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_max.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_max_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_max_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_cube_col.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_max_cube_col.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_cube_col.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_max_cube_col.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_max_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_max_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_max_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_max_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_max_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_min.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_min.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_min.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_min.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_min_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_min_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_cube_col.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_min_cube_col.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_cube_col.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_min_cube_col.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_min_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_min_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/index_min_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/index_min_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/index_min_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_philox_randn.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_philox_randn.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_philox_randn.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_philox_randn.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_set_eye.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_set_eye.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_set_eye.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_set_eye.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow32_randi.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow32_randi.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow32_randi.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow32_randi.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow32_randu.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow32_randu.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow32_randu.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow32_randu.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow64_randi.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow64_randi.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow64_randi.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow64_randi.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow64_randu.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow64_randu.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/inplace_xorwow64_randu.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/inplace_xorwow64_randu.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/linspace.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/linspace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/linspace.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/linspace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/logspace.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/logspace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/logspace.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/logspace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/ltri_set_zero.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/ltri_set_zero.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/ltri_set_zero.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/ltri_set_zero.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/max.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/max.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/max.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/max.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/max_abs.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/max_abs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/max_abs.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/max_abs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/max_abs_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/max_abs_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/max_abs_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/max_abs_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/max_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/max_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/max_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/max_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/min.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/min.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/min.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/min.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/min_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/min_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/min_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/min_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/mul_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/mul_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/mul_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/mul_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/mul_colwise_trans.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/mul_colwise_trans.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/mul_colwise_trans.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/mul_colwise_trans.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/mul_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/mul_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/mul_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/mul_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/mul_rowwise_trans.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/mul_rowwise_trans.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/mul_rowwise_trans.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/mul_rowwise_trans.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/prod.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/prod.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/prod.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/prod.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/prod_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/prod_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/prod_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/prod_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_asc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_asc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_asc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_asc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_colwise_asc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_colwise_asc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_colwise_asc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_colwise_asc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_colwise_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_colwise_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_colwise_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_colwise_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_asc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_asc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_asc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_asc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_multi_wg_shuffle.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_multi_wg_shuffle.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_index_multi_wg_shuffle.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_index_multi_wg_shuffle.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_multi_wg_bit_count.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_multi_wg_bit_count.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_multi_wg_bit_count.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_multi_wg_bit_count.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_multi_wg_shuffle.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_multi_wg_shuffle.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_multi_wg_shuffle.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_multi_wg_shuffle.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_rowwise_asc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_rowwise_asc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_rowwise_asc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_rowwise_asc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_rowwise_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_rowwise_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/radix_sort_rowwise_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/radix_sort_rowwise_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/regspace_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/regspace_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/regspace_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/regspace_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/reorder_cols.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/reorder_cols.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/reorder_cols.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/reorder_cols.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/rotate_180.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/rotate_180.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/rotate_180.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/rotate_180.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_add_offset.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_add_offset.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_add_offset.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_add_offset.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_subgroups.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_subgroups.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/shifted_prefix_sum_subgroups.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/shifted_prefix_sum_subgroups.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/shuffle.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/shuffle.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/shuffle.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/shuffle.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/shuffle_large.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/shuffle_large.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/shuffle_large.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/shuffle_large.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/stable_radix_sort_index_asc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/stable_radix_sort_index_asc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/stable_radix_sort_index_asc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/stable_radix_sort_index_asc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/stable_radix_sort_index_desc.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/stable_radix_sort_index_desc.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/stable_radix_sort_index_desc.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/stable_radix_sort_index_desc.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/submat_var.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/submat_var.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/submat_var.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/submat_var.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/submat_var_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/submat_var_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/submat_var_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/submat_var_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/symmatl_inplace.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/symmatl_inplace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/symmatl_inplace.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/symmatl_inplace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/symmatu_inplace.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/symmatu_inplace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/symmatu_inplace.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/symmatu_inplace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/trace.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/trace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/trace.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/trace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/var.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/var.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/var.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/var.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/var_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/var_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/var_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/var_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/var_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/var_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/var_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/var_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway/var_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway/var_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway/var_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway/var_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/and_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/and_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/and_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/and_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/and_reduce_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/and_reduce_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/and_reduce_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/and_reduce_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/ipiv_det.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/ipiv_det.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/ipiv_det.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/ipiv_det.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/ipiv_det_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/ipiv_det_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/ipiv_det_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/ipiv_det_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/or_reduce.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/or_reduce.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/or_reduce.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/or_reduce.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_integral/or_reduce_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_integral/or_reduce_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_integral/or_reduce_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_integral/or_reduce_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/diag_prod.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/diag_prod.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/diag_prod.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/diag_prod.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/diag_prod_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/diag_prod_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/diag_prod_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/diag_prod_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/extract_cx.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/extract_cx.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/extract_cx.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/extract_cx.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_l.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_l.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_l.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_l.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_p.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_p.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_p.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_p.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_pivoted_l.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_pivoted_l.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/lu_extract_pivoted_l.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/lu_extract_pivoted_l.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_inf.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_inf.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_inf.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_inf.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_inf_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_inf_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_inf_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_inf_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nan.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nan.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nan.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nan.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nan_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nan_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nan_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nan_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nonfinite.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nonfinite.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nonfinite.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nonfinite.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nonfinite_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nonfinite_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_any_nonfinite_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_any_nonfinite_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isfinite.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isfinite.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isfinite.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isfinite.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isnan.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isnan.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isnan.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isnan.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isnonfinite.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isnonfinite.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/rel_isnonfinite.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/rel_isnonfinite.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_1.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_1.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_1_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_1_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_1_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_1_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_robust.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_robust.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_robust.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_robust.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_robust_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_robust_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_robust_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_robust_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_2_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_2_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_k.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_k.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_k.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_k.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_k_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_k_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_k_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_k_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_min.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_min.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_min.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_min.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_min_small.cl b/inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_min_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/oneway_real/vec_norm_min_small.cl rename to inst/include/bandicoot_bits/ks/opencl/oneway_real/vec_norm_min_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_atan2.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_atan2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_atan2.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_atan2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_div_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_div_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_div_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_div_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_div_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_div_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_div_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_div_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_hypot.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_hypot.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_hypot.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_hypot.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_max_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_max_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_max_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_max_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_min_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_min_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_min_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_min_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_minus_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_minus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_minus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_minus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_minus_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_minus_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_minus_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_minus_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_mul_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_mul_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_mul_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_mul_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_mul_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_mul_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_mul_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_mul_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_plus_array.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_plus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_plus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_plus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_plus_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_plus_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/threeway/equ_array_plus_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/threeway/equ_array_plus_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_div_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_div_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_div_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_div_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_div_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_div_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_div_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_div_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_minus_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_minus_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_minus_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_minus_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_minus_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_minus_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_minus_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_minus_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_plus.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_plus.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_plus.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_plus.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_schur.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_schur.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_schur.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_schur.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_set.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_set.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_set.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_set.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_div_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_div_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_div_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_div_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_div_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_div_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_div_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_div_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_minus_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_minus_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_minus_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_minus_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_minus_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_minus_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_minus_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_minus_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_plus.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_plus.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_plus.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_plus.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_schur.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_schur.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_schur.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_schur.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_set.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_set.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/broadcast_subset_set.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/broadcast_subset_set.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/clamp.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/clamp.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/clamp.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/clamp.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/convert_type.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/convert_type.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/convert_type.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/convert_type.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/convert_type_cube.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/convert_type_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/convert_type_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/convert_type_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/cross.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/cross.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/cross.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/cross.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/dot.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/dot.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/dot.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/dot.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/dot_small.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/dot_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/dot_small.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/dot_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_abs.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_abs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_abs.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_abs.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acos_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acos_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acos_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acos_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acos_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acos_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acos_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acos_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acosh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acosh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acosh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acosh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acosh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acosh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_acosh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_acosh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asin_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asin_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asin_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asin_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asin_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asin_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asin_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asin_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asinh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asinh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asinh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asinh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asinh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asinh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_asinh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_asinh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atan_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atan_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atan_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atan_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atan_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atan_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atan_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atan_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atanh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atanh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atanh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atanh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atanh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atanh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_atanh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_atanh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_ceil_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_ceil_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_ceil_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_ceil_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_ceil_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_ceil_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_ceil_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_ceil_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cos_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cos_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cos_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cos_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cos_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cos_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cos_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cos_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cosh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cosh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cosh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cosh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cosh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cosh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_cosh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_cosh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_post_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_post_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_div_scalar_pre_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_div_scalar_pre_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erf_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erf_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erf_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erf_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erf_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erf_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erf_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erf_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erfc_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erfc_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erfc_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erfc_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erfc_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erfc_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_erfc_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_erfc_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp10_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp10_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp10_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp10_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp10_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp10_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp10_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp10_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp2_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp2_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp2_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp2_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp2_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp2_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp2_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp2_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_exp_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_exp_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_floor_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_floor_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_floor_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_floor_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_floor_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_floor_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_floor_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_floor_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_lgamma_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_lgamma_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_lgamma_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_lgamma_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_lgamma_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_lgamma_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_lgamma_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_lgamma_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log10_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log10_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log10_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log10_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log10_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log10_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log10_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log10_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log2_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log2_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log2_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log2_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log2_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log2_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log2_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log2_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_log_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_log_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_max_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_max_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_max_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_max_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_min_array_cube.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_min_array_cube.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_min_array_cube.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_min_array_cube.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_post_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_post_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_post_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_minus_scalar_pre_pre_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mod_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mod_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mod_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mod_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_mul_scalar_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_mul_scalar_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_neg_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_neg_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_neg_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_neg_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_neg_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_neg_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_neg_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_neg_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_plus_scalar_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_plus_scalar_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_pow_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_pow_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_pow_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_pow_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_pow_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_pow_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_pow_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_pow_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_round_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_round_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_round_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_round_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_round_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_round_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_round_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_round_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sign_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sign_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sign_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sign_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sign_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sign_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sign_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sign_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sin_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sin_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sin_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sin_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sin_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sin_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sin_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sin_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinc_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinc_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinc_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinc_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinc_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinc_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinc_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinc_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sinh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sinh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sqrt_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sqrt_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sqrt_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sqrt_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sqrt_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sqrt_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_sqrt_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_sqrt_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_square_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_square_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_square_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_square_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_square_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_square_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_square_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_square_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tan_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tan_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tan_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tan_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tan_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tan_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tan_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tan_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tanh_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tanh_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tanh_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tanh_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tanh_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tanh_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_tanh_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_tanh_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_exp_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_exp_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_exp_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_exp_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_exp_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_exp_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_exp_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_exp_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_log_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_log_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_log_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_log_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_log_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_log_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_log_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_log_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/equ_array_trunc_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/equ_array_trunc_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/extract_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/extract_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/extract_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/extract_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/extract_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/extract_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/extract_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/extract_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/htrans.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/htrans.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/htrans.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/htrans.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_div_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_div_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_div_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_div_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_div_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_div_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_div_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_div_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_eq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_eq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_eq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_eq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_eq_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_eq_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_eq_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_eq_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_minus_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_minus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_minus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_minus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_minus_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_minus_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_minus_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_minus_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_mul_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_mul_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_mul_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_mul_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_mul_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_mul_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_mul_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_mul_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_plus_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_plus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_plus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_plus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_plus_sve1.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_plus_sve1.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve1_plus_sve1.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve1_plus_sve1.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_div_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_div_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_div_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_div_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_div_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_div_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_div_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_div_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_eq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_eq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_eq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_eq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_eq_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_eq_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_eq_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_eq_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_minus_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_minus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_minus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_minus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_minus_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_minus_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_minus_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_minus_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_mul_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_mul_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_mul_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_mul_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_mul_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_mul_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_mul_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_mul_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_plus_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_plus_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_plus_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_plus_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_plus_sve2.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_plus_sve2.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/inplace_sve2_plus_sve2.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/inplace_sve2_plus_sve2.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_colwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_colwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_colwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_colwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_colwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_colwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_colwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_colwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_cube_col_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_cube_col_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_cube_col_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_cube_col_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_cube_col_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_cube_col_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_cube_col_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_cube_col_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_rowwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_rowwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_rowwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_rowwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/max_rowwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/max_rowwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/max_rowwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/max_rowwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/mean_colwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/mean_colwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/mean_colwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/mean_colwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/mean_colwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/mean_colwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/mean_colwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/mean_colwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/mean_rowwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/mean_rowwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/mean_rowwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/mean_rowwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/mean_rowwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/mean_rowwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/mean_rowwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/mean_rowwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_colwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_colwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_colwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_colwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_colwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_colwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_colwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_colwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_cube_col_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_cube_col_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_cube_col_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_cube_col_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_cube_col_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_cube_col_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_cube_col_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_cube_col_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_rowwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_rowwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_rowwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_rowwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/min_rowwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/min_rowwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/min_rowwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/min_rowwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_small.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_all_neq_small.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_all_neq_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_and_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_and_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_and_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_and_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_colwise.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_colwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_colwise.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_colwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_rowwise.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_rowwise.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_rowwise.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_rowwise.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_small.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_small.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_any_neq_small.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_any_neq_small.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_eq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_eq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_eq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_eq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_eq_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_eq_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_eq_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_eq_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gt_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_gt_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gt_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_gt_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gt_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_gt_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gt_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_gt_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gteq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_gteq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gteq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_gteq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gteq_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_gteq_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_gteq_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_gteq_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lt_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_lt_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lt_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_lt_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lt_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_lt_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lt_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_lt_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lteq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_lteq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lteq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_lteq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lteq_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_lteq_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_lteq_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_lteq_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_neq_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_neq_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_neq_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_neq_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_neq_scalar.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_neq_scalar.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_neq_scalar.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_neq_scalar.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/rel_or_array.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/rel_or_array.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/rel_or_array.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/rel_or_array.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/replace.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/replace.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/replace.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/replace.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/repmat.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/repmat.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/repmat.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/repmat.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/strans.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/strans.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/strans.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/strans.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/sum_colwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/sum_colwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/sum_colwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/sum_colwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/sum_colwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/sum_colwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/sum_colwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/sum_colwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/sum_rowwise_conv_post.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/sum_rowwise_conv_post.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/sum_rowwise_conv_post.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/sum_rowwise_conv_post.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/sum_rowwise_conv_pre.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/sum_rowwise_conv_pre.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/sum_rowwise_conv_pre.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/sum_rowwise_conv_pre.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/symmatl.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/symmatl.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/symmatl.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/symmatl.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/twoway/symmatu.cl b/inst/include/bandicoot_bits/ks/opencl/twoway/symmatu.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/twoway/symmatu.cl rename to inst/include/bandicoot_bits/ks/opencl/twoway/symmatu.cl diff --git a/inst/include/bandicoot_bits/kernels/opencl/zeroway/shuffle_large_compute_locs.cl b/inst/include/bandicoot_bits/ks/opencl/zeroway/shuffle_large_compute_locs.cl similarity index 100% rename from inst/include/bandicoot_bits/kernels/opencl/zeroway/shuffle_large_compute_locs.cl rename to inst/include/bandicoot_bits/ks/opencl/zeroway/shuffle_large_compute_locs.cl diff --git a/src/Makevars.in b/src/Makevars.in index c38f0f7..3fcbb0c 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,7 +3,7 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/kernels/"' +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/ks/"' ## Compiler flags from configure PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ From b58411129fb8b4580a656d79b78b0a93e76dcf08 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 18:40:37 -0600 Subject: [PATCH 08/12] File src/Makevars is a temporary so remove from git --- src/Makevars | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 src/Makevars diff --git a/src/Makevars b/src/Makevars deleted file mode 100644 index a4565dc..0000000 --- a/src/Makevars +++ /dev/null @@ -1,12 +0,0 @@ -## RcppBandicoot Makevars.in -## -## This file is processed by configure to generate Makevars -## It includes GPU backend configuration (OpenCL, CUDA) and other settings - -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/kernels/"' - -## Compiler flags from configure -PKG_CXXFLAGS = -DCOOT_USE_OPENCL -DCOOT_USE_CLBLAST -DCOOT_USE_CLBLAS -DCOOT_USE_CUDA -DCOOT_CUDA_INCLUDE_PATH=/usr/include/ -DCOOT_DEFAULT_BACKEND=CUDA_BACKEND $(SHLIB_OPENMP_CXXFLAGS) - -## Linker flags from configure -PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -lclBLAS -lclblast -lOpenCL -L/usr/lib64 -L/usr/lib -lcuda -lcudart -lcublas -lcusolver -lcurand -lnvrtc -llapack -lblas -lgfortran -lm -lquadmath From 616a6ca2de918dcd3f86b8e10dfa4997aca4f15a Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 18:44:13 -0600 Subject: [PATCH 09/12] Tweak following code review --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90fe43e..973a09e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ Description: 'Bandicoot' is a templated C++ GPU Linear Algebra library Note that the 'Bandicoot' library is licensed under Apache License version 2.0 and 'RcppBandicoot' (the 'Rcpp' bindings/bridge to 'Bandicoot') is licensed under the GNU GPL version 2 or later. Note that 'Bandicoot' requires a compiler that - supports 'C++14' and 'OpenCL' (>= 1.2) or 'CUDA' (>= 9.8). + supports 'OpenCL' (>= 1.2) or 'CUDA' (>= 9.8). License: GPL (>= 2) Depends: R (>= 4.4) LinkingTo: Rcpp (>= 1.1.0) From e5c85f308179b51bc699bf508732bad7722eb76f Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 19:26:20 -0600 Subject: [PATCH 10/12] Clean up .onLoad() removing commented-out code --- R/rcppbandicoot-package.R | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/R/rcppbandicoot-package.R b/R/rcppbandicoot-package.R index 107d7d6..19ff01e 100644 --- a/R/rcppbandicoot-package.R +++ b/R/rcppbandicoot-package.R @@ -8,25 +8,6 @@ NULL .onLoad <- function(libname, pkgname) { - # Set the kernel path for Bandicoot OpenCL kernels - #kernel_path <- system.file("include/bandicoot_bits/opencl/kernels", - # package = "RcppBandicoot") - - #if (nzchar(kernel_path)) { - # Sys.setenv(COOT_CL_KERNEL_PATH = kernel_path) - #} - - #cuda_path <- system.file("include/bandicoot_bits/cuda/kernels", - # package = "RcppBandicoot") - #if (nzchar(cuda_path)) { - # Sys.setenv(COOT_CUDA_KERNEL_PATH = kernel_path) - #} - - #kernel_path <- system.file("include/bandicoot_bits/", package = "RcppBandicoot") - #cat("kernel_path: ", kernel_path, "\n") - #if (nzchar(kernel_path)) { - # Sys.setenv(COOT_KERNEL_SOURCE_DIR = kernel_path) - #} - - #gpu_initialize("cuda", TRUE) + ## this function is now empty as the kernel path is a _compile-time_ + ## and not run-time issue } From 46bb31f71096727a977c9ea52d219e36923ef884 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 19:42:03 -0600 Subject: [PATCH 11/12] Configure now finds the kernels directory --- configure | 17 +++++++++++++++-- configure.ac | 13 ++++++++++++- src/Makevars.in | 2 +- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 457d354..ab3b742 100755 --- a/configure +++ b/configure @@ -639,6 +639,7 @@ ac_includes_default="\ ac_header_cxx_list= ac_subst_vars='LTLIBOBJS LIBOBJS +BANDICOOT_KERNELS_DIR OPENCL_TARGET_VERSION SDKPATH CUDA_HOME @@ -5258,14 +5259,21 @@ After installation, ensure GPU drivers are properly installed and configured. " "$LINENO" 5 fi +## Installation directory +## Note that we end on 'ks', mot 'kernels', to shorten the path. +## Note also that we ensure a final trailing '/' +BANDICOOT_KERNELS_DIR=$(${R_HOME}/bin/Rscript -e 'cat(paste(head(.libPaths(),1), "RcppBandicoot", "include", "bandicoot_bits", "ks", "", sep="/"))') + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: " >&5 printf "%s\n" "$as_me: " >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: GPU Backend Configuration Summary:" >&5 printf "%s\n" "$as_me: GPU Backend Configuration Summary:" >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Platform: ${RSysinfoName}" >&5 printf "%s\n" "$as_me: Platform: ${RSysinfoName}" >&6;} -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: C++ Standard: C++14" >&5 -printf "%s\n" "$as_me: C++ Standard: C++14" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: C++ Standard: C++14 or later" >&5 +printf "%s\n" "$as_me: C++ Standard: C++14 or later" >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: OpenCL: ${HAVE_OPENCL}" >&5 printf "%s\n" "$as_me: OpenCL: ${HAVE_OPENCL}" >&6;} if test "x${HAVE_OPENCL}" = x1; then @@ -5280,6 +5288,8 @@ fi printf "%s\n" "$as_me: CUDA: ${HAVE_CUDA}" >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Default: ${DEFAULT_BACKEND}" >&5 printf "%s\n" "$as_me: Default: ${DEFAULT_BACKEND}" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: Kernels: ${BANDICOOT_KERNELS_DIR}" >&5 +printf "%s\n" "$as_me: Kernels: ${BANDICOOT_KERNELS_DIR}" >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: " >&5 printf "%s\n" "$as_me: " >&6;} @@ -5365,6 +5375,9 @@ BANDICOOT_LIBS="${BANDICOOT_LIBS} ${LAPACK_BLAS_LIBS}" ## OpenCL target version +## Bandicoot kernels + + ac_config_files="$ac_config_files src/Makevars R/flags.R" cat >confcache <<\_ACEOF diff --git a/configure.ac b/configure.ac index da65986..97287bb 100644 --- a/configure.ac +++ b/configure.ac @@ -387,10 +387,17 @@ After installation, ensure GPU drivers are properly installed and configured. ]) fi +## Installation directory +## Note that we end on 'ks', mot 'kernels', to shorten the path. +## Note also that we ensure a final trailing '/' +BANDICOOT_KERNELS_DIR=$(${R_HOME}/bin/Rscript -e 'cat(paste(head(.libPaths(),1), "RcppBandicoot", "include", "bandicoot_bits", "ks", "", sep="/"))') + + + AC_MSG_NOTICE([]) AC_MSG_NOTICE([GPU Backend Configuration Summary:]) AC_MSG_NOTICE([ Platform: ${RSysinfoName}]) -AC_MSG_NOTICE([ C++ Standard: C++14]) +AC_MSG_NOTICE([ C++ Standard: C++14 or later]) AC_MSG_NOTICE([ OpenCL: ${HAVE_OPENCL}]) if test "x${HAVE_OPENCL}" = x1; then AC_MSG_NOTICE([ CLBlast: ${HAVE_CLBLAST}]) @@ -399,6 +406,7 @@ if test "x${HAVE_OPENCL}" = x1; then fi AC_MSG_NOTICE([ CUDA: ${HAVE_CUDA}]) AC_MSG_NOTICE([ Default: ${DEFAULT_BACKEND}]) +AC_MSG_NOTICE([ Kernels: ${BANDICOOT_KERNELS_DIR}]) AC_MSG_NOTICE([]) ## ============================================================================= @@ -478,5 +486,8 @@ AC_SUBST([SDKPATH]) ## OpenCL target version AC_SUBST([OPENCL_TARGET_VERSION]) +## Bandicoot kernels +AC_SUBST([BANDICOOT_KERNELS_DIR]) + AC_CONFIG_FILES([src/Makevars R/flags.R]) AC_OUTPUT diff --git a/src/Makevars.in b/src/Makevars.in index 3fcbb0c..5e3f3d4 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,7 +3,7 @@ ## This file is processed by configure to generate Makevars ## It includes GPU backend configuration (OpenCL, CUDA) and other settings -PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"/usr/local/lib/R/site-library/RcppBandicoot/include/bandicoot_bits/ks/"' +PKG_CPPFLAGS = -I../inst/include -DCOOT_TARGET_OPENCL_VERSION=300 -DCOOT_KERNEL_SOURCE_DIR='"@BANDICOOT_KERNELS_DIR@"' ## Compiler flags from configure PKG_CXXFLAGS = @BANDICOOT_CXXFLAGS@ From 54eb296915d3086101f51f3ec70fcd1edccb0988 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 17 Dec 2025 19:54:06 -0600 Subject: [PATCH 12/12] Typo fix corrected per code review --- configure | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index ab3b742..77dc399 100755 --- a/configure +++ b/configure @@ -5260,7 +5260,7 @@ After installation, ensure GPU drivers are properly installed and configured. fi ## Installation directory -## Note that we end on 'ks', mot 'kernels', to shorten the path. +## Note that we end on 'ks', not 'kernels', to shorten the path. ## Note also that we ensure a final trailing '/' BANDICOOT_KERNELS_DIR=$(${R_HOME}/bin/Rscript -e 'cat(paste(head(.libPaths(),1), "RcppBandicoot", "include", "bandicoot_bits", "ks", "", sep="/"))') diff --git a/configure.ac b/configure.ac index 97287bb..e632dd7 100644 --- a/configure.ac +++ b/configure.ac @@ -388,7 +388,7 @@ After installation, ensure GPU drivers are properly installed and configured. fi ## Installation directory -## Note that we end on 'ks', mot 'kernels', to shorten the path. +## Note that we end on 'ks', not 'kernels', to shorten the path. ## Note also that we ensure a final trailing '/' BANDICOOT_KERNELS_DIR=$(${R_HOME}/bin/Rscript -e 'cat(paste(head(.libPaths(),1), "RcppBandicoot", "include", "bandicoot_bits", "ks", "", sep="/"))')