-
-
Notifications
You must be signed in to change notification settings - Fork 96
/
Copy pathoperators_blas_l2l3_opencl.nim
47 lines (36 loc) · 1.73 KB
/
operators_blas_l2l3_opencl.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Copyright (c) 2017-2018 the Arraymancer contributors
# Distributed under the Apache v2 License
# (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
import ./data_structure,
./backend/[opencl_backend, metadataArray],
./private/[p_init_opencl, p_checks]
proc openCL_MV_y_eq_aAx_p_by(
alpha: float32, a, x: ClTensor[float32],
beta: float32, y: var ClTensor[float32]) =
# Matrix-Vector: y = alpha A matvecmul x + beta y
# TODO: remove this contiguous layout constraint
if not a.isContiguous:
raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")
let
a_is_rowMajor = a.is_C_contiguous
layout = if a_is_rowMajor: CLBlastLayoutRowMajor
else: CLBlastLayoutColMajor
lda = if a_is_rowMajor: a.strides[0]
else: a.strides[1]
check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
alpha,
a.toClPointer, a.offset, lda,
x.toClpointer, x.offset, x.strides[0],
beta,
y.toClpointer, y.offset, y.strides[0],
unsafeAddr clQueue0, nil)
proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] =
## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA
assert T is float32, "Only float32 is supported at the moment"
assert b.rank == 1, "Only Matrix-Vector product is supported at the moment"
if a.rank == 2 and b.rank == 1:
when compileOption("boundChecks"):
check_matvec(a,b)
result = newClTensor[T]([a.shape[0]])
openCL_MV_y_eq_aAx_p_by(1.T,a, b, 0.T, result)
else: raise newException(ValueError, "Matrix-Matrix or Matrix-Vector multiplication valid only if first Tensor is a Matrix and second is a Matrix or Vector")