Skip to content

Commit 9bff6f1

Browse files
authored
Opt-in gpu (#676)
* lock opencl and cuda tensor type declarations behind `defined` blocks * only include when using * `Tensor` is never `CudaTensor` * remove irrellevant comment
1 parent 2535236 commit 9bff6f1

File tree

3 files changed

+90
-78
lines changed

3 files changed

+90
-78
lines changed

Diff for: src/arraymancer/tensor/data_structure.nim

+80-66
Original file line numberDiff line numberDiff line change
@@ -16,73 +16,87 @@ import
1616
../laser/dynamic_stack_arrays,
1717
../laser/tensor/datatypes,
1818
nimblas,
19-
nimcuda/cuda12_5/[cuda_runtime_api, check],
2019
# Standard library
2120
std/[complex]
2221

2322
export nimblas.OrderType, complex
2423
export datatypes, dynamic_stack_arrays
2524

26-
type
27-
CudaTensorRefTrackerObj*[T: SomeFloat] = object
28-
value*: ptr UncheckedArray[T]
29-
30-
CudaTensorRefTracker*[T] = ref CudaTensorRefTrackerObj[T]
31-
32-
CudaStorage*[T: SomeFloat] = object
33-
## Opaque seq-like structure for storage on the Cuda backend.
34-
##
35-
## Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
36-
##
37-
# TODO: Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
38-
Flen*: int
39-
Fdata*: ptr UncheckedArray[T]
40-
Fref_tracking*: CudaTensorRefTracker[T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
41-
42-
CudaTensor*[T: SomeFloat] = object
43-
## Tensor data structure stored on Nvidia GPU (Cuda)
44-
## - ``shape``: Dimensions of the CudaTensor
45-
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
46-
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
47-
## - ``storage``: An opaque data storage for the CudaTensor
48-
##
49-
## Warning ⚠:
50-
## Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
51-
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
52-
## Explicit copies can be made with ``clone``: ``var a = b.clone``
53-
shape*: Metadata
54-
strides*: Metadata
55-
offset*: int
56-
storage*: CudaStorage[T]
57-
58-
ClStorage*[T: SomeFloat] = object
59-
## Opaque seq-like structure for storage on the OpenCL backend.
60-
Flen*: int
61-
Fdata*: ptr UncheckedArray[T]
62-
Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
63-
64-
ClTensor*[T: SomeFloat] = object
65-
## Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
66-
## - ``shape``: Dimensions of the CudaTensor
67-
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
68-
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
69-
## - ``storage``: An opaque data storage for the CudaTensor
70-
##
71-
## Warning ⚠:
72-
## Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
73-
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
74-
## Explicit copies can be made with ``clone``: ``var a = b.clone``
75-
shape*: Metadata
76-
strides*: Metadata
77-
offset*: int
78-
storage*: ClStorage[T]
79-
80-
AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T]
81-
82-
83-
proc deallocCuda*[T](p: CudaTensorRefTracker[T]) {.noSideEffect.}=
84-
if not p.value.isNil:
85-
check cudaFree(p.value)
25+
when defined(cuda):
26+
import nimcuda/cuda12_5/[cuda_runtime_api, check]
27+
28+
type
29+
CudaTensorRefTrackerObj*[T: SomeFloat] = object
30+
value*: ptr UncheckedArray[T]
31+
32+
CudaTensorRefTracker*[T] = ref CudaTensorRefTrackerObj[T]
33+
34+
CudaStorage*[T: SomeFloat] = object
35+
## Opaque seq-like structure for storage on the Cuda backend.
36+
##
37+
## Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
38+
##
39+
# TODO: Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
40+
Flen*: int
41+
Fdata*: ptr UncheckedArray[T]
42+
Fref_tracking*: CudaTensorRefTracker[T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
43+
44+
CudaTensor*[T: SomeFloat] = object
45+
## Tensor data structure stored on Nvidia GPU (Cuda)
46+
## - ``shape``: Dimensions of the CudaTensor
47+
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
48+
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
49+
## - ``storage``: An opaque data storage for the CudaTensor
50+
##
51+
## Warning ⚠:
52+
## Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
53+
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
54+
## Explicit copies can be made with ``clone``: ``var a = b.clone``
55+
shape*: Metadata
56+
strides*: Metadata
57+
offset*: int
58+
storage*: CudaStorage[T]
59+
60+
proc deallocCuda*[T](p: CudaTensorRefTracker[T]) {.noSideEffect.}=
61+
if not p.value.isNil:
62+
check cudaFree(p.value)
63+
64+
when defined(opencl):
65+
type
66+
ClStorage*[T: SomeFloat] = object
67+
## Opaque seq-like structure for storage on the OpenCL backend.
68+
Flen*: int
69+
Fdata*: ptr UncheckedArray[T]
70+
Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
71+
72+
ClTensor*[T: SomeFloat] = object
73+
## Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
74+
## - ``shape``: Dimensions of the CudaTensor
75+
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
76+
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
77+
## - ``storage``: An opaque data storage for the CudaTensor
78+
##
79+
## Warning ⚠:
80+
## Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
81+
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
82+
## Explicit copies can be made with ``clone``: ``var a = b.clone``
83+
shape*: Metadata
84+
strides*: Metadata
85+
offset*: int
86+
storage*: ClStorage[T]
87+
88+
when defined(cuda) and defined(opencl):
89+
type AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T]
90+
elif defined(cuda):
91+
type AnyTensor*[T] = Tensor[T] or CudaTensor[T]
92+
elif defined(opencl):
93+
type AnyTensor*[T] = Tensor[T] or ClTensor[T]
94+
else:
95+
type AnyTensor*[T] = Tensor[T]
96+
97+
type GpuTensor[T] = AnyTensor[T] and not Tensor[T]
98+
99+
86100

87101

88102
# ###############
@@ -102,10 +116,10 @@ proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw ins
102116
# Tensor Metadata
103117
# ################
104118

105-
func rank*[T](t: CudaTensor[T] or ClTensor[T]): range[0 .. LASER_MAXRANK] {.inline.} =
119+
func rank*[T](t: GpuTensor[T]): range[0 .. LASER_MAXRANK] {.inline.} =
106120
t.shape.len
107121

108-
func size*[T](t: CudaTensor[T] or ClTensor[T]): Natural {.inline.} =
122+
func size*[T](t: GpuTensor[T]): Natural {.inline.} =
109123
t.shape.product
110124

111125
proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: var Metadata) {.noSideEffect.} =
@@ -131,7 +145,7 @@ proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: va
131145
accum *= shape[i]
132146
return
133147

134-
func is_C_contiguous*(t: CudaTensor or ClTensor): bool =
148+
func is_C_contiguous*(t: GpuTensor): bool =
135149
## Check if the tensor follows C convention / is row major
136150
var cur_size = 1
137151
for i in countdown(t.rank - 1,0):
@@ -182,14 +196,14 @@ proc get_offset_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffec
182196
proc get_offset_ptr*[T: not KnownSupportsCopyMem](t: AnyTensor[T]): ptr T {.error: "`get_offset_ptr`" &
183197
" cannot be safely used for GC'ed types!".}
184198

185-
proc get_data_ptr*[T](t: CudaTensor[T] or ClTensor[T]): ptr T {.noSideEffect, inline.}=
199+
proc get_data_ptr*[T](t: GpuTensor[T]): ptr T {.noSideEffect, inline.}=
186200
## Input:
187201
## - A tensor
188202
## Returns:
189203
## - A pointer to the real start of its data (no offset)
190204
cast[ptr T](t.storage.Fdata)
191205

192-
proc get_offset_ptr*[T](t: CudaTensor[T] or ClTensor[T]): ptr T {.noSideEffect, inline.}=
206+
proc get_offset_ptr*[T](t: GpuTensor[T]): ptr T {.noSideEffect, inline.}=
193207
## Input:
194208
## - A tensor
195209
## Returns:

Diff for: src/arraymancer/tensor/exporting.nim

+6-11
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,12 @@ proc toRawSeq*[T](t:Tensor[T]): seq[T] {.noSideEffect, deprecated: "This proc ca
3333
## or that you raise your use-case in the issue tracker https://github.com/mratsim/Arraymancer/issues
3434
## so that more suitable primitives can be crafted
3535

36-
# Due to forward declaration this proc must be declared
37-
# after "cpu" proc are declared in init_cuda
38-
when t is Tensor:
39-
result = newSeq[T](t.size)
40-
for i in 0 ..< t.size:
41-
when T is KnownSupportsCopyMem:
42-
result[i] = t.unsafe_raw_offset()[i]
43-
else:
44-
result[i] = t.storage.raw_buffer[i]
45-
elif t is CudaTensor:
46-
return t.cpu.data
36+
result = newSeq[T](t.size)
37+
for i in 0 ..< t.size:
38+
when T is KnownSupportsCopyMem:
39+
result[i] = t.unsafe_raw_offset()[i]
40+
else:
41+
result[i] = t.storage.raw_buffer[i]
4742

4843
proc toFlatSeq*[T](t: Tensor[T]) : seq[T] =
4944
## Export the data of the Tensor flattened as a Seq

Diff for: src/arraymancer/tensor/private/p_checks.nim

+4-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ import ../../laser/private/nested_containers,
1919
when (NimMajor, NimMinor) < (1, 4):
2020
import ../../std_version_types
2121

22-
include ./p_checks_cuda, ./p_checks_opencl
22+
when defined(cuda):
23+
include ./p_checks_cuda
24+
when defined(opencl):
25+
include ./p_checks_opencl
2326

2427
func check_nested_elements*(shape: Metadata, len: int) {.inline.}=
2528
## Compare the detected shape from flatten with the real length of the data

0 commit comments

Comments
 (0)