@@ -16,73 +16,87 @@ import
16
16
../ laser/ dynamic_stack_arrays,
17
17
../ laser/ tensor/ datatypes,
18
18
nimblas,
19
- nimcuda/ cuda12_5/ [cuda_runtime_api, check],
20
19
# Standard library
21
20
std/ [complex]
22
21
23
22
export nimblas.OrderType , complex
24
23
export datatypes, dynamic_stack_arrays
25
24
26
- type
27
- CudaTensorRefTrackerObj * [T: SomeFloat ] = object
28
- value* : ptr UncheckedArray [T]
29
-
30
- CudaTensorRefTracker * [T] = ref CudaTensorRefTrackerObj [T]
31
-
32
- CudaStorage * [T: SomeFloat ] = object
33
- # # Opaque seq-like structure for storage on the Cuda backend.
34
- # #
35
- # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
36
- # #
37
- # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
38
- Flen * : int
39
- Fdata * : ptr UncheckedArray [T]
40
- Fref_tracking* : CudaTensorRefTracker [T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
41
-
42
- CudaTensor * [T: SomeFloat ] = object
43
- # # Tensor data structure stored on Nvidia GPU (Cuda)
44
- # # - ``shape``: Dimensions of the CudaTensor
45
- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
46
- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
47
- # # - ``storage``: An opaque data storage for the CudaTensor
48
- # #
49
- # # Warning ⚠:
50
- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
51
- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
52
- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
53
- shape* : Metadata
54
- strides* : Metadata
55
- offset* : int
56
- storage* : CudaStorage [T]
57
-
58
- ClStorage * [T: SomeFloat ] = object
59
- # # Opaque seq-like structure for storage on the OpenCL backend.
60
- Flen * : int
61
- Fdata * : ptr UncheckedArray [T]
62
- Fref_tracking* : ref [ptr UncheckedArray [T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
63
-
64
- ClTensor * [T: SomeFloat ] = object
65
- # # Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
66
- # # - ``shape``: Dimensions of the CudaTensor
67
- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
68
- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
69
- # # - ``storage``: An opaque data storage for the CudaTensor
70
- # #
71
- # # Warning ⚠:
72
- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
73
- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
74
- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
75
- shape* : Metadata
76
- strides* : Metadata
77
- offset* : int
78
- storage* : ClStorage [T]
79
-
80
- AnyTensor * [T] = Tensor [T] or CudaTensor [T] or ClTensor [T]
81
-
82
-
83
- proc deallocCuda * [T](p: CudaTensorRefTracker [T]) {.noSideEffect .}=
84
- if not p.value.isNil:
85
- check cudaFree (p.value)
25
+ when defined (cuda):
26
+ import nimcuda/ cuda12_5/ [cuda_runtime_api, check]
27
+
28
+ type
29
+ CudaTensorRefTrackerObj * [T: SomeFloat ] = object
30
+ value* : ptr UncheckedArray [T]
31
+
32
+ CudaTensorRefTracker * [T] = ref CudaTensorRefTrackerObj [T]
33
+
34
+ CudaStorage * [T: SomeFloat ] = object
35
+ # # Opaque seq-like structure for storage on the Cuda backend.
36
+ # #
37
+ # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
38
+ # #
39
+ # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
40
+ Flen * : int
41
+ Fdata * : ptr UncheckedArray [T]
42
+ Fref_tracking* : CudaTensorRefTracker [T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
43
+
44
+ CudaTensor * [T: SomeFloat ] = object
45
+ # # Tensor data structure stored on Nvidia GPU (Cuda)
46
+ # # - ``shape``: Dimensions of the CudaTensor
47
+ # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
48
+ # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
49
+ # # - ``storage``: An opaque data storage for the CudaTensor
50
+ # #
51
+ # # Warning ⚠:
52
+ # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
53
+ # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
54
+ # # Explicit copies can be made with ``clone``: ``var a = b.clone``
55
+ shape* : Metadata
56
+ strides* : Metadata
57
+ offset* : int
58
+ storage* : CudaStorage [T]
59
+
60
+ proc deallocCuda * [T](p: CudaTensorRefTracker [T]) {.noSideEffect .}=
61
+ if not p.value.isNil:
62
+ check cudaFree (p.value)
63
+
64
+ when defined (opencl):
65
+ type
66
+ ClStorage * [T: SomeFloat ] = object
67
+ # # Opaque seq-like structure for storage on the OpenCL backend.
68
+ Flen * : int
69
+ Fdata * : ptr UncheckedArray [T]
70
+ Fref_tracking* : ref [ptr UncheckedArray [T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
71
+
72
+ ClTensor * [T: SomeFloat ] = object
73
+ # # Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
74
+ # # - ``shape``: Dimensions of the CudaTensor
75
+ # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
76
+ # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
77
+ # # - ``storage``: An opaque data storage for the CudaTensor
78
+ # #
79
+ # # Warning ⚠:
80
+ # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
81
+ # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
82
+ # # Explicit copies can be made with ``clone``: ``var a = b.clone``
83
+ shape* : Metadata
84
+ strides* : Metadata
85
+ offset* : int
86
+ storage* : ClStorage [T]
87
+
88
+ when defined (cuda) and defined (opencl):
89
+ type AnyTensor * [T] = Tensor [T] or CudaTensor [T] or ClTensor [T]
90
+ elif defined (cuda):
91
+ type AnyTensor * [T] = Tensor [T] or CudaTensor [T]
92
+ elif defined (opencl):
93
+ type AnyTensor * [T] = Tensor [T] or ClTensor [T]
94
+ else :
95
+ type AnyTensor * [T] = Tensor [T]
96
+
97
+ type GpuTensor [T] = AnyTensor [T] and not Tensor [T]
98
+
99
+
86
100
87
101
88
102
# ###############
@@ -102,10 +116,10 @@ proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw ins
102
116
# Tensor Metadata
103
117
# ################
104
118
105
- func rank * [T](t: CudaTensor [T] or ClTensor [T]): range [0 .. LASER_MAXRANK ] {.inline .} =
119
+ func rank * [T](t: GpuTensor [T]): range [0 .. LASER_MAXRANK ] {.inline .} =
106
120
t.shape.len
107
121
108
- func size * [T](t: CudaTensor [T] or ClTensor [T]): Natural {.inline .} =
122
+ func size * [T](t: GpuTensor [T]): Natural {.inline .} =
109
123
t.shape.product
110
124
111
125
proc shape_to_strides * (shape: Metadata , layout: OrderType = rowMajor, result: var Metadata ) {.noSideEffect .} =
@@ -131,7 +145,7 @@ proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: va
131
145
accum *= shape[i]
132
146
return
133
147
134
- func is_C_contiguous * (t: CudaTensor or ClTensor ): bool =
148
+ func is_C_contiguous * (t: GpuTensor ): bool =
135
149
# # Check if the tensor follows C convention / is row major
136
150
var cur_size = 1
137
151
for i in countdown (t.rank - 1 ,0 ):
@@ -182,14 +196,14 @@ proc get_offset_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffec
182
196
proc get_offset_ptr * [T: not KnownSupportsCopyMem ](t: AnyTensor [T]): ptr T {.error : " `get_offset_ptr`" &
183
197
" cannot be safely used for GC'ed types!" .}
184
198
185
- proc get_data_ptr * [T](t: CudaTensor [T] or ClTensor [T]): ptr T {.noSideEffect , inline .}=
199
+ proc get_data_ptr * [T](t: GpuTensor [T]): ptr T {.noSideEffect , inline .}=
186
200
# # Input:
187
201
# # - A tensor
188
202
# # Returns:
189
203
# # - A pointer to the real start of its data (no offset)
190
204
cast [ptr T](t.storage.Fdata )
191
205
192
- proc get_offset_ptr * [T](t: CudaTensor [T] or ClTensor [T]): ptr T {.noSideEffect , inline .}=
206
+ proc get_offset_ptr * [T](t: GpuTensor [T]): ptr T {.noSideEffect , inline .}=
193
207
# # Input:
194
208
# # - A tensor
195
209
# # Returns:
0 commit comments