Skip to content

Commit e6cb4fc

Browse files
authored
Merge pull request #1782 from CEED/jeremy/strided-fix
Add stop to CeedVectorSetValueStrided
2 parents 4b6745b + a637ca9 commit e6cb4fc

File tree

11 files changed

+124
-86
lines changed

11 files changed

+124
-86
lines changed

backends/cuda-ref/ceed-cuda-ref-operator.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,9 +1909,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C
19091909
if (!is_active) continue;
19101910

19111911
// Update unit vector
1912-
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
1913-
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0));
1914-
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0));
1912+
{
1913+
// Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size)
1914+
CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size;
1915+
CeedSize start = node * 1 + comp * (elem_size * num_elem);
1916+
CeedSize stop = (comp + 1) * (elem_size * num_elem);
1917+
1918+
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
1919+
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0));
1920+
1921+
node = s % elem_size, comp = s / elem_size;
1922+
start = node * 1 + comp * (elem_size * num_elem);
1923+
stop = (comp + 1) * (elem_size * num_elem);
1924+
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0));
1925+
}
19151926

19161927
// Basis action
19171928
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));

backends/cuda-ref/ceed-cuda-ref-vector.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -223,20 +223,20 @@ static int CeedVectorSetArray_Cuda(const CeedVector vec, const CeedMemType mem_t
223223
//------------------------------------------------------------------------------
224224
// Copy host array to value strided
225225
//------------------------------------------------------------------------------
226-
static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) {
227-
for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i];
226+
static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
227+
for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
228228
return CEED_ERROR_SUCCESS;
229229
}
230230

231231
//------------------------------------------------------------------------------
232232
// Copy device array to value strided (impl in .cu file)
233233
//------------------------------------------------------------------------------
234-
int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array);
234+
int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);
235235

236236
//------------------------------------------------------------------------------
237237
// Copy a vector to a value strided
238238
//------------------------------------------------------------------------------
239-
static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) {
239+
static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
240240
CeedSize length;
241241
CeedVector_Cuda *impl;
242242

@@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s
248248
CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
249249
length = length_vec < length_copy ? length_vec : length_copy;
250250
}
251+
if (stop == -1) stop = length;
251252
// Set value for synced device/host array
252253
if (impl->d_array) {
253254
CeedScalar *copy_array;
@@ -260,21 +261,21 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s
260261
CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
261262
CeedCallBackend(CeedGetCublasHandle_Cuda(ceed, &handle));
262263
#if defined(CEED_SCALAR_IS_FP32)
263-
CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
264+
CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
264265
#else /* CEED_SCALAR */
265-
CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
266+
CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
266267
#endif /* CEED_SCALAR */
267268
CeedCallBackend(CeedDestroy(&ceed));
268269
#else /* CUDA_VERSION */
269-
CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, step, length, copy_array));
270+
CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, stop, step, copy_array));
270271
#endif /* CUDA_VERSION */
271272
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
272273
impl->h_array = NULL;
273274
} else if (impl->h_array) {
274275
CeedScalar *copy_array;
275276

276277
CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
277-
CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, step, length, copy_array));
278+
CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, stop, step, copy_array));
278279
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
279280
impl->d_array = NULL;
280281
} else {
@@ -336,31 +337,32 @@ static int CeedVectorSetValue_Cuda(CeedVector vec, CeedScalar val) {
336337
//------------------------------------------------------------------------------
337338
// Set host array to value strided
338339
//------------------------------------------------------------------------------
339-
static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
340-
for (CeedSize i = start; i < length; i += step) h_array[i] = val;
340+
static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
341+
for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
341342
return CEED_ERROR_SUCCESS;
342343
}
343344

344345
//------------------------------------------------------------------------------
345346
// Set device array to value strided (impl in .cu file)
346347
//------------------------------------------------------------------------------
347-
int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val);
348+
int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);
348349

349350
//------------------------------------------------------------------------------
350351
// Set a vector to a value strided
351352
//------------------------------------------------------------------------------
352-
static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) {
353+
static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
353354
CeedSize length;
354355
CeedVector_Cuda *impl;
355356

356357
CeedCallBackend(CeedVectorGetData(vec, &impl));
357358
CeedCallBackend(CeedVectorGetLength(vec, &length));
358359
// Set value for synced device/host array
360+
if (stop == -1) stop = length;
359361
if (impl->d_array) {
360-
CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, step, length, val));
362+
CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, stop, step, val));
361363
impl->h_array = NULL;
362364
} else if (impl->h_array) {
363-
CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, step, length, val));
365+
CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, stop, step, val));
364366
impl->d_array = NULL;
365367
} else {
366368
return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");

backends/cuda-ref/kernels/cuda-ref-vector.cu

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@
1111
//------------------------------------------------------------------------------
1212
// Kernel for copy strided on device
1313
//------------------------------------------------------------------------------
14-
__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar *__restrict__ vec_copy) {
14+
__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *__restrict__ vec_copy) {
1515
const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
1616

17-
if (index < size) {
18-
if ((index - start) % step == 0) vec_copy[index] = vec[index];
17+
if (index < stop - start) {
18+
if (index % step == 0) vec_copy[start + index] = vec[start + index];
1919
}
2020
}
2121

2222
//------------------------------------------------------------------------------
2323
// Copy strided on device memory
2424
//------------------------------------------------------------------------------
25-
extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array) {
25+
extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array) {
2626
const int block_size = 512;
27-
const CeedSize vec_size = length;
28-
int grid_size = vec_size / block_size;
27+
const CeedSize copy_size = stop - start;
28+
int grid_size = copy_size / block_size;
2929

30-
if (block_size * grid_size < vec_size) grid_size += 1;
31-
copyStridedK<<<grid_size, block_size>>>(d_array, start, step, length, d_copy_array);
30+
if (block_size * grid_size < copy_size) grid_size += 1;
31+
copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array);
3232
return 0;
3333
}
3434

@@ -57,24 +57,24 @@ extern "C" int CeedDeviceSetValue_Cuda(CeedScalar *d_array, CeedSize length, Cee
5757
//------------------------------------------------------------------------------
5858
// Kernel for set value strided on device
5959
//------------------------------------------------------------------------------
60-
__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar val) {
60+
__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
6161
const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
6262

63-
if (index < size) {
64-
if ((index - start) % step == 0) vec[index] = val;
63+
if (index < stop - start) {
64+
if (index % step == 0) vec[start + index] = val;
6565
}
6666
}
6767

6868
//------------------------------------------------------------------------------
6969
// Set value strided on device memory
7070
//------------------------------------------------------------------------------
71-
extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
71+
extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
7272
const int block_size = 512;
73-
const CeedSize vec_size = length;
74-
int grid_size = vec_size / block_size;
73+
const CeedSize set_size = stop - start;
74+
int grid_size = set_size / block_size;
7575

76-
if (block_size * grid_size < vec_size) grid_size += 1;
77-
setValueStridedK<<<grid_size, block_size>>>(d_array, start, step, length, val);
76+
if (block_size * grid_size < set_size) grid_size += 1;
77+
setValueStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, val);
7878
return 0;
7979
}
8080

backends/hip-ref/ceed-hip-ref-operator.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,9 +1906,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce
19061906
if (!is_active) continue;
19071907

19081908
// Update unit vector
1909-
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
1910-
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0));
1911-
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0));
1909+
{
1910+
// Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size)
1911+
CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size;
1912+
CeedSize start = node * 1 + comp * (elem_size * num_elem);
1913+
CeedSize stop = (comp + 1) * (elem_size * num_elem);
1914+
1915+
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
1916+
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0));
1917+
1918+
node = s % elem_size, comp = s / elem_size;
1919+
start = node * 1 + comp * (elem_size * num_elem);
1920+
stop = (comp + 1) * (elem_size * num_elem);
1921+
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0));
1922+
}
19121923

19131924
// Basis action
19141925
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));

backends/hip-ref/ceed-hip-ref-vector.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -223,20 +223,20 @@ static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_ty
223223
//------------------------------------------------------------------------------
224224
// Copy host array to value strided
225225
//------------------------------------------------------------------------------
226-
static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) {
227-
for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i];
226+
static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
227+
for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
228228
return CEED_ERROR_SUCCESS;
229229
}
230230

231231
//------------------------------------------------------------------------------
232232
// Copy device array to value strided (impl in .hip.cpp file)
233233
//------------------------------------------------------------------------------
234-
int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array);
234+
int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);
235235

236236
//------------------------------------------------------------------------------
237237
// Copy a vector to a value strided
238238
//------------------------------------------------------------------------------
239-
static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) {
239+
static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
240240
CeedSize length;
241241
CeedVector_Hip *impl;
242242

@@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
248248
CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
249249
length = length_vec < length_copy ? length_vec : length_copy;
250250
}
251+
if (stop == -1) stop = length;
251252
// Set value for synced device/host array
252253
if (impl->d_array) {
253254
CeedScalar *copy_array;
@@ -260,12 +261,12 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
260261
CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
261262
CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
262263
#if defined(CEED_SCALAR_IS_FP32)
263-
CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
264+
CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
264265
#else /* CEED_SCALAR */
265-
CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
266+
CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
266267
#endif /* CEED_SCALAR */
267268
#else /* HIP_VERSION */
268-
CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, step, length, copy_array));
269+
CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array));
269270
#endif /* HIP_VERSION */
270271
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
271272
impl->h_array = NULL;
@@ -274,7 +275,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
274275
CeedScalar *copy_array;
275276

276277
CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
277-
CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, step, length, copy_array));
278+
CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array));
278279
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
279280
impl->d_array = NULL;
280281
} else {
@@ -336,31 +337,32 @@ static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) {
336337
//------------------------------------------------------------------------------
337338
// Set host array to value strided
338339
//------------------------------------------------------------------------------
339-
static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
340-
for (CeedSize i = start; i < length; i += step) h_array[i] = val;
340+
static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
341+
for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
341342
return CEED_ERROR_SUCCESS;
342343
}
343344

344345
//------------------------------------------------------------------------------
345346
// Set device array to value strided (impl in .hip.cpp file)
346347
//------------------------------------------------------------------------------
347-
int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val);
348+
int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);
348349

349350
//------------------------------------------------------------------------------
350351
// Set a vector to a value strided
351352
//------------------------------------------------------------------------------
352-
static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) {
353+
static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
353354
CeedSize length;
354355
CeedVector_Hip *impl;
355356

356357
CeedCallBackend(CeedVectorGetData(vec, &impl));
357358
CeedCallBackend(CeedVectorGetLength(vec, &length));
358359
// Set value for synced device/host array
360+
if (stop == -1) stop = length;
359361
if (impl->d_array) {
360-
CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, step, length, val));
362+
CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val));
361363
impl->h_array = NULL;
362364
} else if (impl->h_array) {
363-
CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, step, length, val));
365+
CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val));
364366
impl->d_array = NULL;
365367
} else {
366368
return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");

0 commit comments

Comments
 (0)