@@ -74,7 +74,7 @@ inline __device__ void InterpTransposeAtPoints1d(SharedData_Cuda &data, const Ce
7474 // Contract x direction
7575 if (p < NUM_POINTS) {
7676 for (CeedInt i = 0 ; i < Q_1D; i++) {
77- atomicAdd (&data.slice [comp * Q_1D + (i + p ) % Q_1D], chebyshev_x[(i + p ) % Q_1D] * r_U[comp]);
77+ atomicAdd (&data.slice [comp * Q_1D + (i + data. t_id_x ) % Q_1D], chebyshev_x[(i + data. t_id_x ) % Q_1D] * r_U[comp]);
7878 }
7979 }
8080 // Pull from shared to register
@@ -120,7 +120,7 @@ inline __device__ void GradTransposeAtPoints1d(SharedData_Cuda &data, const Ceed
120120 // Contract x direction
121121 if (p < NUM_POINTS) {
122122 for (CeedInt i = 0 ; i < Q_1D; i++) {
123- atomicAdd (&data.slice [comp * Q_1D + (i + p ) % Q_1D], chebyshev_x[(i + p ) % Q_1D] * r_U[comp]);
123+ atomicAdd (&data.slice [comp * Q_1D + (i + data. t_id_x ) % Q_1D], chebyshev_x[(i + data. t_id_x ) % Q_1D] * r_U[comp]);
124124 }
125125 }
126126 // Pull from shared to register
@@ -186,10 +186,10 @@ inline __device__ void InterpTransposeAtPoints2d(SharedData_Cuda &data, const Ce
186186 if (p < NUM_POINTS) {
187187 for (CeedInt i = 0 ; i < Q_1D; i++) {
188188 // Note: shifting to avoid atomic adds
189- const CeedInt ii = (i + (p / Q_1D) ) % Q_1D;
189+ const CeedInt ii = (i + data. t_id_x ) % Q_1D;
190190
191191 for (CeedInt j = 0 ; j < Q_1D; j++) {
192- const CeedInt jj = (j + p ) % Q_1D;
192+ const CeedInt jj = (j + data. t_id_y ) % Q_1D;
193193
194194 atomicAdd (&data.slice [jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
195195 }
@@ -261,10 +261,10 @@ inline __device__ void GradTransposeAtPoints2d(SharedData_Cuda &data, const Ceed
261261 if (p < NUM_POINTS) {
262262 for (CeedInt i = 0 ; i < Q_1D; i++) {
263263 // Note: shifting to avoid atomic adds
264- const CeedInt ii = (i + (p / Q_1D) ) % Q_1D;
264+ const CeedInt ii = (i + data. t_id_x ) % Q_1D;
265265
266266 for (CeedInt j = 0 ; j < Q_1D; j++) {
267- const CeedInt jj = (j + p ) % Q_1D;
267+ const CeedInt jj = (j + data. t_id_y ) % Q_1D;
268268
269269 atomicAdd (&data.slice [jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
270270 }
@@ -343,10 +343,10 @@ inline __device__ void InterpTransposeAtPoints3d(SharedData_Cuda &data, const Ce
343343 if (p < NUM_POINTS) {
344344 for (CeedInt i = 0 ; i < Q_1D; i++) {
345345 // Note: shifting to avoid atomic adds
346- const CeedInt ii = (i + (p / Q_1D) ) % Q_1D;
346+ const CeedInt ii = (i + data. t_id_x ) % Q_1D;
347347
348348 for (CeedInt j = 0 ; j < Q_1D; j++) {
349- const CeedInt jj = (( j + p ) % Q_1D) ;
349+ const CeedInt jj = (j + data. t_id_y ) % Q_1D;
350350
351351 atomicAdd (&data.slice [jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
352352 }
@@ -430,10 +430,10 @@ inline __device__ void GradTransposeAtPoints3d(SharedData_Cuda &data, const Ceed
430430 if (p < NUM_POINTS) {
431431 for (CeedInt i = 0 ; i < Q_1D; i++) {
432432 // Note: shifting to avoid atomic adds
433- const CeedInt ii = (i + (p / Q_1D) ) % Q_1D;
433+ const CeedInt ii = (i + data. t_id_x ) % Q_1D;
434434
435435 for (CeedInt j = 0 ; j < Q_1D; j++) {
436- const CeedInt jj = (( j + p ) % Q_1D) ;
436+ const CeedInt jj = (j + data. t_id_y ) % Q_1D;
437437
438438 atomicAdd (&data.slice [jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
439439 }
0 commit comments