@@ -376,22 +376,20 @@ inline __device__ void GradAtPoints3d(SharedData_Cuda &data, const CeedInt p, co
376376 CeedScalar buffer[Q_1D];
377377 CeedScalar chebyshev_x[Q_1D];
378378
379- for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
380- // Get z contraction value
381- ChebyshevPolynomialsAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
382- CeedScalar z = chebyshev_x[k];
379+ // Get z contraction values
380+ ChebyshevPolynomialsAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
381+ const CeedScalar z = chebyshev_x[k];
382+
383+ ChebyshevDerivativeAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
384+ const CeedScalar dz = chebyshev_x[k];
383385
386+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
384387 // Load coefficients
385388 __syncthreads ();
386389 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) data.slice [data.t_id_x + data.t_id_y * Q_1D] = r_C[k + comp * Q_1D];
387390 __syncthreads ();
388391 // Gradient directions
389392 for (CeedInt dim = 0 ; dim < 3 ; dim++) {
390- // Update z value for final pass
391- if (dim == 2 ) {
392- ChebyshevDerivativeAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
393- z = chebyshev_x[k];
394- }
395393 // Contract x direction
396394 if (dim == 0 ) ChebyshevDerivativeAtPoint<Q_1D>(r_X[0 ], chebyshev_x);
397395 else ChebyshevPolynomialsAtPoint<Q_1D>(r_X[0 ], chebyshev_x);
@@ -404,8 +402,10 @@ inline __device__ void GradAtPoints3d(SharedData_Cuda &data, const CeedInt p, co
404402 // Contract y and z direction
405403 if (dim == 1 ) ChebyshevDerivativeAtPoint<Q_1D>(r_X[1 ], chebyshev_x);
406404 else ChebyshevPolynomialsAtPoint<Q_1D>(r_X[1 ], chebyshev_x);
405+ const CeedScalar zz = dim == 3 ? dz : z;
406+
407407 for (CeedInt i = 0 ; i < Q_1D; i++) {
408- r_V[comp + dim * NUM_COMP] += chebyshev_x[i] * buffer[i] * z ;
408+ r_V[comp + dim * NUM_COMP] += chebyshev_x[i] * buffer[i] * zz ;
409409 }
410410 }
411411 }
@@ -422,26 +422,26 @@ inline __device__ void GradTransposeAtPoints3d(SharedData_Cuda &data, const Ceed
422422 CeedScalar buffer[Q_1D];
423423 CeedScalar chebyshev_x[Q_1D];
424424
425- for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
426- // Get z contraction value
427- ChebyshevPolynomialsAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
428- CeedScalar z = chebyshev_x[k];
425+ // Get z contraction values
426+ ChebyshevPolynomialsAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
427+ const CeedScalar z = chebyshev_x[k];
428+
429+ ChebyshevDerivativeAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
430+ const CeedScalar dz = chebyshev_x[k];
429431
432+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
430433 // Clear shared memory
431434 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) data.slice [data.t_id_x + data.t_id_y * Q_1D] = 0.0 ;
432435 __syncthreads ();
433436 // Gradient directions
434437 for (CeedInt dim = 0 ; dim < 3 ; dim++) {
435- // Update z value for final pass
436- if (dim == 2 ) {
437- ChebyshevDerivativeAtPoint<Q_1D>(r_X[2 ], chebyshev_x);
438- z = chebyshev_x[k];
439- }
440438 // Contract y and z direction
441439 if (dim == 1 ) ChebyshevDerivativeAtPoint<Q_1D>(r_X[1 ], chebyshev_x);
442440 else ChebyshevPolynomialsAtPoint<Q_1D>(r_X[1 ], chebyshev_x);
441+ const CeedScalar zz = dim == 3 ? dz : z;
442+
443443 for (CeedInt i = 0 ; i < Q_1D; i++) {
444- buffer[i] = chebyshev_x[i] * r_U[comp + dim * NUM_COMP] * z ;
444+ buffer[i] = chebyshev_x[i] * r_U[comp + dim * NUM_COMP] * zz ;
445445 }
446446 // Contract x direction
447447 if (dim == 0 ) ChebyshevDerivativeAtPoint<Q_1D>(r_X[0 ], chebyshev_x);
0 commit comments