|
| 1 | +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. |
| 2 | +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. |
| 3 | +// All Rights reserved. See files LICENSE and NOTICE for details. |
| 4 | +// |
| 5 | +// This file is part of CEED, a collection of benchmarks, miniapps, software |
| 6 | +// libraries and APIs for efficient high-order finite element and spectral |
| 7 | +// element discretizations for exascale applications. For more information and |
| 8 | +// source code availability see http://github.com/ceed. |
| 9 | +// |
| 10 | +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, |
| 11 | +// a collaborative effort of two U.S. Department of Energy organizations (Office |
| 12 | +// of Science and the National Nuclear Security Administration) responsible for |
| 13 | +// the planning and preparation of a capable exascale ecosystem, including |
| 14 | +// software, applications, hardware, advanced system engineering and early |
| 15 | +// testbed platforms, in support of the nation's exascale computing imperative. |
| 16 | + |
| 17 | +#include <ceed-impl.h> |
| 18 | +#include <string.h> |
| 19 | +#include "ceed-ref.h" |
| 20 | + |
| 21 | +// Contracts on the middle index |
| 22 | +// NOTRANSPOSE: V_ajc = T_jb U_abc |
| 23 | +// TRANSPOSE: V_ajc = T_bj U_abc |
| 24 | +// If Add != 0, "=" is replaced by "+=" |
| 25 | +static int CeedTensorContract_Ref(Ceed ceed, |
| 26 | + CeedInt A, CeedInt B, CeedInt C, CeedInt J, |
| 27 | + const CeedScalar *t, CeedTransposeMode tmode, |
| 28 | + const CeedInt Add, |
| 29 | + const CeedScalar *u, CeedScalar *v) { |
| 30 | + CeedInt tstride0 = B, tstride1 = 1; |
| 31 | + if (tmode == CEED_TRANSPOSE) { |
| 32 | + tstride0 = 1; tstride1 = J; |
| 33 | + } |
| 34 | + |
| 35 | + for (CeedInt a=0; a<A; a++) { |
| 36 | + for (CeedInt j=0; j<J; j++) { |
| 37 | + if (!Add) { |
| 38 | + for (CeedInt c=0; c<C; c++) |
| 39 | + v[(a*J+j)*C+c] = 0; |
| 40 | + } |
| 41 | + for (CeedInt b=0; b<B; b++) { |
| 42 | + for (CeedInt c=0; c<C; c++) { |
| 43 | + v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; |
| 44 | + } |
| 45 | + } |
| 46 | + } |
| 47 | + } |
| 48 | + return 0; |
| 49 | +} |
| 50 | + |
| 51 | +static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode, |
| 52 | + CeedEvalMode emode, |
| 53 | + const CeedScalar *u, CeedScalar *v) { |
| 54 | + int ierr; |
| 55 | + const CeedInt dim = basis->dim; |
| 56 | + const CeedInt ndof = basis->ndof; |
| 57 | + const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); |
| 58 | + const CeedInt add = (tmode == CEED_TRANSPOSE); |
| 59 | + |
| 60 | + if (tmode == CEED_TRANSPOSE) { |
| 61 | + const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); |
| 62 | + for (CeedInt i = 0; i < vsize; i++) |
| 63 | + v[i] = (CeedScalar) 0; |
| 64 | + } |
| 65 | + if (emode & CEED_EVAL_INTERP) { |
| 66 | + CeedInt P = basis->P1d, Q = basis->Q1d; |
| 67 | + if (tmode == CEED_TRANSPOSE) { |
| 68 | + P = basis->Q1d; Q = basis->P1d; |
| 69 | + } |
| 70 | + CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; |
| 71 | + CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; |
| 72 | + for (CeedInt d=0; d<dim; d++) { |
| 73 | + ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d, |
| 74 | + tmode, add&&(d==dim-1), |
| 75 | + d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); |
| 76 | + CeedChk(ierr); |
| 77 | + pre /= P; |
| 78 | + post *= Q; |
| 79 | + } |
| 80 | + if (tmode == CEED_NOTRANSPOSE) { |
| 81 | + v += nqpt; |
| 82 | + } else { |
| 83 | + u += nqpt; |
| 84 | + } |
| 85 | + } |
| 86 | + if (emode & CEED_EVAL_GRAD) { |
| 87 | + CeedInt P = basis->P1d, Q = basis->Q1d; |
| 88 | + // In CEED_NOTRANSPOSE mode: |
| 89 | + // u is (P^dim x nc), column-major layout (nc = ndof) |
| 90 | + // v is (Q^dim x nc x dim), column-major layout (nc = ndof) |
| 91 | + // In CEED_TRANSPOSE mode, the sizes of u and v are switched. |
| 92 | + if (tmode == CEED_TRANSPOSE) { |
| 93 | + P = basis->Q1d, Q = basis->P1d; |
| 94 | + } |
| 95 | + CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; |
| 96 | + for (CeedInt p = 0; p < dim; p++) { |
| 97 | + CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; |
| 98 | + for (CeedInt d=0; d<dim; d++) { |
| 99 | + ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, |
| 100 | + (p==d)?basis->grad1d:basis->interp1d, |
| 101 | + tmode, add&&(d==dim-1), |
| 102 | + d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); |
| 103 | + CeedChk(ierr); |
| 104 | + pre /= P; |
| 105 | + post *= Q; |
| 106 | + } |
| 107 | + if (tmode == CEED_NOTRANSPOSE) { |
| 108 | + v += nqpt; |
| 109 | + } else { |
| 110 | + u += nqpt; |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + if (emode & CEED_EVAL_WEIGHT) { |
| 115 | + if (tmode == CEED_TRANSPOSE) |
| 116 | + return CeedError(basis->ceed, 1, |
| 117 | + "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); |
| 118 | + CeedInt Q = basis->Q1d; |
| 119 | + for (CeedInt d=0; d<dim; d++) { |
| 120 | + CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); |
| 121 | + for (CeedInt i=0; i<pre; i++) { |
| 122 | + for (CeedInt j=0; j<Q; j++) { |
| 123 | + for (CeedInt k=0; k<post; k++) { |
| 124 | + v[(i*Q + j)*post + k] = basis->qweight1d[j] |
| 125 | + * (d == 0 ? 1 : v[(i*Q + j)*post + k]); |
| 126 | + } |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + return 0; |
| 132 | +} |
| 133 | + |
| 134 | +static int CeedBasisDestroy_Ref(CeedBasis basis) { |
| 135 | + return 0; |
| 136 | +} |
| 137 | + |
| 138 | +int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d, |
| 139 | + CeedInt Q1d, const CeedScalar *interp1d, |
| 140 | + const CeedScalar *grad1d, |
| 141 | + const CeedScalar *qref1d, |
| 142 | + const CeedScalar *qweight1d, |
| 143 | + CeedBasis basis) { |
| 144 | + basis->Apply = CeedBasisApply_Ref; |
| 145 | + basis->Destroy = CeedBasisDestroy_Ref; |
| 146 | + return 0; |
| 147 | +} |
0 commit comments