|
| 1 | +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. |
| 2 | +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. |
| 3 | +// All Rights reserved. See files LICENSE and NOTICE for details. |
| 4 | +// |
| 5 | +// This file is part of CEED, a collection of benchmarks, miniapps, software |
| 6 | +// libraries and APIs for efficient high-order finite element and spectral |
| 7 | +// element discretizations for exascale applications. For more information and |
| 8 | +// source code availability see http://github.com/ceed. |
| 9 | +// |
| 10 | +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, |
| 11 | +// a collaborative effort of two U.S. Department of Energy organizations (Office |
| 12 | +// of Science and the National Nuclear Security Administration) responsible for |
| 13 | +// the planning and preparation of a capable exascale ecosystem, including |
| 14 | +// software, applications, hardware, advanced system engineering and early |
| 15 | +// testbed platforms, in support of the nation's exascale computing imperative. |
| 16 | + |
| 17 | +#include <ceed/ceed.h> |
| 18 | +#include <ceed/backend.h> |
| 19 | +#ifdef __ARM_FEATURE_SVE |
| 20 | +#include <arm_sve.h> |
| 21 | +#endif |
| 22 | +#include <stdbool.h> |
| 23 | +#include "ceed-sve.h" |
| 24 | + |
| 25 | +//------------------------------------------------------------------------------ |
| 26 | +// Blocked Tensor Contract |
| 27 | +//------------------------------------------------------------------------------ |
| 28 | +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, |
| 29 | + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, |
| 30 | + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, |
| 31 | + double *restrict v, const CeedInt JJ) { |
| 32 | + CeedInt t_stride_0 = B, t_stride_1 = 1; |
| 33 | + if (t_mode == CEED_TRANSPOSE) { |
| 34 | + t_stride_0 = 1; t_stride_1 = J; |
| 35 | + } |
| 36 | + |
| 37 | + for (CeedInt a=0; a<A; a++) |
| 38 | + for (CeedInt b=0; b<B; b++) |
| 39 | + // Blocks of JJ rows |
| 40 | + for (CeedInt j=0; j<(J/JJ)*JJ; j+=JJ) |
| 41 | + for (CeedInt jj=0; jj<JJ; jj++) { // unroll |
| 42 | + // C vectorization by compiler |
| 43 | + int32_t c = 0; |
| 44 | + svbool_t pg = svwhilelt_b64(c, C); |
| 45 | + do { |
| 46 | + // Load u, v into vectors |
| 47 | + svfloat64_t u_vec = svld1(pg, &u[(a*B+b)*C+c]); |
| 48 | + svfloat64_t v_vec = svld1(pg, &v[(a*J+j+jj)*C+c]); |
| 49 | + // Basis matrix value |
| 50 | + double tq = t[(j+jj)*t_stride_0 + b*t_stride_1]; |
| 51 | + // fmadd |
| 52 | + svst1(pg, &v[(a*J+j+jj)*C+c], svmla_x(pg, v_vec, u_vec, tq)); |
| 53 | + // Loop update |
| 54 | + c += svcntd(); |
| 55 | + pg = svwhilelt_b64(c, C); |
| 56 | + } while (svptest_any(svptrue_b64(), pg)); |
| 57 | + } |
| 58 | + // Remainder of rows |
| 59 | + CeedInt j=(J/JJ)*JJ; |
| 60 | + if (j < J) { |
| 61 | + for (CeedInt a=0; a<A; a++) |
| 62 | + for (CeedInt b=0; b<B; b++) |
| 63 | + // Blocks of JJ rows |
| 64 | + for (CeedInt jj=0; jj<J-j; jj++) { // not unrolled |
| 65 | + // C vectorization by compiler |
| 66 | + int32_t c = 0; |
| 67 | + svbool_t pg = svwhilelt_b64(c, C); |
| 68 | + do { |
| 69 | + // Load u, v into vectors |
| 70 | + svfloat64_t u_vec = svld1(pg, &u[(a*B+b)*C+c]); |
| 71 | + svfloat64_t v_vec = svld1(pg, &v[(a*J+j+jj)*C+c]); |
| 72 | + // Basis matrix value |
| 73 | + double tq = t[(j+jj)*t_stride_0 + b*t_stride_1]; |
| 74 | + // fmadd |
| 75 | + svst1(pg, &v[(a*J+j+jj)*C+c], svmla_x(pg, v_vec, u_vec, tq)); |
| 76 | + // Loop update |
| 77 | + c += svcntd(); |
| 78 | + pg = svwhilelt_b64(c, C); |
| 79 | + } while (svptest_any(svptrue_b64(), pg)); |
| 80 | + } |
| 81 | + } |
| 82 | + return CEED_ERROR_SUCCESS; |
| 83 | +} |
| 84 | + |
| 85 | +//------------------------------------------------------------------------------ |
| 86 | +// Blocked Tensor Contract |
| 87 | +//------------------------------------------------------------------------------ |
| 88 | +static inline int CeedTensorContract_Sve_Serial(CeedTensorContract contract, |
| 89 | + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, |
| 90 | + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, |
| 91 | + double *restrict v, const CeedInt JJ) { |
| 92 | + CeedInt t_stride_0 = B, t_stride_1 = 1; |
| 93 | + if (t_mode == CEED_TRANSPOSE) { |
| 94 | + t_stride_0 = 1; t_stride_1 = J; |
| 95 | + } |
| 96 | + |
| 97 | + for (CeedInt a=0; a<A; a++) |
| 98 | + for (CeedInt b=0; b<B; b++) |
| 99 | + for (CeedInt j=0; j<(J/JJ)*JJ; j+=JJ) |
| 100 | + for (CeedInt jj=0; jj<JJ; jj++) // unroll |
| 101 | + v[a*J+(j+jj)] += t[(j+jj)*t_stride_0 + b*t_stride_1] * u[a*B+b]; |
| 102 | + |
| 103 | + CeedInt j=(J/JJ)*JJ; |
| 104 | + if (j < J) |
| 105 | + for (CeedInt a=0; a<A; a++) |
| 106 | + for (CeedInt b=0; b<B; b++) |
| 107 | + for (CeedInt jj=0; jj<J-j; jj++) // not unrolled |
| 108 | + v[a*J+(j+jj)] += t[(j+jj)*t_stride_0 + b*t_stride_1] * u[a*B+b]; |
| 109 | + |
| 110 | + return CEED_ERROR_SUCCESS; |
| 111 | +} |
| 112 | + |
| 113 | +//------------------------------------------------------------------------------ |
| 114 | +// Tensor Contract - Common Sizes |
| 115 | +//------------------------------------------------------------------------------ |
| 116 | +static int CeedTensorContract_Sve_Blocked_8(CeedTensorContract contract, |
| 117 | + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, |
| 118 | + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, |
| 119 | + double *restrict v) { |
| 120 | + return CeedTensorContract_Sve_Blocked(contract, A, B, C, J, t, t_mode, add, u, |
| 121 | + v, 8); |
| 122 | +} |
| 123 | +static int CeedTensorContract_Sve_Serial_8(CeedTensorContract contract, |
| 124 | + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, |
| 125 | + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, |
| 126 | + double *restrict v) { |
| 127 | + return CeedTensorContract_Sve_Serial(contract, A, B, C, J, t, t_mode, add, u, v, |
| 128 | + 8); |
| 129 | +} |
| 130 | + |
| 131 | +//------------------------------------------------------------------------------ |
| 132 | +// Tensor Contract Apply |
| 133 | +//------------------------------------------------------------------------------ |
| 134 | +static int CeedTensorContractApply_Sve(CeedTensorContract contract, CeedInt A, |
| 135 | + CeedInt B, CeedInt C, CeedInt J, |
| 136 | + const double *restrict t, |
| 137 | + CeedTransposeMode t_mode, |
| 138 | + const CeedInt add, |
| 139 | + const double *restrict u, |
| 140 | + double *restrict v) { |
| 141 | + if (!add) |
| 142 | + for (CeedInt q=0; q<A*J*C; q++) |
| 143 | + v[q] = (double) 0.0; |
| 144 | + |
| 145 | + if (C == 1) |
| 146 | + CeedTensorContract_Sve_Serial_8(contract, A, B, C, J, t, t_mode, true, u, v); |
| 147 | + else |
| 148 | + CeedTensorContract_Sve_Blocked_8(contract, A, B, C, J, t, t_mode, true, u, v); |
| 149 | + |
| 150 | + return CEED_ERROR_SUCCESS; |
| 151 | +} |
| 152 | + |
| 153 | +//------------------------------------------------------------------------------ |
| 154 | +// Tensor Contract Destroy |
| 155 | +//------------------------------------------------------------------------------ |
| 156 | +static int CeedTensorContractDestroy_Sve(CeedTensorContract contract) { |
| 157 | + return CEED_ERROR_SUCCESS; |
| 158 | +} |
| 159 | + |
| 160 | +//------------------------------------------------------------------------------ |
| 161 | +// Tensor Contract Create |
| 162 | +//------------------------------------------------------------------------------ |
| 163 | +int CeedTensorContractCreate_f64_Sve(CeedBasis basis, |
| 164 | + CeedTensorContract contract) { |
| 165 | + int ierr; |
| 166 | + Ceed ceed; |
| 167 | + ierr = CeedTensorContractGetCeed(contract, &ceed); CeedChkBackend(ierr); |
| 168 | + |
| 169 | + ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply", |
| 170 | + CeedTensorContractApply_Sve); CeedChkBackend(ierr); |
| 171 | + ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Destroy", |
| 172 | + CeedTensorContractDestroy_Sve); CeedChkBackend(ierr); |
| 173 | + |
| 174 | + return CEED_ERROR_SUCCESS; |
| 175 | +} |
| 176 | +//------------------------------------------------------------------------------ |
0 commit comments