@@ -116,6 +116,14 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{NC1},::Va
116116
117117end
118118
119+ @inline function muladdmulti(a1,b1,a2,b2,a3,b3)
120+ acc = zero(typeof(a1))
121+ acc = muladd(a1, b1, acc)
122+ acc = muladd(a2, b2, acc)
123+ acc = muladd(a3, b3, acc)
124+ return acc
125+ end
126+
119127@inline function kernel_Umgammax_p!(C,κ,U,ψdata,indices,indices_p,oneminusγ)
120128 v11,v12,v13,v14 = mul_op(oneminusγ, ψdata, 1 ,indices_p)
121129 v21,v22,v23,v24 = mul_op(oneminusγ, ψdata, 2 ,indices_p)
@@ -131,22 +139,23 @@ end
131139 U32 = U[3 , 2 , indices... ]
132140 U33 = U[3 , 3 , indices... ]
133141
134- C[1 , 1 , indices... ] += - κ* (U11* v11 + U12* v21 + U13* v31)
135- C[2 , 1 , indices... ] += - κ* (U21* v11 + U22* v21 + U23* v31)
136- C[3 , 1 , indices... ] += - κ* (U31* v11 + U32* v21 + U33* v31)
142+ # C[1, 1, indices...] += -κ*(U11*v11 + U12*v21 + U13*v31)
143+ C[1 , 1 , indices... ] += - κ* muladdmulti(U11,v11,U12,v21,U13,v31)
144+ C[2 , 1 , indices... ] += - κ* muladdmulti(U21,v11,U22,v21,U23,v31)
145+ C[3 , 1 , indices... ] += - κ* muladdmulti(U31,v11,U32,v21,U33,v31)
137146
138- C[1 , 2 , indices... ] += - κ* (U11* v12 + U12* v22 + U13* v32)
139- C[2 , 2 , indices... ] += - κ* (U21* v12 + U22* v22 + U23* v32)
140- C[3 , 2 , indices... ] += - κ* (U31* v12 + U32* v22 + U33* v32)
147+ C[1 , 2 , indices... ] += - κ* muladdmulti (U11, v12, U12, v22, U13, v32)
148+ C[2 , 2 , indices... ] += - κ* muladdmulti (U21, v12, U22, v22, U23, v32)
149+ C[3 , 2 , indices... ] += - κ* muladdmulti (U31, v12, U32, v22, U33, v32)
141150
142151
143- C[1 , 3 , indices... ] += - κ* (U11* v13 + U12* v23 + U13* v33)
144- C[2 , 3 , indices... ] += - κ* (U21* v13 + U22* v23 + U23* v33)
145- C[3 , 3 , indices... ] += - κ* (U31* v13 + U32* v23 + U33* v33)
152+ C[1 , 3 , indices... ] += - κ* muladdmulti (U11, v13, U12, v23, U13, v33)
153+ C[2 , 3 , indices... ] += - κ* muladdmulti (U21, v13, U22, v23, U23, v33)
154+ C[3 , 3 , indices... ] += - κ* muladdmulti (U31, v13, U32, v23, U33, v33)
146155
147- C[1 , 4 , indices... ] += - κ* (U11* v14 + U12* v24 + U13* v34)
148- C[2 , 4 , indices... ] += - κ* (U21* v14 + U22* v24 + U23* v34)
149- C[3 , 4 , indices... ] += - κ* (U31* v14 + U32* v24 + U33* v34)
156+ C[1 , 4 , indices... ] += - κ* muladdmulti (U11, v14, U12, v24, U13, v34)
157+ C[2 , 4 , indices... ] += - κ* muladdmulti (U21, v14, U22, v24, U23, v34)
158+ C[3 , 4 , indices... ] += - κ* muladdmulti (U31, v14, U32, v24, U33, v34)
150159end
151160
152161
@@ -165,22 +174,22 @@ end
165174 U32 = U[2 , 3 , indices_m... ]'
166175 U33 = U[3 , 3 , indices_m... ]'
167176
168- C[1 , 1 , indices... ] += - κ* (U11* v11 + U12* v21 + U13* v31)
169- C[2 , 1 , indices... ] += - κ* (U21* v11 + U22* v21 + U23* v31)
170- C[3 , 1 , indices... ] += - κ* (U31* v11 + U32* v21 + U33* v31)
177+ C[1 , 1 , indices... ] += - κ* muladdmulti (U11, v11, U12, v21, U13, v31)
178+ C[2 , 1 , indices... ] += - κ* muladdmulti (U21, v11, U22, v21, U23, v31)
179+ C[3 , 1 , indices... ] += - κ* muladdmulti (U31, v11, U32, v21, U33, v31)
171180
172- C[1 , 2 , indices... ] += - κ* (U11* v12 + U12* v22 + U13* v32)
173- C[2 , 2 , indices... ] += - κ* (U21* v12 + U22* v22 + U23* v32)
174- C[3 , 2 , indices... ] += - κ* (U31* v12 + U32* v22 + U33* v32)
181+ C[1 , 2 , indices... ] += - κ* muladdmulti (U11, v12, U12, v22, U13, v32)
182+ C[2 , 2 , indices... ] += - κ* muladdmulti (U21, v12, U22, v22, U23, v32)
183+ C[3 , 2 , indices... ] += - κ* muladdmulti (U31, v12, U32, v22, U33, v32)
175184
176185
177- C[1 , 3 , indices... ] += - κ* (U11* v13 + U12* v23 + U13* v33)
178- C[2 , 3 , indices... ] += - κ* (U21* v13 + U22* v23 + U23* v33)
179- C[3 , 3 , indices... ] += - κ* (U31* v13 + U32* v23 + U33* v33)
186+ C[1 , 3 , indices... ] += - κ* muladdmulti (U11, v13, U12, v23, U13, v33)
187+ C[2 , 3 , indices... ] += - κ* muladdmulti (U21, v13, U22, v23, U23, v33)
188+ C[3 , 3 , indices... ] += - κ* muladdmulti (U31, v13, U32, v23, U33, v33)
180189
181- C[1 , 4 , indices... ] += - κ* (U11* v14 + U12* v24 + U13* v34)
182- C[2 , 4 , indices... ] += - κ* (U21* v14 + U22* v24 + U23* v34)
183- C[3 , 4 , indices... ] += - κ* (U31* v14 + U32* v24 + U33* v34)
190+ C[1 , 4 , indices... ] += - κ* muladdmulti (U11, v14, U12, v24, U13, v34)
191+ C[2 , 4 , indices... ] += - κ* muladdmulti (U21, v14, U22, v24, U23, v34)
192+ C[3 , 4 , indices... ] += - κ* muladdmulti (U31, v14, U32, v24, U33, v34)
184193end
185194
186195
@@ -206,7 +215,7 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{3},::Val{
206215 C[3 , 4 , indices... ] = ψdata[3 ,4 , indices... ]
207216
208217 # @inbounds for ν=1:4
209- # @inbounds begin
218+ @inbounds begin
210219 indices_p = shiftindices(indices, shift_1p)
211220 kernel_Umgammax_p!(C,κ,U1,ψdata,indices,indices_p,oneminusγ1)
212221
@@ -232,7 +241,7 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{3},::Val{
232241
233242 indices_m = shiftindices(indices, shift_4m)
234243 kernel_Updaggammax_m!(C,κ,U4,ψdata,indices,indices_m,oneplusγ4)
235- # end
244+ end
236245
237246 # end
238247
0 commit comments