Skip to content

Commit 34a37ad

Browse files
committed
small revise in Dirac operator
1 parent 488d19c commit 34a37ad

File tree

2 files changed

+38
-27
lines changed

2 files changed

+38
-27
lines changed

src/Operators/DiracOperators.jl

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{NC1},::Va
116116

117117
end
118118

119+
@inline function muladdmulti(a1,b1,a2,b2,a3,b3)
120+
acc = zero(typeof(a1))
121+
acc = muladd(a1, b1, acc)
122+
acc = muladd(a2, b2, acc)
123+
acc = muladd(a3, b3, acc)
124+
return acc
125+
end
126+
119127
@inline function kernel_Umgammax_p!(C,κ,U,ψdata,indices,indices_p,oneminusγ)
120128
v11,v12,v13,v14 = mul_op(oneminusγ, ψdata, 1,indices_p)
121129
v21,v22,v23,v24 = mul_op(oneminusγ, ψdata, 2,indices_p)
@@ -131,22 +139,23 @@ end
131139
U32 = U[3, 2, indices...]
132140
U33 = U[3, 3, indices...]
133141

134-
C[1, 1, indices...] += -κ*(U11*v11 + U12*v21 + U13*v31)
135-
C[2, 1, indices...] += -κ*(U21*v11 + U22*v21 + U23*v31)
136-
C[3, 1, indices...] += -κ*(U31*v11 + U32*v21 + U33*v31)
142+
#C[1, 1, indices...] += -κ*(U11*v11 + U12*v21 + U13*v31)
143+
C[1, 1, indices...] += -κ*muladdmulti(U11,v11,U12,v21,U13,v31)
144+
C[2, 1, indices...] += -κ*muladdmulti(U21,v11,U22,v21,U23,v31)
145+
C[3, 1, indices...] += -κ*muladdmulti(U31,v11,U32,v21,U33,v31)
137146

138-
C[1, 2, indices...] += -κ*(U11*v12 + U12*v22 + U13*v32)
139-
C[2, 2, indices...] += -κ*(U21*v12 + U22*v22 + U23*v32)
140-
C[3, 2, indices...] += -κ*(U31*v12 + U32*v22 + U33*v32)
147+
C[1, 2, indices...] += -κ*muladdmulti(U11,v12,U12,v22,U13,v32)
148+
C[2, 2, indices...] += -κ*muladdmulti(U21,v12,U22,v22,U23,v32)
149+
C[3, 2, indices...] += -κ*muladdmulti(U31,v12,U32,v22,U33,v32)
141150

142151

143-
C[1, 3, indices...] += -κ*(U11*v13 + U12*v23 + U13*v33)
144-
C[2, 3, indices...] += -κ*(U21*v13 + U22*v23 + U23*v33)
145-
C[3, 3, indices...] += -κ*(U31*v13 + U32*v23 + U33*v33)
152+
C[1, 3, indices...] += -κ*muladdmulti(U11,v13,U12,v23,U13,v33)
153+
C[2, 3, indices...] += -κ*muladdmulti(U21,v13,U22,v23,U23,v33)
154+
C[3, 3, indices...] += -κ*muladdmulti(U31,v13,U32,v23,U33,v33)
146155

147-
C[1, 4, indices...] += -κ*(U11*v14 + U12*v24 + U13*v34)
148-
C[2, 4, indices...] += -κ*(U21*v14 + U22*v24 + U23*v34)
149-
C[3, 4, indices...] += -κ*(U31*v14 + U32*v24 + U33*v34)
156+
C[1, 4, indices...] += -κ*muladdmulti(U11,v14,U12,v24,U13,v34)
157+
C[2, 4, indices...] += -κ*muladdmulti(U21,v14,U22,v24,U23,v34)
158+
C[3, 4, indices...] += -κ*muladdmulti(U31,v14,U32,v24,U33,v34)
150159
end
151160

152161

@@ -165,22 +174,22 @@ end
165174
U32 = U[2, 3, indices_m...]'
166175
U33 = U[3, 3, indices_m...]'
167176

168-
C[1, 1, indices...] += -κ*(U11*v11 + U12*v21 + U13*v31)
169-
C[2, 1, indices...] += -κ*(U21*v11 + U22*v21 + U23*v31)
170-
C[3, 1, indices...] += -κ*(U31*v11 + U32*v21 + U33*v31)
177+
C[1, 1, indices...] += -κ*muladdmulti(U11,v11,U12,v21,U13,v31)
178+
C[2, 1, indices...] += -κ*muladdmulti(U21,v11,U22,v21,U23,v31)
179+
C[3, 1, indices...] += -κ*muladdmulti(U31,v11,U32,v21,U33,v31)
171180

172-
C[1, 2, indices...] += -κ*(U11*v12 + U12*v22 + U13*v32)
173-
C[2, 2, indices...] += -κ*(U21*v12 + U22*v22 + U23*v32)
174-
C[3, 2, indices...] += -κ*(U31*v12 + U32*v22 + U33*v32)
181+
C[1, 2, indices...] += -κ*muladdmulti(U11,v12,U12,v22,U13,v32)
182+
C[2, 2, indices...] += -κ*muladdmulti(U21,v12,U22,v22,U23,v32)
183+
C[3, 2, indices...] += -κ*muladdmulti(U31,v12,U32,v22,U33,v32)
175184

176185

177-
C[1, 3, indices...] += -κ*(U11*v13 + U12*v23 + U13*v33)
178-
C[2, 3, indices...] += -κ*(U21*v13 + U22*v23 + U23*v33)
179-
C[3, 3, indices...] += -κ*(U31*v13 + U32*v23 + U33*v33)
186+
C[1, 3, indices...] += -κ*muladdmulti(U11,v13,U12,v23,U13,v33)
187+
C[2, 3, indices...] += -κ*muladdmulti(U21,v13,U22,v23,U23,v33)
188+
C[3, 3, indices...] += -κ*muladdmulti(U31,v13,U32,v23,U33,v33)
180189

181-
C[1, 4, indices...] += -κ*(U11*v14 + U12*v24 + U13*v34)
182-
C[2, 4, indices...] += -κ*(U21*v14 + U22*v24 + U23*v34)
183-
C[3, 4, indices...] += -κ*(U31*v14 + U32*v24 + U33*v34)
190+
C[1, 4, indices...] += -κ*muladdmulti(U11,v14,U12,v24,U13,v34)
191+
C[2, 4, indices...] += -κ*muladdmulti(U21,v14,U22,v24,U23,v34)
192+
C[3, 4, indices...] += -κ*muladdmulti(U31,v14,U32,v24,U33,v34)
184193
end
185194

186195

@@ -206,7 +215,7 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{3},::Val{
206215
C[3, 4, indices...] = ψdata[3,4, indices...]
207216

208217
#@inbounds for ν=1:4
209-
#@inbounds begin
218+
@inbounds begin
210219
indices_p = shiftindices(indices, shift_1p)
211220
kernel_Umgammax_p!(C,κ,U1,ψdata,indices,indices_p,oneminusγ1)
212221

@@ -232,7 +241,7 @@ function kernel_WilsonDiracOperator4D!(i,C,U1,U2,U3,U4,κ,ψdata,::Val{3},::Val{
232241

233242
indices_m = shiftindices(indices, shift_4m)
234243
kernel_Updaggammax_m!(C,κ,U4,ψdata,indices,indices_m,oneplusγ4)
235-
#end
244+
end
236245

237246
#end
238247

test/runtests.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ function indextest(dim)
800800
end
801801
802802
function wilsondiractest(NC)
803-
NX = 16
803+
NX = 32
804804
dim = 4
805805
nprocs = MPI.Comm_size(MPI.COMM_WORLD)
806806
myrank = MPI.Comm_rank(MPI.COMM_WORLD)
@@ -939,6 +939,8 @@ function main()
939939
end
940940

941941

942+
return
943+
942944
for dim = 2:4
943945
for NC = 2:4
944946
@testset "NC = $NC, dim = $dim" begin

0 commit comments

Comments
 (0)