|
| 1 | +function add!(b, Y, a, X) #b*Y + a*X -> Y |
| 2 | + LinearAlgebra.axpby!(a, X, b, Y) #X*a + Y*b -> Y |
| 3 | +end |
| 4 | + |
| 5 | +function add!(Y, a, X) #Y + a*X -> Y |
| 6 | + LinearAlgebra.axpby!(a, X, 1, Y) #X*a + Y -> Y |
| 7 | +end |
| 8 | + |
| 9 | + |
| 10 | + |
| 11 | + |
| 12 | +function cg(x, A, b, temps; eps=1e-10, maxsteps=5000, verboselevel=2) #Ax=b |
| 13 | + #temps = get_blockoraryvectors_forCG(A) |
| 14 | + |
| 15 | + if verboselevel >= 3 |
| 16 | + println("--------------------------------------") |
| 17 | + println("cg method") |
| 18 | + end |
| 19 | + |
| 20 | + bnorm = sqrt(real(b ⋅ b)) |
| 21 | + #= |
| 22 | + res = deepcopy(b) |
| 23 | + temp1 = similar(x) |
| 24 | + mul!(temp1,A,x) |
| 25 | + add!(res,-1,temp1) |
| 26 | + q = similar(x) |
| 27 | + p = deepcopy(res) |
| 28 | + =# |
| 29 | + |
| 30 | + |
| 31 | + res, it_res = get_block(temps) |
| 32 | + #res = temps[1] |
| 33 | + substitute!(res, b) |
| 34 | + set_halo!(res) |
| 35 | + temp1, it_temp1 = get_block(temps) |
| 36 | + #temp1 = temps[2] |
| 37 | + #println("in CG $(sum(abs.(x.f)))") |
| 38 | + mul!(temp1, A, x) |
| 39 | + set_halo!(temp1) |
| 40 | + |
| 41 | + |
| 42 | + add!(res, -1, temp1) |
| 43 | + q, it_q = get_block(temps) |
| 44 | + p, it_p = get_block(temps) |
| 45 | + #q = temps[3] |
| 46 | + #p = temps[4] |
| 47 | + substitute!(p, res) |
| 48 | + set_halo!(p) |
| 49 | + |
| 50 | + #p = deepcopy(res) |
| 51 | + |
| 52 | + rnorm = sqrt(real(res ⋅ res)) / bnorm |
| 53 | + |
| 54 | + #println(rnorm) |
| 55 | + |
| 56 | + if rnorm < eps |
| 57 | + unused!(temps, it_res) |
| 58 | + unused!(temps, it_temp1) |
| 59 | + unused!(temps, it_q) |
| 60 | + unused!(temps, it_p) |
| 61 | + return |
| 62 | + end |
| 63 | + |
| 64 | + c1 = p ⋅ p |
| 65 | + |
| 66 | + |
| 67 | + for i = 1:maxsteps |
| 68 | + mul!(q, A, p) |
| 69 | + set_halo!(q) |
| 70 | + c2 = real(dot(p, q)) |
| 71 | + #println("c2 = $c2") |
| 72 | + |
| 73 | + #c2 = p ⋅ q |
| 74 | + |
| 75 | + α = c1 / c2 |
| 76 | + #! ... x = x + alpha * p |
| 77 | + #println("add2") |
| 78 | + add!(x, α, p) |
| 79 | + set_halo!(x) |
| 80 | + #... res = res - alpha * q |
| 81 | + #println("add1") |
| 82 | + add!(res, -α, q) |
| 83 | + c3 = res ⋅ res |
| 84 | + rnorm = sqrt(real(c3)) / bnorm |
| 85 | + if verboselevel >= 3 |
| 86 | + println("$i-th eps: $rnorm") |
| 87 | + end |
| 88 | + |
| 89 | + |
| 90 | + |
| 91 | + |
| 92 | + if rnorm < eps |
| 93 | + #println("$i eps: $eps rnorm $rnorm") |
| 94 | + if verboselevel >= 3 |
| 95 | + println("Converged at $i-th step. eps: $rnorm") |
| 96 | + println("--------------------------------------") |
| 97 | + end |
| 98 | + unused!(temps, it_res) |
| 99 | + unused!(temps, it_temp1) |
| 100 | + unused!(temps, it_q) |
| 101 | + unused!(temps, it_p) |
| 102 | + return |
| 103 | + end |
| 104 | + |
| 105 | + β = c3 / c1 |
| 106 | + c1 = c3 |
| 107 | + |
| 108 | + #println("add3") |
| 109 | + add!(β, p, 1, res) #p = beta*p + s |
| 110 | + set_halo!(p) |
| 111 | + |
| 112 | + end |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | + error(""" |
| 117 | + The CG is not converged! with maxsteps = $(maxsteps) |
| 118 | + residual is $rnorm |
| 119 | + maxsteps should be larger.""") |
| 120 | + |
| 121 | + |
| 122 | +end |
0 commit comments