WaterLily.jl/ext/WaterLilyMPIExt.jl at 5b6de27a8f68d929a9db50124b5c6f6dd183b580 · WaterLily-jl/WaterLily.jl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
module WaterLilyMPIExt

if isdefined(Base, :get_extension)
    using MPI
else
    using ..MPI
end

using StaticArrays
using WaterLily
import WaterLily: init_mpi,me,mpi_grid,finalize_mpi,get_extents
import WaterLily: BC!,perBC!,exitBC!,L₂,L∞,loc,grid_loc,_dot,CFL,residual!,sim_step!

const NDIMS_MPI = 3           # Internally, we set the number of dimensions always to 3 for calls to MPI. This ensures a fixed size for MPI coords, neigbors, etc and in general a simple, easy to read code.
const NNEIGHBORS_PER_DIM = 2

"""
    halos(dims,d)

Return the CartesianIndices of the halos in dimension `±d` of an array of size `dims`.
"""
function halos(dims::NTuple{N},j) where N
    CartesianIndices(ntuple( i-> i==abs(j) ? j<0 ? (1:2) : (dims[i]-1:dims[i]) : (1:dims[i]), N))
end
"""
    buff(dims,d)

Return the CartesianIndices of the buffer in dimension `±d` of an array of size `dims`.
"""
function buff(dims::NTuple{N},j) where N
    CartesianIndices(ntuple( i-> i==abs(j) ? j<0 ? (3:4) : (dims[i]-3:dims[i]-2) : (1:dims[i]), N))
end

"""
    mpi_swap!(send1,recv1,send2,recv2,neighbor,comm)

This function swaps the data between two MPI processes. The data is sent from `send1` to `neighbor[1]` and received in `recv1`.
The data is sent from `send2` to `neighbor[2]` and received in `recv2`. The function is non-blocking and returns when all data
has been sent and received.
"""
function mpi_swap!(send1,recv1,send2,recv2,neighbor,comm)
    reqs=MPI.Request[]
    # Send to / receive from neighbor 1 in dimension d
    push!(reqs,MPI.Isend(send1,  neighbor[1], 0, comm))
    push!(reqs,MPI.Irecv!(recv1, neighbor[1], 1, comm))
    # Send to / receive from neighbor 2 in dimension d
    push!(reqs,MPI.Irecv!(recv2, neighbor[2], 0, comm))
    push!(reqs,MPI.Isend(send2,  neighbor[2], 1, comm))
    # wair for all transfer to be done
    MPI.Waitall!(reqs)
end

"""
    perBC!(a)

This function sets the boundary conditions of the array `a` using the MPI grid.
"""
perBC!(a::MPIArray,::Tuple{})            = _perBC!(a, size(a), true)
perBC!(a::MPIArray, perdir, N = size(a)) = _perBC!(a, N, true)
_perBC!(a, N, mpi::Bool) = for d ∈ eachindex(N)
    # get data to transfer @TODO use @views
    send1 = a[buff(N,-d)]; send2 =a[buff(N,+d)]
    recv1 = zero(send1);   recv2 = zero(send2)
    # swap
    mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

    # this sets the BCs
    !mpi_wall(d,1) && (a[halos(N,-d)] .= recv1) # halo swap
    !mpi_wall(d,2) && (a[halos(N,+d)] .= recv2) # halo swap
end

"""
    BC!(a)

This function sets the boundary conditions of the array `a` using the MPI grid.
"""
# function BC!(a::MPIArray,A,saveexit=false,perdir=())
#     N,n = WaterLily.size_u(a)
#     for i ∈ 1:n, d ∈ 1:n
#         # get data to transfer @TODO use @views
#         send1 = a[buff(N,-d),i]; send2 = a[buff(N,+d),i]
#         recv1 = zero(send1);     recv2 = zero(send2)
#         # swap
#         mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

#         # this sets the BCs on the domain boundary and transfers the data
#         if mpi_wall(d,1) # left wall
#             if i==d # set flux
#                 a[halos(N,-d),i] .= A[i]
#                 a[WaterLily.slice(N,3,d),i] .= A[i]
#             else # zero gradient
#                 a[halos(N,-d),i] .= reverse(send1; dims=d)
#             end
#         else # neighbor on the left
#             a[halos(N,-d),i] .= recv1
#         end
#         if mpi_wall(d,2) # right wall
#             if i==d && (!saveexit || i>1) # convection exit
#                 a[halos(N,+d),i] .= A[i]
#             else # zero gradient
#                 a[halos(N,+d),i] .= reverse(send2; dims=d)
#             end
#         else # neighbor on the right
#             a[halos(N,+d),i] .= recv2
#         end
#     end
# end
using EllipsisNotation
function BC!(a::MPIArray,A,saveexit=false,perdir=())
    N,n = WaterLily.size_u(a)
    for d ∈ 1:n # transfer full halos in each direction
        # get data to transfer @TODO use @views
        send1 = a[buff(N,-d),:]; send2 = a[buff(N,+d),:]
        recv1 = zero(send1);     recv2 = zero(send2)
        # swap
        mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

        # mpi boundary swap
        !mpi_wall(d,1) && (a[halos(N,-d),:] .= recv1) # halo swap
        !mpi_wall(d,2) && (a[halos(N,+d),:] .= recv2) # halo swap

        for i ∈ 1:n # this sets the BCs on the physical boundary
            if mpi_wall(d,1) # left wall
                if i==d # set flux
                    a[halos(N,-d),i] .= A[i]
                    a[WaterLily.slice(N,3,d),i] .= A[i]
                else # zero gradient
                    a[halos(N,-d),i] .= reverse(send1[..,i]; dims=d)
                end
            end
            if mpi_wall(d,2) # right wall
                if i==d && (!saveexit || i>1) # convection exit
                    a[halos(N,+d),i] .= A[i]
                else # zero gradient
                    a[halos(N,+d),i] .= reverse(send2[..,i]; dims=d)
                end
            end
        end
    end
end

function exitBC!(u::MPIArray,u⁰,U,Δt)
    N,_ = WaterLily.size_u(u)
    exitR = WaterLily.slice(N.-2,N[1]-2,1,3) # exit slice excluding ghosts
    # ∮udA = 0
    # if mpi_wall(1,2) #right wall
    @WaterLily.loop u[I,1] = u⁰[I,1]-U[1]*Δt*(u⁰[I,1]-u⁰[I-δ(1,I),1]) over I ∈ exitR
    ∮u = sum(u[exitR,1])/length(exitR)-U[1]   # mass flux imbalance
    # end
    # ∮u = MPI.Allreduce(∮udA,+,mpi_grid().comm)           # domain imbalance
    # mpi_wall(1,2) && (@WaterLily.loop u[I,1] -= ∮u over I ∈ exitR) # correct flux only on right wall
    @WaterLily.loop u[I,1] -= ∮u over I ∈ exitR # correct flux only on right wall
end

struct MPIGrid #{I,C<:MPI.Comm,N<:AbstractVector,M<:AbstractArray,G<:AbstractVector}
    me::Int                    # rank
    comm::MPI.Comm             # communicator
    coords::AbstractVector     # coordinates
    neighbors::AbstractArray   # neighbors
    global_loc::AbstractVector # the location of the lower left corner in global index space
end
const MPI_GRID_NULL = MPIGrid(-1,MPI.COMM_NULL,[-1,-1,-1],[-1 -1 -1; -1 -1 -1],[0,0,0])

let
    global MPIGrid, set_mpi_grid, mpi_grid, mpi_initialized, check_mpi

    # allows to access the global mpi grid
    _mpi_grid::MPIGrid          = MPI_GRID_NULL
    mpi_grid()::MPIGrid         = (check_mpi(); _mpi_grid::MPIGrid)
    set_mpi_grid(grid::MPIGrid) = (_mpi_grid = grid;)
    mpi_initialized()           = (_mpi_grid.comm != MPI.COMM_NULL)
    check_mpi()                 = !mpi_initialized() && error("MPI not initialized")
end

function init_mpi(Dims::NTuple{D};dims=[0, 0, 0],periods=[0, 0, 0],comm::MPI.Comm=MPI.COMM_WORLD,
                  disp::Integer=1,reorder::Bool=true) where D
    # MPI
    MPI.Init()
    nprocs = MPI.Comm_size(comm)
    # create cartesian communicator
    MPI.Dims_create!(nprocs, dims)
    comm_cart = MPI.Cart_create(comm, dims, periods, reorder)
    me     = MPI.Comm_rank(comm_cart)
    coords = MPI.Cart_coords(comm_cart)
    # make the cart comm
    neighbors = fill(MPI.PROC_NULL, NNEIGHBORS_PER_DIM, NDIMS_MPI);
    for i = 1:NDIMS_MPI
        neighbors[:,i] .= MPI.Cart_shift(comm_cart, i-1, disp);
    end
    # global index coordinate in grid space
    global_loc = SVector([coords[i]*Dims[i] for i in 1:D]...)
    set_mpi_grid(MPIGrid(me,comm_cart,coords,neighbors,global_loc))
    return me; # this is the most usefull MPI vriable to have in the local space
end
finalize_mpi() = MPI.Finalize()

# global coordinate in grid space
# grid_loc(grid::MPIGrid=mpi_grid()) = grid.global_loc
me()= mpi_grid().me
neighbors(dim) = mpi_grid().neighbors[:,dim]
mpi_wall(dim,i) = mpi_grid().neighbors[i,dim]==MPI.PROC_NULL

L₂(a::MPIArray{T}) where T = MPI.Allreduce(sum(T,abs2,@inbounds(a[I]) for I ∈ inside(a)),+,mpi_grid().comm)
function L₂(p::Poisson{T,S}) where {T,S<:MPIArray{T}} # should work on the GPU
    MPI.Allreduce(sum(T,@inbounds(p.r[I]*p.r[I]) for I ∈ inside(p.r)),+,mpi_grid().comm)
end
L∞(a::MPIArray) = MPI.Allreduce(maximum(abs.(a)),Base.max,mpi_grid().comm)
L∞(p::Poisson{T,S}) where {T,S<:MPIArray{T}} = MPI.Allreduce(maximum(abs.(p.r)),Base.max,mpi_grid().comm)
function _dot(a::MPIArray{T},b::MPIArray{T}) where T
    MPI.Allreduce(sum(T,@inbounds(a[I]*b[I]) for I ∈ inside(a)),+,mpi_grid().comm)
end

function CFL(a::Flow{D,T,S};Δt_max=10) where {D,T,S<:MPIArray{T}}
    @inside a.σ[I] = WaterLily.flux_out(I,a.u)
    MPI.Allreduce(min(Δt_max,inv(maximum(a.σ)+5a.ν)),Base.min,mpi_grid().comm)
end
# this actually add a global comminutation every time residual is called
function residual!(p::Poisson{T,S}) where {T,S<:MPIArray{T}}
    WaterLily.perBC!(p.x,p.perdir)
    @inside p.r[I] = ifelse(p.iD[I]==0,0,p.z[I]-WaterLily.mult(I,p.L,p.D,p.x))
    # s = sum(p.r)/length(inside(p.r))
    s = MPI.Allreduce(sum(p.r)/length(inside(p.r)),+,mpi_grid().comm)
    abs(s) <= 2eps(eltype(s)) && return
    @inside p.r[I] = p.r[I]-s
end

function sim_step!(sim::Simulation{D,T,S},t_end;remeasure=true,
                   max_steps=typemax(Int),verbose=false) where {D,T,S<:MPIArray{T}}
    steps₀ = length(sim.flow.Δt)
    while sim_time(sim) < t_end && length(sim.flow.Δt) - steps₀ < max_steps
        sim_step!(sim; remeasure)
        (verbose && me()==0) && println("tU/L=",round(sim_time(sim),digits=4),
                                        ", Δt=",round(sim.flow.Δt[end],digits=3))
    end
end

# hepler function for vtk writer
function get_extents(a::MPIArray)
    xs = Tuple(ifelse(x==0,1,x+1):ifelse(x==0,n+4,n+x+4) for (n,x) in zip(size(inside(a)),grid_loc()))
    MPI.Allgather(xs, mpi_grid().comm)
end

end # module