using CUDA
const α = 1f-4 # Diffusivity
const L = 1f-1 # Length
const W = 1f-1 # Width
const Nx = 66 # No.of steps in x-axis
const Ny = 66 # No.of steps in y-axis
const Δx = Float32(L/(Nx-1)) # x-grid spacing
const Δy = Float32(W/(Ny-1)) # y-grid spacing
const Δt = Float32(Δx^2 * Δy^2 / (2f0 * α * (Δx^2 + Δy^2))) # Largest stable time step
function diffuse!(du, u, p,t)
dijij = view(du, 2:Nx-1, 2:Ny-1)
dij = view(u, 2:Nx-1, 2:Ny-1)
di1j = view(u, 1:Nx-2, 2:Ny-1)
dij1 = view(u, 2:Nx-1, 1:Ny-2)
di2j = view(u, 3:Nx , 2:Ny-1)
dij2 = view(u, 2:Nx-1, 3:Ny ) # Stencil Computations
@. dijij = α * (
(di1j - 2 * dij + di2j)/Δx^2 +
(dij1 - 2 * dij + dij2)/Δy^2) # Apply diffusion
@views @. du[1, :] += α * (2*u[2, :] - 2*u[1, :])/Δx^2
@views @. du[Nx, :] += α * (2*u[Nx-1, :] - 2*u[Ny, :])/Δx^2
@views @. du[:, 1] += α * (2*u[:, 2]-2*u[:, 1])/Δy^2
@views @. du[:, Ny] += α * (2*u[:, Nx-1]-2*u[:, Ny])/Δy^2 # update boundary condition (Neumann BCs)
end
u = zeros(Nx,Ny)
du = similar(u)
u[25:35, 25:35] .= 50f0;
@time for i in 1:1000 # Apply the diffuse 1000 time to let the heat spread a long the rod
diffuse!(du, u,0,0)
@. u = u + Δt * du
end;
u_GPU= CUDA.zeros(Nx,Ny)
du_GPU = similar(u_GPU)
u_GPU[25:35, 25:35] .= 50f0;
@time for i in 1:1000 # Apply the diffuse 1000 time to let the heat spread a long the rod
diffuse!(du_GPU, u_GPU,0,0)
u_GPU = u_GPU + Δt * du_GPU
end;
f(du,u) = du .+= 2 .* u
@time for i in 1:10000; f(du,u) end # 0.021198 seconds
@time for i in 1:10000; f(du_GPU,u_GPU) end # 0.163096 seconds
using OrdinaryDiffEq
tspan = (0f0, 1f-2)
prob = ODEProblem(diffuse!, u_GPU, tspan)
sol = solve(prob, Euler(), dt=Δt) # Success
sol = solve(prob, Rodas5(), dt=Δt) # Success
sol = solve(prob, TRBDF2(), dt=Δt) # Success
sol = solve(prob, ROCK2(), dt=Δt) # Success
sol2 = solve(prob, QNDF())
sol3 = solve(prob, FBDF())
@YingboMa these two seem to be mixing in some CPU arrays?