Skip to content

GPU Compatibility of QNDF and FBDF #1637

@ChrisRackauckas

Description

@ChrisRackauckas
using CUDA

const α  = 1f-4                                                                             # Diffusivity
const L  = 1f-1                                                                             # Length
const W  = 1f-1                                                                             # Width
const Nx = 66                                                                               # No.of steps in x-axis
const Ny = 66                                                                               # No.of steps in y-axis
const Δx = Float32(L/(Nx-1))                                                                # x-grid spacing
const Δy = Float32(W/(Ny-1))                                                                # y-grid spacing
const Δt = Float32(Δx^2 * Δy^2 / (2f0 * α * (Δx^2 + Δy^2)))                                 # Largest stable time step

function diffuse!(du, u, p,t)
    dijij = view(du, 2:Nx-1, 2:Ny-1)
    dij  = view(u, 2:Nx-1, 2:Ny-1)
    di1j = view(u, 1:Nx-2, 2:Ny-1)
    dij1 = view(u, 2:Nx-1, 1:Ny-2)
    di2j = view(u, 3:Nx  , 2:Ny-1)
    dij2 = view(u, 2:Nx-1, 3:Ny  )                                                  # Stencil Computations

    @. dijij = α  * (
        (di1j - 2 * dij + di2j)/Δx^2 +
        (dij1 - 2 * dij + dij2)/Δy^2)                                               # Apply diffusion

    @views @. du[1, :] += α  * (2*u[2, :] - 2*u[1, :])/Δx^2
    @views @. du[Nx, :] += α  * (2*u[Nx-1, :] - 2*u[Ny, :])/Δx^2
    @views @. du[:, 1] += α   * (2*u[:, 2]-2*u[:, 1])/Δy^2
    @views @. du[:, Ny] += α * (2*u[:, Nx-1]-2*u[:, Ny])/Δy^2                  # update boundary condition (Neumann BCs)

end

u = zeros(Nx,Ny)
du = similar(u)     
u[25:35, 25:35] .= 50f0;

@time for i in 1:1000                                                                     # Apply the diffuse 1000 time to let the heat spread a long the rod      

    diffuse!(du, u,0,0)

    @. u = u + Δt * du

end;

u_GPU= CUDA.zeros(Nx,Ny)
du_GPU = similar(u_GPU)     
u_GPU[25:35, 25:35] .= 50f0;

@time for i in 1:1000                                                                     # Apply the diffuse 1000 time to let the heat spread a long the rod      

    diffuse!(du_GPU, u_GPU,0,0)

    u_GPU = u_GPU + Δt * du_GPU

end;

f(du,u) = du .+= 2 .* u
@time for i in 1:10000; f(du,u) end # 0.021198 seconds
@time for i in 1:10000; f(du_GPU,u_GPU) end # 0.163096 seconds

using OrdinaryDiffEq

tspan = (0f0, 1f-2)
prob = ODEProblem(diffuse!, u_GPU, tspan)
sol = solve(prob, Euler(), dt=Δt) # Success
sol = solve(prob, Rodas5(), dt=Δt) # Success
sol = solve(prob, TRBDF2(), dt=Δt) # Success
sol = solve(prob, ROCK2(), dt=Δt) # Success
sol2 = solve(prob, QNDF()) 
sol3 = solve(prob, FBDF())

@YingboMa these two seem to be mixing in some CPU arrays?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions