GPU Compatibility of QNDF and FBDF

```julia
using CUDA

const α  = 1f-4                                                                             # Diffusivity
const L  = 1f-1                                                                             # Length
const W  = 1f-1                                                                             # Width
const Nx = 66                                                                               # No.of steps in x-axis
const Ny = 66                                                                               # No.of steps in y-axis
const Δx = Float32(L/(Nx-1))                                                                # x-grid spacing
const Δy = Float32(W/(Ny-1))                                                                # y-grid spacing
const Δt = Float32(Δx^2 * Δy^2 / (2f0 * α * (Δx^2 + Δy^2)))                                 # Largest stable time step

function diffuse!(du, u, p,t)
    dijij = view(du, 2:Nx-1, 2:Ny-1)
    dij  = view(u, 2:Nx-1, 2:Ny-1)
    di1j = view(u, 1:Nx-2, 2:Ny-1)
    dij1 = view(u, 2:Nx-1, 1:Ny-2)
    di2j = view(u, 3:Nx  , 2:Ny-1)
    dij2 = view(u, 2:Nx-1, 3:Ny  )                                                  # Stencil Computations

    @. dijij = α  * (
        (di1j - 2 * dij + di2j)/Δx^2 +
        (dij1 - 2 * dij + dij2)/Δy^2)                                               # Apply diffusion

    @views @. du[1, :] += α  * (2*u[2, :] - 2*u[1, :])/Δx^2
    @views @. du[Nx, :] += α  * (2*u[Nx-1, :] - 2*u[Ny, :])/Δx^2
    @views @. du[:, 1] += α   * (2*u[:, 2]-2*u[:, 1])/Δy^2
    @views @. du[:, Ny] += α * (2*u[:, Nx-1]-2*u[:, Ny])/Δy^2                  # update boundary condition (Neumann BCs)

end

u = zeros(Nx,Ny)
du = similar(u)     
u[25:35, 25:35] .= 50f0;

@time for i in 1:1000                                                                     # Apply the diffuse 1000 time to let the heat spread a long the rod      

    diffuse!(du, u,0,0)

    @. u = u + Δt * du

end;

u_GPU= CUDA.zeros(Nx,Ny)
du_GPU = similar(u_GPU)     
u_GPU[25:35, 25:35] .= 50f0;

@time for i in 1:1000                                                                     # Apply the diffuse 1000 time to let the heat spread a long the rod      

    diffuse!(du_GPU, u_GPU,0,0)

    u_GPU = u_GPU + Δt * du_GPU

end;

f(du,u) = du .+= 2 .* u
@time for i in 1:10000; f(du,u) end # 0.021198 seconds
@time for i in 1:10000; f(du_GPU,u_GPU) end # 0.163096 seconds

using OrdinaryDiffEq

tspan = (0f0, 1f-2)
prob = ODEProblem(diffuse!, u_GPU, tspan)
sol = solve(prob, Euler(), dt=Δt) # Success
sol = solve(prob, Rodas5(), dt=Δt) # Success
sol = solve(prob, TRBDF2(), dt=Δt) # Success
sol = solve(prob, ROCK2(), dt=Δt) # Success
sol2 = solve(prob, QNDF()) 
sol3 = solve(prob, FBDF())
```

@YingboMa these two seem to be mixing in some CPU arrays?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

GPU Compatibility of QNDF and FBDF #1637

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

GPU Compatibility of QNDF and FBDF #1637

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions