Skip to content

Commit

Permalink
Shock capturing with nonconservative_terms::True (#46)
Browse files Browse the repository at this point in the history
* Start

* Remove 3D condition

* Complete 2D

* Complete 3D

* Add tests
  • Loading branch information
huiyuxie authored Sep 19, 2024
1 parent cb499a0 commit d56bcbb
Show file tree
Hide file tree
Showing 6 changed files with 999 additions and 194 deletions.
72 changes: 37 additions & 35 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,41 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
# length(element_ids_dg) == size(du, 3)
# length(element_ids_dgfv) == size(du, 3)

element_dg = element_ids_dg[k] # check if `element_dg` is zero
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

@inbounds begin
if element_dg != 0 # bad
for ii in axes(du, 2)
du[i, j, element_dg] += derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dg]
end
end

if element_dgfv != 0 # bad
for ii in axes(du, 2)
du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dgfv]
end
end
end
end

return nothing
end

# Kernel for calculating pure DG and DG-FV volume fluxes
function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
element_ids_dgfv, derivative_split,
Expand Down Expand Up @@ -235,41 +270,6 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, f
return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
# length(element_ids_dg) == size(du, 3)
# length(element_ids_dgfv) == size(du, 3)

element_dg = element_ids_dg[k] # check if `element_dg` is zero
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

@inbounds begin
if element_dg != 0 # bad
for ii in axes(du, 2)
du[i, j, element_dg] += derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dg]
end
end

if element_dgfv != 0 # bad
for ii in axes(du, 2)
du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dgfv]
end
end
end
end

return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, noncons_flux_arr,
Expand Down Expand Up @@ -805,6 +805,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)

derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split`

volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
element_ids_dgfv,
alpha,
Expand Down
234 changes: 234 additions & 0 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,148 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha,
return nothing
end

# Kernel for calculating pure DG and DG-FV volume fluxes
function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
u, element_ids_dgfv, derivative_split,
equations::AbstractEquations{2},
volume_flux_dg::Any, nonconservative_flux_dg::Any,
volume_flux_fv::Any, nonconservative_flux_fv::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u, 2)^3 && k <= size(u, 4))
# length(element_ids_dgfv) == size(u, 4)
u2 = size(u, 2)

j1 = div(j - 1, u2^2) + 1
j2 = div(rem(j - 1, u2^2), u2) + 1
j3 = rem(rem(j - 1, u2^2), u2) + 1

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero

# The sets of `get_node_vars` operations may be combined
# into a single set of operation for better performance (to be explored).

u_node = get_node_vars(u, equations, j1, j2, k)
u_node1 = get_node_vars(u, equations, j3, j2, k)
u_node2 = get_node_vars(u, equations, j1, j3, k)

volume_flux_node1 = volume_flux_dg(u_node, u_node1, 1, equations)
volume_flux_node2 = volume_flux_dg(u_node, u_node2, 2, equations)

noncons_flux_node1 = nonconservative_flux_dg(u_node, u_node1, 1, equations)
noncons_flux_node2 = nonconservative_flux_dg(u_node, u_node2, 2, equations)

@inbounds begin
for ii in axes(u, 1)
volume_flux_arr1[ii, j1, j3, j2, k] = derivative_split[j1, j3] *
volume_flux_node1[ii]
volume_flux_arr2[ii, j1, j2, j3, k] = derivative_split[j2, j3] *
volume_flux_node2[ii]
noncons_flux_arr1[ii, j1, j3, j2, k] = noncons_flux_node1[ii]
noncons_flux_arr2[ii, j1, j2, j3, k] = noncons_flux_node2[ii]
end
end

if j1 != 1 && j3 == 1 && element_dgfv != 0 # bad
u_ll = get_node_vars(u, equations, j1 - 1, j2, element_dgfv)
u_rr = get_node_vars(u, equations, j1, j2, element_dgfv)

f1_node = volume_flux_fv(u_ll, u_rr, 1, equations)

f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations)
f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations)

@inbounds begin
for ii in axes(u, 1)
fstar1_L[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii]
fstar1_R[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii]
end
end
end

if j2 != 1 && j3 == 1 && element_dgfv != 0 # bad
u_ll = get_node_vars(u, equations, j1, j2 - 1, element_dgfv)
u_rr = get_node_vars(u, equations, j1, j2, element_dgfv)

f2_node = volume_flux_fv(u_ll, u_rr, 2, equations)

f2_L_node = nonconservative_flux_fv(u_ll, u_rr, 2, equations)
f2_R_node = nonconservative_flux_fv(u_rr, u_ll, 2, equations)

@inbounds begin
for ii in axes(u, 1)
fstar2_L[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_L_node[ii]
fstar2_R[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_R_node[ii]
end
end
end
end

return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr1, volume_flux_arr2,
noncons_flux_arr1, noncons_flux_arr2,
equations::AbstractEquations{2})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2)^2 && k <= size(du, 4))
# length(element_ids_dg) == size(du, 4)
# length(element_ids_dgfv) == size(du, 4)

j1 = div(j - 1, size(du, 2)) + 1
j2 = rem(j - 1, size(du, 2)) + 1

element_dg = element_ids_dg[k] # check if `element_dg` is zero
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

@inbounds begin
if element_dg != 0 # bad
integral_contribution = 0.0

for ii in axes(du, 2)
du[i, j1, j2, element_dg] += volume_flux_arr1[i, j1, ii, j2, element_dg]
du[i, j1, j2, element_dg] += volume_flux_arr2[i, j1, j2, ii, element_dg]

integral_contribution += derivative_split[j1, ii] *
noncons_flux_arr1[i, j1, ii, j2, element_dg]
integral_contribution += derivative_split[j2, ii] *
noncons_flux_arr2[i, j1, j2, ii, element_dg]
end

du[i, j1, j2, element_dg] += 0.5 * integral_contribution
end

if element_dgfv != 0 # bad
integral_contribution = 0.0

for ii in axes(du, 2)
du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
volume_flux_arr1[i, j1, ii, j2, element_dgfv]
du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
volume_flux_arr2[i, j1, j2, ii, element_dgfv]

integral_contribution += derivative_split[j1, ii] *
noncons_flux_arr1[i, j1, ii, j2, element_dgfv]
integral_contribution += derivative_split[j2, ii] *
noncons_flux_arr2[i, j1, j2, ii, element_dgfv]
end

du[i, j1, j2, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution
end
end
end

return nothing
end

# Kernel for calculating FV volume integral contribution
function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
inverse_weights, element_ids_dgfv, alpha)
Expand Down Expand Up @@ -1050,6 +1192,98 @@ end
# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
volume_flux_dg, nonconservative_flux_dg = dg.volume_integral.volume_flux_dg
volume_flux_fv, nonconservative_flux_fv = dg.volume_integral.volume_flux_fv
indicator = dg.volume_integral.indicator

# TODO: Get copies of `u` and `du` on both device and host
alpha = indicator(Array(u), mesh, equations, dg, cache)
alpha = CuArray{Float64}(alpha)

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
alpha,
atol)
pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol;
configurator_1d(pure_blended_element_count_kernel, alpha)...)

derivative_split = dg.basis.derivative_split
set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`

derivative_split = CuArray{Float64}(derivative_split)
volume_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
size(u, 4))
volume_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
size(u, 4))
noncons_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
size(u, 4))
noncons_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
size(u, 4))

inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))

size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4))

volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr1,
volume_flux_arr2,
noncons_flux_arr1,
noncons_flux_arr2,
fstar1_L, fstar1_R,
fstar2_L, fstar2_R,
u, element_ids_dgfv,
derivative_split,
equations,
volume_flux_dg,
nonconservative_flux_dg,
volume_flux_fv,
nonconservative_flux_fv)
volume_flux_dgfv_kernel(volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u,
element_ids_dgfv, derivative_split, equations, volume_flux_dg,
nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)

derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split`

size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^2, size(du, 4))

volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
element_ids_dgfv,
alpha,
derivative_split,
volume_flux_arr1,
volume_flux_arr2,
noncons_flux_arr1,
noncons_flux_arr2,
equations)
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
noncons_flux_arr2, equations;
configurator_3d(volume_integral_dg_kernel, size_arr)...)

size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 4))

volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L,
fstar1_R,
fstar2_L, fstar2_R,
inverse_weights,
element_ids_dgfv,
alpha)
volume_integral_fv_kernel(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R, inverse_weights,
element_ids_dgfv, alpha;
configurator_2d(volume_integral_fv_kernel, size_arr)...)

return nothing
end

Expand Down
Loading

0 comments on commit d56bcbb

Please sign in to comment.