Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down Expand Up @@ -62,6 +63,7 @@ Polyester = "0.7.10"
ReadVTK = "0.2"
RecipesBase = "1"
Reexport = "1"
SIMD = "3.7.2"
SciMLBase = "2"
StaticArrays = "1"
Statistics = "1"
Expand Down
108 changes: 108 additions & 0 deletions dualsphysics_2d_benchmark_2m_particles.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
2D benchmark 2M particles

DualSPHysics:
1.55ms

DualSPHysics no density diffusion:
1.1ms

main FP32, no density diffusion:
3.1ms

now with compact vov
main mixed precision:
5ms (3.2x)

main FP32:
4ms (2.6x)

with dd, sorted by cell coords:
4.8ms

sorted by cell index:
4.9ms

blocks of three cells:
4.9ms

optimizations (inbounds, zero distance skip):
4.7ms

simplified kernel gradient:
4.3ms

no density diffusion:
3.1ms

unrolled continuity equation:
3.1ms

unrolled viscosity:
2.9ms

pull *_a quantities before the neighbor loop:
2.7ms

split dv and drho:
2.55ms (2.3x)

add to arrays after loop:
2.17ms (1.97x)

optimized day 1:
1.3ms

optimized day 1 mixed precision:
3.7ms

full grid NHS:
1.5ms (2.1x)


3D DualSPHysics 638400 fluid particles:
4.9ms
+ 200µs KerUpdatePosCell

main 3D TP, no DD:
12ms

3D TP stripped (above with pull quantities...) and compact vov:
8.3ms (3.1x, 1.7x?)

add to arrays after loop:
7.5ms (2.8x, 1.5x?)

split dv and drho:
7.4ms

fixed @inbounds:
6.8ms

fastmath if distance:
4.7ms

first vloada:
4.6ms

second vloada:
4.3ms

mixed precision:
21.8ms???

kernel grad fastmath:
4ms

targeted fastdiv with sorted NHS:
4ms
fluid-boundary: 0.4ms. fluid-*: 4.4ms

full grid NHS:
4.9ms (2.4x)
total 5.3ms

full grid with foreach_neighbor inbounds:
4.8ms

full grid with foreach_point_neighbor:
7.7ms
75 changes: 75 additions & 0 deletions examples/fluid/dam_break_2d_dualsphysics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Modify the 01_DamBreak example of DualSPHysics like this:
# <parameter key="StepAlgorithm" value="2" comment="Step Algorithm 1:Verlet, 2:Symplectic (default=1)" />
# <parameter key="DensityDT" value="1" comment="Density Diffusion Term 0:None, 1:Molteni, 2:Fourtakas, 3:Fourtakas(full) (default=0)" />
# <parameter key="TimeMax" value="1.0" comment="Time of simulation" units_comment="seconds" />
#
# When comparing with high resolution, change the resolution here:
# <definition dp="0.002" units_comment="metres (m)">
# With this resolution, use:
# <parameter key="DtFixed" value="1e-5" comment="Fixed Dt value. Use 0 to disable (default=disabled)" units_comment="seconds" />

using TrixiParticles, TrixiParticles.PointNeighbors

fluid_particle_spacing = 0.002

# Load setup from dam break example
trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
fluid_particle_spacing=fluid_particle_spacing,
smoothing_length=1.414216 * fluid_particle_spacing,
tank_size=(4.0, 3.0), W=1.0, H=2.0,
spacing_ratio=1, boundary_layers=1,
sol=nothing, ode=nothing)

tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density,
n_layers=boundary_layers, spacing_ratio=spacing_ratio,
coordinates_eltype=Float64)

tank.fluid.coordinates .+= 0.005
tank.boundary.coordinates .+= 0.005

# Define a GPU-compatible neighborhood search
min_corner = minimum(tank.boundary.coordinates, dims=2)
max_corner = maximum(tank.boundary.coordinates, dims=2)
cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32})
neighborhood_search = GridNeighborhoodSearch{2}(; cell_list,
update_strategy=ParallelUpdate())

search_radius = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi).search_radius
nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0)
cell_coords(x) = PointNeighbors.cell_coords(x, nhs)
cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x))
coords = reinterpret(reshape, SVector{ndims(fluid_system), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates)
sort!(coords, by=cell_index)

# function cells(coordinates, system, semi)
# nhs = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi)
# coords = reinterpret(reshape, SVector{ndims(system), eltype(coordinates)}, coordinates)
# return TrixiParticles.PointNeighbors.cell_coords.(coords, Ref(nhs))
# end

# For benchmarking, run:
# trixi_include_changeprecision(Float32, "../TrixiParticles.jl/examples/fluid/dam_break_2d_dualsphysics.jl", parallelization_backend=CUDABackend(), tspan=(0.0f0, 1.0f-10), fluid_particle_spacing=0.001, coordinates_eltype=Float32);
# dv_ode, du_ode = copy(sol.u[end]).x; v_ode, u_ode = copy(sol.u[end]).x; semi = ode.p; system = semi.systems[1]; dv = TrixiParticles.wrap_v(dv_ode, system, semi); v = TrixiParticles.wrap_v(v_ode, system, semi); u = TrixiParticles.wrap_u(u_ode, system, semi);
# @benchmark TrixiParticles.interact_reassembled!($dv, $v, $u, $v, $u, $system, $system, $semi)

# Run the dam break simulation with this neighborhood search
trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
tank=tank,
smoothing_length=1.414216 * fluid_particle_spacing,
time_integration_scheme=SymplecticPositionVerlet(),
boundary_density_calculator=ContinuityDensity(),
fluid_particle_spacing=fluid_particle_spacing,
tank_size=(4.0, 3.0), W=1.0, H=2.0,
spacing_ratio=1, boundary_layers=1,
tspan=(0.0, 1.0), cfl=0.2,
neighborhood_search=neighborhood_search,
viscosity_wall=viscosity_fluid,
# This is the same saving frequency as in DualSPHysics for easier comparison
saving_callback=SolutionSavingCallback(dt=0.01),
# extra_callback=SortingCallback(interval=1),
density_diffusion=nothing, # TODO only for benchmark
# For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving:
dt=1e-5, stepsize_callback=nothing, #saving_callback=nothing,
parallelization_backend=PolyesterBackend())
8 changes: 6 additions & 2 deletions examples/fluid/dam_break_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,14 @@ boundary_system = WallBoundarySystem(tank.boundary, boundary_model)
# ==========================================================================================
# ==== Simulation
semi = Semidiscretization(fluid_system, boundary_system,
neighborhood_search=GridNeighborhoodSearch{3}(),
parallelization_backend=PolyesterBackend())
ode = semidiscretize(semi, tspan)

info_callback = InfoCallback(interval=100)
saving_callback = SolutionSavingCallback(dt=0.1, prefix="")
callbacks = CallbackSet(info_callback, saving_callback)
extra_callback = nothing
callbacks = CallbackSet(info_callback, saving_callback, extra_callback)

# Use a Runge-Kutta method with automatic (error based) time step size control.
# Limiting of the maximum stepsize is necessary to prevent crashing.
Expand All @@ -80,8 +82,10 @@ callbacks = CallbackSet(info_callback, saving_callback)
# Sometimes, the method fails to do so because forces become extremely large when
# fluid particles are very close to boundary particles, and the time integration method
# interprets this as an instability.
sol = solve(ode, RDPK3SpFSAL35(),
time_integration_scheme = RDPK3SpFSAL35()
sol = solve(ode, time_integration_scheme,
abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration)
reltol=1e-4, # Default reltol is 1e-3 (may need to be tuned to prevent boundary penetration)
dtmax=1e-2, # Limit stepsize to prevent crashing
dt=1.0,
save_everystep=false, callback=callbacks);
69 changes: 69 additions & 0 deletions examples/fluid/dam_break_3d_dualsphysics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Modify the 01_DamBreak example of DualSPHysics like this:
# <parameter key="StepAlgorithm" value="2" comment="Step Algorithm 1:Verlet, 2:Symplectic (default=1)" />
# <parameter key="DensityDT" value="1" comment="Density Diffusion Term 0:None, 1:Molteni, 2:Fourtakas, 3:Fourtakas(full) (default=0)" />
# <parameter key="TimeMax" value="1.0" comment="Time of simulation" units_comment="seconds" />
#
# When comparing with high resolution, change the resolution here:
# <definition dp="0.002" units_comment="metres (m)">
# With this resolution, use:
# <parameter key="DtFixed" value="1e-5" comment="Fixed Dt value. Use 0 to disable (default=disabled)" units_comment="seconds" />

using TrixiParticles, TrixiParticles.PointNeighbors, OrdinaryDiffEq

fluid_particle_spacing = 0.0085

smoothing_length = 1.7320508 * fluid_particle_spacing
tank_size = (1.6, 0.665, 0.4)
initial_fluid_size = (0.4, 0.665, 0.3)
acceleration = (0.0, 0.0, -9.81)
spacing_ratio = 1
boundary_layers = 1
fluid_density = 1000.0
sound_speed = 20 * sqrt(9.81 * tank_size[2])
state_equation = StateEquationCole(; sound_speed, reference_density=fluid_density,
exponent=7)

tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density,
n_layers=boundary_layers, spacing_ratio=spacing_ratio,
coordinates_eltype=Float64)

tank.fluid.coordinates .+= 0.005
tank.boundary.coordinates .+= 0.005

# Define a GPU-compatible neighborhood search
min_corner = minimum(tank.boundary.coordinates, dims=2)
max_corner = maximum(tank.boundary.coordinates, dims=2)
cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32})
neighborhood_search = GridNeighborhoodSearch{3}(; cell_list,
update_strategy=ParallelUpdate())

search_radius = TrixiParticles.compact_support(WendlandC2Kernel{3}(), smoothing_length)
nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0)
cell_coords(x) = PointNeighbors.cell_coords(x, nhs)
cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x))
coords = reinterpret(reshape, SVector{ndims(nhs), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates)
sort!(coords, by=cell_index)

# Run the dam break simulation with this neighborhood search
trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_3d.jl"),
tank=tank,
smoothing_length=1.7320508 * fluid_particle_spacing,
time_integration_scheme=SymplecticPositionVerlet(),
boundary_density_calculator=ContinuityDensity(),
state_equation=state_equation,
fluid_particle_spacing=fluid_particle_spacing,
tank_size=tank_size, initial_fluid_size=initial_fluid_size,
acceleration=acceleration,
alpha=0.1,
spacing_ratio=spacing_ratio, boundary_layers=boundary_layers,
tspan=(0.0, 1.0), #cfl=0.2,
neighborhood_search=neighborhood_search,
# viscosity_wall=viscosity_fluid, TODO
# This is the same saving frequency as in DualSPHysics for easier comparison
# saving_callback=SolutionSavingCallback(dt=0.01),
# extra_callback=SortingCallback(interval=1),
density_diffusion=nothing, # TODO only for benchmark
# For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving:
dt=8e-5, #stepsize_callback=nothing, saving_callback=nothing,
parallelization_backend=PolyesterBackend())
3 changes: 2 additions & 1 deletion src/TrixiParticles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ using Random: seed!
using SciMLBase: SciMLBase, CallbackSet, DiscreteCallback, DynamicalODEProblem, u_modified!,
get_tmp_cache, set_proposed_dt!, ODESolution, ODEProblem, terminate!,
add_tstop!
using SIMD: vloada, Vec
@reexport using StaticArrays: SVector
using StaticArrays: @SMatrix, SMatrix, setindex
using Statistics: Statistics
Expand Down Expand Up @@ -73,7 +74,7 @@ export WeaklyCompressibleSPHSystem, EntropicallyDampedSPHSystem, TotalLagrangian
export BoundaryZone, InFlow, OutFlow, BidirectionalFlow
export InfoCallback, SolutionSavingCallback, DensityReinitializationCallback,
PostprocessCallback, StepsizeCallback, UpdateCallback, SteadyStateReachedCallback,
SplitIntegrationCallback
SplitIntegrationCallback, SortingCallback
export ContinuityDensity, SummationDensity
export PenaltyForceGanzenmueller, TransportVelocityAdami, ParticleShiftingTechnique,
ParticleShiftingTechniqueSun2017, ConsistentShiftingSun2019,
Expand Down
1 change: 1 addition & 0 deletions src/callbacks/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,4 @@ include("stepsize.jl")
include("update.jl")
include("steady_state_reached.jl")
include("split_integration.jl")
include("sorting.jl")
Loading