Skip to content

Commit 1719dc1

Browse files
committed
Use vloada on both CPUs and GPUs
1 parent 4251a3d commit 1719dc1

File tree

1 file changed

+10
-15
lines changed
  • src/schemes/fluid/weakly_compressible_sph

1 file changed

+10
-15
lines changed

src/schemes/fluid/weakly_compressible_sph/rhs.jl

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function interact!(dv, v_particle_system, u_particle_system,
3535
# which gives a significant speedup on GPUs.
3636
(v_a,
3737
rho_a) = @inbounds velocity_and_density(v_particle_system, particle_system,
38-
backend, particle)
38+
particle)
3939

4040
# Accumulate the RHS contributions over all neighbors before writing to `dv`,
4141
# to reduce the number of memory writes.
@@ -61,7 +61,7 @@ function interact!(dv, v_particle_system, u_particle_system,
6161
m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor)
6262
(v_b,
6363
rho_b) = @inbounds velocity_and_density(v_neighbor_system, neighbor_system,
64-
backend, neighbor)
64+
neighbor)
6565

6666
# The following call is equivalent to
6767
# `p_b = current_pressure(v_neighbor_system, neighbor_system, neighbor)`
@@ -141,35 +141,30 @@ end
141141
end
142142

143143
@propagate_inbounds function velocity_and_density(v, system::WeaklyCompressibleSPHSystem,
144-
backend, particle)
144+
particle)
145145
(; density_calculator) = system
146146

147-
return velocity_and_density(v, density_calculator, system, backend, particle)
147+
return velocity_and_density(v, density_calculator, system, particle)
148148
end
149149

150-
@propagate_inbounds function velocity_and_density(v, system, backend, particle)
150+
@propagate_inbounds function velocity_and_density(v, system, particle)
151151
# Call the default method below
152-
return velocity_and_density(v, nothing, system, backend, particle)
152+
return velocity_and_density(v, nothing, system, particle)
153153
end
154154

155155
# Default method, which simply calls `current_velocity` and `current_density` separately.
156-
@propagate_inbounds function velocity_and_density(v, _, system, backend, particle)
156+
@propagate_inbounds function velocity_and_density(v, _, system, particle)
157157
v_particle = current_velocity(v, system, particle)
158158
rho_particle = current_density(v, system, particle)
159159

160160
return v_particle, rho_particle
161161
end
162162

163-
# Optimized version for WCSPH with `ContinuityDensity` in 3D on GPUs,
163+
# Optimized version for WCSPH with `ContinuityDensity` in 3D,
164164
# which combines the velocity and density load into one wide load.
165-
# This is slightly slower on CPUs, so we only use it with GPU backends.
166-
# Note that we cannot dispatch by `AbstractGPUArray` because this is called from within
167-
# a kernel, where the arrays are device arrays (like `CuDeviceArray`),
168-
# which are not `AbstractGPUArray`s.
165+
# This is significantly faster on GPUs.
169166
@inline function velocity_and_density(v, ::ContinuityDensity,
170-
::WeaklyCompressibleSPHSystem{3},
171-
::KernelAbstractions.GPU,
172-
particle)
167+
::WeaklyCompressibleSPHSystem{3}, particle)
173168
# Since `v` is stored as a 4 x N matrix, this aligned load extracts one column
174169
# of `v` corresponding to `particle`.
175170
# As opposed to `extract_svector`, this will translate to a single wide load instruction

0 commit comments

Comments
 (0)