@@ -35,7 +35,7 @@ function interact!(dv, v_particle_system, u_particle_system,
3535 # which gives a significant speedup on GPUs.
3636 (v_a,
3737 rho_a) = @inbounds velocity_and_density (v_particle_system, particle_system,
38- backend, particle)
38+ particle)
3939
4040 # Accumulate the RHS contributions over all neighbors before writing to `dv`,
4141 # to reduce the number of memory writes.
@@ -61,7 +61,7 @@ function interact!(dv, v_particle_system, u_particle_system,
6161 m_b = @inbounds hydrodynamic_mass (neighbor_system, neighbor)
6262 (v_b,
6363 rho_b) = @inbounds velocity_and_density (v_neighbor_system, neighbor_system,
64- backend, neighbor)
64+ neighbor)
6565
6666 # The following call is equivalent to
6767 # `p_b = current_pressure(v_neighbor_system, neighbor_system, neighbor)`
@@ -141,35 +141,30 @@ end
141141end
142142
143143@propagate_inbounds function velocity_and_density (v, system:: WeaklyCompressibleSPHSystem ,
144- backend, particle)
144+ particle)
145145 (; density_calculator) = system
146146
147- return velocity_and_density (v, density_calculator, system, backend, particle)
147+ return velocity_and_density (v, density_calculator, system, particle)
148148end
149149
150- @propagate_inbounds function velocity_and_density (v, system, backend, particle)
150+ @propagate_inbounds function velocity_and_density (v, system, particle)
151151 # Call the default method below
152- return velocity_and_density (v, nothing , system, backend, particle)
152+ return velocity_and_density (v, nothing , system, particle)
153153end
154154
155155# Default method, which simply calls `current_velocity` and `current_density` separately.
156- @propagate_inbounds function velocity_and_density (v, _, system, backend, particle)
156+ @propagate_inbounds function velocity_and_density (v, _, system, particle)
157157 v_particle = current_velocity (v, system, particle)
158158 rho_particle = current_density (v, system, particle)
159159
160160 return v_particle, rho_particle
161161end
162162
163- # Optimized version for WCSPH with `ContinuityDensity` in 3D on GPUs ,
163+ # Optimized version for WCSPH with `ContinuityDensity` in 3D,
164164# which combines the velocity and density load into one wide load.
165- # This is slightly slower on CPUs, so we only use it with GPU backends.
166- # Note that we cannot dispatch by `AbstractGPUArray` because this is called from within
167- # a kernel, where the arrays are device arrays (like `CuDeviceArray`),
168- # which are not `AbstractGPUArray`s.
165+ # This is significantly faster on GPUs.
169166@inline function velocity_and_density (v, :: ContinuityDensity ,
170- :: WeaklyCompressibleSPHSystem{3} ,
171- :: KernelAbstractions.GPU ,
172- particle)
167+ :: WeaklyCompressibleSPHSystem{3} , particle)
173168 # Since `v` is stored as a 4 x N matrix, this aligned load extracts one column
174169 # of `v` corresponding to `particle`.
175170 # As opposed to `extract_svector`, this will translate to a single wide load instruction
0 commit comments