|
19 | 19 |
|
20 | 20 | # Everything here is done in the initial coordinates |
21 | 21 | system_coords = initial_coordinates(system) |
| 22 | + neighborhood_search = get_neighborhood_search(system, semi) |
| 23 | + backend = semi.parallelization_backend |
22 | 24 |
|
23 | 25 | # For `distance == 0`, the analytical gradient is zero, but the unsafe gradient |
24 | 26 | # and the density diffusion divide by zero. |
|
29 | 31 | h = initial_smoothing_length(system) |
30 | 32 | almostzero = sqrt(eps(h^2)) |
31 | 33 |
|
32 | | - # Loop over all pairs of particles and neighbors within the kernel cutoff. |
33 | | - # For structure-structure interaction, this has to happen in the initial coordinates. |
34 | | - foreach_point_neighbor(system, system, system_coords, system_coords, semi; |
35 | | - points=each_integrated_particle(system)) do particle, neighbor, |
36 | | - initial_pos_diff, |
37 | | - initial_distance |
38 | | - # Skip neighbors with the same position because the kernel gradient is zero. |
39 | | - # Note that `return` only exits the closure, i.e., skips the current neighbor. |
40 | | - skip_zero_distance(system) && initial_distance < almostzero && return |
41 | | - |
42 | | - # Now that we know that `distance` is not zero, we can safely call the unsafe |
43 | | - # version of the kernel gradient to avoid redundant zero checks. |
44 | | - grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff, |
45 | | - initial_distance, particle) |
46 | | - |
47 | | - rho_a = @inbounds system.material_density[particle] |
48 | | - rho_b = @inbounds system.material_density[neighbor] |
49 | | - |
| 34 | + @threaded semi for particle in each_integrated_particle(system) |
| 35 | + # We are looping over the particles of `system`, so it is guaranteed |
| 36 | + # that `particle` is in bounds of `system`. |
50 | 37 | m_a = @inbounds system.mass[particle] |
51 | | - m_b = @inbounds system.mass[neighbor] |
52 | | - |
| 38 | + rho_a = @inbounds system.material_density[particle] |
53 | 39 | # PK1 / rho^2 |
54 | 40 | pk1_rho2_a = @inbounds pk1_rho2(system, particle) |
55 | | - pk1_rho2_b = @inbounds pk1_rho2(system, neighbor) |
56 | | - |
57 | | - current_pos_diff_ = @inbounds current_coords(system, particle) - |
58 | | - current_coords(system, neighbor) |
59 | | - # On GPUs, convert `Float64` coordinates to `Float32` after computing the difference |
60 | | - current_pos_diff = convert.(eltype(system), current_pos_diff_) |
61 | | - current_distance = norm(current_pos_diff) |
62 | | - |
63 | | - dv_stress = m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel |
64 | | - |
65 | | - dv_penalty_force_ = @inbounds dv_penalty_force(penalty_force, particle, neighbor, |
66 | | - initial_pos_diff, initial_distance, |
67 | | - current_pos_diff, current_distance, |
68 | | - system, m_a, m_b, rho_a, rho_b) |
69 | | - |
70 | | - dv_particle = Ref(dv_stress + dv_penalty_force_) |
71 | | - @inbounds dv_viscosity_tlsph!(dv_particle, system, v_system, particle, neighbor, |
72 | | - current_pos_diff, current_distance, |
73 | | - m_a, m_b, rho_a, rho_b, grad_kernel) |
| 41 | + current_coords_a = @inbounds current_coords(system, particle) |
| 42 | + F_a = @inbounds deformation_gradient(system, particle) |
| 43 | + |
| 44 | + # Accumulate the RHS contributions over all neighbors before writing to `dv` |
| 45 | + # to reduce the number of memory writes. |
| 46 | + # Note that we need a `Ref` in order to be able to update these variables |
| 47 | + # inside the closure in the `foreach_neighbor` loop. |
| 48 | + dv_particle = Ref(zero(current_coords_a)) |
| 49 | + |
| 50 | + # Loop over all neighbors within the kernel cutoff |
| 51 | + @inbounds foreach_neighbor(system_coords, system_coords, |
| 52 | + neighborhood_search, backend, |
| 53 | + particle) do particle, neighbor, |
| 54 | + initial_pos_diff, initial_distance |
| 55 | + # Skip neighbors with the same position because the kernel gradient is zero. |
| 56 | + # Note that `return` only exits the closure, i.e., skips the current neighbor. |
| 57 | + skip_zero_distance(system) && initial_distance < almostzero && return |
| 58 | + |
| 59 | + # Now that we know that `distance` is not zero, we can safely call the unsafe |
| 60 | + # version of the kernel gradient to avoid redundant zero checks. |
| 61 | + grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff, |
| 62 | + initial_distance, particle) |
| 63 | + |
| 64 | + rho_b = @inbounds system.material_density[neighbor] |
| 65 | + m_b = @inbounds system.mass[neighbor] |
| 66 | + # PK1 / rho^2 |
| 67 | + pk1_rho2_b = @inbounds pk1_rho2(system, neighbor) |
| 68 | + current_coords_b = @inbounds current_coords(system, neighbor) |
| 69 | + |
| 70 | + # The compiler is smart enough to optimize this away if no penalty force is used |
| 71 | + F_b = @inbounds deformation_gradient(system, neighbor) |
| 72 | + |
| 73 | + current_pos_diff_ = current_coords_a - current_coords_b |
| 74 | + # On GPUs, convert `Float64` coordinates to `Float32` after computing the difference |
| 75 | + current_pos_diff = convert.(eltype(system), current_pos_diff_) |
| 76 | + current_distance = norm(current_pos_diff) |
| 77 | + |
| 78 | + dv_particle[] += m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel |
| 79 | + |
| 80 | + @inbounds dv_penalty_force!(dv_particle, penalty_force, particle, neighbor, |
| 81 | + initial_pos_diff, initial_distance, |
| 82 | + current_pos_diff, current_distance, |
| 83 | + system, m_a, m_b, rho_a, rho_b, F_a, F_b) |
| 84 | + |
| 85 | + @inbounds dv_viscosity_tlsph!(dv_particle, system, v_system, particle, neighbor, |
| 86 | + current_pos_diff, current_distance, |
| 87 | + m_a, m_b, rho_a, rho_b, F_a, grad_kernel) |
| 88 | + end |
74 | 89 |
|
75 | 90 | for i in 1:ndims(system) |
76 | 91 | @inbounds dv[i, particle] += dv_particle[][i] |
|
0 commit comments