|
| 1 | +/* |
| 2 | +!===================================================================== |
| 3 | +! |
| 4 | +! S p e c f e m 3 D |
| 5 | +! ----------------- |
| 6 | +! |
| 7 | +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp |
| 8 | +! CNRS, France |
| 9 | +! and Princeton University, USA |
| 10 | +! (there are currently many more authors!) |
| 11 | +! (c) October 2017 |
| 12 | +! |
| 13 | +! This program is free software; you can redistribute it and/or modify |
| 14 | +! it under the terms of the GNU General Public License as published by |
| 15 | +! the Free Software Foundation; either version 3 of the License, or |
| 16 | +! (at your option) any later version. |
| 17 | +! |
| 18 | +! This program is distributed in the hope that it will be useful, |
| 19 | +! but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | +! GNU General Public License for more details. |
| 22 | +! |
| 23 | +! You should have received a copy of the GNU General Public License along |
| 24 | +! with this program; if not, write to the Free Software Foundation, Inc., |
| 25 | +! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 26 | +! |
| 27 | +!===================================================================== |
| 28 | +*/ |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +/* ----------------------------------------------------------------------------------------------- */ |
| 33 | +// prepare/assemble kernels |
| 34 | +/* ----------------------------------------------------------------------------------------------- */ |
| 35 | + |
| 36 | +__global__ void prepare_reduced_boundary_lts_accel_on_device(realw* d_accel, |
| 37 | + realw* d_send_accel_buffer, |
| 38 | + int num_interface_p_refine_boundary, |
| 39 | + int* interface_p_refine_boundary, |
| 40 | + int max_nibool_interfaces_boundary, |
| 41 | + int ilevel) { |
| 42 | + |
| 43 | + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; |
| 44 | + |
| 45 | + // go through list of nodes on p-level interface |
| 46 | + if(id < num_interface_p_refine_boundary) { |
| 47 | + int iglob = interface_p_refine_boundary[id + (ilevel-1)*max_nibool_interfaces_boundary]-1; |
| 48 | + |
| 49 | + d_send_accel_buffer[3*id] = d_accel[3*iglob]; |
| 50 | + d_send_accel_buffer[3*id+1] = d_accel[3*iglob+1]; |
| 51 | + d_send_accel_buffer[3*id+2] = d_accel[3*iglob+2]; |
| 52 | + } |
| 53 | + |
| 54 | +} |
| 55 | + |
| 56 | +/* ----------------------------------------------------------------------------------------------- */ |
| 57 | + |
| 58 | +__global__ void assemble_reduced_boundary_lts_accel_on_device(realw* d_accel, |
| 59 | + realw* d_send_accel_buffer, |
| 60 | + int num_interface_p_refine_boundary, |
| 61 | + int* interface_p_refine_boundary, |
| 62 | + int max_nibool_interfaces_boundary, |
| 63 | + int ilevel) { |
| 64 | + |
| 65 | + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; |
| 66 | + |
| 67 | + // go through list of nodes on p-level interface |
| 68 | + if(id < num_interface_p_refine_boundary) { |
| 69 | + int iglob = interface_p_refine_boundary[id + (ilevel-1)*max_nibool_interfaces_boundary]-1; |
| 70 | + |
| 71 | + atomicAdd(&d_accel[3*iglob],d_send_accel_buffer[3*id]); |
| 72 | + atomicAdd(&d_accel[3*iglob+1],d_send_accel_buffer[3*id+1]); |
| 73 | + atomicAdd(&d_accel[3*iglob+2],d_send_accel_buffer[3*id+2]); |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +/* ----------------------------------------------------------------------------------------------- */ |
| 78 | + |
| 79 | + |
| 80 | +__global__ void assemble_boundary_lts_accel_on_device(realw* d_accel, |
| 81 | + realw* d_send_accel_buffer, |
| 82 | + int num_interfaces_ext_mesh, |
| 83 | + int* num_interface_p_refine_ibool, |
| 84 | + int* interface_p_refine_ibool, |
| 85 | + int max_nibool_interfaces_ext_mesh, |
| 86 | + int ilevel) { |
| 87 | + |
| 88 | + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; |
| 89 | + |
| 90 | + for( int iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) { |
| 91 | + int num_interface_iglob = num_interface_p_refine_ibool[iinterface + num_interfaces_ext_mesh*(ilevel-1)]; |
| 92 | + if(id < num_interface_iglob) { |
| 93 | + int iglob = interface_p_refine_ibool[id + max_nibool_interfaces_ext_mesh*(iinterface + num_interfaces_ext_mesh*(ilevel-1))]-1; |
| 94 | + int iglob_send = id + iinterface*max_nibool_interfaces_ext_mesh; |
| 95 | + |
| 96 | + atomicAdd(&d_accel[3*iglob],d_send_accel_buffer[3*iglob_send]); |
| 97 | + atomicAdd(&d_accel[3*iglob+1],d_send_accel_buffer[3*iglob_send+1]); |
| 98 | + atomicAdd(&d_accel[3*iglob+2],d_send_accel_buffer[3*iglob_send+2]); |
| 99 | + } |
| 100 | + } |
| 101 | +} |
| 102 | + |
| 103 | +/* ----------------------------------------------------------------------------------------------- */ |
| 104 | + |
| 105 | + |
| 106 | +__global__ void prepare_boundary_lts_accel_on_device(realw* d_accel, |
| 107 | + realw* d_send_accel_buffer, |
| 108 | + int num_interfaces_ext_mesh, |
| 109 | + int* num_interface_p_refine_ibool, |
| 110 | + int* interface_p_refine_ibool, |
| 111 | + int max_nibool_interfaces_ext_mesh, |
| 112 | + int ilevel) { |
| 113 | + |
| 114 | + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; |
| 115 | + |
| 116 | + for( int iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) { |
| 117 | + int num_interface_iglob = num_interface_p_refine_ibool[iinterface + num_interfaces_ext_mesh*(ilevel-1)]; |
| 118 | + if(id < num_interface_iglob) { |
| 119 | + int iglob = interface_p_refine_ibool[id + max_nibool_interfaces_ext_mesh*(iinterface + num_interfaces_ext_mesh*(ilevel-1))]-1; |
| 120 | + int iglob_send = id + iinterface*max_nibool_interfaces_ext_mesh; |
| 121 | + |
| 122 | + d_send_accel_buffer[3*iglob_send] = d_accel[3*iglob]; |
| 123 | + d_send_accel_buffer[3*iglob_send+1] = d_accel[3*iglob+1]; |
| 124 | + d_send_accel_buffer[3*iglob_send+2] = d_accel[3*iglob+2]; |
| 125 | + } |
| 126 | + } |
| 127 | +} |
0 commit comments