Skip to content

Commit 071e112

Browse files
committed
adds GPU support for local-time stepping
1 parent 1d4cbaa commit 071e112

19 files changed

+3636
-969
lines changed

src/gpu/kernels/kernel_cuda.mk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ cuda_kernels_OBJS := \
3030
$O/kernel_3_acoustic_cuda_device.cuda-kernel.o \
3131
$O/kernel_3_cuda_device.cuda-kernel.o \
3232
$O/kernel_3_veloc_cuda_device.cuda-kernel.o \
33+
$O/lts_assembly_mpi_kernel.cuda-kernel.o \
34+
$O/lts_compute_forces_viscoelastic_kernel.cuda-kernel.o \
3335
$O/noise_read_add_surface_movie_cuda_kernel.cuda-kernel.o \
3436
$O/pml_impose_boundary_condition_cuda_kernel.cuda-kernel.o \
3537
$O/pml_kernel_2_viscoelastic_impl.cuda-kernel.o \

src/gpu/kernels/kernel_proto.cu.h

Lines changed: 345 additions & 327 deletions
Large diffs are not rendered by default.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
!=====================================================================
3+
!
4+
! S p e c f e m 3 D
5+
! -----------------
6+
!
7+
! Main historical authors: Dimitri Komatitsch and Jeroen Tromp
8+
! CNRS, France
9+
! and Princeton University, USA
10+
! (there are currently many more authors!)
11+
! (c) October 2017
12+
!
13+
! This program is free software; you can redistribute it and/or modify
14+
! it under the terms of the GNU General Public License as published by
15+
! the Free Software Foundation; either version 3 of the License, or
16+
! (at your option) any later version.
17+
!
18+
! This program is distributed in the hope that it will be useful,
19+
! but WITHOUT ANY WARRANTY; without even the implied warranty of
20+
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21+
! GNU General Public License for more details.
22+
!
23+
! You should have received a copy of the GNU General Public License along
24+
! with this program; if not, write to the Free Software Foundation, Inc.,
25+
! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26+
!
27+
!=====================================================================
28+
*/
29+
30+
#include "lts_assembly_mpi_kernel.cu"
31+
32+
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
!=====================================================================
3+
!
4+
! S p e c f e m 3 D
5+
! -----------------
6+
!
7+
! Main historical authors: Dimitri Komatitsch and Jeroen Tromp
8+
! CNRS, France
9+
! and Princeton University, USA
10+
! (there are currently many more authors!)
11+
! (c) October 2017
12+
!
13+
! This program is free software; you can redistribute it and/or modify
14+
! it under the terms of the GNU General Public License as published by
15+
! the Free Software Foundation; either version 3 of the License, or
16+
! (at your option) any later version.
17+
!
18+
! This program is distributed in the hope that it will be useful,
19+
! but WITHOUT ANY WARRANTY; without even the implied warranty of
20+
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21+
! GNU General Public License for more details.
22+
!
23+
! You should have received a copy of the GNU General Public License along
24+
! with this program; if not, write to the Free Software Foundation, Inc.,
25+
! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26+
!
27+
!=====================================================================
28+
*/
29+
30+
31+
32+
/* ----------------------------------------------------------------------------------------------- */
33+
// prepare/assemble kernels
34+
/* ----------------------------------------------------------------------------------------------- */
35+
36+
__global__ void prepare_reduced_boundary_lts_accel_on_device(realw* d_accel,
37+
realw* d_send_accel_buffer,
38+
int num_interface_p_refine_boundary,
39+
int* interface_p_refine_boundary,
40+
int max_nibool_interfaces_boundary,
41+
int ilevel) {
42+
43+
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
44+
45+
// go through list of nodes on p-level interface
46+
if(id < num_interface_p_refine_boundary) {
47+
int iglob = interface_p_refine_boundary[id + (ilevel-1)*max_nibool_interfaces_boundary]-1;
48+
49+
d_send_accel_buffer[3*id] = d_accel[3*iglob];
50+
d_send_accel_buffer[3*id+1] = d_accel[3*iglob+1];
51+
d_send_accel_buffer[3*id+2] = d_accel[3*iglob+2];
52+
}
53+
54+
}
55+
56+
/* ----------------------------------------------------------------------------------------------- */
57+
58+
__global__ void assemble_reduced_boundary_lts_accel_on_device(realw* d_accel,
59+
realw* d_send_accel_buffer,
60+
int num_interface_p_refine_boundary,
61+
int* interface_p_refine_boundary,
62+
int max_nibool_interfaces_boundary,
63+
int ilevel) {
64+
65+
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
66+
67+
// go through list of nodes on p-level interface
68+
if(id < num_interface_p_refine_boundary) {
69+
int iglob = interface_p_refine_boundary[id + (ilevel-1)*max_nibool_interfaces_boundary]-1;
70+
71+
atomicAdd(&d_accel[3*iglob],d_send_accel_buffer[3*id]);
72+
atomicAdd(&d_accel[3*iglob+1],d_send_accel_buffer[3*id+1]);
73+
atomicAdd(&d_accel[3*iglob+2],d_send_accel_buffer[3*id+2]);
74+
}
75+
}
76+
77+
/* ----------------------------------------------------------------------------------------------- */
78+
79+
80+
__global__ void assemble_boundary_lts_accel_on_device(realw* d_accel,
81+
realw* d_send_accel_buffer,
82+
int num_interfaces_ext_mesh,
83+
int* num_interface_p_refine_ibool,
84+
int* interface_p_refine_ibool,
85+
int max_nibool_interfaces_ext_mesh,
86+
int ilevel) {
87+
88+
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
89+
90+
for( int iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
91+
int num_interface_iglob = num_interface_p_refine_ibool[iinterface + num_interfaces_ext_mesh*(ilevel-1)];
92+
if(id < num_interface_iglob) {
93+
int iglob = interface_p_refine_ibool[id + max_nibool_interfaces_ext_mesh*(iinterface + num_interfaces_ext_mesh*(ilevel-1))]-1;
94+
int iglob_send = id + iinterface*max_nibool_interfaces_ext_mesh;
95+
96+
atomicAdd(&d_accel[3*iglob],d_send_accel_buffer[3*iglob_send]);
97+
atomicAdd(&d_accel[3*iglob+1],d_send_accel_buffer[3*iglob_send+1]);
98+
atomicAdd(&d_accel[3*iglob+2],d_send_accel_buffer[3*iglob_send+2]);
99+
}
100+
}
101+
}
102+
103+
/* ----------------------------------------------------------------------------------------------- */
104+
105+
106+
__global__ void prepare_boundary_lts_accel_on_device(realw* d_accel,
107+
realw* d_send_accel_buffer,
108+
int num_interfaces_ext_mesh,
109+
int* num_interface_p_refine_ibool,
110+
int* interface_p_refine_ibool,
111+
int max_nibool_interfaces_ext_mesh,
112+
int ilevel) {
113+
114+
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
115+
116+
for( int iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
117+
int num_interface_iglob = num_interface_p_refine_ibool[iinterface + num_interfaces_ext_mesh*(ilevel-1)];
118+
if(id < num_interface_iglob) {
119+
int iglob = interface_p_refine_ibool[id + max_nibool_interfaces_ext_mesh*(iinterface + num_interfaces_ext_mesh*(ilevel-1))]-1;
120+
int iglob_send = id + iinterface*max_nibool_interfaces_ext_mesh;
121+
122+
d_send_accel_buffer[3*iglob_send] = d_accel[3*iglob];
123+
d_send_accel_buffer[3*iglob_send+1] = d_accel[3*iglob+1];
124+
d_send_accel_buffer[3*iglob_send+2] = d_accel[3*iglob+2];
125+
}
126+
}
127+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
!=====================================================================
3+
!
4+
! S p e c f e m 3 D
5+
! -----------------
6+
!
7+
! Main historical authors: Dimitri Komatitsch and Jeroen Tromp
8+
! CNRS, France
9+
! and Princeton University, USA
10+
! (there are currently many more authors!)
11+
! (c) October 2017
12+
!
13+
! This program is free software; you can redistribute it and/or modify
14+
! it under the terms of the GNU General Public License as published by
15+
! the Free Software Foundation; either version 3 of the License, or
16+
! (at your option) any later version.
17+
!
18+
! This program is distributed in the hope that it will be useful,
19+
! but WITHOUT ANY WARRANTY; without even the implied warranty of
20+
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21+
! GNU General Public License for more details.
22+
!
23+
! You should have received a copy of the GNU General Public License along
24+
! with this program; if not, write to the Free Software Foundation, Inc.,
25+
! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26+
!
27+
!=====================================================================
28+
*/
29+
30+
#include "lts_compute_forces_viscoelastic_kernel.cu"
31+

0 commit comments

Comments
 (0)