Skip to content

Commit 1327244

Browse files
authored
Fixed bug for z-pencils with z-implicit diffusion on CPUs. (CaNS-World#148)
1 parent 72a67b6 commit 1327244

File tree

2 files changed

+67
-62
lines changed

2 files changed

+67
-62
lines changed

src/solver.f90

Lines changed: 60 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
module mod_solver
88
use, intrinsic :: iso_c_binding, only: C_PTR
99
use decomp_2d
10-
use mod_fft , only: fft
10+
use mod_common_mpi, only: ipencil_axis
11+
use mod_fft , only: fft
1112
use mod_types
1213
implicit none
1314
private
@@ -26,30 +27,32 @@ subroutine solver(n,ng,arrplan,normfft,lambdaxy,a,b,c,bc,c_or_f,p)
2627
character(len=1), dimension(0:1,3), intent(in) :: bc
2728
character(len=1), intent(in), dimension(3) :: c_or_f
2829
real(rp), intent(inout), dimension(0:,0:,0:) :: p
29-
real(rp), dimension(xsize(1),xsize(2),xsize(3)) :: px
30-
real(rp), dimension(ysize(1),ysize(2),ysize(3)) :: py
31-
real(rp), dimension(zsize(1),zsize(2),zsize(3)) :: pz
30+
real(rp), allocatable, dimension(:,:,:) :: px,py,pz
3231
integer :: q
3332
integer, dimension(3) :: n_z
3433
!
3534
n_z(:) = zsize(:)
36-
#if !defined(_DECOMP_Y) && !defined(_DECOMP_Z)
37-
!$OMP PARALLEL WORKSHARE
38-
px(:,:,:) = p(1:n(1),1:n(2),1:n(3))
39-
!$OMP END PARALLEL WORKSHARE
40-
#elif defined(_DECOMP_Y)
41-
!$OMP PARALLEL WORKSHARE
42-
py(:,:,:) = p(1:n(1),1:n(2),1:n(3))
43-
!$OMP END PARALLEL WORKSHARE
44-
call transpose_y_to_x(py,px)
45-
#elif defined(_DECOMP_Z)
46-
!$OMP PARALLEL WORKSHARE
47-
pz(:,:,:) = p(1:n(1),1:n(2),1:n(3))
48-
!$OMP END PARALLEL WORKSHARE
49-
!call transpose_z_to_x(pz,px)
50-
call transpose_z_to_y(pz,py)
51-
call transpose_y_to_x(py,px)
52-
#endif
35+
allocate(px(xsize(1),xsize(2),xsize(3)))
36+
allocate(py(ysize(1),ysize(2),ysize(3)))
37+
allocate(pz(zsize(1),zsize(2),zsize(3)))
38+
select case(ipencil_axis)
39+
case(1)
40+
!$OMP PARALLEL WORKSHARE
41+
px(:,:,:) = p(1:n(1),1:n(2),1:n(3))
42+
!$OMP END PARALLEL WORKSHARE
43+
case(2)
44+
!$OMP PARALLEL WORKSHARE
45+
py(:,:,:) = p(1:n(1),1:n(2),1:n(3))
46+
!$OMP END PARALLEL WORKSHARE
47+
call transpose_y_to_x(py,px)
48+
case(3)
49+
!$OMP PARALLEL WORKSHARE
50+
pz(:,:,:) = p(1:n(1),1:n(2),1:n(3))
51+
!$OMP END PARALLEL WORKSHARE
52+
!call transpose_z_to_x(pz,px)
53+
call transpose_z_to_y(pz,py)
54+
call transpose_y_to_x(py,px)
55+
end select
5356
call fft(arrplan(1,1),px) ! fwd transform in x
5457
!
5558
call transpose_x_to_y(px,py)
@@ -210,30 +213,31 @@ subroutine solver_gaussel_z(n,a,b,c,bcz,c_or_f,p)
210213
character(len=1), dimension(0:1), intent(in) :: bcz
211214
character(len=1), intent(in), dimension(3) :: c_or_f
212215
real(rp), intent(inout), dimension(0:,0:,0:) :: p
213-
real(rp), allocatable, dimension(:,:,:), save :: px,py,pz
216+
real(rp), allocatable, dimension(:,:,:) :: px,py,pz
214217
integer :: q
215218
integer, dimension(3) :: n_z
216219
logical :: is_no_decomp_z
217220
!
218221
n_z(:) = zsize(:)
219-
is_no_decomp_z = xsize(3) == n_z(3) ! not decomposed along z: xsize(3) == ysize(3) == ng(3) when dims(2) = 1
222+
is_no_decomp_z = xsize(3) == n_z(3).or.ipencil_axis == 3 ! not decomposed along z: xsize(3) == ysize(3) == ng(3) when dims(2) = 1
220223
if(.not.is_no_decomp_z) then
221-
if(.not.allocated(px)) allocate(px(xsize(1),xsize(2),xsize(3)))
222-
if(.not.allocated(py)) allocate(py(ysize(1),ysize(2),ysize(3)))
223-
if(.not.allocated(pz)) allocate(pz(zsize(1),zsize(2),zsize(3)))
224-
#if !defined(_DECOMP_Y) && !defined(_DECOMP_Z)
225-
!$OMP PARALLEL WORKSHARE
226-
px(:,:,:) = p(1:n(1),1:n(2),1:n(3))
227-
!$OMP END PARALLEL WORKSHARE
228-
!call transpose_x_to_z(px,pz)
229-
call transpose_x_to_y(px,py)
230-
call transpose_y_to_z(py,pz)
231-
#elif defined(_DECOMP_Y)
232-
!$OMP PARALLEL WORKSHARE
233-
py(:,:,:) = p(1:n(1),1:n(2),1:n(3))
234-
!$OMP END PARALLEL WORKSHARE
235-
call transpose_y_to_z(py,pz)
236-
#endif
224+
allocate(px(xsize(1),xsize(2),xsize(3)))
225+
allocate(py(ysize(1),ysize(2),ysize(3)))
226+
allocate(pz(zsize(1),zsize(2),zsize(3)))
227+
select case(ipencil_axis)
228+
case(1)
229+
!$OMP PARALLEL WORKSHARE
230+
px(:,:,:) = p(1:n(1),1:n(2),1:n(3))
231+
!$OMP END PARALLEL WORKSHARE
232+
!call transpose_x_to_z(px,pz)
233+
call transpose_x_to_y(px,py)
234+
call transpose_y_to_z(py,pz)
235+
case(2)
236+
!$OMP PARALLEL WORKSHARE
237+
py(:,:,:) = p(1:n(1),1:n(2),1:n(3))
238+
!$OMP END PARALLEL WORKSHARE
239+
call transpose_y_to_z(py,pz)
240+
end select
237241
end if
238242
q = 0
239243
if(c_or_f(3) == 'f'.and.bcz(1) == 'D') q = 1
@@ -244,27 +248,29 @@ subroutine solver_gaussel_z(n,a,b,c,bcz,c_or_f,p)
244248
call gaussel( n_z(1),n_z(2),n_z(3)-q,0,a,b,c,pz)
245249
end if
246250
else
251+
n_z(:) = n(:)
247252
if(bcz(0)//bcz(1) == 'PP') then
248-
call gaussel_periodic(n(1),n(2),n(3)-q,1,a,b,c,p)
253+
call gaussel_periodic(n_z(1),n_z(2),n_z(3)-q,1,a,b,c,p )
249254
else
250-
call gaussel( n(1),n(2),n(3)-q,1,a,b,c,p)
255+
call gaussel( n_z(1),n_z(2),n_z(3)-q,1,a,b,c,p )
251256
end if
252257
end if
253258
!
254259
if(.not.is_no_decomp_z) then
255-
#if !defined(_DECOMP_Y) && !defined(_DECOMP_Z)
256-
!call transpose_z_to_x(pz,px)
257-
call transpose_z_to_y(pz,py)
258-
call transpose_y_to_x(py,px)
259-
!$OMP PARALLEL WORKSHARE
260-
p(1:n(1),1:n(2),1:n(3)) = px(:,:,:)
261-
!$OMP END PARALLEL WORKSHARE
262-
#elif defined(_DECOMP_Y)
263-
call transpose_z_to_y(pz,py)
264-
!$OMP PARALLEL WORKSHARE
265-
p(1:n(1),1:n(2),1:n(3)) = py(:,:,:)
266-
!$OMP END PARALLEL WORKSHARE
267-
#endif
260+
select case(ipencil_axis)
261+
case(1)
262+
!call transpose_z_to_x(pz,px)
263+
call transpose_z_to_y(pz,py)
264+
call transpose_y_to_x(py,px)
265+
!$OMP PARALLEL WORKSHARE
266+
p(1:n(1),1:n(2),1:n(3)) = px(:,:,:)
267+
!$OMP END PARALLEL WORKSHARE
268+
case(2)
269+
call transpose_z_to_y(pz,py)
270+
!$OMP PARALLEL WORKSHARE
271+
p(1:n(1),1:n(2),1:n(3)) = py(:,:,:)
272+
!$OMP END PARALLEL WORKSHARE
273+
end select
268274
end if
269275
end subroutine solver_gaussel_z
270276
!

src/solver_gpu.f90

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ subroutine solver_gaussel_z_gpu(n,a,b,c,bcz,c_or_f,p)
398398
n_x(:) = ap_x%shape(:)
399399
n_y(:) = ap_y%shape(:)
400400
n_z(:) = ap_z%shape(:)
401-
is_no_decomp_z = n_x(3) == n_z(3) ! not decomposed along z: xsize(3) == ysize(3) == ng(3) when dims(2) = 1
401+
is_no_decomp_z = n_x(3) == n_z(3).or.ipencil_axis == 3 ! not decomposed along z: xsize(3) == ysize(3) == ng(3) when dims(2) = 1
402402
if(.not.is_no_decomp_z) then
403403
px(1:n_x(1),1:n_x(2),1:n_x(3)) => solver_buf_0(1:product(n_x(:)))
404404
if(cudecomp_is_t_in_place) then
@@ -444,21 +444,20 @@ subroutine solver_gaussel_z_gpu(n,a,b,c,bcz,c_or_f,p)
444444
end select
445445
end if
446446
!
447-
if(ipencil_axis /= 3 .and. .not.is_no_decomp_z) then
448-
q = 0
449-
if(c_or_f(3) == 'f'.and.bcz(1) == 'D') q = 1
447+
q = 0
448+
if(c_or_f(3) == 'f'.and.bcz(1) == 'D') q = 1
449+
if(.not.is_no_decomp_z) then
450450
if(bcz(0)//bcz(1) == 'PP') then
451451
call gaussel_periodic_gpu(n_z_0(1),n_z_0(2),n_z_0(3)-q,0,a,b,c,pz,work,pz_aux_1,pz_aux_2)
452452
else
453453
call gaussel_gpu( n_z_0(1),n_z_0(2),n_z_0(3)-q,0,a,b,c,pz,work)
454454
end if
455455
else
456-
q = 0
457-
if(c_or_f(3) == 'f'.and.bcz(1) == 'D') q = 1
456+
n_z_0(:) = n(:)
458457
if(bcz(0)//bcz(1) == 'PP') then
459-
call gaussel_periodic_gpu(n(1),n(2),n(3)-q,1,a,b,c,p,work,pz_aux_1,pz_aux_2)
458+
call gaussel_periodic_gpu(n_z_0(1),n_z_0(2),n_z_0(3)-q,1,a,b,c,p ,work,pz_aux_1,pz_aux_2)
460459
else
461-
call gaussel_gpu( n(1),n(2),n(3)-q,1,a,b,c,p,work)
460+
call gaussel_gpu( n_z_0(1),n_z_0(2),n_z_0(3)-q,1,a,b,c,p ,work)
462461
end if
463462
end if
464463
!

0 commit comments

Comments
 (0)