Skip to content

Commit 0330613

Browse files
performance enhancements from ESCOMP/CAM#1365
1 parent 5dd4566 commit 0330613

File tree

2 files changed

+310
-316
lines changed

2 files changed

+310
-316
lines changed

src/dynamics/se/dycore/fvm_consistent_se_cslam.F90

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#define FVM_TIMERS .FALSE.
12
module fvm_consistent_se_cslam
23
use shr_kind_mod, only: r8=>shr_kind_r8
34
use dimensions_mod, only: nc, nhe, nlev, ntrac, np, nhr, nhc, ngpc, ns, nht
@@ -40,7 +41,7 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
4041
use bndry_mod , only: ghost_exchange
4142
use hybvcoord_mod , only: hvcoord_t
4243
!Un-comment once constituents are enabled -JN:
43-
! use constituents , only: qmin
44+
use constituents , only: qmin
4445
use dimensions_mod , only: large_Courant_incr,irecons_tracer_lev
4546
use thread_mod , only: vert_num_threads, omp_set_nested
4647
implicit none
@@ -108,7 +109,7 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
108109
endif
109110

110111
kblk = kmax-kmin+1
111-
!call t_startf('fvm:before_Qnhc')
112+
if(FVM_TIMERS) call t_startf('fvm:before_Qnhc')
112113
do ie=nets,nete
113114
do k=kmin,kmax
114115
elem(ie)%sub_elem_mass_flux(:,:,:,k) = dt_fvm*elem(ie)%sub_elem_mass_flux(:,:,:,k)*fvm(ie)%dp_ref_inverse(k)
@@ -121,11 +122,11 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
121122
call ghostpack(ghostbufQnhc,fvm(ie)%c(1-nhc:nc+nhc,1-nhc:nc+nhc,kmin:kmax,q),kblk,kptr,ie)
122123
enddo
123124
end do
124-
!call t_stopf('fvm:before_Qnhc')
125-
!call t_startf('fvm:ghost_exchange:Qnhc')
125+
if(FVM_TIMERS) call t_stopf('fvm:before_Qnhc')
126+
if(FVM_TIMERS) call t_startf('fvm:ghost_exchange:Qnhc')
126127
call ghost_exchange(hybridnew,ghostbufQnhc,location='ghostbufQnhc')
127-
!call t_stopf('fvm:ghost_exchange:Qnhc')
128-
!call t_startf('fvm:orthogonal_swept_areas')
128+
if(FVM_TIMERS) call t_stopf('fvm:ghost_exchange:Qnhc')
129+
if(FVM_TIMERS) call t_startf('fvm:orthogonal_swept_areas')
129130
do ie=nets,nete
130131
do k=kmin,kmax
131132
fvm(ie)%se_flux (1:nc,1:nc,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k)
@@ -153,14 +154,14 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
153154
end do
154155
enddo
155156

156-
!call t_stopf('fvm:orthogonal_swept_areas')
157+
if(FVM_TIMERS) call t_stopf('fvm:orthogonal_swept_areas')
157158
do ie=nets,nete
158159
! Intel compiler version 2023.0.0 on derecho had significant slowdown on subroutine interface without
159160
! these pointers.
160161
fcube => fvm(ie)%c(:,:,:,:)
161162
spherecentroid => fvm(ie)%spherecentroid(:,1-nhe:nc+nhe,1-nhe:nc+nhe)
162163
do k=kmin,kmax
163-
!call t_startf('FVM:tracers_reconstruct')
164+
if(FVM_TIMERS) call t_startf('FVM:tracers_reconstruct')
164165
call reconstruction(fcube,nlev,k,&
165166
ctracer(:,:,:,:),irecons_tracer,llimiter,ntrac,&
166167
nc,nhe,nhr,nhc,nht,ns,nhr+(nhe-1),&
@@ -171,10 +172,10 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
171172
fvm(ie)%rot_matrix,fvm(ie)%centroid_stretch,&
172173
fvm(ie)%vertex_recons_weights,fvm(ie)%vtx_cart,&
173174
irecons_tracer_lev(k))
174-
!call t_stopf('FVM:tracers_reconstruct')
175-
!call t_startf('fvm:swept_flux')
175+
if(FVM_TIMERS) call t_stopf('FVM:tracers_reconstruct')
176+
if(FVM_TIMERS) call t_startf('fvm:swept_flux')
176177
call swept_flux(elem(ie),fvm(ie),k,ctracer,irecons_tracer_lev(k),gsweights,gspts)
177-
!call t_stopf('fvm:swept_flux')
178+
if(FVM_TIMERS) call t_stopf('fvm:swept_flux')
178179
end do
179180
end do
180181
!
@@ -194,7 +195,7 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
194195
!
195196
!
196197
if (large_Courant_incr) then
197-
!call t_startf('fvm:fill_halo_fvm:large_Courant')
198+
if(FVM_TIMERS) call t_startf('fvm:fill_halo_fvm:large_Courant')
198199
!if (kmin_jet<kmin.or.kmax_jet>kmax) then
199200
! call endrun('ERROR: kmax_jet must be .le. kmax passed to run_consistent_se_cslam')
200201
!end if
@@ -204,19 +205,19 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
204205
kmax_jet_local = min(kmax_jet,kmax)
205206
klev = kmax_jet-kmin_jet+1
206207
call fill_halo_fvm(ghostbufQ1,elem,fvm,hybridnew,nets,nete,1,kmin_jet_local,kmax_jet_local,klev,active=ActiveJetThread)
207-
!call t_stopf('fvm:fill_halo_fvm:large_Courant')
208-
!call t_startf('fvm:large_Courant_number_increment')
208+
if(FVM_TIMERS) call t_stopf('fvm:fill_halo_fvm:large_Courant')
209+
if(FVM_TIMERS) call t_startf('fvm:large_Courant_number_increment')
209210
if(ActiveJetThread) then
210211
do k=kmin_jet_local,kmax_jet_local !1,nlev
211212
do ie=nets,nete
212213
call large_courant_number_increment(fvm(ie),k)
213214
end do
214215
end do
215216
endif
216-
!call t_stopf('fvm:large_Courant_number_increment')
217+
if(FVM_TIMERS) call t_stopf('fvm:large_Courant_number_increment')
217218
end if
218219

219-
!call t_startf('fvm:end_of_reconstruct_subroutine')
220+
if(FVM_TIMERS) call t_startf('fvm:end_of_reconstruct_subroutine')
220221
do k=kmin,kmax
221222
!
222223
! convert to mixing ratio
@@ -234,9 +235,8 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
234235
! convert to mixing ratio
235236
fvm(ie)%c(i,j,k,itr) = fvm(ie)%c(i,j,k,itr)*inv_dp_area(i,j)
236237
! remove round-off undershoots
237-
!fvm(ie)%c(i,j,k,itr) = MAX(fvm(ie)%c(i,j,k,itr),qmin(itr))
238238
!Remove once constituents are enabled and ucomment above line -JN:
239-
fvm(ie)%c(i,j,k,itr) = MAX(fvm(ie)%c(i,j,k,itr), 0._r8)
239+
fvm(ie)%c(i,j,k,itr) = MAX(fvm(ie)%c(i,j,k,itr),qmin(itr))
240240
end do
241241
end do
242242
end do
@@ -254,7 +254,7 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
254254
elem(ie)%sub_elem_mass_flux(:,:,:,k)=0
255255
end do
256256
end do
257-
!call t_stopf('fvm:end_of_reconstruct_subroutine')
257+
if(FVM_TIMERS) call t_stopf('fvm:end_of_reconstruct_subroutine')
258258
!$OMP END PARALLEL
259259
call omp_set_nested(.false.)
260260
end subroutine run_consistent_se_cslam
@@ -284,7 +284,7 @@ subroutine swept_flux(elem,fvm,ilev,ctracer,irecons_tracer_actual,gsweights,gspt
284284
REAL(KIND=r8), dimension(2,8) :: x_start, dgam_vec
285285
REAL(KIND=r8) :: gamma_max, displ_first_guess
286286

287-
REAL(KIND=r8) :: flux,flux_tracer(ntrac)
287+
REAL(KIND=r8) :: flux,flux_tracer(ntrac),w
288288

289289
REAL(KIND=r8), dimension(num_area) :: dp_area
290290

@@ -309,7 +309,6 @@ subroutine swept_flux(elem,fvm,ilev,ctracer,irecons_tracer_actual,gsweights,gspt
309309
!
310310
! prepare for air/tracer update
311311
!
312-
! dp = fvm%dp_fvm(1-nhe:nc+nhe,1-nhe:nc+nhe,ilev)
313312
dp = fvm%dp_fvm(1-nhc:nc+nhc,1-nhc:nc+nhc,ilev)
314313
fvm%dp_fvm(1:nc,1:nc,ilev) = fvm%dp_fvm(1:nc,1:nc,ilev)*fvm%area_sphere
315314
do itr=1,ntrac
@@ -541,14 +540,14 @@ subroutine swept_flux(elem,fvm,ilev,ctracer,irecons_tracer_actual,gsweights,gspt
541540
!
542541
! iterate to get flux area
543542
!
544-
!call t_startf('fvm:swept_area:get_gamma')
543+
if(FVM_TIMERS) call t_startf('fvm:swept_area:get_gamma')
545544
do iarea=1,num_area
546545
dp_area(iarea) = dp(idx(1,iarea,i,j,iside),idx(2,iarea,i,j,iside))
547546
end do
548547
call get_flux_segments_area_iterate(x,x_static,dx_static,dx,x_start,dgam_vec,num_seg,num_seg_static,&
549548
num_seg_max,num_area,dp_area,flowcase,gamma,mass_flux_se(i,j,iside),0.0_r8,gamma_max, &
550549
gsweights,gspts,ilev)
551-
!call t_stopf('fvm:swept_area:get_gamma')
550+
if(FVM_TIMERS) call t_stopf('fvm:swept_area:get_gamma')
552551
!
553552
! pack segments for high-order weights computation
554553
!
@@ -563,27 +562,28 @@ subroutine swept_flux(elem,fvm,ilev,ctracer,irecons_tracer_actual,gsweights,gspt
563562
!
564563
! compute higher-order weights
565564
!
566-
!call t_startf('fvm:swept_area:get_high_order_w')
565+
if(FVM_TIMERS) call t_startf('fvm:swept_area:get_high_order_w')
567566
call get_high_order_weights_over_areas(x,dx,num_seg,num_seg_max,num_area,weights,ngpc,&
568567
gsweights, gspts,irecons_tracer)
569-
!call t_stopf('fvm:swept_area:get_high_order_w')
568+
if(FVM_TIMERS) call t_stopf('fvm:swept_area:get_high_order_w')
570569
!
571570
!**************************************************
572571
!
573572
! remap air and tracers
574573
!
575574
!**************************************************
576575
!
577-
!call t_startf('fvm:swept_area:remap')
576+
if(FVM_TIMERS) call t_startf('fvm:swept_area:remap')
578577
flux=0.0_r8; flux_tracer=0.0_r8
579578
do iarea=1,num_area
580579
if (num_seg(iarea)>0) then
581580
ii=idx(1,iarea,i,j,iside); jj=idx(2,iarea,i,j,iside)
582581
flux=flux+weights(1,iarea)*dp(ii,jj)
583-
do itr=1,ntrac
584-
do iw=1,irecons_tracer_actual
585-
flux_tracer(itr) = flux_tracer(itr)+weights(iw,iarea)*ctracer(iw,ii,jj,itr)
586-
end do
582+
do iw=1,irecons_tracer_actual
583+
w = weights(iw,iarea)
584+
do itr=1,ntrac
585+
flux_tracer(itr) = flux_tracer(itr)+w*ctracer(iw,ii,jj,itr)
586+
end do
587587
end do
588588
end if
589589
end do
@@ -617,7 +617,7 @@ subroutine swept_flux(elem,fvm,ilev,ctracer,irecons_tracer_actual,gsweights,gspt
617617
fvm%dp_fvm(i-1,j,ilev ) = fvm%dp_fvm(i-1,j,ilev )+flux
618618
fvm% c(i-1,j,ilev,1:ntrac) = fvm% c(i-1,j,ilev,1:ntrac)+flux_tracer(1:ntrac)
619619
end if
620-
!call t_stopf('fvm:swept_area:remap')
620+
if(FVM_TIMERS) call t_stopf('fvm:swept_area:remap')
621621
end if
622622
end do
623623
end do
@@ -655,7 +655,6 @@ subroutine large_courant_number_increment(fvm,ilev)
655655
fvm%se_flux(i,j,iside,ilev)*inv_dp_area(i,j)
656656
end if
657657
#endif
658-
659658
do itr=1,ntrac
660659
flux_tracer(itr) = fvm%se_flux(i,j,iside,ilev)*c_tmp(i,j,itr)*inv_dp_area(i,j)
661660
end do

0 commit comments

Comments
 (0)