Skip to content

Commit 8838e74

Browse files
committed
Reduce threading barriers in ocean
1 parent 0c12a20 commit 8838e74

12 files changed

+2
-79
lines changed

src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
397397
398398
call mpas_pool_get_subpool(domain % blocklist % structs, 'diagnostics', diagnosticsPool)
399399
400-
call mpas_threading_barrier()
401400
! --- update halos for diagnostic ocean boundary layer depth
402401
if (config_use_cvmix_kpp) then
403402
call mpas_timer_start("se halo diag obd")
@@ -438,15 +437,12 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
438437
end do
439438
call mpas_timer_stop("se freq-filtered-thick computations")
440439
441-
call mpas_threading_barrier()
442-
443440
call mpas_timer_start("se freq-filtered-thick halo update")
444441
445442
call mpas_dmpar_field_halo_exch(domain, 'tendHighFreqThickness')
446443
call mpas_dmpar_field_halo_exch(domain, 'tendLowFreqDivergence')
447444
448445
call mpas_timer_stop("se freq-filtered-thick halo update")
449-
call mpas_threading_barrier()
450446
451447
block => domain % blocklist
452448
do while (associated(block))
@@ -609,8 +605,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
609605
block => block % next
610606
end do
611607

612-
call mpas_threading_barrier()
613-
614608
call mpas_timer_start("se halo normalBaroclinicVelocity")
615609
call mpas_dmpar_field_halo_exch(domain, 'normalBaroclinicVelocity', timeLevel=2)
616610
call mpas_timer_stop("se halo normalBaroclinicVelocity")
@@ -761,21 +755,16 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
761755
edgeHaloComputeCounter = 0
762756
neededHalos = 1 + config_n_btr_cor_iter
763757

764-
call mpas_threading_barrier()
765-
766758
call mpas_timer_start('btr se subcycle loop')
767759
do j = 1, nBtrSubcycles * config_btr_subcycle_loop_factor
768760
if(cellHaloComputeCounter < neededHalos) then
769761

770-
call mpas_threading_barrier()
771762
call mpas_timer_start('se halo subcycle')
772763
call mpas_dmpar_exch_group_reuse_halo_exch(domain, subcycleGroupName, timeLevel=oldBtrSubcycleTime)
773-
call mpas_threading_barrier()
774764
call mpas_timer_stop('se halo subcycle')
775765

776766
cellHaloComputeCounter = config_num_halos - mod( config_num_halos, neededHalos )
777767
edgeHaloComputeCounter = config_num_halos + 1 - mod( config_num_halos, neededHalos )
778-
call mpas_threading_barrier()
779768
end if
780769

781770
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -1235,7 +1224,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
12351224
end do ! j=1,nBtrSubcycles
12361225
call mpas_timer_stop('btr se subcycle loop')
12371226
1238-
call mpas_threading_barrier()
12391227
call mpas_pool_get_subpool(domain % blocklist % structs, 'scratch', scratchPool)
12401228
call mpas_pool_get_field(scratchPool, 'btrvel_temp', btrvel_tempField)
12411229
call mpas_deallocate_scratch_field(btrvel_tempField, .false.)
@@ -1276,8 +1264,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
12761264
end do ! block
12771265
call mpas_timer_stop('btr se norm')
12781266
1279-
call mpas_threading_barrier()
1280-
12811267
! boundary update on F
12821268
call mpas_timer_start("se halo F and btr vel")
12831269
call mpas_dmpar_exch_group_create(domain, finalBtrGroupName)
@@ -1287,12 +1273,11 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
12871273
12881274
call mpas_threading_barrier()
12891275
call mpas_dmpar_exch_group_full_halo_exch(domain, finalBtrGroupName)
1276+
call mpas_threading_barrier()
12901277
12911278
call mpas_dmpar_exch_group_destroy(domain, finalBtrGroupName)
12921279
call mpas_timer_stop("se halo F and btr vel")
12931280
1294-
call mpas_threading_barrier()
1295-
12961281
! Check that you can compute SSH using the total sum or the individual increments
12971282
! over the barotropic subcycles.
12981283
! efficiency: This next block of code is really a check for debugging, and can
@@ -1443,17 +1428,13 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
14431428
end do
14441429
call mpas_timer_stop('se thick tend')
14451430
1446-
call mpas_threading_barrier()
1447-
14481431
! update halo for thickness tendencies
14491432
call mpas_timer_start("se halo thickness")
14501433
14511434
call mpas_dmpar_field_halo_exch(domain, 'tendLayerThickness')
14521435
14531436
call mpas_timer_stop("se halo thickness")
14541437
1455-
call mpas_threading_barrier()
1456-
14571438
call mpas_timer_start('se tracer tend', .false.)
14581439
block => domain % blocklist
14591440
do while (associated(block))
@@ -1473,8 +1454,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
14731454
end do
14741455
call mpas_timer_stop('se tracer tend')
14751456
1476-
call mpas_threading_barrier()
1477-
14781457
! update halo for tracer tendencies
14791458
call mpas_timer_start("se halo tracers")
14801459
call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tendPool)
@@ -1491,8 +1470,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
14911470
end do
14921471
call mpas_timer_stop("se halo tracers")
14931472
1494-
call mpas_threading_barrier()
1495-
14961473
call mpas_timer_start('se loop fini')
14971474
block => domain % blocklist
14981475
do while (associated(block))
@@ -1807,8 +1784,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
18071784
block => block % next
18081785
end do
18091786
1810-
call mpas_threading_barrier()
1811-
18121787
! Update halo on u and tracers, which were just updated for implicit vertical mixing. If not done,
18131788
! this leads to lack of volume conservation. It is required because halo updates in stage 3 are only
18141789
! conducted on tendencies, not on the velocity and tracer fields. So this update is required to
@@ -1832,8 +1807,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
18321807
18331808
call mpas_timer_stop("se implicit vert mix")
18341809
1835-
call mpas_threading_barrier()
1836-
18371810
call mpas_timer_start('se fini')
18381811
block => domain % blocklist
18391812
do while (associated(block))
@@ -1930,7 +1903,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{
19301903
!$omp end do
19311904

19321905
call ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forcingPool, 2)
1933-
call mpas_threading_barrier()
19341906

19351907
if (config_use_standardGM) then
19361908
call ocn_reconstruct_gm_vectors(diagnosticsPool, meshPool)

src/core_ocean/shared/mpas_ocn_diagnostics.F

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,6 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
390390
call mpas_pool_get_field(scratchPool, 'kineticEnergyVertexOnCells', kineticEnergyVertexOnCellsField)
391391
call mpas_allocate_scratch_field(kineticEnergyVertexField, .true.)
392392
call mpas_allocate_scratch_field(kineticEnergyVertexOnCellsField, .true.)
393-
call mpas_threading_barrier()
394393

395394
kineticEnergyVertex => kineticEnergyVertexField % array
396395
kineticEnergyVertexOnCells => kineticEnergyVertexOnCellsField % array
@@ -435,7 +434,6 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
435434
end do
436435
!$omp end do
437436

438-
call mpas_threading_barrier()
439437
call mpas_deallocate_scratch_field(kineticEnergyVertexField, .true.)
440438
call mpas_deallocate_scratch_field(kineticEnergyVertexOnCellsField, .true.)
441439
end if
@@ -447,7 +445,6 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
447445
call mpas_pool_get_field(scratchPool, 'normalizedPlanetaryVorticityVertex', normalizedPlanetaryVorticityVertexField)
448446
call mpas_allocate_scratch_field(normalizedRelativeVorticityVertexField, .true., .false.)
449447
call mpas_allocate_scratch_field(normalizedPlanetaryVorticityVertexField, .true., .false.)
450-
call mpas_threading_barrier()
451448

452449
normalizedPlanetaryVorticityVertex => normalizedPlanetaryVorticityVertexField % array
453450
normalizedRelativeVorticityVertex => normalizedRelativeVorticityVertexField % array
@@ -517,7 +514,6 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
517514
call mpas_pool_get_field(scratchPool, 'vorticityGradientTangentialComponent', vorticityGradientTangentialComponentField)
518515
call mpas_allocate_scratch_field(vorticityGradientNormalComponentField, .true.)
519516
call mpas_allocate_scratch_field(vorticityGradientTangentialComponentField, .true.)
520-
call mpas_threading_barrier()
521517

522518
vorticityGradientNormalComponent => vorticityGradientNormalComponentField % array
523519
vorticityGradientTangentialComponent => vorticityGradientTangentialComponentField % array
@@ -564,13 +560,11 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
564560
enddo
565561
!$omp end do
566562

567-
call mpas_threading_barrier()
568563
call mpas_deallocate_scratch_field(vorticityGradientNormalComponentField, .true.)
569564
call mpas_deallocate_scratch_field(vorticityGradientTangentialComponentField, .true.)
570565

571566
endif
572567

573-
call mpas_threading_barrier()
574568
call mpas_deallocate_scratch_field(normalizedRelativeVorticityVertexField, .true.)
575569
call mpas_deallocate_scratch_field(normalizedPlanetaryVorticityVertexField, .true.)
576570

@@ -862,8 +856,6 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic
862856
end do
863857
!$omp end do
864858
865-
call mpas_threading_barrier()
866-
867859
call mpas_deallocate_scratch_field(normalizedRelativeVorticityVertexField, .true.)
868860
call mpas_deallocate_scratch_field(normalizedPlanetaryVorticityVertexField, .true.)
869861
call mpas_timer_stop('diagnostic solve')
@@ -983,8 +975,6 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo
983975
call mpas_allocate_scratch_field(projectedSSHField, .true.)
984976
call mpas_allocate_scratch_field(ALE_ThicknessField, .true.)
985977
986-
call mpas_threading_barrier()
987-
988978
div_hu => div_huField % array
989979
projectedSSH => projectedSSHField % array
990980
ALE_Thickness => ALE_ThicknessField % array
@@ -1041,7 +1031,6 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo
10411031
end do
10421032
!$omp end do
10431033
1044-
call mpas_threading_barrier()
10451034
call mpas_deallocate_scratch_field(div_huField, .true.)
10461035
call mpas_deallocate_scratch_field(projectedSSHField, .true.)
10471036
call mpas_deallocate_scratch_field(ALE_ThicknessField, .true.)
@@ -1084,7 +1073,6 @@ subroutine ocn_fuperp(statePool, meshPool, timeLevelIn)!{{{
10841073
end if
10851074
10861075
call mpas_timer_start("ocn_fuperp")
1087-
call mpas_threading_barrier()
10881076
10891077
call mpas_pool_get_array(statePool, 'normalVelocity', normalVelocity, timeLevel)
10901078
call mpas_pool_get_array(statePool, 'normalBaroclinicVelocity', normalBaroclinicVelocity, timeLevel)
@@ -1122,7 +1110,6 @@ subroutine ocn_fuperp(statePool, meshPool, timeLevelIn)!{{{
11221110
end do
11231111
!$omp end do
11241112
1125-
call mpas_threading_barrier()
11261113
call mpas_timer_stop("ocn_fuperp")
11271114
11281115
end subroutine ocn_fuperp!}}}
@@ -1435,7 +1422,6 @@ subroutine ocn_compute_KPP_input_fields(statePool, forcingPool, meshPool, diagno
14351422
call mpas_allocate_scratch_field(densitySurfaceDisplacedField, .true., .false.)
14361423
call mpas_allocate_scratch_field(thermalExpansionCoeffField, .true., .false.)
14371424
call mpas_allocate_scratch_field(salineContractionCoeffField, .true., .false.)
1438-
call mpas_threading_barrier()
14391425
14401426
densitySurfaceDisplaced => densitySurfaceDisplacedField % array
14411427
thermalExpansionCoeff => thermalExpansionCoeffField % array
@@ -1695,7 +1681,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, &
16951681
call mpas_pool_get_field(scratchPool, 'boundaryLayerSalinityScratch', boundaryLayerSalinityField)
16961682
call mpas_allocate_scratch_field(boundaryLayerTemperatureField, .true.)
16971683
call mpas_allocate_scratch_field(boundaryLayerSalinityField, .true.)
1698-
call mpas_threading_barrier()
1684+
16991685
blTempScratch => boundaryLayerTemperatureField % array
17001686
blSaltScratch => boundaryLayerSalinityField % array
17011687
@@ -1806,7 +1792,6 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, &
18061792
!$omp end do
18071793
end if
18081794
1809-
call mpas_threading_barrier()
18101795
call mpas_deallocate_scratch_field(boundaryLayerTemperatureField, .true.)
18111796
call mpas_deallocate_scratch_field(boundaryLayerSalinityField, .true.)
18121797

src/core_ocean/shared/mpas_ocn_gm.F

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,6 @@ subroutine ocn_gm_compute_Bolus_velocity(diagnosticsPool, meshPool, scratchPool)
172172
call mpas_allocate_scratch_field(gradZMidTopOfEdgeField, .True., .false.)
173173
call mpas_allocate_scratch_field(areaCellSumField, .True., .false.)
174174

175-
call mpas_threading_barrier()
176-
177175
gradDensityEdge => gradDensityEdgeField % array
178176
gradDensityTopOfEdge => gradDensityTopOfEdgeField % array
179177
gradDensityConstZTopOfEdge => gradDensityConstZTopOfEdgeField % array
@@ -615,8 +613,6 @@ subroutine ocn_gm_compute_Bolus_velocity(diagnosticsPool, meshPool, scratchPool)
615613
deallocate(tridiagB)
616614
deallocate(tridiagC)
617615

618-
call mpas_threading_barrier()
619-
620616
! Deallocate scratch variables
621617
call mpas_deallocate_scratch_field(gradDensityEdgeField, .true.)
622618
call mpas_deallocate_scratch_field(gradDensityTopOfEdgeField, .true.)

src/core_ocean/shared/mpas_ocn_tendency.F

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,6 @@ subroutine ocn_tend_vel(tendPool, statePool, forcingPool, diagnosticsPool, meshP
345345
! velocity tendency: vertical mixing d/dz( nu_v du/dz))
346346
!
347347
call mpas_timer_stop("ocn_tend_vel")
348-
call mpas_threading_barrier()
349348

350349
end subroutine ocn_tend_vel!}}}
351350

@@ -534,7 +533,6 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me
534533
!allocate(normalThicknessFlux(nVertLevels, nEdges+1))
535534
call mpas_pool_get_field(scratchPool, 'normalThicknessFlux', normalThicknessFluxField)
536535
call mpas_allocate_scratch_field(normalThicknessFluxField, .true.)
537-
call mpas_threading_barrier()
538536

539537
normalThicknessFlux => normalThicknessFluxField % array
540538

@@ -891,7 +889,6 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me
891889
end if
892890
end do
893891

894-
call mpas_threading_barrier()
895892
call mpas_deallocate_scratch_field(normalThicknessFluxField, .true.)
896893

897894
call mpas_timer_stop("ocn_tend_tracer")

src/core_ocean/shared/mpas_ocn_time_average_coupled.F

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{
178178
!$omp end do
179179
endif
180180

181-
call mpas_threading_barrier()
182181
!$omp master
183182

184183
nAccumulatedCoupled = 0
@@ -401,7 +400,6 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci
401400
!$omp end do
402401
endif
403402

404-
call mpas_threading_barrier()
405403
!$omp master
406404

407405
nAccumulatedCoupled = nAccumulatedCoupled + 1

src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ subroutine ocn_tracer_advection_mono_tend(tracers, adv_coefs, adv_coefs_3rd, nAd
167167
call mpas_allocate_scratch_field(hProvField, .true., .false.)
168168
call mpas_allocate_scratch_field(lowOrderFluxField, .true., .false.)
169169
call mpas_allocate_scratch_field(highOrderFluxField, .true., .false.)
170-
call mpas_threading_barrier()
171170

172171
! allocate nCells arrays
173172
h_prov_inv => hProvInvField % array
@@ -685,17 +684,9 @@ subroutine ocn_tracer_advection_mono_tend(tracers, adv_coefs, adv_coefs_3rd, nAd
685684
#endif
686685
end do ! iTracer loop
687686

688-
#ifdef _ADV_TIMERS
689-
call mpas_timer_start('half step')
690-
#endif
691-
#ifdef _ADV_TIMERS
692-
call mpas_timer_stop('half step')
693-
#endif
694-
695687
#ifdef _ADV_TIMERS
696688
call mpas_timer_start('deallocates')
697689
#endif
698-
call mpas_threading_barrier()
699690
call mpas_deallocate_scratch_field(tracerCurField, .true.)
700691
call mpas_deallocate_scratch_field(workTendencyField, .true.)
701692
call mpas_deallocate_scratch_field(hNewInvField, .true.)

src/core_ocean/shared/mpas_ocn_tracer_advection_std.F

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ subroutine ocn_tracer_advection_std_tend(tracers, adv_coefs, adv_coefs_3rd, nAdv
116116
call mpas_allocate_scratch_field(highOrderHorizFluxField, .true.)
117117
call mpas_allocate_scratch_field(tracerCurField, .true.)
118118
call mpas_allocate_scratch_field(highOrderVertFluxField, .true.)
119-
call mpas_threading_barrier()
120119

121120
high_order_horiz_flux => highOrderHorizFluxField % array
122121
tracer_cur => tracerCurField % array
@@ -223,7 +222,6 @@ subroutine ocn_tracer_advection_std_tend(tracers, adv_coefs, adv_coefs_3rd, nAdv
223222
!$omp end do
224223
end do ! iTracer loop
225224

226-
call mpas_threading_barrier()
227225
call mpas_deallocate_scratch_field(highOrderHorizFluxField, .true.)
228226
call mpas_deallocate_scratch_field(tracerCurField, .true.)
229227
call mpas_deallocate_scratch_field(highOrderVertFluxField, .true.)

src/core_ocean/shared/mpas_ocn_tracer_hmix_del4.F

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,6 @@ subroutine ocn_tracer_hmix_del4_tend(meshPool, scratchPool, layerThicknessEdge,
173173

174174
call mpas_allocate_scratch_field(delsq_tracerField, .true.)
175175

176-
call mpas_threading_barrier()
177-
178176
delsq_tracer => delsq_tracerField % array
179177

180178
! Need 1 halo around owned cells

src/core_ocean/shared/mpas_ocn_tracer_hmix_redi.F

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ subroutine ocn_tracer_hmix_redi_tend(meshPool, scratchPool, layerThicknessEdge,
190190
call mpas_allocate_scratch_field(dTracerdZTopOfCellField, .true., .false.)
191191
call mpas_allocate_scratch_field(dTracerdZTopOfEdgeField, .true., .false.)
192192
call mpas_allocate_scratch_field(areaCellSumField, .true., .false.)
193-
call mpas_threading_barrier()
194193

195194
gradTracerEdge => gradTracerEdgeField % array
196195
gradTracerTopOfEdge => gradTracerTopOfEdgeField % array
@@ -253,8 +252,6 @@ subroutine ocn_tracer_hmix_redi_tend(meshPool, scratchPool, layerThicknessEdge,
253252

254253
! Compute vertical derivative of tracers at cell center and top of layer
255254
do iTracer = 1, num_tracers
256-
! Sync threads before starting on tracers
257-
call mpas_threading_barrier()
258255

259256
nCells = nCellsArray( 2 )
260257
!$omp do schedule(runtime)
@@ -389,7 +386,6 @@ subroutine ocn_tracer_hmix_redi_tend(meshPool, scratchPool, layerThicknessEdge,
389386

390387
end do ! iTracer
391388

392-
call mpas_threading_barrier()
393389
call mpas_deallocate_scratch_field(gradTracerEdgeField, .true.)
394390
call mpas_deallocate_scratch_field(gradTracerTopOfEdgeField, .true.)
395391
call mpas_deallocate_scratch_field(gradHTracerSlopedTopOfCellField, .true.)

0 commit comments

Comments
 (0)