|
| 1 | +program main() |
| 2 | + |
| 3 | + use shr_kind_mod , only : r8 => shr_kind_r8 |
| 4 | + use update_accMod |
| 5 | + use elm_varctl |
| 6 | + use filterMod |
| 7 | + use decompMod ,only : get_clump_bounds_gpu, gpu_clumps,gpu_procinfo |
| 8 | + use decompMod , only : get_proc_bounds, get_clump_bounds,procinfo,clumps, init_proc_clump_info |
| 9 | + use verificationMod |
| 10 | + use cudafor |
| 11 | + use landunit_varcon , only : istice, istice_mec, istsoil |
| 12 | + use soilorder_varcon |
| 13 | + use timeInfoMod |
| 14 | + use pftvarcon |
| 15 | + use GridcellType |
| 16 | + use TopounitType |
| 17 | + use LandunitType |
| 18 | + use ColumnType |
| 19 | + use VegetationType |
| 20 | + use VegetationPropertiesType |
| 21 | + use elm_instMod |
| 22 | + use elm_initializeMod |
| 23 | + use LakeCon |
| 24 | + !#USE_START |
| 25 | + !#py use elm_varorb |
| 26 | + !#py use GridcellDataType |
| 27 | + use TopounitDataType |
| 28 | + !#py use LandunitDataType |
| 29 | + use ColumnDataType |
| 30 | + use VegetationDataType |
| 31 | + !#py use dynPriorWeightsMod |
| 32 | + use SharedParamsMod |
| 33 | + !#py use SoilLittDecompMod |
| 34 | + !#py use DecompCascadeCNMod |
| 35 | + !#py use DecompCascadeBGCMod |
| 36 | + !#py use AllocationMod |
| 37 | + !#py use NitrifDenitrifMod |
| 38 | + use CNDecompCascadeConType |
| 39 | + !#py use GapMortalityMod |
| 40 | + !#py use SoilLittVertTranspMod |
| 41 | + use UrbanParamsType |
| 42 | + use CH4Mod |
| 43 | + !#py use PhotosynthesisMod |
| 44 | + use domainMod |
| 45 | + !#py use DaylengthMod |
| 46 | + !#py use atm2lndMod |
| 47 | + !#py use CanopyHydrologyMod |
| 48 | + !#py use SurfaceRadiationMod |
| 49 | + !#py use UrbanRadiationMod |
| 50 | + !#py use CanopyFluxesMod |
| 51 | + !#py use CanopyTemperatureMod |
| 52 | + !#py use BareGroundFluxesMod |
| 53 | + !#py use UrbanFluxesMod |
| 54 | + !#py use LakeFluxesMod |
| 55 | + !#py use DUSTMod |
| 56 | + use LakeTemperatureMod |
| 57 | + !#py use SoilTemperatureMod |
| 58 | + !#py use SoilFluxesMod |
| 59 | + !#py use HydrologyNoDrainageMod |
| 60 | + !#py use AerosolMod |
| 61 | + !#py use SnowSnicarMod |
| 62 | + !#py use LakeHydrologyMod |
| 63 | + !#py use EcosystemDynMod |
| 64 | + !#py use SedYieldMod |
| 65 | + !#py use AnnualUpdateMod |
| 66 | + !#py use DryDepVelocity |
| 67 | + !#py use Ch4Mod |
| 68 | + !#py use dynInitColumnsMod |
| 69 | + !#py use dynConsBiogeophysMod |
| 70 | + !#py use dynConsBiogeochemMod |
| 71 | + !#py use reweightMod |
| 72 | + !#py use subgridWeightsMod |
| 73 | + !#py use NitrogenDynamicsMod |
| 74 | + !#py use CarbonStateUpdate1Mod |
| 75 | + !#py use NitrogenStateUpdate1Mod |
| 76 | + !#py use PhosphorusStateUpdate1Mod |
| 77 | + !#py use FireMod |
| 78 | + !#py use dynPriorWeightsMod |
| 79 | + !#py use dynSubgridDriverMod |
| 80 | + !#py use dynPatchStateUpdaterMod |
| 81 | + !#py use dynColumnStateUpdaterMod |
| 82 | + !#py use BalanceCheckMod |
| 83 | + !#py use EcosystemBalanceCheckMod |
| 84 | + !#py use SurfaceAlbedoMod |
| 85 | + !#py use UrbanAlbedoMod |
| 86 | + !#py use VerticalProfileMod |
| 87 | + !#py use glc2lndMod |
| 88 | + !#py use shr_orb_mod_elm |
| 89 | + !#USE_END |
| 90 | + |
| 91 | + !=======================================! |
| 92 | + implicit none |
| 93 | + type(bounds_type) :: bounds_clump, bounds_proc |
| 94 | + integer :: beg=1,fin=10, p, nclumps, nc, step_count |
| 95 | + real*8 :: temp |
| 96 | + integer :: err |
| 97 | +#if _CUDA |
| 98 | + integer(kind=cuda_count_kind) :: heapsize,free1,free2,total |
| 99 | + integer :: istat, val |
| 100 | +#endif |
| 101 | + character(len=50) :: clump_input_char,pproc_input_char |
| 102 | + integer :: clump_input,pproc_input, fc, c, l, fp,g,j |
| 103 | + logical :: found_thawlayer |
| 104 | + integer :: k_frz |
| 105 | + real(r8) :: declin, declinp1 |
| 106 | + real :: startt, stopt |
| 107 | + !========================== Initialize/Allocate variables =======================! |
| 108 | + !First, make sure the right number of inputs have been provided |
| 109 | + IF(COMMAND_ARGUMENT_COUNT() == 1) THEN |
| 110 | + WRITE(*,*)'ONE COMMAND-LINE ARGUMENT DETECTED, Defaulting to 1 site per clump' |
| 111 | + call get_command_argument(1,clump_input_char) |
| 112 | + READ(clump_input_char,*) clump_input |
| 113 | + pproc_input = 1 !1 site per clump |
| 114 | + |
| 115 | + ELSEIF(COMMAND_ARGUMENT_COUNT() == 2) THEN |
| 116 | + call get_command_argument(1,clump_input_char) |
| 117 | + call get_command_argument(2,pproc_input_char) |
| 118 | + READ(clump_input_char,*) clump_input |
| 119 | + READ(pproc_input_char,*) pproc_input |
| 120 | + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| 121 | + END IF |
| 122 | + |
| 123 | + call elm_init(clump_input,pproc_input, dtime_mod, year_curr) |
| 124 | + declin = -0.4030289369547867 |
| 125 | + step_count = 0 |
| 126 | + nclumps = procinfo%nclumps |
| 127 | + print *, "number of clumps", nclumps |
| 128 | + print *, "step:", step_count |
| 129 | + if(step_count == 0 ) then |
| 130 | + print *, "transferring data to GPU" |
| 131 | + call init_proc_clump_info() |
| 132 | + #ifdef _CUDA |
| 133 | + istat = cudaMemGetInfo(free1, total) |
| 134 | + print *, "Free1:",free1 |
| 135 | + #endif |
| 136 | + !NOTE: Moved some update directives from |
| 137 | + ! update_acc_variables() due to ICE |
| 138 | + !$acc update device( & |
| 139 | + !$acc spinup_state & |
| 140 | + !$acc , nyears_ad_carbon_only & |
| 141 | + !$acc , spinup_mortality_factor & |
| 142 | + !$acc , carbon_only & |
| 143 | + !$acc , carbonphosphorus_only & |
| 144 | + !$acc , carbonnitrogen_only & |
| 145 | + !$acc ,use_crop & |
| 146 | + !$acc ,use_snicar_frc & |
| 147 | + !$acc ,use_snicar_ad & |
| 148 | + !$acc ,use_vancouver & |
| 149 | + !$acc ,use_mexicocity & |
| 150 | + !$acc ,use_noio & |
| 151 | + !$acc ,use_var_soil_thick & |
| 152 | + !$acc ,NFIX_PTASE_plant & |
| 153 | + !$acc ,tw_irr & |
| 154 | + !$acc ,use_erosion & |
| 155 | + !$acc ,ero_ccycle & |
| 156 | + !$acc ,anoxia & |
| 157 | + !$acc , glc_do_dynglacier & |
| 158 | + !$acc , all_active & |
| 159 | + !$acc , co2_ppmv & |
| 160 | + !$acc , const_climate_hist & |
| 161 | + !$acc ) |
| 162 | + !$acc update device( & |
| 163 | + !$acc KM_DECOMP_NH4 & |
| 164 | + !$acc ,KM_DECOMP_NO3 & |
| 165 | + !$acc ,KM_DECOMP_P & |
| 166 | + !$acc ,KM_NIT & |
| 167 | + !$acc ,KM_DEN & |
| 168 | + !$acc ,decompmicc_patch_vr(:,:) & |
| 169 | + !$acc ,alpha_nfix(:) & |
| 170 | + !$acc ,alpha_ptase(:) & |
| 171 | + !$acc ,ccost_nfix(:) & |
| 172 | + !$acc ,pcost_nfix(:) & |
| 173 | + !$acc ,ccost_ptase(:) & |
| 174 | + !$acc ,ncost_ptase(:) & |
| 175 | + !$acc ,VMAX_NFIX(:) & |
| 176 | + !$acc ,KM_NFIX(:) & |
| 177 | + !$acc ,VMAX_PTASE(:) & |
| 178 | + !$acc ,KM_PTASE & |
| 179 | + !$acc ,lamda_ptase & |
| 180 | + !$acc ,i_vc(:) & |
| 181 | + !$acc ,s_vc(:) & |
| 182 | + !$acc ,leafcn_obs(:) & |
| 183 | + !$acc ,frootcn_obs(:) & |
| 184 | + !$acc ,livewdcn_obs(:) & |
| 185 | + !$acc ,deadwdcn_obs(:) & |
| 186 | + !$acc ,leafcp_obs(:) & |
| 187 | + !$acc ,frootcp_obs(:) & |
| 188 | + !$acc ,livewdcp_obs(:) & |
| 189 | + !$acc ,deadwdcp_obs(:) & |
| 190 | + !$acc ,leafcn_obs_flex(:,:) & |
| 191 | + !$acc ,frootcn_obs_flex(:,:) & |
| 192 | + !$acc ,livewdcn_obs_flex(:,:) & |
| 193 | + !$acc ,deadwdcn_obs_flex(:,:) & |
| 194 | + !$acc ,leafcp_obs_flex(:,:) & |
| 195 | + !$acc ,frootcp_obs_flex(:,:) & |
| 196 | + !$acc ,livewdcp_obs_flex(:,:) & |
| 197 | + !$acc ,deadwdcp_obs_flex(:,:) & |
| 198 | + !$acc ,fnr(:) & |
| 199 | + !$acc ,act25(:) & |
| 200 | + !$acc ,kcha(:) & |
| 201 | + !$acc ,koha(:) & |
| 202 | + !$acc ,cpha(:) & |
| 203 | + !$acc ,vcmaxha(:) & |
| 204 | + !$acc ,jmaxha(:) & |
| 205 | + !$acc ,tpuha(:) & |
| 206 | + !$acc ,lmrha(:) & |
| 207 | + !$acc ,vcmaxhd(:) & |
| 208 | + !$acc ,jmaxhd(:) & |
| 209 | + !$acc ,tpuhd(:) & |
| 210 | + !$acc ,lmrhd(:) & |
| 211 | + !$acc ,lmrse(:) & |
| 212 | + !$acc ,qe(:) & |
| 213 | + !$acc ,theta_cj(:) & |
| 214 | + !$acc ,bbbopt(:) & |
| 215 | + !$acc ,mbbopt(:) & |
| 216 | + !$acc ,nstor(:) & |
| 217 | + !$acc ,br_xr(:) & |
| 218 | + !$acc ,tc_stress & |
| 219 | + !$acc ,vcmax_np1(:) & |
| 220 | + !$acc ,vcmax_np2(:) & |
| 221 | + !$acc ,vcmax_np3(:) & |
| 222 | + !$acc ,vcmax_np4(:) & |
| 223 | + !$acc ,jmax_np1 & |
| 224 | + !$acc ,jmax_np2 & |
| 225 | + !$acc ,jmax_np3 & |
| 226 | + !$acc ,laimax & |
| 227 | + !$acc ,rsub_top_globalmax & |
| 228 | + !------------- LakeCon ------------------! |
| 229 | + !$acc ,fcrit & |
| 230 | + !$acc ,minz0lake & |
| 231 | + !$acc ,pudz & |
| 232 | + !$acc ,depthcrit & |
| 233 | + !$acc ,mixfact & |
| 234 | + !$acc ,betavis & |
| 235 | + !$acc ,lakepuddling & |
| 236 | + !$acc ,lake_no_ed ) |
| 237 | + |
| 238 | + !!!$acc update device(first_step, nlevgrnd, eccen, obliqr, lambm0, mvelpp ) |
| 239 | + call update_acc_variables() |
| 240 | + |
| 241 | + !Note: copy/paste enter data directives here for FUT. |
| 242 | + ! Will make this automatic in the future |
| 243 | + !#ACC_COPYIN |
| 244 | + !$acc enter data copyin( & |
| 245 | + !$acc ch4_vars , & |
| 246 | + !$acc col_ef , & |
| 247 | + !$acc col_es , & |
| 248 | + !$acc col_pp , & |
| 249 | + !$acc col_wf , & |
| 250 | + !$acc col_ws , & |
| 251 | + !$acc grc_pp , & |
| 252 | + !$acc lakestate_vars , & |
| 253 | + !$acc lun_pp , & |
| 254 | + !$acc soilstate_vars , & |
| 255 | + !$acc solarabs_vars , & |
| 256 | + !$acc top_pp , & |
| 257 | + !$acc veg_ef , & |
| 258 | + !$acc veg_pp & |
| 259 | + !$acc ) |
| 260 | + |
| 261 | + call get_proc_bounds(bounds_proc) |
| 262 | + !$acc enter data copyin(filter(:),gpu_clumps(:), gpu_procinfo, proc_filter, bounds_proc ) |
| 263 | + call setProcFilters(bounds_proc, proc_filter, .false.) |
| 264 | + |
| 265 | +#if _CUDA |
| 266 | + ! Heap Limit may need to be increased for certain routines |
| 267 | + ! if using routine directives with many automatic arrays |
| 268 | + ! should be adjusted based on problem size |
| 269 | + istat = cudaDeviceGetLimit(heapsize, cudaLimitMallocHeapSize) |
| 270 | + print *, "SETTING Heap Limit from", heapsize |
| 271 | + heapsize = 10_8*1024_8*1024_8 |
| 272 | + print *, "TO:",heapsize |
| 273 | + istat = cudaDeviceSetLimit(cudaLimitMallocHeapSize,heapsize) |
| 274 | + istat = cudaMemGetInfo(free1, total) |
| 275 | + print *, "Free1:",free1/1.E+9 |
| 276 | +#endif |
| 277 | + end if |
| 278 | + |
| 279 | + !NOTE: This may be adjusted depending on the timestep data |
| 280 | + ! is output from |
| 281 | + !TODO: make this info apart of input file itself |
| 282 | + |
| 283 | + !$acc enter data copyin( doalb, declinp1, declin ) |
| 284 | + !$acc update device(dtime_mod, dayspyr_mod, & |
| 285 | + !$acc year_curr, mon_curr, day_curr, secs_curr, nstep_mod, thiscalday_mod & |
| 286 | + !$acc , nextsw_cday_mod, end_cd_mod, doalb ) |
| 287 | + |
| 288 | + ! Note: should add these to writeConstants in the future (as arguments?) |
| 289 | + !$acc serial |
| 290 | + declin = -0.4023686267583503 |
| 291 | + declinp1 = -0.4023686267583503 |
| 292 | + !$acc end serial |
| 293 | + |
| 294 | + #ifdef _OPENACC |
| 295 | + #define gpuflag 1 |
| 296 | + #else |
| 297 | + #define gpuflag 0 |
| 298 | + #endif |
| 299 | + |
| 300 | + !NOTE: Put ELM Subroutine call here |
| 301 | + ! Default is currently LakeTemperature for others to reproduce |
| 302 | + ! and gain familiarity with this framework. |
| 303 | + ! SPEL will be updated to auto-insert the all the appropriate |
| 304 | + ! calls for a given FUT in the future |
| 305 | + |
| 306 | + ! This is the "Naive" Implementation |
| 307 | + ! !$acc parallel loop independent gang vector default(present) private(bounds_clump) |
| 308 | + ! do nc=1, nclumps |
| 309 | + ! call get_clump_bounds_gpu(nc, bounds_clump) |
| 310 | + ! ! Set lake temperature |
| 311 | + ! if(filter(nc)%num_lakec > 0 ) then |
| 312 | + ! call LakeTemperature(bounds_clump, filter(nc)%num_lakec, filter(nc)%lakec, & |
| 313 | + ! filter(nc)%num_lakep, filter(nc)%lakep, & |
| 314 | + ! solarabs_vars, soilstate_vars, ch4_vars, & |
| 315 | + ! lakestate_vars) |
| 316 | + ! end if |
| 317 | + ! end do |
| 318 | + |
| 319 | + ! This call should be used if running SPEL with "opt = True" and "add_acc = True" |
| 320 | + ! and all internal loops have been accelerated -- must comment out the above |
| 321 | + |
| 322 | + call cpu_time(startt) |
| 323 | + call LakeTemperature(bounds_proc, & |
| 324 | + proc_filter%num_lakec, proc_filter%lakec, & |
| 325 | + proc_filter%num_lakep, proc_filter%lakep, & |
| 326 | + solarabs_vars, soilstate_vars, ch4_vars, & |
| 327 | + lakestate_vars) |
| 328 | + call cpu_time(stopt) |
| 329 | + print *, "LakeTemperature :: ",(stopt-startt)*1.E+3,"ms" |
| 330 | + call update_vars_LakeTemperature(gpuflag,"END") |
| 331 | + |
| 332 | + #if _CUDA |
| 333 | + istat = cudaMemGetInfo(free1, total) |
| 334 | + print *, "free after kernel:",free1/1.E+9 |
| 335 | + #endif |
| 336 | + |
| 337 | + print *, "done with unit-test execution" |
| 338 | + |
| 339 | +end Program main |
0 commit comments