Skip to content

Commit 27db2ad

Browse files
committed
Merge branch 'jgfouca/gw_port_front' into master (PR E3SM-Project#7804)
Merge 2 for this PR. Forgot to update local branch before merging. [BFB]
2 parents b59f750 + 625937f commit 27db2ad

File tree

2 files changed

+76
-37
lines changed

2 files changed

+76
-37
lines changed

components/eamxx/src/physics/gw/impl/gw_gw_oro_src_impl.hpp

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,40 @@ void Functions<S,D>::gw_oro_src(
5858
const Real hdsp = 2 * sgh; // Surface streamline displacement height (2*sgh).
5959

6060
Int k = pver-1;
61-
src_level = k;
6261

6362
// Averages over source region.
64-
Real rsrc = pmid(k+1)/(C::Rair*t(k)) * dpm(k); // Density.
63+
Real rsrc = pmid(k)/(C::Rair*t(k)) * dpm(k); // Density.
6564
Real usrc = u(k) * dpm(k); // Zonal wind.
6665
Real vsrc = v(k) * dpm(k); // Meridional wind.
6766
Real nsrc = nm(k)* dpm(k); // B-V frequency.
6867

69-
for (k = pver - 2; k >= pver/2 -1; --k) {
70-
if (hdsp > std::sqrt(zm(k)*zm(k+1))) {
71-
src_level = k;
72-
rsrc = rsrc + pmid(k+1) / (C::Rair*t(k))* dpm(k);
73-
usrc = usrc + u(k) * dpm(k);
74-
vsrc = vsrc + v(k) * dpm(k);
75-
nsrc = nsrc + nm(k)* dpm(k);
76-
}
77-
}
68+
Real rsrc_sum, usrc_sum, vsrc_sum, nsrc_sum;
69+
Kokkos::parallel_reduce(
70+
Kokkos::TeamVectorRange(team, pver/2 - 1, pver-1), [&] (const int k, Real& lrsrc, Real& lusrc, Real& lvsrc, Real& lnsrc) {
71+
if (hdsp > std::sqrt(zm(k)*zm(k+1))) {
72+
lrsrc += pmid(k) / (C::Rair*t(k))* dpm(k);
73+
lusrc += u(k) * dpm(k);
74+
lvsrc += v(k) * dpm(k);
75+
lnsrc += (nm(k)* dpm(k));
76+
}
77+
}, rsrc_sum, usrc_sum, vsrc_sum, nsrc_sum);
78+
79+
Kokkos::parallel_reduce(
80+
Kokkos::TeamVectorRange(team, pver/2 - 1, pver-1), [&] (const int k, Int& lmin) {
81+
if (lmin > pver - 1) {
82+
lmin = pver - 1;
83+
}
84+
if (hdsp > std::sqrt(zm(k)*zm(k+1)) && lmin > k) {
85+
lmin = k;
86+
}
87+
}, Kokkos::Min<Int>(src_level));
88+
89+
team.team_barrier();
90+
91+
rsrc += rsrc_sum;
92+
usrc += usrc_sum;
93+
vsrc += vsrc_sum;
94+
nsrc += nsrc_sum;
7895

7996
// Difference in interface pressure across source region.
8097
const Real dpsrc = pint(pver) - pint(src_level);
@@ -88,16 +105,21 @@ void Functions<S,D>::gw_oro_src(
88105
get_unit_vector(usrc, vsrc, xv, yv, ubi(pver));
89106

90107
// Project the local wind at midpoints onto the source wind.
91-
for (k = 0; k < pver; ++k) {
92-
ubm(k) = dot_2d(u(k), v(k), xv, yv);
93-
}
108+
Kokkos::parallel_for(
109+
Kokkos::TeamVectorRange(team, pver), [&] (const int k) {
110+
ubm(k) = dot_2d(u(k), v(k), xv, yv);
111+
});
112+
113+
team.team_barrier();
94114

95115
// Compute the interface wind projection by averaging the midpoint winds.
96116
// Use the top level wind at the top interface.
97117
ubi(0) = ubm(0);
98118

99119
midpoint_interp(team, ubm, ekat::subview(ubi, Kokkos::pair<int, int>{1, pver}));
100120

121+
team.team_barrier();
122+
101123
// Determine the orographic c=0 source term following McFarlane (1987).
102124
// Set the source top interface index to pver, if the orographic term is
103125
// zero.
@@ -115,18 +137,24 @@ void Functions<S,D>::gw_oro_src(
115137
// Set the phase speeds and wave numbers in the direction of the source
116138
// wind. Set the source stress magnitude (positive only, note that the
117139
// sign of the stress is the same as (c-u).
118-
for (k = pver; k >= src_level; --k) {
119-
tau(0, k) = tauoro;
120-
}
140+
Kokkos::parallel_for(
141+
Kokkos::TeamVectorRange(team, src_level, pver+1), [&] (const int k) {
142+
tau(pgwv, k) = tauoro;
143+
});
121144

122145
// Allow wind tendencies all the way to the model bottom.
123146
tend_level = pver - 1;
124-
--src_level;
147+
148+
// adjust to c indexing. Up to this point, src_level was used to index into 0:pver arrays
149+
Kokkos::single(Kokkos::PerTeam(team), [&] {
150+
--src_level;
151+
});
125152

126153
// No spectrum; phase speed is just 0.
127-
for (k = 0; k < (Int)c.size(); ++k) {
128-
c(k) = 0;
129-
}
154+
Kokkos::parallel_for(
155+
Kokkos::TeamVectorRange(team, c.size()), [&] (const int k) {
156+
c(k) = 0;
157+
});
130158
}
131159

132160
} // namespace gw

components/eamxx/src/physics/gw/tests/gw_gw_oro_src_tests.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ struct UnitWrap::UnitTest<D>::TestGwOroSrc : public UnitWrap::UnitTest<D>::Base
3636
// Generate random input data
3737
// Alternatively, you can use the baseline_data construtors/initializer lists to hardcode data
3838
for (auto& d : baseline_data) {
39-
d.randomize(engine);
39+
d.randomize(engine, { {d.sgh, {2, 7}} });
4040
}
4141

4242
// Create copies of data for use by test. Needs to happen before read calls so that
@@ -57,41 +57,52 @@ struct UnitWrap::UnitTest<D>::TestGwOroSrc : public UnitWrap::UnitTest<D>::Base
5757

5858
// Get data from test
5959
for (auto& d : test_data) {
60-
gw_oro_src(d);
60+
if (this->m_baseline_action == GENERATE) {
61+
gw_oro_src_f(d);
62+
}
63+
else {
64+
gw_oro_src(d);
65+
}
6166
}
6267

68+
// We need a tolerance since the order of operations is different from f90.
69+
// This tol can be removed once we are no longer using
70+
// fortran to generate baselines.
71+
const auto margin = std::numeric_limits<Real>::epsilon() *
72+
(ekat::is_single_precision<Real>::value ? 1000 : 1);
73+
74+
6375
// Verify BFB results, all data should be in C layout
6476
if (SCREAM_BFB_TESTING && this->m_baseline_action == COMPARE) {
6577
for (Int i = 0; i < num_runs; ++i) {
6678
GwOroSrcData& d_baseline = baseline_data[i];
6779
GwOroSrcData& d_test = test_data[i];
80+
REQUIRE(d_baseline.total(d_baseline.tau) == d_test.total(d_test.tau));
6881
for (Int k = 0; k < d_baseline.total(d_baseline.tau); ++k) {
69-
REQUIRE(d_baseline.total(d_baseline.tau) == d_test.total(d_test.tau));
70-
REQUIRE(d_baseline.tau[k] == d_test.tau[k]);
82+
REQUIRE(d_baseline.tau[k] == Approx(d_test.tau[k]).margin(margin));
7183
}
84+
REQUIRE(d_baseline.total(d_baseline.ubm) == d_test.total(d_test.ubm));
7285
for (Int k = 0; k < d_baseline.total(d_baseline.ubm); ++k) {
73-
REQUIRE(d_baseline.total(d_baseline.ubm) == d_test.total(d_test.ubm));
74-
REQUIRE(d_baseline.ubm[k] == d_test.ubm[k]);
86+
REQUIRE(d_baseline.ubm[k] == Approx(d_test.ubm[k]).margin(margin));
7587
}
88+
REQUIRE(d_baseline.total(d_baseline.ubi) == d_test.total(d_test.ubi));
7689
for (Int k = 0; k < d_baseline.total(d_baseline.ubi); ++k) {
77-
REQUIRE(d_baseline.total(d_baseline.ubi) == d_test.total(d_test.ubi));
78-
REQUIRE(d_baseline.ubi[k] == d_test.ubi[k]);
90+
REQUIRE(d_baseline.ubi[k] == Approx(d_test.ubi[k]).margin(margin));
7991
}
92+
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.xv));
93+
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.yv));
94+
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.src_level));
95+
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.tend_level));
8096
for (Int k = 0; k < d_baseline.total(d_baseline.xv); ++k) {
81-
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.xv));
82-
REQUIRE(d_baseline.xv[k] == d_test.xv[k]);
83-
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.yv));
84-
REQUIRE(d_baseline.yv[k] == d_test.yv[k]);
85-
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.src_level));
97+
REQUIRE(d_baseline.xv[k] == Approx(d_test.xv[k]).margin(margin));
98+
REQUIRE(d_baseline.yv[k] == Approx(d_test.yv[k]).margin(margin));
8699
REQUIRE(d_baseline.src_level[k] == d_test.src_level[k]);
87-
REQUIRE(d_baseline.total(d_baseline.xv) == d_test.total(d_test.tend_level));
88100
REQUIRE(d_baseline.tend_level[k] == d_test.tend_level[k]);
89101
}
102+
REQUIRE(d_baseline.total(d_baseline.c) == d_test.total(d_test.c));
90103
for (Int k = 0; k < d_baseline.total(d_baseline.c); ++k) {
91-
REQUIRE(d_baseline.total(d_baseline.c) == d_test.total(d_test.c));
92104
REQUIRE(d_baseline.c[k] == d_test.c[k]);
93105
}
94-
95106
}
96107
}
97108
else if (this->m_baseline_action == GENERATE) {

0 commit comments

Comments
 (0)