Skip to content

Commit f2eb368

Browse files
authored
Merge pull request #4 from E3SM-Project/develop
Merge develop into master for intial release.
2 parents 0a960bb + bfb3e3d commit f2eb368

37 files changed

+6775
-1
lines changed

CMakeLists.txt

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
cmake_minimum_required (VERSION 3.5)
2+
3+
project (compose CXX)
4+
set (CMAKE_CXX_STANDARD 11)
5+
6+
function (prc var)
7+
message ("${var}: ${${var}}")
8+
endfunction ()
9+
10+
find_package (MPI REQUIRED)
11+
12+
if (Kokkos_DIR)
13+
include (${Kokkos_DIR}/kokkos.cmake)
14+
set (Kokkos_INCLUDE ${Kokkos_DIR}/include)
15+
else ()
16+
message (FATAL_ERROR "COMPOSE requires Kokkos_DIR")
17+
endif ()
18+
19+
set (SOURCES
20+
cedr/cedr_caas.cpp
21+
cedr/cedr_local.cpp
22+
cedr/cedr_mpi.cpp
23+
cedr/cedr_qlt.cpp
24+
cedr/cedr_test.cpp
25+
cedr/cedr_test_1d_transport.cpp
26+
cedr/cedr_test_randomized.cpp
27+
cedr/cedr_util.cpp)
28+
29+
set (HEADERS
30+
cedr/cedr.hpp
31+
cedr/cedr_caas.hpp
32+
cedr/cedr_caas_inl.hpp
33+
cedr/cedr_cdr.hpp
34+
cedr/cedr_kokkos.hpp
35+
cedr/cedr_local.hpp
36+
cedr/cedr_local_inl.hpp
37+
cedr/cedr_mpi.hpp
38+
cedr/cedr_mpi_inl.hpp
39+
cedr/cedr_qlt.hpp
40+
cedr/cedr_qlt_inl.hpp
41+
cedr/cedr_test.hpp
42+
cedr/cedr_test_randomized.hpp
43+
cedr/cedr_util.hpp
44+
siqk/siqk.hpp
45+
siqk/siqk_defs.hpp
46+
siqk/siqk_geometry.hpp
47+
siqk/siqk_intersect.hpp
48+
siqk/siqk_quadrature.hpp
49+
siqk/siqk_search.hpp
50+
siqk/siqk_sqr.hpp)
51+
52+
if (NOT COMPOSE_TEST_MPIRUN)
53+
set (COMPOSE_TEST_MPIRUN mpirun)
54+
endif ()
55+
if (NOT COMPOSE_TEST_NRANK)
56+
set (COMPOSE_TEST_NRANK 8)
57+
endif ()
58+
59+
set (COMPOSE_COMPILE_FLAGS "${MPI_COMPILE_FLAGS} ${KOKKOS_CXXFLAGS} ${CMAKE_CXX_FLAGS}")
60+
set (COMPOSE_LINK_FLAGS "${MPI_LINK_FLAGS} ${KOKKOS_LDFLAGS}")
61+
set (COMPOSE_INCLUDES "${Kokkos_INCLUDE}")
62+
set (COMPOSE_LIBRARIES ${MPI_LIBRARIES} ${KOKKOS_LIBS})
63+
64+
prc(MPI_COMPILE_FLAGS)
65+
prc(MPI_LINK_FLAGS)
66+
prc(MPI_LIBRARIES)
67+
add_library (${PROJECT_NAME} ${SOURCES})
68+
set_target_properties (${PROJECT_NAME} PROPERTIES
69+
COMPILE_FLAGS ${COMPOSE_COMPILE_FLAGS}
70+
LINK_FLAGS ${COMPOSE_LINK_FLAGS})
71+
target_include_directories (${PROJECT_NAME} PUBLIC cedr siqk)
72+
target_include_directories (${PROJECT_NAME} PRIVATE siqk cedr)
73+
target_include_directories (${PROJECT_NAME} PUBLIC ${COMPOSE_INCLUDES})
74+
target_link_libraries (${PROJECT_NAME} ${COMPOSE_LIBRARIES})
75+
76+
install (TARGETS ${PROJECT_NAME} ARCHIVE DESTINATION lib)
77+
install (FILES ${HEADERS} DESTINATION include/compose)
78+
79+
enable_testing ()
80+
add_subdirectory(siqk)
81+
add_subdirectory(cedr)

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,22 @@
11
# COMPOSE
2-
Compact Multi-moment Performance-Portable Semi-Lagrangian methods for non-hydrostatic dynamics
2+
Compact Multi-moment Performance-Portable Semi-Lagrangian methods
3+
4+
COMPOSE provides libraries for semi-Lagrangian transport and, together or
5+
separately, property preservation.
6+
7+
CEDR: Communication-Efficient Constrained Density Reconstructors.
8+
SIQK: Sphereical Polygon Intersection and Quadrature.
9+
10+
First, install Kokkos:
11+
https://github.com/kokkos/kokkos
12+
For example, in a typical environment using OpenMP, a simple build line is:
13+
./kokkos/generate_makefile.bash --with-serial --with-openmp --prefix=/path/to/my/libs --compiler=g++
14+
make -j8 install
15+
16+
Second, configure, build, and test COMPOSE:
17+
cmake \
18+
-D Kokkos_DIR=/path/to/my/kokkos/install \
19+
-D CMAKE_INSTALL_PREFIX=/path/to/my/compose/install \
20+
/path/to/compose/repo
21+
make -j8
22+
ctest

cedr/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
add_executable (cedr_test cedr_test.cpp)
2+
set_target_properties (cedr_test PROPERTIES
3+
COMPILE_FLAGS ${COMPOSE_COMPILE_FLAGS}
4+
LINK_FLAGS ${COMPOSE_LINK_FLAGS})
5+
6+
target_include_directories (cedr_test PRIVATE ${COMPOSE_INCLUDES})
7+
target_link_libraries (cedr_test ${PROJECT_NAME} ${COMPOSE_LIBRARIES})
8+
9+
add_test (NAME cedr-test-unit
10+
COMMAND $<TARGET_FILE:cedr_test> -t)
11+
add_test (NAME cedr-test-unit-mpi
12+
COMMAND ${COMPOSE_TEST_MPIRUN} -np ${COMPOSE_TEST_NRANK}
13+
$<TARGET_FILE:cedr_test> -t --proc-random -nc 111 -nt 11)
14+
add_test (NAME cedr-test-t1d
15+
COMMAND $<TARGET_FILE:cedr_test> -t -t1d -nc 111)

cedr/cedr.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef INCLUDE_CEDR_HPP
2+
#define INCLUDE_CEDR_HPP
3+
4+
#include "cedr_kokkos.hpp"
5+
6+
// Communication-Efficient Constrained Density Reconstructors
7+
namespace cedr {
8+
typedef int Int;
9+
typedef long int Long;
10+
typedef std::size_t Size;
11+
typedef double Real;
12+
13+
// CDRs in general implement
14+
// * tracer mass, Qm, conservation;
15+
// * mixing ratio, q, shape preservation, either local bound preservation or
16+
// dynamic range preservation; and
17+
// * tracer consistency, which follows from dynamic range preservation or
18+
// stronger (including local bound preservation) with rhom coming from the
19+
// dynamics.
20+
//
21+
// One can solve a subset of these.
22+
// If !conserve, then the CDR does not alter the tracer mass, but it does not
23+
// correct for any failure in mass conservation in the field given to it.
24+
// If consistent but !shapepreserve, the the CDR solves the dynamic range
25+
// preservation problem rather than the local bound preservation problem.
26+
struct ProblemType {
27+
enum : Int { conserve = 1, shapepreserve = 1 << 1, consistent = 1 << 2 };
28+
};
29+
}
30+
31+
#endif

cedr/cedr_caas.cpp

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#include "cedr_caas.hpp"
2+
#include "cedr_util.hpp"
3+
#include "cedr_test_randomized.hpp"
4+
5+
namespace cedr {
6+
namespace caas {
7+
8+
template <typename ES>
9+
CAAS<ES>::CAAS (const mpi::Parallel::Ptr& p, const Int nlclcells)
10+
: p_(p), nlclcells_(nlclcells), nrhomidxs_(0), need_conserve_(false)
11+
{
12+
cedr_throw_if(nlclcells == 0, "CAAS does not support 0 cells on a rank.");
13+
tracer_decls_ = std::make_shared<std::vector<Decl> >();
14+
}
15+
16+
template <typename ES>
17+
void CAAS<ES>::declare_tracer(int problem_type, const Int& rhomidx) {
18+
cedr_throw_if( ! (problem_type & ProblemType::shapepreserve),
19+
"CAAS is a WIP; ! shapepreserve is not supported yet.");
20+
cedr_throw_if(rhomidx > 0, "rhomidx > 0 is not supported yet.");
21+
tracer_decls_->push_back(Decl(problem_type, rhomidx));
22+
if (problem_type & ProblemType::conserve)
23+
need_conserve_ = true;
24+
nrhomidxs_ = std::max(nrhomidxs_, rhomidx+1);
25+
}
26+
27+
template <typename ES>
28+
void CAAS<ES>::end_tracer_declarations () {
29+
cedr_throw_if(tracer_decls_->size() == 0, "#tracers is 0.");
30+
cedr_throw_if(nrhomidxs_ == 0, "#rhomidxs is 0.");
31+
probs_ = IntList("CAAS probs", static_cast<Int>(tracer_decls_->size()));
32+
t2r_ = IntList("CAAS t2r", static_cast<Int>(tracer_decls_->size()));
33+
for (Int i = 0; i < probs_.extent_int(0); ++i) {
34+
probs_(i) = (*tracer_decls_)[i].probtype;
35+
t2r_(i) = (*tracer_decls_)[i].rhomidx;
36+
}
37+
tracer_decls_ = nullptr;
38+
// (rho, Qm, Qm_min, Qm_max, [Qm_prev])
39+
const Int e = need_conserve_ ? 1 : 0;
40+
d_ = RealList("CAAS data", nlclcells_ * ((3+e)*probs_.size() + 1));
41+
const auto nslots = 4*probs_.size();
42+
// (e'Qm_clip, e'Qm, e'Qm_min, e'Qm_max, [e'Qm_prev])
43+
send_ = RealList("CAAS send", nslots);
44+
recv_ = RealList("CAAS recv", nslots);
45+
}
46+
47+
template <typename ES>
48+
int CAAS<ES>::get_problem_type (const Int& tracer_idx) const {
49+
cedr_assert(tracer_idx >= 0 && tracer_idx < probs_.extent_int(0));
50+
return probs_[tracer_idx];
51+
}
52+
53+
template <typename ES>
54+
Int CAAS<ES>::get_num_tracers () const {
55+
return probs_.extent_int(0);
56+
}
57+
58+
template <typename ES>
59+
void CAAS<ES>::reduce_locally () {
60+
const Int nt = probs_.size();
61+
Int k = 0;
62+
Int os = nlclcells_;
63+
// Qm_clip
64+
for ( ; k < nt; ++k) {
65+
Real Qm_sum = 0, Qm_clip_sum = 0;
66+
for (Int i = 0; i < nlclcells_; ++i) {
67+
const Real Qm = d_(os+i);
68+
Qm_sum += (probs_(k) & ProblemType::conserve ?
69+
d_(os + nlclcells_*3*nt + i) /* Qm_prev */ :
70+
Qm);
71+
const Real Qm_min = d_(os + nlclcells_* nt + i);
72+
const Real Qm_max = d_(os + nlclcells_*2*nt + i);
73+
const Real Qm_clip = cedr::impl::min(Qm_max, cedr::impl::max(Qm_min, Qm));
74+
Qm_clip_sum += Qm_clip;
75+
d_(os+i) = Qm_clip;
76+
}
77+
send_( k) = Qm_clip_sum;
78+
send_(nt + k) = Qm_sum;
79+
os += nlclcells_;
80+
}
81+
k += nt;
82+
// Qm_min, Qm_max
83+
for ( ; k < 4*nt; ++k) {
84+
Real accum = 0;
85+
for (Int i = 0; i < nlclcells_; ++i)
86+
accum += d_(os+i);
87+
send_(k) = accum;
88+
os += nlclcells_;
89+
}
90+
}
91+
92+
template <typename ES>
93+
void CAAS<ES>::reduce_globally () {
94+
int err = mpi::all_reduce(*p_, send_.data(), recv_.data(), send_.size(), MPI_SUM);
95+
cedr_throw_if(err != MPI_SUCCESS,
96+
"CAAS::reduce_globally MPI_Allreduce returned " << err);
97+
}
98+
99+
template <typename ES>
100+
void CAAS<ES>::finish_locally () {
101+
const Int nt = probs_.size();
102+
Int os = nlclcells_;
103+
for (Int k = 0; k < nt; ++k) {
104+
const Real Qm_clip_sum = recv_( k);
105+
const Real Qm_sum = recv_(nt + k);
106+
const Real m = Qm_sum - Qm_clip_sum;
107+
if (m < 0) {
108+
const Real Qm_min_sum = recv_(2*nt + k);
109+
Real fac = Qm_clip_sum - Qm_min_sum;
110+
if (fac > 0) {
111+
fac = m/fac;
112+
for (Int i = 0; i < nlclcells_; ++i) {
113+
const Real Qm_min = d_(os + nlclcells_* nt + i);
114+
Real& Qm = d_(os+i);
115+
Qm += fac*(Qm - Qm_min);
116+
}
117+
}
118+
} else if (m > 0) {
119+
const Real Qm_max_sum = recv_(3*nt + k);
120+
Real fac = Qm_max_sum - Qm_clip_sum;
121+
if (fac > 0) {
122+
fac = m/fac;
123+
for (Int i = 0; i < nlclcells_; ++i) {
124+
const Real Qm_max = d_(os + nlclcells_*2*nt + i);
125+
Real& Qm = d_(os+i);
126+
Qm += fac*(Qm_max - Qm);
127+
}
128+
}
129+
}
130+
os += nlclcells_;
131+
}
132+
}
133+
134+
template <typename ES>
135+
void CAAS<ES>::run () {
136+
reduce_locally();
137+
reduce_globally();
138+
finish_locally();
139+
}
140+
141+
namespace test {
142+
struct TestCAAS : public cedr::test::TestRandomized {
143+
typedef CAAS<Kokkos::DefaultExecutionSpace> CAAST;
144+
145+
TestCAAS (const mpi::Parallel::Ptr& p, const Int& ncells, const bool verbose)
146+
: TestRandomized("CAAS", p, ncells, verbose),
147+
p_(p)
148+
{
149+
const auto np = p->size(), rank = p->rank();
150+
nlclcells_ = ncells / np;
151+
const Int todo = ncells - nlclcells_ * np;
152+
if (rank < todo) ++nlclcells_;
153+
caas_ = std::make_shared<CAAST>(p, nlclcells_);
154+
init();
155+
}
156+
157+
CDR& get_cdr () override { return *caas_; }
158+
159+
void init_numbering () override {
160+
const auto np = p_->size(), rank = p_->rank();
161+
Int start = 0;
162+
for (Int lrank = 0; lrank < rank; ++lrank)
163+
start += get_nllclcells(ncells_, np, lrank);
164+
gcis_.resize(nlclcells_);
165+
for (Int i = 0; i < nlclcells_; ++i)
166+
gcis_[i] = start + i;
167+
}
168+
169+
void init_tracers () override {
170+
// CAAS doesn't yet support everything, so remove a bunch of the tracers.
171+
std::vector<TestRandomized::Tracer> tracers;
172+
Int idx = 0;
173+
for (auto& t : tracers_) {
174+
if ( ! (t.problem_type & ProblemType::shapepreserve) ||
175+
! t.local_should_hold)
176+
continue;
177+
t.idx = idx++;
178+
tracers.push_back(t);
179+
caas_->declare_tracer(t.problem_type, 0);
180+
}
181+
tracers_ = tracers;
182+
caas_->end_tracer_declarations();
183+
}
184+
185+
void run_impl (const Int trial) override {
186+
caas_->run();
187+
}
188+
189+
private:
190+
mpi::Parallel::Ptr p_;
191+
Int nlclcells_;
192+
CAAST::Ptr caas_;
193+
194+
static Int get_nllclcells (const Int& ncells, const Int& np, const Int& rank) {
195+
Int nlclcells = ncells / np;
196+
const Int todo = ncells - nlclcells * np;
197+
if (rank < todo) ++nlclcells;
198+
return nlclcells;
199+
}
200+
};
201+
202+
Int unittest (const mpi::Parallel::Ptr& p) {
203+
const auto np = p->size();
204+
Int nerr = 0;
205+
for (Int nlclcells : {1, 2, 4, 11}) {
206+
Long ncells = np*nlclcells;
207+
if (ncells > np) ncells -= np/2;
208+
nerr += TestCAAS(p, ncells, false).run(1, false);
209+
}
210+
return nerr;
211+
}
212+
} // namespace test
213+
} // namespace caas
214+
} // namespace cedr

0 commit comments

Comments
 (0)