|
| 1 | +#include "cedr_caas.hpp" |
| 2 | +#include "cedr_util.hpp" |
| 3 | +#include "cedr_test_randomized.hpp" |
| 4 | + |
| 5 | +namespace cedr { |
| 6 | +namespace caas { |
| 7 | + |
| 8 | +template <typename ES> |
| 9 | +CAAS<ES>::CAAS (const mpi::Parallel::Ptr& p, const Int nlclcells) |
| 10 | + : p_(p), nlclcells_(nlclcells), nrhomidxs_(0), need_conserve_(false) |
| 11 | +{ |
| 12 | + cedr_throw_if(nlclcells == 0, "CAAS does not support 0 cells on a rank."); |
| 13 | + tracer_decls_ = std::make_shared<std::vector<Decl> >(); |
| 14 | +} |
| 15 | + |
| 16 | +template <typename ES> |
| 17 | +void CAAS<ES>::declare_tracer(int problem_type, const Int& rhomidx) { |
| 18 | + cedr_throw_if( ! (problem_type & ProblemType::shapepreserve), |
| 19 | + "CAAS is a WIP; ! shapepreserve is not supported yet."); |
| 20 | + cedr_throw_if(rhomidx > 0, "rhomidx > 0 is not supported yet."); |
| 21 | + tracer_decls_->push_back(Decl(problem_type, rhomidx)); |
| 22 | + if (problem_type & ProblemType::conserve) |
| 23 | + need_conserve_ = true; |
| 24 | + nrhomidxs_ = std::max(nrhomidxs_, rhomidx+1); |
| 25 | +} |
| 26 | + |
| 27 | +template <typename ES> |
| 28 | +void CAAS<ES>::end_tracer_declarations () { |
| 29 | + cedr_throw_if(tracer_decls_->size() == 0, "#tracers is 0."); |
| 30 | + cedr_throw_if(nrhomidxs_ == 0, "#rhomidxs is 0."); |
| 31 | + probs_ = IntList("CAAS probs", static_cast<Int>(tracer_decls_->size())); |
| 32 | + t2r_ = IntList("CAAS t2r", static_cast<Int>(tracer_decls_->size())); |
| 33 | + for (Int i = 0; i < probs_.extent_int(0); ++i) { |
| 34 | + probs_(i) = (*tracer_decls_)[i].probtype; |
| 35 | + t2r_(i) = (*tracer_decls_)[i].rhomidx; |
| 36 | + } |
| 37 | + tracer_decls_ = nullptr; |
| 38 | + // (rho, Qm, Qm_min, Qm_max, [Qm_prev]) |
| 39 | + const Int e = need_conserve_ ? 1 : 0; |
| 40 | + d_ = RealList("CAAS data", nlclcells_ * ((3+e)*probs_.size() + 1)); |
| 41 | + const auto nslots = 4*probs_.size(); |
| 42 | + // (e'Qm_clip, e'Qm, e'Qm_min, e'Qm_max, [e'Qm_prev]) |
| 43 | + send_ = RealList("CAAS send", nslots); |
| 44 | + recv_ = RealList("CAAS recv", nslots); |
| 45 | +} |
| 46 | + |
| 47 | +template <typename ES> |
| 48 | +int CAAS<ES>::get_problem_type (const Int& tracer_idx) const { |
| 49 | + cedr_assert(tracer_idx >= 0 && tracer_idx < probs_.extent_int(0)); |
| 50 | + return probs_[tracer_idx]; |
| 51 | +} |
| 52 | + |
| 53 | +template <typename ES> |
| 54 | +Int CAAS<ES>::get_num_tracers () const { |
| 55 | + return probs_.extent_int(0); |
| 56 | +} |
| 57 | + |
| 58 | +template <typename ES> |
| 59 | +void CAAS<ES>::reduce_locally () { |
| 60 | + const Int nt = probs_.size(); |
| 61 | + Int k = 0; |
| 62 | + Int os = nlclcells_; |
| 63 | + // Qm_clip |
| 64 | + for ( ; k < nt; ++k) { |
| 65 | + Real Qm_sum = 0, Qm_clip_sum = 0; |
| 66 | + for (Int i = 0; i < nlclcells_; ++i) { |
| 67 | + const Real Qm = d_(os+i); |
| 68 | + Qm_sum += (probs_(k) & ProblemType::conserve ? |
| 69 | + d_(os + nlclcells_*3*nt + i) /* Qm_prev */ : |
| 70 | + Qm); |
| 71 | + const Real Qm_min = d_(os + nlclcells_* nt + i); |
| 72 | + const Real Qm_max = d_(os + nlclcells_*2*nt + i); |
| 73 | + const Real Qm_clip = cedr::impl::min(Qm_max, cedr::impl::max(Qm_min, Qm)); |
| 74 | + Qm_clip_sum += Qm_clip; |
| 75 | + d_(os+i) = Qm_clip; |
| 76 | + } |
| 77 | + send_( k) = Qm_clip_sum; |
| 78 | + send_(nt + k) = Qm_sum; |
| 79 | + os += nlclcells_; |
| 80 | + } |
| 81 | + k += nt; |
| 82 | + // Qm_min, Qm_max |
| 83 | + for ( ; k < 4*nt; ++k) { |
| 84 | + Real accum = 0; |
| 85 | + for (Int i = 0; i < nlclcells_; ++i) |
| 86 | + accum += d_(os+i); |
| 87 | + send_(k) = accum; |
| 88 | + os += nlclcells_; |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +template <typename ES> |
| 93 | +void CAAS<ES>::reduce_globally () { |
| 94 | + int err = mpi::all_reduce(*p_, send_.data(), recv_.data(), send_.size(), MPI_SUM); |
| 95 | + cedr_throw_if(err != MPI_SUCCESS, |
| 96 | + "CAAS::reduce_globally MPI_Allreduce returned " << err); |
| 97 | +} |
| 98 | + |
| 99 | +template <typename ES> |
| 100 | +void CAAS<ES>::finish_locally () { |
| 101 | + const Int nt = probs_.size(); |
| 102 | + Int os = nlclcells_; |
| 103 | + for (Int k = 0; k < nt; ++k) { |
| 104 | + const Real Qm_clip_sum = recv_( k); |
| 105 | + const Real Qm_sum = recv_(nt + k); |
| 106 | + const Real m = Qm_sum - Qm_clip_sum; |
| 107 | + if (m < 0) { |
| 108 | + const Real Qm_min_sum = recv_(2*nt + k); |
| 109 | + Real fac = Qm_clip_sum - Qm_min_sum; |
| 110 | + if (fac > 0) { |
| 111 | + fac = m/fac; |
| 112 | + for (Int i = 0; i < nlclcells_; ++i) { |
| 113 | + const Real Qm_min = d_(os + nlclcells_* nt + i); |
| 114 | + Real& Qm = d_(os+i); |
| 115 | + Qm += fac*(Qm - Qm_min); |
| 116 | + } |
| 117 | + } |
| 118 | + } else if (m > 0) { |
| 119 | + const Real Qm_max_sum = recv_(3*nt + k); |
| 120 | + Real fac = Qm_max_sum - Qm_clip_sum; |
| 121 | + if (fac > 0) { |
| 122 | + fac = m/fac; |
| 123 | + for (Int i = 0; i < nlclcells_; ++i) { |
| 124 | + const Real Qm_max = d_(os + nlclcells_*2*nt + i); |
| 125 | + Real& Qm = d_(os+i); |
| 126 | + Qm += fac*(Qm_max - Qm); |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + os += nlclcells_; |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +template <typename ES> |
| 135 | +void CAAS<ES>::run () { |
| 136 | + reduce_locally(); |
| 137 | + reduce_globally(); |
| 138 | + finish_locally(); |
| 139 | +} |
| 140 | + |
| 141 | +namespace test { |
| 142 | +struct TestCAAS : public cedr::test::TestRandomized { |
| 143 | + typedef CAAS<Kokkos::DefaultExecutionSpace> CAAST; |
| 144 | + |
| 145 | + TestCAAS (const mpi::Parallel::Ptr& p, const Int& ncells, const bool verbose) |
| 146 | + : TestRandomized("CAAS", p, ncells, verbose), |
| 147 | + p_(p) |
| 148 | + { |
| 149 | + const auto np = p->size(), rank = p->rank(); |
| 150 | + nlclcells_ = ncells / np; |
| 151 | + const Int todo = ncells - nlclcells_ * np; |
| 152 | + if (rank < todo) ++nlclcells_; |
| 153 | + caas_ = std::make_shared<CAAST>(p, nlclcells_); |
| 154 | + init(); |
| 155 | + } |
| 156 | + |
| 157 | + CDR& get_cdr () override { return *caas_; } |
| 158 | + |
| 159 | + void init_numbering () override { |
| 160 | + const auto np = p_->size(), rank = p_->rank(); |
| 161 | + Int start = 0; |
| 162 | + for (Int lrank = 0; lrank < rank; ++lrank) |
| 163 | + start += get_nllclcells(ncells_, np, lrank); |
| 164 | + gcis_.resize(nlclcells_); |
| 165 | + for (Int i = 0; i < nlclcells_; ++i) |
| 166 | + gcis_[i] = start + i; |
| 167 | + } |
| 168 | + |
| 169 | + void init_tracers () override { |
| 170 | + // CAAS doesn't yet support everything, so remove a bunch of the tracers. |
| 171 | + std::vector<TestRandomized::Tracer> tracers; |
| 172 | + Int idx = 0; |
| 173 | + for (auto& t : tracers_) { |
| 174 | + if ( ! (t.problem_type & ProblemType::shapepreserve) || |
| 175 | + ! t.local_should_hold) |
| 176 | + continue; |
| 177 | + t.idx = idx++; |
| 178 | + tracers.push_back(t); |
| 179 | + caas_->declare_tracer(t.problem_type, 0); |
| 180 | + } |
| 181 | + tracers_ = tracers; |
| 182 | + caas_->end_tracer_declarations(); |
| 183 | + } |
| 184 | + |
| 185 | + void run_impl (const Int trial) override { |
| 186 | + caas_->run(); |
| 187 | + } |
| 188 | + |
| 189 | +private: |
| 190 | + mpi::Parallel::Ptr p_; |
| 191 | + Int nlclcells_; |
| 192 | + CAAST::Ptr caas_; |
| 193 | + |
| 194 | + static Int get_nllclcells (const Int& ncells, const Int& np, const Int& rank) { |
| 195 | + Int nlclcells = ncells / np; |
| 196 | + const Int todo = ncells - nlclcells * np; |
| 197 | + if (rank < todo) ++nlclcells; |
| 198 | + return nlclcells; |
| 199 | + } |
| 200 | +}; |
| 201 | + |
| 202 | +Int unittest (const mpi::Parallel::Ptr& p) { |
| 203 | + const auto np = p->size(); |
| 204 | + Int nerr = 0; |
| 205 | + for (Int nlclcells : {1, 2, 4, 11}) { |
| 206 | + Long ncells = np*nlclcells; |
| 207 | + if (ncells > np) ncells -= np/2; |
| 208 | + nerr += TestCAAS(p, ncells, false).run(1, false); |
| 209 | + } |
| 210 | + return nerr; |
| 211 | +} |
| 212 | +} // namespace test |
| 213 | +} // namespace caas |
| 214 | +} // namespace cedr |
0 commit comments