Skip to content

Commit d76b199

Browse files
committed
Implement hierarhial unpacking
1 parent 1455ce4 commit d76b199

File tree

7 files changed

+793
-146
lines changed

7 files changed

+793
-146
lines changed

packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4747,9 +4747,7 @@ namespace Tpetra {
47474747
// Read parameters from the input ParameterList.
47484748
//
47494749
{
4750-
Details::ProfilingRegion region(
4751-
"Tpetra::CrsMatrix::fillCompete",
4752-
"ParameterList");
4750+
Details::ProfilingRegion region_fc("Tpetra::CrsMatrix::fillCompete", "ParameterList");
47534751

47544752
// If true, the caller promises that no process did nonlocal
47554753
// changes since the last call to fillComplete.
@@ -4798,9 +4796,7 @@ namespace Tpetra {
47984796
}
47994797
}
48004798
if (this->isStaticGraph ()) {
4801-
Details::ProfilingRegion region(
4802-
"Tpetra::CrsMatrix::fillCompete",
4803-
"isStaticGraph");
4799+
Details::ProfilingRegion region_isg("Tpetra::CrsMatrix::fillCompete", "isStaticGraph");
48044800
// FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
48054801
// checks below only in debug mode. It would be nicer to do a
48064802
// local check, then propagate the error state in a deferred
@@ -4850,9 +4846,7 @@ namespace Tpetra {
48504846
this->fillLocalMatrix (params);
48514847
}
48524848
else {
4853-
Details::ProfilingRegion region(
4854-
"Tpetra::CrsMatrix::fillCompete",
4855-
"isNotStaticGraph");
4849+
Details::ProfilingRegion region_insg("Tpetra::CrsMatrix::fillCompete", "isNotStaticGraph");
48564850
// Set the graph's domain and range Maps. This will clear the
48574851
// Import if the domain Map has changed (is a different
48584852
// pointer), and the Export if the range Map has changed (is a
@@ -4906,9 +4900,9 @@ namespace Tpetra {
49064900
}
49074901

49084902
{
4909-
Details::ProfilingRegion region(
4910-
"Tpetra::CrsMatrix::fillCompete",
4911-
"callComputeGlobalConstamnts");
4903+
Details::ProfilingRegion region_ccgc(
4904+
"Tpetra::CrsMatrix::fillCompete", "callComputeGlobalConstamnts"
4905+
);
49124906
const bool callComputeGlobalConstants = params.get () == nullptr ||
49134907
params->get ("compute global constants", true);
49144908
if (callComputeGlobalConstants) {
@@ -4920,9 +4914,9 @@ namespace Tpetra {
49204914

49214915
this->fillComplete_ = true; // Now we're fill complete!
49224916
{
4923-
Details::ProfilingRegion region(
4924-
"Tpetra::CrsMatrix::fillCompete",
4925-
"checkInternalState");
4917+
Details::ProfilingRegion region_cis(
4918+
"Tpetra::CrsMatrix::fillCompete", "checkInternalState"
4919+
);
49264920
this->checkInternalState ();
49274921
}
49284922
}

packages/tpetra/core/src/Tpetra_Details_Behavior.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,16 @@ namespace { // (anonymous)
189189
else {
190190
// This could throw invalid_argument or out_of_range.
191191
// Go ahead and let it do so.
192-
const long long val = std::stoll(stringToUpper(varVal));
193-
TEUCHOS_TEST_FOR_EXCEPTION
194-
(val < static_cast<long long>(0), std::out_of_range,
195-
prefix << "Environment variable \""
196-
<< environmentVariableName << "\" is supposed to be a size, "
197-
"but it has a negative integer value " << val << ".");
192+
long long val = std::stoll(stringToUpper(varVal));
193+
if (val < static_cast<long long>(0)) {
194+
// If negative - user has requested threshold be lifted
195+
return std::numeric_limits<size_t>::max();
196+
}
197+
// TEUCHOS_TEST_FOR_EXCEPTION
198+
// (val < static_cast<long long>(0), std::out_of_range,
199+
// prefix << "Environment variable \""
200+
// << environmentVariableName << "\" is supposed to be a size, "
201+
// "but it has a negative integer value " << val << ".");
198202
if (sizeof(long long) > sizeof(size_t)) {
199203
// It's hard to test this code, but I want to try writing it
200204
// at least, in case we ever have to run on 32-bit machines or
@@ -284,6 +288,10 @@ namespace { // (anonymous)
284288
#endif // TPETRA_ASSUME_CUDA_AWARE_MPI
285289
}
286290

291+
constexpr bool hierarchicalUnpackDefault () {
292+
return true;
293+
}
294+
287295
} // namespace (anonymous)
288296

289297
bool Behavior::debug ()
@@ -459,6 +467,28 @@ bool Behavior::timing (const char name[])
459467
envVarName,
460468
defaultValue);
461469
}
470+
471+
void Behavior::enable_timing() {
472+
BehaviorDetails::timingDisabled_ = false;
473+
}
474+
475+
void Behavior::disable_timing() {
476+
BehaviorDetails::timingDisabled_ = true;
477+
}
478+
479+
bool Behavior::hierarchicalUnpack ()
480+
{
481+
constexpr char envVarName[] = "TPETRA_HIERARCHICAL_UNPACK";
482+
constexpr bool defaultValue = hierarchicalUnpackDefault();
483+
484+
static bool value_ = defaultValue;
485+
static bool initialized_ = false;
486+
return idempotentlyGetEnvironmentVariableAsBool (value_,
487+
initialized_,
488+
envVarName,
489+
defaultValue);
490+
}
491+
462492
} // namespace Details
463493
} // namespace Tpetra
464494

packages/tpetra/core/src/Tpetra_Details_Behavior.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ class Behavior {
168168
/// "CrsGraph::insertLocalIndices".
169169
static bool timing (const char name[]);
170170

171+
/// \brief Disable timing, programatically
172+
static void disable_timing();
173+
174+
/// \brief Enable timing, programatically
175+
static void enable_timing();
176+
171177
/// \brief Whether to assume that MPI is CUDA aware.
172178
///
173179
/// An MPI implementation is "CUDA aware" if it can accept CUDA
@@ -216,13 +222,16 @@ class Behavior {
216222

217223
/// \brief the threshold for transitioning from device to host
218224
///
219-
/// If the number of elements in the multivector does not exceed this
225+
/// If the number of elements in the multivector does not exceed this
220226
/// threshold and the data is on host, then run the calculation on
221227
/// host. Otherwise, run on device.
222228
/// By default this is 10000, but may be altered by the environment
223229
/// variable TPETRA_VECTOR_DEVICE_THRESHOLD
224230
static size_t multivectorKernelLocationThreshold ();
225231

232+
/// \brief Unpack rows of a matrix using hierarchical unpacking
233+
static bool hierarchicalUnpack ();
234+
226235
/// \brief Use Teuchos::Timer in Tpetra::ProfilingRegion
227236
///
228237
/// This is disabled by default. You may control this at run time via the

packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -935,15 +935,14 @@ packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
935935

936936
template<typename ST, typename LO, typename GO, typename NT>
937937
void
938-
packCrsMatrixNew (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
939-
Kokkos::DualView<char*,
940-
typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
941-
const Kokkos::DualView<size_t*,
942-
typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
943-
const Kokkos::DualView<const LO*,
944-
typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
945-
size_t& constantNumPackets,
946-
Distributor& distor)
938+
packCrsMatrixNew(
939+
const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
940+
Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
941+
const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
942+
const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
943+
size_t& constantNumPackets,
944+
Distributor& distor
945+
)
947946
{
948947
using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
949948
using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;

0 commit comments

Comments
 (0)