Skip to content

Commit 2ea8fcc

Browse files
committed
#5: LB: abstract out information propagation calls
1 parent 9ba06ce commit 2ea8fcc

File tree

4 files changed

+31
-31
lines changed

4 files changed

+31
-31
lines changed

examples/test_example.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,6 @@ int main(int argc, char** argv) {
132132
std::make_unique<vt_lb::model::PhaseData>(phase_data)
133133
);
134134

135-
while (comm.poll()) {
136-
}
137-
138-
printf("out of poll\n");
139-
140135
comm.finalize();
141136
return 0;
142137
}

src/vt-lb/algo/temperedlb/cluster_summarizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct TaskClusterSummaryInfo {
7373
model::BytesType cluster_footprint = 0;
7474

7575
template <typename SerializerT>
76-
void serializer(SerializerT& s) {
76+
void serialize(SerializerT& s) {
7777
s | cluster_id;
7878
s | num_tasks_;
7979
s | cluster_load;

src/vt-lb/algo/temperedlb/symmetrize_comm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
// *****************************************************************************
4141
//@HEADER
4242
*/
43+
4344
#if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H
4445
#define INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H
4546

src/vt-lb/algo/temperedlb/temperedlb.h

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,16 @@ struct InformationPropagation {
7979
* @param deterministic Whether to use deterministic selection
8080
*
8181
*/
82-
InformationPropagation(CommT& comm, int f, int k_max, bool deterministic, int seed)
82+
InformationPropagation(CommT& comm, Configuration const& config)
8383
: comm_(comm.clone()), // collective operation
84-
f_(f),
85-
k_max_(k_max),
86-
deterministic_(deterministic)
84+
f_(config.f_),
85+
k_max_(config.k_max_),
86+
deterministic_(config.deterministic_)
8787
{
8888
handle_ = comm_.template registerInstanceCollective<ThisType>(this);
8989

9090
if (deterministic_) {
91-
gen_select_.seed(seed + comm_.getRank());
91+
gen_select_.seed(config.seed_ + comm_.getRank());
9292
}
9393
}
9494

@@ -180,7 +180,7 @@ struct InformationPropagation {
180180
};
181181

182182
template <typename CommT>
183-
struct TemperedLB : baselb::BaseLB {
183+
struct TemperedLB final : baselb::BaseLB {
184184
using HandleType = typename CommT::template HandleType<TemperedLB<CommT>>;
185185

186186
// Assert that CommT conforms to the communication interface we expect
@@ -250,50 +250,54 @@ struct TemperedLB : baselb::BaseLB {
250250
}
251251
}
252252

253+
template <typename T>
254+
std::unordered_map<int, T> runInformationPropagation(T& initial_data) {
255+
InformationPropagation<CommT, T, TemperedLB<CommT>> ip(comm_, config_);
256+
auto gathered_info = ip.run(initial_data);
257+
printf("%d: gathered load info size=%zu\n", comm_.getRank(), gathered_info.size());
258+
return gathered_info;
259+
}
260+
253261
void run() {
262+
// Make communications symmetric before running trials so we only have to done it once
263+
makeCommunicationsSymmetric();
264+
254265
for (int trial = 0; trial < config_.num_trials_; ++trial) {
255266
printf("%d: Starting trial %d/%d\n", comm_.getRank(), trial + 1, config_.num_trials_);
256-
runTrial();
267+
runTrial(trial);
268+
printf("%d: Finished trial %d/%d\n", comm_.getRank(), trial + 1, config_.num_trials_);
257269
}
258270
}
259271

260-
void runTrial() {
272+
void runTrial(int trial) {
261273
// Save a clone of the phase data before load balancing
262274
savePhaseData();
263275

264276
auto total_load = computeLoad();
265277
printf("%d: initial total load: %f, num tasks: %zu\n", comm_.getRank(), total_load, numTasks());
266278

267-
// Make communications symmetric before distributed decisions
268-
makeCommunicationsSymmetric();
269-
270279
// Run the clustering algorithm if appropiate for the configuration
271280
doClustering();
272281

273282
// Generate visualization after symmetrization/clustering
274-
visualizeGraph("temperedlb2");
283+
visualizeGraph("temperedlb_rank" + std::to_string(comm_.getRank()) + "_trial" + std::to_string(trial));
275284

276285
auto& wm = config_.work_model_;
277286
if (wm.beta == 0.0 && wm.gamma == 0.0 && wm.delta == 0.0) {
278-
using LoadType = double;
279-
auto ip = InformationPropagation<CommT, LoadType, TemperedLB<CommT>>(
280-
comm_,
281-
config_.f_,
282-
config_.k_max_,
283-
config_.deterministic_,
284-
config_.seed_
285-
);
286-
auto info = ip.run(total_load);
287-
//printf("%d: gathered load info from %zu ranks\n", comm_.getRank(), info.size());
287+
auto info = runInformationPropagation(total_load);
288+
printf("%d: runTrial: gathered load info from %zu ranks\n", comm_.getRank(), info.size());
288289
} else {
289290
#if 0
290291
computeGlobalMaxClusters();
291292
#else
292293
// Just assume max of 1000 clusters per rank for now, until we have bcast
293294
#endif
294-
if (clusterer_ != nullptr) {
295-
buildClusterSummaries();
296-
}
295+
// For now, we will assume that if beta/gemma/delta are non-zero, clustering must occur.
296+
// Every task could be its own cluster, but clusters must exist
297+
assert(clusterer_ != nullptr && "Clusterer must be valid");
298+
auto local_summary = buildClusterSummaries();
299+
auto info = runInformationPropagation(local_summary);
300+
printf("%d: runTrial: gathered load info from %zu ranks\n", comm_.getRank(), info.size());
297301
}
298302

299303
// Before we restore phase data for the next trial, save the work and task distribution

0 commit comments

Comments
 (0)