@@ -79,16 +79,16 @@ struct InformationPropagation {
7979 * @param deterministic Whether to use deterministic selection
8080 *
8181 */
82- InformationPropagation (CommT& comm, int f, int k_max, bool deterministic, int seed )
82+ InformationPropagation (CommT& comm, Configuration const & config )
8383 : comm_(comm.clone()), // collective operation
84- f_ (f ),
85- k_max_(k_max ),
86- deterministic_(deterministic )
84+ f_ (config.f_ ),
85+ k_max_(config.k_max_ ),
86+ deterministic_(config.deterministic_ )
8787 {
8888 handle_ = comm_.template registerInstanceCollective <ThisType>(this );
8989
9090 if (deterministic_) {
91- gen_select_.seed (seed + comm_.getRank ());
91+ gen_select_.seed (config. seed_ + comm_.getRank ());
9292 }
9393 }
9494
@@ -180,7 +180,7 @@ struct InformationPropagation {
180180};
181181
182182template <typename CommT>
183- struct TemperedLB : baselb::BaseLB {
183+ struct TemperedLB final : baselb::BaseLB {
184184 using HandleType = typename CommT::template HandleType<TemperedLB<CommT>>;
185185
186186 // Assert that CommT conforms to the communication interface we expect
@@ -250,50 +250,54 @@ struct TemperedLB : baselb::BaseLB {
250250 }
251251 }
252252
253+ template <typename T>
254+ std::unordered_map<int , T> runInformationPropagation (T& initial_data) {
255+ InformationPropagation<CommT, T, TemperedLB<CommT>> ip (comm_, config_);
256+ auto gathered_info = ip.run (initial_data);
257+ printf (" %d: gathered load info size=%zu\n " , comm_.getRank (), gathered_info.size ());
258+ return gathered_info;
259+ }
260+
253261 void run () {
262+ // Make communications symmetric before running trials so we only have to done it once
263+ makeCommunicationsSymmetric ();
264+
254265 for (int trial = 0 ; trial < config_.num_trials_ ; ++trial) {
255266 printf (" %d: Starting trial %d/%d\n " , comm_.getRank (), trial + 1 , config_.num_trials_ );
256- runTrial ();
267+ runTrial (trial);
268+ printf (" %d: Finished trial %d/%d\n " , comm_.getRank (), trial + 1 , config_.num_trials_ );
257269 }
258270 }
259271
260- void runTrial () {
272+ void runTrial (int trial ) {
261273 // Save a clone of the phase data before load balancing
262274 savePhaseData ();
263275
264276 auto total_load = computeLoad ();
265277 printf (" %d: initial total load: %f, num tasks: %zu\n " , comm_.getRank (), total_load, numTasks ());
266278
267- // Make communications symmetric before distributed decisions
268- makeCommunicationsSymmetric ();
269-
270279 // Run the clustering algorithm if appropiate for the configuration
271280 doClustering ();
272281
273282 // Generate visualization after symmetrization/clustering
274- visualizeGraph (" temperedlb2 " );
283+ visualizeGraph (" temperedlb_rank " + std::to_string (comm_. getRank ()) + " _trial " + std::to_string (trial) );
275284
276285 auto & wm = config_.work_model_ ;
277286 if (wm.beta == 0.0 && wm.gamma == 0.0 && wm.delta == 0.0 ) {
278- using LoadType = double ;
279- auto ip = InformationPropagation<CommT, LoadType, TemperedLB<CommT>>(
280- comm_,
281- config_.f_ ,
282- config_.k_max_ ,
283- config_.deterministic_ ,
284- config_.seed_
285- );
286- auto info = ip.run (total_load);
287- // printf("%d: gathered load info from %zu ranks\n", comm_.getRank(), info.size());
287+ auto info = runInformationPropagation (total_load);
288+ printf (" %d: runTrial: gathered load info from %zu ranks\n " , comm_.getRank (), info.size ());
288289 } else {
289290#if 0
290291 computeGlobalMaxClusters();
291292#else
292293 // Just assume max of 1000 clusters per rank for now, until we have bcast
293294#endif
294- if (clusterer_ != nullptr ) {
295- buildClusterSummaries ();
296- }
295+ // For now, we will assume that if beta/gemma/delta are non-zero, clustering must occur.
296+ // Every task could be its own cluster, but clusters must exist
297+ assert (clusterer_ != nullptr && " Clusterer must be valid" );
298+ auto local_summary = buildClusterSummaries ();
299+ auto info = runInformationPropagation (local_summary);
300+ printf (" %d: runTrial: gathered load info from %zu ranks\n " , comm_.getRank (), info.size ());
297301 }
298302
299303 // Before we restore phase data for the next trial, save the work and task distribution
0 commit comments