5555#include < random>
5656#include < ostream>
5757#include < fstream>
58+ #include < cassert>
5859
5960#include < mpi.h>
6061
@@ -70,6 +71,17 @@ struct WorkModel {
7071 // / @brief Coefficient for shared-memory communication component
7172 double delta = 0.0 ;
7273
74+ // / @brief Whether memory information is available
75+ bool has_memory_info = true ;
76+ // / @brief Has task serialized memory info
77+ bool has_task_serialized_memory_info = true ;
78+ // / @brief Has task working memory info
79+ bool has_task_working_memory_info = true ;
80+ // / @brief Has task footprint memory info
81+ bool has_task_footprint_memory_info = true ;
82+ // / @brief Has shared block memory info
83+ bool has_shared_block_memory_info = true ;
84+
7385 double applyWorkFormula (
7486 double compute, double inter_comm_bytes, double intra_comm_bytes,
7587 double shared_comm_bytes
@@ -90,6 +102,20 @@ struct Configuration {
90102 k_max_ = std::ceil (std::sqrt (std::log (num_ranks)/std::log (2.0 )));
91103 }
92104
105+ bool hasMemoryInfo () const { return work_model_.has_memory_info ; }
106+ bool hasTaskSerializedMemoryInfo () const {
107+ return hasMemoryInfo () && work_model_.has_task_serialized_memory_info ;
108+ }
109+ bool hasTaskWorkingMemoryInfo () const {
110+ return hasMemoryInfo () && work_model_.has_task_working_memory_info ;
111+ }
112+ bool hasTaskFootprintMemoryInfo () const {
113+ return hasMemoryInfo () && work_model_.has_task_footprint_memory_info ;
114+ }
115+ bool hasSharedBlockMemoryInfo () const {
116+ return hasMemoryInfo () && work_model_.has_shared_block_memory_info ;
117+ }
118+
93119 // / @brief Number of trials to perform
94120 int num_trials_ = 1 ;
95121 // / @brief Number of iterations per trial
@@ -249,6 +275,31 @@ struct TaskClusterSummaryInfo {
249275 model::BytesType max_object_serialized_bytes = 0 ;
250276 model::BytesType max_object_serialized_bytes_outside = 0 ;
251277 model::BytesType cluster_footprint = 0 ;
278+
279+ template <typename SerializerT>
280+ void serializer (SerializerT& s) {
281+ s | cluster_id;
282+ s | num_tasks_;
283+ s | cluster_load;
284+ s | cluster_intra_send_bytes;
285+ s | cluster_intra_recv_bytes;
286+ s | inter_edges_;
287+ s | shared_block_bytes_;
288+ s | max_object_working_bytes;
289+ s | max_object_working_bytes_outside;
290+ s | max_object_serialized_bytes;
291+ s | max_object_serialized_bytes_outside;
292+ s | cluster_footprint;
293+ }
294+ };
295+
296+ struct WorkBreakdown {
297+ double compute = 0.0 ;
298+ double inter_node_recv_comm = 0.0 ;
299+ double inter_node_send_comm = 0.0 ;
300+ double intra_node_recv_comm = 0.0 ;
301+ double intra_node_send_comm = 0.0 ;
302+ double shared_mem_comm = 0.0 ;
252303};
253304
254305template <typename CommT>
@@ -347,6 +398,102 @@ struct TemperedLB : baselb::BaseLB {
347398
348399 Clusterer const * getClusterer () const { return clusterer_.get (); }
349400
401+ private:
402+ WorkBreakdown computeWorkBreakdown () const {
403+ WorkBreakdown breakdown;
404+ std::unordered_set<model::SharedBlockType> shared_blocks_here;
405+
406+ // Rank-alpha term
407+ for (auto const & [id, task] : this ->getPhaseData ().getTasksMap ()) {
408+ breakdown.compute += task.getLoad ();
409+ for (auto const & sb : task.getSharedBlocks ()) {
410+ shared_blocks_here.insert (sb);
411+ }
412+ }
413+
414+ // Communication terms
415+ for (auto const & e : this ->getPhaseData ().getCommunications ()) {
416+ assert (
417+ (e.getFromRank () == comm_.getRank () || e.getToRank () == comm_.getRank ()) &&
418+ " Edge does not belong to this rank"
419+ );
420+ if (e.getFromRank () != e.getToRank ()) {
421+ if (e.getToRank () == comm_.getRank ()) {
422+ breakdown.inter_node_recv_comm += e.getVolume ();
423+ } else {
424+ breakdown.inter_node_send_comm += e.getVolume ();
425+ }
426+ } else {
427+ if (e.getToRank () == comm_.getRank ()) {
428+ breakdown.intra_node_recv_comm += e.getVolume ();
429+ } else {
430+ breakdown.intra_node_send_comm += e.getVolume ();
431+ }
432+ }
433+ }
434+
435+ // Shared-memory communication term
436+ for (auto const & sb : shared_blocks_here) {
437+ assert (getPhaseData ().hasSharedBlock (sb) && " Shared block information missing" );
438+ auto info = getPhaseData ().getSharedBlock (sb);
439+ if (info->getHome () != comm_.getRank ()) {
440+ breakdown.shared_mem_comm += info->getSize ();
441+ }
442+ }
443+
444+ return breakdown;
445+ }
446+
447+ double computeWork (WorkBreakdown breakdown) const {
448+ return config_.work_model_ .applyWorkFormula (
449+ breakdown.compute ,
450+ std::max (breakdown.inter_node_recv_comm , breakdown.inter_node_send_comm ),
451+ std::max (breakdown.intra_node_recv_comm , breakdown.intra_node_send_comm ),
452+ breakdown.shared_mem_comm
453+ );
454+ }
455+
456+ double computeMemoryUsage () const {
457+ if (!config_.hasMemoryInfo ()) {
458+ return 0.0 ;
459+ }
460+
461+ double task_footprint_bytes_ = 0.0 ;
462+ double task_max_working_bytes_ = 0.0 ;
463+ double task_max_serialized_bytes_ = 0.0 ;
464+ double shared_blocks_bytes_ = 0.0 ;
465+ std::unordered_set<model::SharedBlockType> shared_blocks_here;
466+ for (auto const & [id, task] : this ->getPhaseData ().getTasksMap ()) {
467+ if (config_.hasTaskFootprintMemoryInfo ()) {
468+ task_footprint_bytes_ += task.getMemory ().footprint_bytes ;
469+ }
470+ if (config_.hasTaskWorkingMemoryInfo ()) {
471+ task_max_working_bytes_ = std::max (
472+ task_max_working_bytes_, task.getMemory ().working_bytes
473+ );
474+ }
475+ if (config_.hasTaskSerializedMemoryInfo ()) {
476+ task_max_serialized_bytes_ = std::max (
477+ task_max_serialized_bytes_, task.getMemory ().serialized_bytes
478+ );
479+ }
480+ if (config_.hasSharedBlockMemoryInfo ()) {
481+ for (auto const & sb : task.getSharedBlocks ()) {
482+ shared_blocks_here.insert (sb);
483+ }
484+ }
485+ }
486+ for (auto const & sb : shared_blocks_here) {
487+ assert (getPhaseData ().hasSharedBlock (sb) && " Shared block information missing" );
488+ auto info = getPhaseData ().getSharedBlock (sb);
489+ shared_blocks_bytes_ += info->getSize ();
490+ }
491+ return this ->getPhaseData ().getRankFootprintBytes () +
492+ task_footprint_bytes_ +
493+ task_max_working_bytes_ +
494+ shared_blocks_bytes_;
495+ }
496+
350497private:
351498 void computeGlobalMaxClusters () {
352499 // compute max number of clusters on any rank
0 commit comments