Skip to content

Commit f4c93bc

Browse files
committed
#1: termination: fix bug and put compile-time flag around debug prints
1 parent f5bf248 commit f4c93bc

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

src/vt-lb/comm/termination.cc

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
#include "termination.h"
4545
#include "vt-lb/comm/comm_mpi.h"
4646

47+
#define DEBUG_TERMINATION 0
48+
4749
namespace vt_lb::comm::detail {
4850

4951
void TerminationDetector::init(CommMPI& comm, ClassHandle<TerminationDetector> handle) {
@@ -65,21 +67,27 @@ void TerminationDetector::startFirstWave() {
6567
}
6668

6769
void TerminationDetector::sendControlToChildren() {
70+
#if DEBUG_TERMINATION
6871
printf("Rank %d: sending control to %d children\n", rank_, num_children_);
72+
#endif
6973
for (int i = 0; i < num_children_; i++) {
7074
handle_[first_child_ + i].sendTerm<&TerminationDetector::onControl>();
7175
}
7276
}
7377

7478
void TerminationDetector::sendResponseToParent(uint64_t in_sent, uint64_t in_recv) {
79+
#if DEBUG_TERMINATION
7580
printf("Rank %d: sending response to parent %d: sent=%lld, recv=%lld\n",
7681
rank_, parent_, in_sent, in_recv);
82+
#endif
7783
handle_[parent_].sendTerm<&TerminationDetector::onResponse>(in_sent, in_recv);
7884
}
7985

8086
void TerminationDetector::onControl() {
87+
#if DEBUG_TERMINATION
8188
printf("Rank %d: received control message, num_children_=%d\n",
8289
rank_, num_children_);
90+
#endif
8391
waiting_children_ = num_children_;
8492
// Forward control to children
8593
if (num_children_ > 0) {
@@ -91,24 +99,30 @@ void TerminationDetector::onControl() {
9199
}
92100

93101
void TerminationDetector::onResponse(uint64_t in_sent, uint64_t in_recv) {
94-
printf("Rank %d: received response: sent=%lld, recv=%lld, waiting_children=%d\n",
95-
rank_, in_sent, in_recv, waiting_children_);
96-
102+
#if DEBUG_TERMINATION
103+
printf("Rank %d: received response: sent=%lld, recv=%lld, global_sent1=%lld, global_recv1_=%lld waiting_children=%d\n",
104+
rank_, in_sent, in_recv, global_sent1_, global_recv1_, waiting_children_);
105+
#endif
106+
97107
global_sent1_ += in_sent;
98108
global_recv1_ += in_recv;
99109

100110
waiting_children_--;
101111

102112
if (waiting_children_ == 0) {
103113

114+
#if DEBUG_TERMINATION
104115
printf("Rank %d: aggregated total: sent=%lld, recv=%lld\n",
105116
rank_, global_sent1_, global_recv1_);
117+
#endif
106118

107119
if (rank_ == 0) {
108120
// Root checks for termination
109121

110-
printf("Root total: s1=%lld, r1=%lld, s2=%lld, r2=%lld\n",
122+
#if DEBUG_TERMINATION
123+
printf("Root total: s1=%lld, r1=%lld, s2=%lld, r2=%lld\n",
111124
global_sent1_, global_recv1_, global_sent2_, global_recv2_);
125+
#endif
112126

113127
if (global_sent1_ == global_recv1_ &&
114128
global_sent2_ == global_recv2_ &&
@@ -125,7 +139,7 @@ void TerminationDetector::onResponse(uint64_t in_sent, uint64_t in_recv) {
125139
}
126140
} else {
127141
// Send response up
128-
sendResponseToParent(global_sent1_, global_recv1_);
142+
sendResponseToParent(global_sent1_ + sent_, global_recv1_ + recv_);
129143
global_sent1_ = global_recv1_ = 0;
130144
waiting_children_ = num_children_;
131145
}
@@ -135,16 +149,20 @@ void TerminationDetector::onResponse(uint64_t in_sent, uint64_t in_recv) {
135149
void TerminationDetector::notifyMessageSend() {
136150
if (!terminated_) {
137151
sent_++;
152+
#if DEBUG_TERMINATION
138153
printf("Rank %d: notified send, counter: sent_=%lld, recv_=%lld\n",
139154
rank_, sent_, recv_);
155+
#endif
140156
}
141157
}
142158

143159
void TerminationDetector::notifyMessageReceive() {
144160
if (!terminated_) {
145161
recv_++;
162+
#if DEBUG_TERMINATION
146163
printf("Rank %d: notified receive, counter: sent=%lld, recv=%lld\n",
147164
rank_, sent_, recv_);
165+
#endif
148166
}
149167
}
150168

0 commit comments

Comments
 (0)