Skip to content

Commit 0391d48

Browse files
Merge pull request #3052 from AlexandreSinger/feature-fg-parallel-mutex-barrier
[FGParallelRouter] Updated Barrier to C++20 Std Barrier
2 parents debf7c1 + 571d3f4 commit 0391d48

File tree

2 files changed

+56
-7
lines changed

2 files changed

+56
-7
lines changed

vpr/src/route/parallel_connection_router.h

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "multi_queue_d_ary_heap.h"
77

88
#include <atomic>
9+
#include <barrier>
910
#include <thread>
1011
#include <mutex>
1112
#include <condition_variable>
@@ -47,7 +48,6 @@ class spin_lock_t {
4748
* condition variable to coordinate thread synchronization.
4849
*/
4950
class barrier_mutex_t {
50-
// FIXME: Try std::barrier (since C++20) to replace this mutex barrier
5151
std::mutex mutex_;
5252
std::condition_variable cv_;
5353
size_t count_;
@@ -60,17 +60,22 @@ class barrier_mutex_t {
6060
* @param num_threads Number of threads that must call wait() before
6161
* any thread is allowed to proceed
6262
*/
63-
explicit barrier_mutex_t(size_t num_threads)
63+
explicit inline barrier_mutex_t(size_t num_threads)
6464
: count_(num_threads)
6565
, max_count_(num_threads) {}
6666

67+
/**
68+
* Initialization method goes unused by this barrier implementation.
69+
*/
70+
inline void init() {}
71+
6772
/**
6873
* @brief Blocks the calling thread until all threads have called wait()
6974
*
7075
* When the specified number of threads have called this method, all
7176
* threads are unblocked and the barrier is reset for the next use.
7277
*/
73-
void wait() {
78+
inline void wait() {
7479
std::unique_lock<std::mutex> lock{mutex_};
7580
size_t gen = generation_;
7681
if (--count_ == 0) {
@@ -110,13 +115,13 @@ class barrier_spin_t {
110115
* @param num_threads Number of threads that must call wait() before
111116
* any thread is allowed to proceed
112117
*/
113-
explicit barrier_spin_t(size_t num_threads) { num_threads_ = num_threads; }
118+
explicit inline barrier_spin_t(size_t num_threads) { num_threads_ = num_threads; }
114119

115120
/**
116121
* @brief Initializes the thread-local sense flag
117122
* @note Should be called by each thread before first using the barrier.
118123
*/
119-
void init() {
124+
inline void init() {
120125
local_sense_ = false;
121126
}
122127

@@ -127,7 +132,7 @@ class barrier_spin_t {
127132
* to arrive unblocks all waiting threads. This method avoids using locks or
128133
* condition variables, making it potentially more efficient for short waits.
129134
*/
130-
void wait() {
135+
inline void wait() {
131136
bool s = !local_sense_;
132137
local_sense_ = s;
133138
size_t num_arrivals = count_.fetch_add(1) + 1;
@@ -141,7 +146,41 @@ class barrier_spin_t {
141146
}
142147
};
143148

144-
using barrier_t = barrier_spin_t; // Using the spin-based thread barrier
149+
/**
150+
* @brief Thread barrier implementation using std::barrier
151+
*
152+
* It ensures all participating threads reach a synchronization point
153+
* before any are allowed to proceed further.
154+
*/
155+
class standard_barrier_t {
156+
/// @brief Internal barrier implementation.
157+
std::barrier<> barrier_;
158+
159+
public:
160+
/**
161+
* @brief Constructs a barrier for a specific number of threads
162+
*
163+
* @param num_threads
164+
* Number of threads that must call wait() before any thread is allowed
165+
* to proceed.
166+
*/
167+
explicit inline standard_barrier_t(size_t num_threads)
168+
: barrier_(num_threads) {}
169+
170+
/**
171+
* Initialization method goes unused by this barrier implementation.
172+
*/
173+
inline void init() {}
174+
175+
/**
176+
* @brief Blocks the calling thread until all threads have called wait()
177+
*/
178+
inline void wait() {
179+
barrier_.arrive_and_wait();
180+
}
181+
};
182+
183+
using barrier_t = standard_barrier_t; // Using the standard thread barrier
145184

146185
/**
147186
* @class ParallelConnectionRouter implements the MultiQueue-based parallel connection
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# This collects QoR data that is interesting for the Fine-Grained Parallel
2+
# Router running on a fixed channel width.
3+
4+
vpr_status;output.txt;vpr_status=(.*)
5+
crit_path_delay;vpr.out;Critical path: (.*) ns
6+
post_route_wirelength;vpr.out;\s*Total wirelength: (\d+)
7+
total_connection_pathsearch_time;vpr.out;.*Time spent on path search: (.*) seconds.
8+
route_runtime;vpr.out;Routing took (.*) seconds
9+
total_runtime;vpr.out;The entire flow of VPR took (.*) seconds
10+
magic_cookie;vpr.out;Serial number \(magic cookie\) for the routing is: (.*)

0 commit comments

Comments
 (0)