Skip to content

Commit 963737b

Browse files
[AP][Solver] Enabled Parallel Eigen
The Eigen solver has the ability to use OpenMP to run the solver computations in parallel. Made the AP flow use the num_workers option to set the number of threads that Eigen can use. VPR did not have the ability to build with OpenMP in its CMAKE. Added an option to the CMAKE to allow the user to enable OpenMP.
1 parent 327cee3 commit 963737b

8 files changed

+39
-0
lines changed

vpr/CMakeLists.txt

+15
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ set_property(CACHE VPR_PGO_CONFIG PROPERTY STRINGS prof_gen prof_use none)
1212

1313
set(VPR_PGO_DATA_DIR "." CACHE PATH "Where to store and retrieve PGO data")
1414

15+
set(VPR_ENABLE_OPEN_MP "off" CACHE STRING "Enable OpenMP when compiling VPR")
16+
1517
#Handle graphics setup
1618
set(GRAPHICS_DEFINES "")
1719

@@ -295,6 +297,19 @@ else()
295297
message(FATAL_ERROR "VPR: Unrecognized execution engine '${VPR_USE_EXECUTION_ENGINE}'")
296298
endif()
297299

300+
#
301+
# OpenMP configuration
302+
#
303+
if (VPR_ENABLE_OPEN_MP STREQUAL "on")
304+
find_package(OpenMP REQUIRED)
305+
if (OpenMP_CXX_FOUND)
306+
target_link_libraries(libvpr OpenMP::OpenMP_CXX)
307+
message(STATUS "OpenMP: Enabled")
308+
endif()
309+
else()
310+
message(STATUS "OpenMP: Disabled")
311+
endif()
312+
298313
#
299314
# Signal handler configuration
300315
#

vpr/src/analytical_place/analytical_placement_flow.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
144144
device_ctx.physical_tile_types,
145145
pre_cluster_timing_manager,
146146
ap_opts.ap_timing_tradeoff,
147+
ap_opts.num_threads,
147148
ap_opts.log_verbosity);
148149
return global_placer->place();
149150
}

vpr/src/analytical_place/analytical_solver.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#pragma GCC diagnostic push
3333
#pragma GCC diagnostic ignored "-Wnull-dereference"
3434

35+
#include <Eigen/src/Core/products/Parallelizer.h>
3536
#include <Eigen/src/SparseCore/SparseMatrix.h>
3637
#include <Eigen/SVD>
3738
#include <Eigen/Sparse>
@@ -48,7 +49,17 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
4849
const AtomNetlist& atom_netlist,
4950
const PreClusterTimingManager& pre_cluster_timing_manager,
5051
float ap_timing_tradeoff,
52+
unsigned num_threads,
5153
int log_verbosity) {
54+
#ifdef EIGEN_INSTALLED
55+
// Set the number of threads globally used by Eigen (if OpenMP is enabled).
56+
// NOTE: Since this is a global update, all solvers will have this number
57+
// of threads.
58+
Eigen::setNbThreads(num_threads);
59+
#else
60+
(void)num_threads;
61+
#endif // EIGEN_INSTALLED
62+
5263
// Based on the solver type passed in, build the solver.
5364
switch (solver_type) {
5465
case e_ap_analytical_solver::QP_Hybrid:

vpr/src/analytical_place/analytical_solver.h

+1
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
138138
const AtomNetlist& atom_netlist,
139139
const PreClusterTimingManager& pre_cluster_timing_manager,
140140
float ap_timing_tradeoff,
141+
unsigned num_threads,
141142
int log_verbosity);
142143

143144
// The Eigen library is used to solve matrix equations in the following solvers.

vpr/src/analytical_place/global_placer.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
3737
const std::vector<t_physical_tile_type>& physical_tile_types,
3838
const PreClusterTimingManager& pre_cluster_timing_manager,
3939
float ap_timing_tradeoff,
40+
unsigned num_threads,
4041
int log_verbosity) {
4142
return std::make_unique<SimPLGlobalPlacer>(analytical_solver_type,
4243
partial_legalizer_type,
@@ -48,6 +49,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
4849
physical_tile_types,
4950
pre_cluster_timing_manager,
5051
ap_timing_tradeoff,
52+
num_threads,
5153
log_verbosity);
5254
}
5355

@@ -61,6 +63,7 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_ap_analytical_solver analytical_solver_ty
6163
const std::vector<t_physical_tile_type>& physical_tile_types,
6264
const PreClusterTimingManager& pre_cluster_timing_manager,
6365
float ap_timing_tradeoff,
66+
unsigned num_threads,
6467
int log_verbosity)
6568
: GlobalPlacer(ap_netlist, log_verbosity) {
6669
// This can be a long method. Good to time this to see how long it takes to
@@ -75,6 +78,7 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_ap_analytical_solver analytical_solver_ty
7578
atom_netlist,
7679
pre_cluster_timing_manager,
7780
ap_timing_tradeoff,
81+
num_threads,
7882
log_verbosity_);
7983

8084
// Build the density manager used by the partial legalizer.

vpr/src/analytical_place/global_placer.h

+2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
8383
const std::vector<t_physical_tile_type>& physical_tile_types,
8484
const PreClusterTimingManager& pre_cluster_timing_manager,
8585
float ap_timing_tradeoff,
86+
unsigned num_threads,
8687
int log_verbosity);
8788

8889
/**
@@ -148,6 +149,7 @@ class SimPLGlobalPlacer : public GlobalPlacer {
148149
const std::vector<t_physical_tile_type>& physical_tile_types,
149150
const PreClusterTimingManager& pre_cluster_timing_manager,
150151
float ap_timing_tradeoff,
152+
unsigned num_threads,
151153
int log_verbosity);
152154

153155
/**

vpr/src/base/SetupVPR.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ void SetupAPOpts(const t_options& options,
559559
apOpts.detailed_placer_type = options.ap_detailed_placer.value();
560560
apOpts.ap_timing_tradeoff = options.ap_timing_tradeoff.value();
561561
apOpts.appack_max_dist_th = options.appack_max_dist_th.value();
562+
apOpts.num_threads = options.num_workers.value();
562563
apOpts.log_verbosity = options.ap_verbosity.value();
563564
}
564565

vpr/src/base/vpr_types.h

+4
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,8 @@ struct t_placer_opts {
11161116
* @param appack_max_dist_th
11171117
* Array of string passed by the user to configure the max candidate
11181118
* distance thresholds.
1119+
* @param num_threads
1120+
* The number of threads the AP flow can use.
11191121
* @param log_verbosity
11201122
* The verbosity level of log messages in the AP flow, with higher
11211123
* values leading to more verbose messages.
@@ -1135,6 +1137,8 @@ struct t_ap_opts {
11351137

11361138
std::vector<std::string> appack_max_dist_th;
11371139

1140+
unsigned num_threads;
1141+
11381142
int log_verbosity;
11391143
};
11401144

0 commit comments

Comments
 (0)