Skip to content

Commit 824e494

Browse files
Merge pull request #2994 from RonZ13/packer-feasible-candidates-list-to-priority-queue
[packer] Changing List of Feasible Candidates to Priority Queue
2 parents bae2aa0 + ccc395b commit 824e494

File tree

224 files changed

+3008
-2808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

224 files changed

+3008
-2808
lines changed

vpr/src/pack/greedy_candidate_selector.cpp

+45-86
Large diffs are not rendered by default.

vpr/src/pack/greedy_candidate_selector.h

+24-11
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "vtr_vector.h"
2424
#include "vtr_random.h"
2525
#include "vtr_vector_map.h"
26+
#include "lazy_pop_unique_priority_queue.h"
2627

2728
// Forward declarations
2829
class AtomNetlist;
@@ -97,13 +98,6 @@ struct ClusterGainStats {
9798
/// with the cluster.
9899
AttractGroupId attraction_grp_id;
99100

100-
/// @brief Array of feasible blocks to select from [0..max_array_size-1]
101-
///
102-
/// Sorted in ascending gain order so that the last cluster_ctx.blocks is
103-
/// the most desirable (this makes it easy to pop blocks off the list.
104-
std::vector<PackMoleculeId> feasible_blocks;
105-
int num_feasible_blocks;
106-
107101
/// @brief The flat placement location of this cluster.
108102
///
109103
/// This is some function of the positions of the molecules which have been
@@ -126,6 +120,25 @@ struct ClusterGainStats {
126120
/// set when the stats are created based on the primitive pb type
127121
/// of the seed.
128122
bool is_memory = false;
123+
124+
/// @brief List of feasible block and its gain pairs.
125+
/// The list is maintained in heap structure with the highest gain block
126+
/// at the front.
127+
LazyPopUniquePriorityQueue<PackMoleculeId, float> feasible_blocks;
128+
129+
/// @brief Indicator for the initial search for feasible blocks.
130+
bool initial_search_for_feasible_blocks;
131+
132+
/// @brief Limit for the number of candiate proposed at each stage.
133+
unsigned candidates_propose_limit;
134+
135+
/// @brief Counter for the number of candiate proposed at each stage.
136+
unsigned num_candidates_proposed;
137+
138+
/// @brief Check if the current stage candidates proposed limit is reached.
139+
bool current_stage_candidates_proposed_limit_reached() {
140+
return num_candidates_proposed >= candidates_propose_limit;
141+
}
129142
};
130143

131144
/**
@@ -444,7 +457,7 @@ class GreedyCandidateSelector {
444457
// Cluster Candidate Selection
445458
// ===================================================================== //
446459

447-
/*
460+
/**
448461
* @brief Add molecules with strong connectedness to the current cluster to
449462
* the list of feasible blocks.
450463
*/
@@ -471,7 +484,7 @@ class GreedyCandidateSelector {
471484
LegalizationClusterId legalization_cluster_id,
472485
const ClusterLegalizer& cluster_legalizer);
473486

474-
/*
487+
/**
475488
* @brief Add molecules based on transitive connections (eg. 2 hops away)
476489
* with current cluster.
477490
*/
@@ -481,7 +494,7 @@ class GreedyCandidateSelector {
481494
const ClusterLegalizer& cluster_legalizer,
482495
AttractionInfo& attraction_groups);
483496

484-
/*
497+
/**
485498
* @brief Add molecules based on weak connectedness (connected by high
486499
* fanout nets) with current cluster.
487500
*/
@@ -491,7 +504,7 @@ class GreedyCandidateSelector {
491504
const ClusterLegalizer& cluster_legalizer,
492505
AttractionInfo& attraction_groups);
493506

494-
/*
507+
/**
495508
* @brief If the current cluster being packed has an attraction group
496509
* associated with it (i.e. there are atoms in it that belong to an
497510
* attraction group), this routine adds molecules from the associated
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/**
2+
* @file
3+
* @author Rongbo Zhang
4+
* @date 2025-04-23
5+
* @brief This file contains the definition of the LazyPopUniquePriorityQueue class.
6+
*
7+
* The class LazyPopUniquePriorityQueue is a priority queue that allows for lazy deletion of elements.
8+
* The elements are pair of key and sort-value. The key is a unique value to identify the item, and the sort-value is used to sort the item.
9+
* It is implemented using a vector and 2 sets, one set keeps track of the elements in the queue, and the other set keeps track of the elements that are pending deletion,
10+
* so that they can be removed from the queue when they are popped.
11+
*
12+
* Currently, the class supports the following functions:
13+
* LazyPopUniquePriorityQueue::push(): Pushes a key-sort-value (K-SV) pair into the priority queue and adds the key to the tracking set.
14+
* LazyPopUniquePriorityQueue::pop(): Returns the K-SV pair with the highest SV whose key is not pending deletion.
15+
* LazyPopUniquePriorityQueue::remove(): Removes an element from the priority queue immediately.
16+
* LazyPopUniquePriorityQueue::remove_at_pop_time(): Removes an element from the priority queue when it is popped.
17+
* LazyPopUniquePriorityQueue::empty(): Returns whether the queue is empty.
18+
* LazyPopUniquePriorityQueue::clear(): Clears the priority queue vector and the tracking sets.
19+
* LazyPopUniquePriorityQueue::size(): Returns the number of elements in the queue.
20+
* LazyPopUniquePriorityQueue::contains(): Returns true if the key is in the queue, false otherwise.
21+
*/
22+
23+
#pragma once
24+
25+
#include <unordered_set>
26+
#include <vector>
27+
#include <algorithm>
28+
29+
/**
30+
* @brief Lazy Pop Unique Priority Queue
31+
*
32+
* This is a priority queue that is used to sort items which are identified by the key
33+
* and sorted by the sort value.
34+
*
35+
* It uses a vector to store the key and sort value pair.
36+
* It uses a set to store the keys that are in the vector for uniqueness checking
37+
* and a set to store the delete pending keys which will be removed at pop time.
38+
*/
39+
40+
template<typename T_key, typename T_sort>
41+
class LazyPopUniquePriorityQueue {
42+
public:
43+
/** @brief The custom comparsion struct for sorting the items in the priority queue.
44+
* A less than comparison will put the item with the highest sort value to the front of the queue.
45+
* A greater than comparison will put the item with the lowest sort value to the front of the queue.
46+
*/
47+
struct LazyPopUniquePriorityQueueCompare {
48+
bool operator()(const std::pair<T_key, T_sort>& a,
49+
const std::pair<T_key, T_sort>& b) const {
50+
return a.second < b.second;
51+
}
52+
};
53+
54+
/// @brief The vector maintained as heap to store the key and sort value pair.
55+
std::vector<std::pair<T_key, T_sort>> heap;
56+
57+
/// @brief The set to store the keys that are in the queue. This is used to ensure uniqueness
58+
std::unordered_set<T_key> content_set;
59+
60+
/// @brief The set to store the delete pending item from the queue refered by the key.
61+
std::unordered_set<T_key> delete_pending_set;
62+
63+
/**
64+
* @brief Push the key and the sort value as a pair into the priority queue.
65+
*
66+
* @param key
67+
* The unique key for the item that will be pushed onto the queue.
68+
* @param value
69+
* The sort value used for sorting the item.
70+
*/
71+
void push(T_key key, T_sort value) {
72+
// Insert the key and sort value pair into the queue if it is not already present
73+
if (content_set.find(key) != content_set.end()) {
74+
// If the key is already in the queue, do nothing
75+
return;
76+
}
77+
// Insert the key and sort value pair into the heap and track the key
78+
// The new item is added to the end of the vector and then the push_heap function is call
79+
// to push the item to the correct position in the heap structure.
80+
heap.emplace_back(key, value);
81+
std::push_heap(heap.begin(), heap.end(), LazyPopUniquePriorityQueueCompare());
82+
content_set.insert(key);
83+
}
84+
85+
/**
86+
* @brief Pop the top item from the priority queue.
87+
*
88+
* @return The key and sort value pair.
89+
*/
90+
std::pair<T_key, T_sort> pop() {
91+
std::pair<T_key, T_sort> top_pair;
92+
while (heap.size() > 0) {
93+
top_pair = heap.front();
94+
// Remove the key from the heap and the tracking set.
95+
// The pop_heap function will move the top item in the heap structure to the end of the vector container.
96+
// Then the pop_back function will remove the last item.
97+
std::pop_heap(heap.begin(), heap.end(), LazyPopUniquePriorityQueueCompare());
98+
heap.pop_back();
99+
content_set.erase(top_pair.first);
100+
101+
// Checking if the key with the highest sort value is in the delete pending set.
102+
// If it is, ignore the current top item and remove the key from the delete pending set. Then get the next top item.
103+
// Otherwise, the top item found, break the loop.
104+
if (delete_pending_set.find(top_pair.first) != delete_pending_set.end()) {
105+
delete_pending_set.erase(top_pair.first);
106+
top_pair = std::pair<T_key, T_sort>();
107+
} else {
108+
break;
109+
}
110+
}
111+
112+
// If there is zero non-pending-delete item, clear the queue.
113+
if (empty()) {
114+
clear();
115+
}
116+
117+
return top_pair;
118+
}
119+
120+
/**
121+
* @brief Remove the item with matching key value from the priority queue
122+
* This will immediately remove the item and re-heapify the queue.
123+
*
124+
* This function is expensive, as it requires a full re-heapify of the queue.
125+
* The time complexity is O(n log n) for the re-heapify, where n is the size of the queue.
126+
* It is recommended to use remove_at_pop_time() instead.
127+
* @param key
128+
* The key of the item to be delected from the queue.
129+
*/
130+
void remove(T_key key) {
131+
// If the key is in the priority queue, remove it from the heap and reheapify.
132+
// Otherwise, do nothing.
133+
if (content_set.find(key) != content_set.end()) {
134+
content_set.erase(key);
135+
delete_pending_set.erase(key);
136+
for (int i = 0; i < heap.size(); i++) {
137+
if (heap[i].first == key) {
138+
heap.erase(heap.begin() + i);
139+
break;
140+
}
141+
}
142+
143+
// If this delete caused the queue to have zero non-pending-delete item, clear the queue.
144+
if (empty()) {
145+
clear();
146+
// Otherwise re-heapify the queue
147+
} else {
148+
std::make_heap(heap.begin(), heap.end(), LazyPopUniquePriorityQueueCompare());
149+
}
150+
}
151+
}
152+
153+
/**
154+
* @brief Remove the item with matching key value from the priority queue at pop time.
155+
* Add the key to the delete pending set for tracking,
156+
* and it will be deleted when it is popped.
157+
*
158+
* This function will not immediately delete the key from the
159+
* priority queue. It will be deleted when it is popped. Thus do not
160+
* expect a size reduction in the priority queue immediately.
161+
* @param key
162+
* The key of the item to be delected from the queue at pop time.
163+
*/
164+
void remove_at_pop_time(T_key key) {
165+
// If the key is in the list, start tracking it in the delete pending list.
166+
// Otherwise, do nothing.
167+
if (content_set.find(key) != content_set.end()) {
168+
delete_pending_set.insert(key);
169+
170+
// If this marks the last non-pending-delete item as to-be-deleted, clear the queue
171+
if (empty()) {
172+
clear();
173+
}
174+
}
175+
}
176+
177+
/**
178+
* @brief Check if the priority queue is empty, i.e. there is zero non-pending-delete item.
179+
*
180+
* @return True if the priority queue is empty, false otherwise.
181+
*/
182+
bool empty() {
183+
return size() == 0;
184+
}
185+
186+
/**
187+
* @brief Clears the priority queue and the tracking sets.
188+
*
189+
* @return None
190+
*/
191+
void clear() {
192+
heap.clear();
193+
content_set.clear();
194+
delete_pending_set.clear();
195+
}
196+
197+
/**
198+
* @brief Get the number of non-pending-delete items in the priority queue.
199+
*
200+
* @return The number of non-pending-delete items in the priority queue.
201+
*/
202+
size_t size() {
203+
return heap.size() - delete_pending_set.size();
204+
}
205+
206+
/**
207+
* @brief Check if the item referred to the key is in the priority queue.
208+
*
209+
* @param key
210+
* The key of the item.
211+
* @return True if the key is in the priority queue, false otherwise.
212+
*/
213+
bool contains(T_key key) {
214+
return content_set.find(key) != content_set.end();
215+
}
216+
};
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time
2-
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.71 vpr 62.29 MiB -1 -1 0.45 18372 3 0.09 -1 -1 33140 -1 -1 71 99 1 0 success v8.0.0-11920-g63becbef4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-12-04T15:29:41 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 63780 99 130 353 483 1 222 301 13 13 169 clb auto 22.7 MiB 0.06 730 30541 5185 13290 12066 62.3 MiB 0.05 0.00 28 1583 11 3.33e+06 2.25e+06 384474. 2275.00 0.18
3-
k4_N10_memSize16384_memData64.xml diffeq1.v common 3.90 vpr 66.30 MiB -1 -1 0.72 23492 23 0.30 -1 -1 34028 -1 -1 77 162 0 5 success v8.0.0-11920-g63becbef4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-12-04T15:29:41 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 67888 162 96 1200 1141 1 675 340 13 13 169 clb auto 25.9 MiB 0.18 5120 92848 24971 61178 6699 66.3 MiB 0.19 0.00 52 9637 13 3.33e+06 2.76e+06 671819. 3975.26 1.14
4-
k4_N10_memSize16384_memData64.xml single_wire.v common 2.10 vpr 59.81 MiB -1 -1 0.16 16372 1 0.17 -1 -1 29680 -1 -1 0 1 0 0 success v8.0.0-11920-g63becbef4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-12-04T15:29:41 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 61244 1 1 1 2 0 1 2 3 3 9 -1 auto 21.3 MiB 0.00 2 3 0 3 0 59.8 MiB 0.01 0.00 2 1 1 30000 0 1489.46 165.495 0.01
5-
k4_N10_memSize16384_memData64.xml single_ff.v common 2.13 vpr 59.62 MiB -1 -1 0.15 16244 1 0.17 -1 -1 29552 -1 -1 1 2 0 0 success v8.0.0-11920-g63becbef4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-12-04T15:29:41 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 61048 2 1 3 4 1 3 4 3 3 9 -1 auto 21.2 MiB 0.00 6 9 6 0 3 59.6 MiB 0.01 0.00 16 5 1 30000 30000 2550.78 283.420 0.01
1+
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time
2+
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.20 vpr 63.47 MiB -1 -1 0.21 18728 3 0.06 -1 -1 32696 -1 -1 72 99 1 0 success v8.0.0-12648-g259ceba57-dirty release IPO VTR_ASSERT_LEVEL=2 Clang 18.1.3 on Linux-6.8.0-58-generic x86_64 2025-05-06T12:34:13 betzgrp-wintermute /home/zhan6738/VTR/vtr-verilog-to-routing/vtr_flow/tasks 64996 99 130 353 483 1 220 302 13 13 169 clb auto 23.7 MiB 0.03 1748 641 31674 5814 13912 11948 63.5 MiB 0.03 0.00 36 1209 9 3.33e+06 2.28e+06 481319. 2848.04 0.18
3+
k4_N10_memSize16384_memData64.xml diffeq1.v common 2.73 vpr 66.43 MiB -1 -1 0.30 23332 23 0.24 -1 -1 33444 -1 -1 78 162 0 5 success v8.0.0-12648-g259ceba57-dirty release IPO VTR_ASSERT_LEVEL=2 Clang 18.1.3 on Linux-6.8.0-58-generic x86_64 2025-05-06T12:34:13 betzgrp-wintermute /home/zhan6738/VTR/vtr-verilog-to-routing/vtr_flow/tasks 68020 162 96 1200 1141 1 690 341 14 14 196 clb auto 26.8 MiB 0.11 8696 5304 81261 22686 53433 5142 66.4 MiB 0.09 0.00 46 10726 18 4.32e+06 2.79e+06 735717. 3753.66 1.03
4+
k4_N10_memSize16384_memData64.xml single_wire.v common 0.51 vpr 61.51 MiB -1 -1 0.06 17188 1 0.02 -1 -1 29568 -1 -1 0 1 0 0 success v8.0.0-12648-g259ceba57-dirty release IPO VTR_ASSERT_LEVEL=2 Clang 18.1.3 on Linux-6.8.0-58-generic x86_64 2025-05-06T12:34:13 betzgrp-wintermute /home/zhan6738/VTR/vtr-verilog-to-routing/vtr_flow/tasks 62988 1 1 1 2 0 1 2 3 3 9 -1 auto 22.9 MiB 0.00 2 2 3 0 3 0 61.5 MiB 0.00 0.00 2 1 1 30000 0 1489.46 165.495 0.00
5+
k4_N10_memSize16384_memData64.xml single_ff.v common 0.51 vpr 61.52 MiB -1 -1 0.06 17188 1 0.02 -1 -1 29584 -1 -1 1 2 0 0 success v8.0.0-12648-g259ceba57-dirty release IPO VTR_ASSERT_LEVEL=2 Clang 18.1.3 on Linux-6.8.0-58-generic x86_64 2025-05-06T12:34:13 betzgrp-wintermute /home/zhan6738/VTR/vtr-verilog-to-routing/vtr_flow/tasks 63000 2 1 3 4 1 3 4 3 3 9 -1 auto 22.9 MiB 0.00 6 6 9 6 0 3 61.5 MiB 0.00 0.00 16 5 1 30000 30000 2550.78 283.420 0.00

0 commit comments

Comments
 (0)