@@ -112,6 +112,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
112
112
, placer_opts_(placer_opts) {
113
113
const int num_layers = g_vpr_ctx.device ().grid .get_num_layers ();
114
114
115
+ is_multi_layer_ = num_layers > 1 ;
116
+
115
117
// Either 3D BB or per layer BB data structure are used, not both.
116
118
if (cube_bb_) {
117
119
ts_bb_edge_new_.resize (num_nets, t_bb ());
@@ -145,10 +147,11 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
145
147
* been recomputed. */
146
148
bb_update_status_.resize (num_nets, NetUpdateState::NOT_UPDATED_YET);
147
149
148
- alloc_and_load_chan_w_factors_for_place_cost_ (placer_opts_. place_cost_exp );
150
+ alloc_and_load_chan_w_factors_for_place_cost_ ();
149
151
}
150
152
151
- void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ (float place_cost_exp) {
153
+ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ () {
154
+ const double place_cost_exp = static_cast <double >(placer_opts_.place_cost_exp );
152
155
auto & device_ctx = g_vpr_ctx.device ();
153
156
154
157
const int grid_height = device_ctx.grid .height ();
@@ -190,7 +193,7 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
190
193
}
191
194
192
195
chanx_place_cost_fac_[high][low] = (high - low + 1 .) / chanx_place_cost_fac_[high][low];
193
- chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], ( double ) place_cost_exp);
196
+ chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], place_cost_exp);
194
197
}
195
198
}
196
199
@@ -220,71 +223,87 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
220
223
}
221
224
222
225
chany_place_cost_fac_[high][low] = (high - low + 1 .) / chany_place_cost_fac_[high][low];
223
- chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], ( double ) place_cost_exp);
226
+ chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], place_cost_exp);
224
227
}
225
228
}
226
229
227
- if (device_ctx. grid . get_num_layers () > 1 ) {
228
- alloc_and_load_for_fast_vertical_cost_update_ (place_cost_exp );
230
+ if (is_multi_layer_ ) {
231
+ alloc_and_load_for_fast_vertical_cost_update_ ();
229
232
}
230
233
}
231
234
232
- void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_ (float place_cost_exp ) {
235
+ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_ () {
233
236
const auto & device_ctx = g_vpr_ctx.device ();
234
237
const auto & rr_graph = device_ctx.rr_graph ;
235
238
236
239
const size_t grid_height = device_ctx.grid .height ();
237
240
const size_t grid_width = device_ctx.grid .width ();
238
241
239
242
240
- chanz_place_cost_fac_ = vtr::NdMatrix<float , 4 >({grid_width, grid_height, grid_width, grid_height}, 0 .);
243
+ acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
244
+
245
+ vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
241
246
242
- vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
247
+ /*
248
+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
249
+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
250
+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
251
+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
252
+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
253
+ * if someday we have architectures with widely varying connectivity between different layers in a stack.
254
+ */
243
255
256
+ /*
257
+ * To calculate the accumulative number of inter-die connections we first need to get the number of
258
+ * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
259
+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
260
+ * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
261
+ * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
262
+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
263
+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
264
+ * the block on the lower left connection of the block is added twice, that part needs to be removed.
265
+ */
244
266
for (const auto & src_rr_node : rr_graph.nodes ()) {
245
- for (const auto & rr_edge_idx : rr_graph.configurable_edges (src_rr_node)) {
267
+ for (const auto & rr_edge_idx : rr_graph.edges (src_rr_node)) {
246
268
const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
247
269
if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
248
270
// We assume that the nodes driving the inter-layer connection or being driven by it
249
- // are not streched across multiple tiles
271
+ // are not stretched across multiple tiles
250
272
int src_x = rr_graph.node_xhigh (src_rr_node);
251
273
int src_y = rr_graph.node_yhigh (src_rr_node);
252
274
VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
253
275
254
276
tile_num_inter_die_conn[src_x][src_y]++;
255
277
}
256
278
}
279
+ }
257
280
258
- for (const auto & rr_edge_idx : rr_graph.non_configurable_edges (src_rr_node)) {
259
- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
260
- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
261
- int src_x = rr_graph.node_xhigh (src_rr_node);
262
- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_xlow (src_rr_node) == src_x);
263
- int src_y = rr_graph.node_yhigh (src_rr_node);
264
- VTR_ASSERT (rr_graph.node_ylow (src_rr_node) == src_y && rr_graph.node_ylow (src_rr_node) == src_y);
265
- tile_num_inter_die_conn[src_x][src_y]++;
266
- }
281
+ int num_layers = device_ctx.grid .get_num_layers ();
282
+ for (size_t x = 0 ; x < device_ctx.grid .width (); x++) {
283
+ for (size_t y = 0 ; y < device_ctx.grid .height (); y++) {
284
+ tile_num_inter_die_conn[x][y] /= (num_layers-1 );
267
285
}
268
286
}
269
287
270
- for (int x_high = 0 ; x_high < (int )device_ctx.grid .width (); x_high++) {
271
- for (int y_high = 0 ; y_high < (int )device_ctx.grid .height (); y_high++) {
272
- for (int x_low = 0 ; x_low <= x_high; x_low++) {
273
- for (int y_low = 0 ; y_low <= y_high; y_low++) {
274
- int num_inter_die_conn = 0 ;
275
- for (int x = x_low; x <= x_high; x++) {
276
- for (int y = y_low; y <= y_high; y++) {
277
- num_inter_die_conn += tile_num_inter_die_conn[x][y];
278
- }
279
- }
280
- int seen_num_tiles = (x_high - x_low + 1 ) * (y_high - y_low + 1 );
281
- chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast <float >(num_inter_die_conn);
282
-
283
- chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow (
284
- (double )chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
285
- (double )place_cost_exp);
286
- }
287
- }
288
+ // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y).
289
+ acc_tile_num_inter_die_conn_[0 ][0 ] = tile_num_inter_die_conn[0 ][0 ];
290
+ // Initialize the first row and column
291
+ for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
292
+ acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
293
+ tile_num_inter_die_conn[x][0 ];
294
+ }
295
+
296
+ for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
297
+ acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
298
+ tile_num_inter_die_conn[0 ][y];
299
+ }
300
+
301
+ for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
302
+ for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
303
+ acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
304
+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
305
+ tile_num_inter_die_conn[x_high][y_high] -
306
+ acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
288
307
}
289
308
}
290
309
}
@@ -818,7 +837,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
818
837
}
819
838
820
839
/* Now account for the layer motion. */
821
- if (num_layers > 1 ) {
840
+ if (is_multi_layer_ ) {
822
841
/* We need to update it only if multiple layers are available */
823
842
for (int layer_num = 0 ; layer_num < num_layers; layer_num++) {
824
843
num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num];
@@ -1402,8 +1421,6 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
1402
1421
1403
1422
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move ().bb_coords [net_id];
1404
1423
1405
- const bool is_multi_layer = (g_vpr_ctx.device ().grid .get_num_layers () > 1 );
1406
-
1407
1424
double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
1408
1425
1409
1426
/* Could insert a check for xmin == xmax. In that case, assume *
@@ -1420,12 +1437,14 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
1420
1437
*/
1421
1438
1422
1439
double ncost;
1423
- ncost = (bb.xmax - bb.xmin + 1 ) * crossing * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1424
- ncost += (bb.ymax - bb.ymin + 1 ) * crossing * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1425
- if (is_multi_layer ) {
1426
- ncost += (bb.layer_max - bb.layer_min ) * crossing * chanz_place_cost_fac_[bb. xmax ][bb. ymax ][bb. xmin ][bb. ymin ] ;
1440
+ ncost = (bb.xmax - bb.xmin + 1 ) * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1441
+ ncost += (bb.ymax - bb.ymin + 1 ) * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1442
+ if (is_multi_layer_ ) {
1443
+ ncost += (bb.layer_max - bb.layer_min ) * get_chanz_cost_factor_ (bb) ;
1427
1444
}
1428
1445
1446
+ ncost *= crossing;
1447
+
1429
1448
return ncost;
1430
1449
}
1431
1450
@@ -1526,6 +1545,39 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) {
1526
1545
return ncost;
1527
1546
}
1528
1547
1548
+ float NetCostHandler::get_chanz_cost_factor_ (const t_bb& bb) {
1549
+ float place_cost_exp = placer_opts_.place_cost_exp ;
1550
+
1551
+ int num_inter_dir_conn;
1552
+
1553
+ if (bb.xmin == 0 && bb.ymin == 0 ) {
1554
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ];
1555
+ } else if (bb.xmin == 0 ) {
1556
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1557
+ acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymin -1 ];
1558
+ } else if (bb.ymin == 0 ) {
1559
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1560
+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymax ];
1561
+ } else {
1562
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1563
+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymax ] -
1564
+ acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymin -1 ] +
1565
+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymin -1 ];
1566
+ }
1567
+
1568
+ float z_cost_factor;
1569
+ if (num_inter_dir_conn == 0 ) {
1570
+ return 1 .0f ;
1571
+ } else {
1572
+ int bb_num_tiles = (bb.xmax - bb.xmin + 1 ) * (bb.ymax - bb.ymin + 1 );
1573
+ z_cost_factor = bb_num_tiles / static_cast <float >(num_inter_dir_conn);
1574
+ z_cost_factor = pow ((double )z_cost_factor, (double )place_cost_exp);
1575
+ }
1576
+
1577
+ return z_cost_factor;
1578
+
1579
+ }
1580
+
1529
1581
double NetCostHandler::recompute_bb_cost_ () {
1530
1582
double cost = 0 ;
1531
1583
0 commit comments