Skip to content

Commit 55e5fdc

Browse files
maj160shssoichiro
authored andcommitted
Save some bounds checks on me_stats
This is part of a series of commits authored by @maj160 to improve performance of rav1e.
1 parent 3ffd407 commit 55e5fdc

File tree

2 files changed

+65
-54
lines changed

2 files changed

+65
-54
lines changed

src/api/internal.rs

Lines changed: 60 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -912,61 +912,67 @@ impl<T: Pixel> ContextInner<T> {
912912

913913
lookahead_intra_costs_lines
914914
.zip(block_importances_lines)
915+
.zip(me_stats.rows_iter().step_by(2))
915916
.enumerate()
916-
.flat_map(|(y, (lookahead_intra_costs, block_importances))| {
917-
lookahead_intra_costs
918-
.iter()
919-
.zip(block_importances.iter())
920-
.enumerate()
921-
.map(move |(x, (&intra_cost, &future_importance))| {
922-
let mv = me_stats[y * 2][x * 2].mv;
923-
924-
// Coordinates of the top-left corner of the reference block, in MV
925-
// units.
926-
let reference_x =
927-
x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64;
928-
let reference_y =
929-
y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64;
930-
931-
let region_org = plane_org.region(Area::Rect {
932-
x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
933-
y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
934-
width: IMPORTANCE_BLOCK_SIZE,
935-
height: IMPORTANCE_BLOCK_SIZE,
936-
});
937-
938-
let region_ref = plane_ref.region(Area::Rect {
939-
x: reference_x as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
940-
y: reference_y as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
941-
width: IMPORTANCE_BLOCK_SIZE,
942-
height: IMPORTANCE_BLOCK_SIZE,
943-
});
944-
945-
let inter_cost = get_satd(
946-
&region_org,
947-
&region_ref,
948-
bsize.width(),
949-
bsize.height(),
950-
bit_depth,
951-
fi.cpu_feature_level,
952-
) as f32;
953-
954-
let intra_cost = intra_cost as f32;
955-
// let intra_cost = lookahead_intra_costs[x] as f32;
956-
// let future_importance = block_importances[x];
957-
958-
let propagate_fraction = if intra_cost <= inter_cost {
959-
0.
960-
} else {
961-
1. - inter_cost / intra_cost
962-
};
963-
964-
let propagate_amount = (intra_cost + future_importance)
965-
* propagate_fraction
966-
/ len as f32;
967-
(propagate_amount, reference_x, reference_y)
968-
})
969-
})
917+
.flat_map(
918+
|(y, ((lookahead_intra_costs, block_importances), me_stats_line))| {
919+
lookahead_intra_costs
920+
.iter()
921+
.zip(block_importances.iter())
922+
.zip(me_stats_line.iter().step_by(2))
923+
.enumerate()
924+
.map(move |(x, ((&intra_cost, &future_importance), &me_stat))| {
925+
let mv = me_stat.mv;
926+
927+
// Coordinates of the top-left corner of the reference block, in MV
928+
// units.
929+
let reference_x =
930+
x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64;
931+
let reference_y =
932+
y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64;
933+
934+
let region_org = plane_org.region(Area::Rect {
935+
x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
936+
y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
937+
width: IMPORTANCE_BLOCK_SIZE,
938+
height: IMPORTANCE_BLOCK_SIZE,
939+
});
940+
941+
let region_ref = plane_ref.region(Area::Rect {
942+
x: reference_x as isize
943+
/ IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
944+
y: reference_y as isize
945+
/ IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
946+
width: IMPORTANCE_BLOCK_SIZE,
947+
height: IMPORTANCE_BLOCK_SIZE,
948+
});
949+
950+
let inter_cost = get_satd(
951+
&region_org,
952+
&region_ref,
953+
bsize.width(),
954+
bsize.height(),
955+
bit_depth,
956+
fi.cpu_feature_level,
957+
) as f32;
958+
959+
let intra_cost = intra_cost as f32;
960+
// let intra_cost = lookahead_intra_costs[x] as f32;
961+
// let future_importance = block_importances[x];
962+
963+
let propagate_fraction = if intra_cost <= inter_cost {
964+
0.
965+
} else {
966+
1. - inter_cost / intra_cost
967+
};
968+
969+
let propagate_amount = (intra_cost + future_importance)
970+
* propagate_fraction
971+
/ len as f32;
972+
(propagate_amount, reference_x, reference_y)
973+
})
974+
},
975+
)
970976
.for_each(|(propagate_amount, reference_x, reference_y)| {
971977
let mut propagate =
972978
|block_x_in_mv_units, block_y_in_mv_units, fraction| {

src/me.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ pub type WriteGuardMEStats<'a> =
5454
RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
5555

5656
impl FrameMEStats {
57+
#[inline]
58+
pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
59+
self.stats.chunks_exact(self.cols)
60+
}
61+
5762
pub fn new(cols: usize, rows: usize) -> Self {
5863
Self {
5964
// dynamic allocation: once per frame

0 commit comments

Comments
 (0)