Skip to content

Commit c2dfb35

Browse files
maj160shssoichiro
authored andcommitted
Optimise sad_plane_internal
This is part of a series of commits authored by @maj160 to improve performance of rav1e. This commit results in an overall performance improvement of about 1% at speed 2 10-bit: ``` Benchmark 1: ~/Downloads/rav1e_master -s 2 --quantizer 64 ~/xiph-media-files/objective-1-fast-10bit/speed_bag_640x360_60f.y4m -y -o /dev/null Time (mean ± σ): 43.492 s ± 0.088 s [User: 43.514 s, System: 0.167 s] Range (min … max): 43.329 s … 43.603 s 10 runs Benchmark 2: ~/Downloads/rav1e_mod -s 2 --quantizer 64 ~/xiph-media-files/objective-1-fast-10bit/speed_bag_640x360_60f.y4m -y -o /dev/null Time (mean ± σ): 43.062 s ± 0.086 s [User: 43.068 s, System: 0.173 s] Range (min … max): 42.947 s … 43.224 s 10 runs ```
1 parent 55e5fdc commit c2dfb35

File tree

3 files changed

+5
-11
lines changed

3 files changed

+5
-11
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ dav1d-sys = { version = "0.7.0", optional = true }
8888
aom-sys = { version = "0.3.2", optional = true }
8989
scan_fmt = { version = "0.2.3", optional = true, default-features = false }
9090
ivf = { version = "0.1", path = "ivf/", optional = true }
91-
v_frame = "0.3.0"
91+
v_frame = "0.3.3"
9292
av-metrics = { version = "0.9.0", optional = true, default-features = false }
9393
rayon = { package = "maybe-rayon", version = "0.1", default-features = false }
9494
crossbeam = { version = "0.8", optional = true }

src/sad_plane.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,16 @@ pub(crate) mod rust {
2929
src: &Plane<T>, dst: &Plane<T>, _cpu: CpuFeatureLevel,
3030
) -> u64 {
3131
debug_assert!(src.cfg.width == dst.cfg.width);
32-
33-
let width = src.cfg.width;
32+
debug_assert!(src.cfg.height == dst.cfg.height);
3433

3534
src
3635
.rows_iter()
3736
.zip(dst.rows_iter())
3837
.map(|(src, dst)| {
39-
let src = src.get(..width).unwrap_or(src);
40-
let dst = dst.get(..width).unwrap_or(dst);
41-
4238
src
4339
.iter()
4440
.zip(dst.iter())
45-
.map(|(&p1, &p2)| {
46-
(i16::cast_from(p1) - i16::cast_from(p2)).unsigned_abs() as u32
47-
})
41+
.map(|(&p1, &p2)| i32::cast_from(p1).abs_diff(i32::cast_from(p2)))
4842
.sum::<u32>() as u64
4943
})
5044
.sum()

0 commit comments

Comments
 (0)