Skip to content

Commit 1a42ef8

Browse files
committed
ascon: improve performance of permutation
Use a slightly different substitution implementation that is slightly more efficient. This does require that the function is now strictly seperated in the three layers. The assembly with `-C opt-level=2` shows no weird instructions, `permute_12` without `soft-compact` is fully unrolled and from and to arrays is skipped. With `soft-compat` it's not unrolled but does also skip from and to arrays between the iterations. Benchmarks with and without soft-compat are quite similair (to the point I'm guessing I'm not even running it correctly). Permutation/1 round time: [4.6049 ns 4.6597 ns 4.7094 ns] change: [-17.040% -15.975% -14.976%] (p = 0.00 < 0.05) Permutation/6 rounds time: [17.179 ns 17.301 ns 17.452 ns] change: [-27.748% -26.520% -25.229%] (p = 0.00 < 0.05) Permutation/8 rounds time: [23.835 ns 23.979 ns 24.109 ns] change: [-24.968% -24.238% -23.588%] (p = 0.00 < 0.05) Permutation/12 rounds time: [33.011 ns 33.458 ns 33.882 ns] change: [-25.656% -24.646% -23.615%] (p = 0.00 < 0.05)
1 parent 8b2c41b commit 1a42ef8

File tree

2 files changed

+32
-28
lines changed

2 files changed

+32
-28
lines changed

ascon/src/lib.rs

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,32 +34,39 @@ pub struct State {
3434

3535
/// Ascon's round function
3636
const fn round(x: [u64; 5], c: u64) -> [u64; 5] {
37-
// S-box layer
38-
let x0 = x[0] ^ x[4];
39-
let x2 = x[2] ^ x[1] ^ c; // with round constant
40-
let x4 = x[4] ^ x[3];
41-
42-
let tx0 = x0 ^ (!x[1] & x2);
43-
let tx1 = x[1] ^ (!x2 & x[3]);
44-
let tx2 = x2 ^ (!x[3] & x4);
45-
let tx3 = x[3] ^ (!x4 & x0);
46-
let tx4 = x4 ^ (!x0 & x[1]);
47-
let tx1 = tx1 ^ tx0;
48-
let tx3 = tx3 ^ tx2;
49-
let tx0 = tx0 ^ tx4;
50-
51-
// linear layer
52-
let x0 = tx0 ^ tx0.rotate_right(9);
53-
let x1 = tx1 ^ tx1.rotate_right(22);
54-
let x2 = tx2 ^ tx2.rotate_right(5);
55-
let x3 = tx3 ^ tx3.rotate_right(7);
56-
let x4 = tx4 ^ tx4.rotate_right(34);
37+
let (mut x0, mut x1, mut x3, mut x4) = (x[0], x[1], x[3], x[4]);
38+
39+
// Addition of Constants
40+
let mut x2 = x[2] ^ c;
41+
42+
// Substitution Layer.
43+
// BGC Optimized Implementations from:
44+
// Optimizing S-box Implementations Using SAT Solvers: Revisited
45+
// https://eprint.iacr.org/2023/1721.pdf
46+
let t0 = x0 ^ x4;
47+
let t1 = !x4;
48+
let t2 = t1 | x3;
49+
let t3 = x1 ^ x2;
50+
let t4 = x3 ^ x2;
51+
let t5 = x3 ^ x4;
52+
let t6 = t0 | x1;
53+
let t7 = x0 | t5;
54+
let t8 = t4 | t3;
55+
x1 = t0 ^ t8;
56+
x3 = t3 ^ t7;
57+
let t11 = x2 & t3;
58+
let t12 = t6 ^ t5;
59+
x2 = t3 ^ t2;
60+
x0 = t12 ^ t11;
61+
x4 = t0 ^ t12;
62+
63+
// Linear Diffusion Layer
5764
[
58-
tx0 ^ x0.rotate_right(19),
59-
tx1 ^ x1.rotate_right(39),
60-
!(tx2 ^ x2.rotate_right(1)),
61-
tx3 ^ x3.rotate_right(10),
62-
tx4 ^ x4.rotate_right(7),
65+
x0 ^ x0.rotate_right(19) ^ x0.rotate_right(28),
66+
x1 ^ x1.rotate_right(61) ^ x1.rotate_right(39),
67+
x2 ^ x2.rotate_right(1) ^ x2.rotate_right(6),
68+
x3 ^ x3.rotate_right(10) ^ x3.rotate_right(17),
69+
x4 ^ x4.rotate_right(7) ^ x4.rotate_right(41),
6370
]
6471
}
6572

benches/Cargo.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ rand = { version = "0.8", default-features = false, features = [
2020
"getrandom",
2121
] }
2222

23-
[features]
24-
no_unroll = ["ascon/no_unroll"]
25-
2623
[[bench]]
2724
name = "ascon"
2825
path = "src/ascon.rs"

0 commit comments

Comments
 (0)