Skip to content

Commit 1bb94b3

Browse files
committed
ec/suite_b: Thread cpu through scalar inverse.
1 parent 3a076df commit 1bb94b3

File tree

2 files changed

+92
-69
lines changed

2 files changed

+92
-69
lines changed

src/ec/suite_b/ops/p256.rs

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps {
178178
};
179179

180180
#[allow(clippy::just_underscores_and_digits)]
181-
fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
181+
fn p256_scalar_inv_to_mont(a: Scalar<R>, cpu: cpu::Features) -> Scalar<R> {
182182
// Calculate the modular inverse of scalar |a| using Fermat's Little
183183
// Theorem:
184184
//
@@ -189,27 +189,34 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
189189
// 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f
190190

191191
#[inline]
192-
fn mul(a: &Scalar<R>, b: &Scalar<R>) -> Scalar<R> {
192+
fn mul(a: &Scalar<R>, b: &Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
193193
binary_op(p256_scalar_mul_mont, a, b)
194194
}
195195

196196
#[inline]
197-
fn sqr(a: &Scalar<R>) -> Scalar<R> {
197+
fn sqr(a: &Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
198198
let mut tmp = Scalar::zero();
199199
unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), 1) }
200200
tmp
201201
}
202202

203203
// Returns (`a` squared `squarings` times) * `b`.
204-
fn sqr_mul(a: &Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) -> Scalar<R> {
204+
#[inline]
205+
fn sqr_mul(
206+
a: &Scalar<R>,
207+
squarings: LeakyWord,
208+
b: &Scalar<R>,
209+
cpu: cpu::Features,
210+
) -> Scalar<R> {
205211
debug_assert!(squarings >= 1);
206212
let mut tmp = Scalar::zero();
207213
unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), squarings) }
208-
mul(&tmp, b)
214+
mul(&tmp, b, cpu)
209215
}
210216

211217
// Sets `acc` = (`acc` squared `squarings` times) * `b`.
212-
fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) {
218+
#[inline]
219+
fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: LeakyWord, b: &Scalar<R>, _cpu: cpu::Features) {
213220
debug_assert!(squarings >= 1);
214221
{
215222
let acc = acc.limbs.as_mut_ptr();
@@ -220,60 +227,60 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
220227

221228
let _1 = &a;
222229

223-
let _10 = sqr(_1); // 2
224-
let _100 = sqr(&_10); // 4
225-
let _101 = mul(&_100, _1); // 5
226-
let _111 = mul(&_101, &_10); // 7
227-
228-
let _1000 = sqr(&_100); // 8
229-
let _10000 = sqr(&_1000); // 16
230-
let _100000 = sqr(&_10000); // 32
231-
232-
let _100111 = mul(&_111, &_100000); // 39 = 7 + 32
233-
let _101011 = mul(&_100, &_100111); // 43 = 4 + 39
234-
let _101111 = mul(&_100, &_101011); // 47 = 4 + 39
235-
let _1001111 = mul(&_100000, &_101111); // 79 = 32 + 47
236-
let _86 = sqr(&_101011); // 86 = 43 * 2
237-
let _1011011 = mul(&_101, &_86); // 91 = 5 + 86
238-
let _92 = mul(_1, &_1011011); // 92 = 1 + 91
239-
let _1100011 = mul(&_111, &_92); // 99 = 7 + 92
240-
let _10111111 = mul(&_92, &_1100011); // 191 = 92 + 99
241-
let _11011111 = mul(&_100000, &_10111111); // 223 = 32 + 191
242-
243-
let ff = mul(&_100000, &_11011111); // 255 = 32 + 223
244-
let ffff = sqr_mul(&ff, 0 + 8, &ff);
245-
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff);
230+
let _10 = sqr(_1, cpu); // 2
231+
let _100 = sqr(&_10, cpu); // 4
232+
let _101 = mul(&_100, _1, cpu); // 5
233+
let _111 = mul(&_101, &_10, cpu); // 7
234+
235+
let _1000 = sqr(&_100, cpu); // 8
236+
let _10000 = sqr(&_1000, cpu); // 16
237+
let _100000 = sqr(&_10000, cpu); // 32
238+
239+
let _100111 = mul(&_111, &_100000, cpu); // 39 = 7 + 32
240+
let _101011 = mul(&_100, &_100111, cpu); // 43 = 4 + 39
241+
let _101111 = mul(&_100, &_101011, cpu); // 47 = 4 + 39
242+
let _1001111 = mul(&_100000, &_101111, cpu); // 79 = 32 + 47
243+
let _86 = sqr(&_101011, cpu); // 86 = 43 * 2
244+
let _1011011 = mul(&_101, &_86, cpu); // 91 = 5 + 86
245+
let _92 = mul(_1, &_1011011, cpu); // 92 = 1 + 91
246+
let _1100011 = mul(&_111, &_92, cpu); // 99 = 7 + 92
247+
let _10111111 = mul(&_92, &_1100011, cpu); // 191 = 92 + 99
248+
let _11011111 = mul(&_100000, &_10111111, cpu); // 223 = 32 + 191
249+
250+
let ff = mul(&_100000, &_11011111, cpu); // 255 = 32 + 223
251+
let ffff = sqr_mul(&ff, 0 + 8, &ff, cpu);
252+
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff, cpu);
246253

247254
// ffffffff00000000ffffffff
248-
let mut acc = sqr_mul(&ffffffff, 32 + 32, &ffffffff);
255+
let mut acc = sqr_mul(&ffffffff, 32 + 32, &ffffffff, cpu);
249256

250257
// ffffffff00000000ffffffffffffffff
251-
sqr_mul_acc(&mut acc, 0 + 32, &ffffffff);
258+
sqr_mul_acc(&mut acc, 0 + 32, &ffffffff, cpu);
252259

253260
// The rest of the exponent, in binary, is:
254261
//
255262
// 1011110011100110111110101010110110100111000101111001111010000100
256263
// 1111001110111001110010101100001011111100011000110010010101001111
257264

258-
sqr_mul_acc(&mut acc, 6, &_101111);
259-
sqr_mul_acc(&mut acc, 2 + 3, &_111);
260-
sqr_mul_acc(&mut acc, 2 + 8, &_11011111);
261-
sqr_mul_acc(&mut acc, 1 + 3, &_101);
262-
sqr_mul_acc(&mut acc, 1 + 7, &_1011011);
263-
sqr_mul_acc(&mut acc, 1 + 6, &_100111);
264-
sqr_mul_acc(&mut acc, 3 + 6, &_101111);
265-
sqr_mul_acc(&mut acc, 2 + 3, &_111);
266-
sqr_mul_acc(&mut acc, 3, &_101);
267-
sqr_mul_acc(&mut acc, 4 + 7, &_1001111);
268-
sqr_mul_acc(&mut acc, 2 + 3, &_111);
269-
sqr_mul_acc(&mut acc, 1 + 3, &_111);
270-
sqr_mul_acc(&mut acc, 2 + 3, &_111);
271-
sqr_mul_acc(&mut acc, 2 + 6, &_101011);
272-
sqr_mul_acc(&mut acc, 4 + 8, &_10111111);
273-
sqr_mul_acc(&mut acc, 3 + 7, &_1100011);
274-
sqr_mul_acc(&mut acc, 2 + 1, _1);
275-
sqr_mul_acc(&mut acc, 2 + 3, &_101);
276-
sqr_mul_acc(&mut acc, 1 + 7, &_1001111);
265+
sqr_mul_acc(&mut acc, 6, &_101111, cpu);
266+
sqr_mul_acc(&mut acc, 2 + 3, &_111, cpu);
267+
sqr_mul_acc(&mut acc, 2 + 8, &_11011111, cpu);
268+
sqr_mul_acc(&mut acc, 1 + 3, &_101, cpu);
269+
sqr_mul_acc(&mut acc, 1 + 7, &_1011011, cpu);
270+
sqr_mul_acc(&mut acc, 1 + 6, &_100111, cpu);
271+
sqr_mul_acc(&mut acc, 3 + 6, &_101111, cpu);
272+
sqr_mul_acc(&mut acc, 2 + 3, &_111, cpu);
273+
sqr_mul_acc(&mut acc, 3, &_101, cpu);
274+
sqr_mul_acc(&mut acc, 4 + 7, &_1001111, cpu);
275+
sqr_mul_acc(&mut acc, 2 + 3, &_111, cpu);
276+
sqr_mul_acc(&mut acc, 1 + 3, &_111, cpu);
277+
sqr_mul_acc(&mut acc, 2 + 3, &_111, cpu);
278+
sqr_mul_acc(&mut acc, 2 + 6, &_101011, cpu);
279+
sqr_mul_acc(&mut acc, 4 + 8, &_10111111, cpu);
280+
sqr_mul_acc(&mut acc, 3 + 7, &_1100011, cpu);
281+
sqr_mul_acc(&mut acc, 2 + 1, _1, cpu);
282+
sqr_mul_acc(&mut acc, 2 + 3, &_101, cpu);
283+
sqr_mul_acc(&mut acc, 1 + 7, &_1001111, cpu);
277284

278285
acc
279286
}

src/ec/suite_b/ops/p384.rs

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps {
140140
scalar_inv_to_mont: p384_scalar_inv_to_mont,
141141
};
142142

143-
fn p384_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
143+
fn p384_scalar_inv_to_mont(a: Scalar<R>, cpu: cpu::Features) -> Scalar<R> {
144144
// Calculate the modular inverse of scalar |a| using Fermat's Little
145145
// Theorem:
146146
//
@@ -151,34 +151,40 @@ fn p384_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
151151
// 0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf\
152152
// 581a0db248b0a77aecec196accc52971
153153

154-
fn mul(a: &Scalar<R>, b: &Scalar<R>) -> Scalar<R> {
154+
fn mul(a: &Scalar<R>, b: &Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
155155
binary_op(p384_scalar_mul_mont, a, b)
156156
}
157157

158-
fn sqr(a: &Scalar<R>) -> Scalar<R> {
158+
fn sqr(a: &Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
159159
binary_op(p384_scalar_mul_mont, a, a)
160160
}
161161

162-
fn sqr_mut(a: &mut Scalar<R>) {
162+
fn sqr_mut(a: &mut Scalar<R>, _cpu: cpu::Features) {
163163
unary_op_from_binary_op_assign(p384_scalar_mul_mont, a);
164164
}
165165

166166
// Returns (`a` squared `squarings` times) * `b`.
167-
fn sqr_mul(a: &Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) -> Scalar<R> {
167+
fn sqr_mul(
168+
a: &Scalar<R>,
169+
squarings: LeakyWord,
170+
b: &Scalar<R>,
171+
cpu: cpu::Features,
172+
) -> Scalar<R> {
168173
debug_assert!(squarings >= 1);
169-
let mut tmp = sqr(a);
174+
let mut tmp = sqr(a, cpu);
170175
for _ in 1..squarings {
171-
sqr_mut(&mut tmp);
176+
sqr_mut(&mut tmp, cpu);
172177
}
173-
mul(&tmp, b)
178+
mul(&tmp, b, cpu)
174179
}
175180

176181
// Sets `acc` = (`acc` squared `squarings` times) * `b`.
177-
fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) {
182+
fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: LeakyWord, b: &Scalar<R>, cpu: cpu::Features) {
178183
debug_assert!(squarings >= 1);
179184
for _ in 0..squarings {
180-
sqr_mut(acc);
185+
sqr_mut(acc, cpu);
181186
}
187+
let _: cpu::Features = cpu;
182188
binary_op_assign(p384_scalar_mul_mont, acc, b)
183189
}
184190

@@ -195,21 +201,26 @@ fn p384_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
195201

196202
let mut d = [Scalar::zero(); DIGIT_COUNT];
197203
d[B_1] = a;
198-
let b_10 = sqr(&d[B_1]);
204+
let b_10 = sqr(&d[B_1], cpu);
199205
for i in B_11..DIGIT_COUNT {
200-
d[i] = mul(&d[i - 1], &b_10);
206+
d[i] = mul(&d[i - 1], &b_10, cpu);
201207
}
202208

203-
let ff = sqr_mul(&d[B_1111], 0 + 4, &d[B_1111]);
204-
let ffff = sqr_mul(&ff, 0 + 8, &ff);
205-
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff);
209+
let ff = sqr_mul(&d[B_1111], 0 + 4, &d[B_1111], cpu);
210+
let ffff = sqr_mul(&ff, 0 + 8, &ff, cpu);
211+
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff, cpu);
206212

207-
let ffffffffffffffff = sqr_mul(&ffffffff, 0 + 32, &ffffffff);
213+
let ffffffffffffffff = sqr_mul(&ffffffff, 0 + 32, &ffffffff, cpu);
208214

209-
let ffffffffffffffffffffffff = sqr_mul(&ffffffffffffffff, 0 + 32, &ffffffff);
215+
let ffffffffffffffffffffffff = sqr_mul(&ffffffffffffffff, 0 + 32, &ffffffff, cpu);
210216

211217
// ffffffffffffffffffffffffffffffffffffffffffffffff
212-
let mut acc = sqr_mul(&ffffffffffffffffffffffff, 0 + 96, &ffffffffffffffffffffffff);
218+
let mut acc = sqr_mul(
219+
&ffffffffffffffffffffffff,
220+
0 + 96,
221+
&ffffffffffffffffffffffff,
222+
cpu,
223+
);
213224

214225
// The rest of the exponent, in binary, is:
215226
//
@@ -261,7 +272,12 @@ fn p384_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
261272
];
262273

263274
for &(squarings, digit) in &REMAINING_WINDOWS[..] {
264-
sqr_mul_acc(&mut acc, LeakyWord::from(squarings), &d[usize::from(digit)]);
275+
sqr_mul_acc(
276+
&mut acc,
277+
LeakyWord::from(squarings),
278+
&d[usize::from(digit)],
279+
cpu,
280+
);
265281
}
266282

267283
acc

0 commit comments

Comments
 (0)