@@ -178,7 +178,7 @@ pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps {
178178} ;
179179
180180#[ allow( clippy:: just_underscores_and_digits) ]
181- fn p256_scalar_inv_to_mont ( a : Scalar < R > , _cpu : cpu:: Features ) -> Scalar < R > {
181+ fn p256_scalar_inv_to_mont ( a : Scalar < R > , cpu : cpu:: Features ) -> Scalar < R > {
182182 // Calculate the modular inverse of scalar |a| using Fermat's Little
183183 // Theorem:
184184 //
@@ -189,27 +189,34 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
189189 // 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f
190190
191191 #[ inline]
192- fn mul ( a : & Scalar < R > , b : & Scalar < R > ) -> Scalar < R > {
192+ fn mul ( a : & Scalar < R > , b : & Scalar < R > , _cpu : cpu :: Features ) -> Scalar < R > {
193193 binary_op ( p256_scalar_mul_mont, a, b)
194194 }
195195
196196 #[ inline]
197- fn sqr ( a : & Scalar < R > ) -> Scalar < R > {
197+ fn sqr ( a : & Scalar < R > , _cpu : cpu :: Features ) -> Scalar < R > {
198198 let mut tmp = Scalar :: zero ( ) ;
199199 unsafe { p256_scalar_sqr_rep_mont ( tmp. limbs . as_mut_ptr ( ) , a. limbs . as_ptr ( ) , 1 ) }
200200 tmp
201201 }
202202
203203 // Returns (`a` squared `squarings` times) * `b`.
204- fn sqr_mul ( a : & Scalar < R > , squarings : LeakyWord , b : & Scalar < R > ) -> Scalar < R > {
204+ #[ inline]
205+ fn sqr_mul (
206+ a : & Scalar < R > ,
207+ squarings : LeakyWord ,
208+ b : & Scalar < R > ,
209+ cpu : cpu:: Features ,
210+ ) -> Scalar < R > {
205211 debug_assert ! ( squarings >= 1 ) ;
206212 let mut tmp = Scalar :: zero ( ) ;
207213 unsafe { p256_scalar_sqr_rep_mont ( tmp. limbs . as_mut_ptr ( ) , a. limbs . as_ptr ( ) , squarings) }
208- mul ( & tmp, b)
214+ mul ( & tmp, b, cpu )
209215 }
210216
211217 // Sets `acc` = (`acc` squared `squarings` times) * `b`.
212- fn sqr_mul_acc ( acc : & mut Scalar < R > , squarings : LeakyWord , b : & Scalar < R > ) {
218+ #[ inline]
219+ fn sqr_mul_acc ( acc : & mut Scalar < R > , squarings : LeakyWord , b : & Scalar < R > , _cpu : cpu:: Features ) {
213220 debug_assert ! ( squarings >= 1 ) ;
214221 {
215222 let acc = acc. limbs . as_mut_ptr ( ) ;
@@ -220,60 +227,60 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
220227
221228 let _1 = & a;
222229
223- let _10 = sqr ( _1) ; // 2
224- let _100 = sqr ( & _10) ; // 4
225- let _101 = mul ( & _100, _1) ; // 5
226- let _111 = mul ( & _101, & _10) ; // 7
227-
228- let _1000 = sqr ( & _100) ; // 8
229- let _10000 = sqr ( & _1000) ; // 16
230- let _100000 = sqr ( & _10000) ; // 32
231-
232- let _100111 = mul ( & _111, & _100000) ; // 39 = 7 + 32
233- let _101011 = mul ( & _100, & _100111) ; // 43 = 4 + 39
234- let _101111 = mul ( & _100, & _101011) ; // 47 = 4 + 39
235- let _1001111 = mul ( & _100000, & _101111) ; // 79 = 32 + 47
236- let _86 = sqr ( & _101011) ; // 86 = 43 * 2
237- let _1011011 = mul ( & _101, & _86) ; // 91 = 5 + 86
238- let _92 = mul ( _1, & _1011011) ; // 92 = 1 + 91
239- let _1100011 = mul ( & _111, & _92) ; // 99 = 7 + 92
240- let _10111111 = mul ( & _92, & _1100011) ; // 191 = 92 + 99
241- let _11011111 = mul ( & _100000, & _10111111) ; // 223 = 32 + 191
242-
243- let ff = mul ( & _100000, & _11011111) ; // 255 = 32 + 223
244- let ffff = sqr_mul ( & ff, 0 + 8 , & ff) ;
245- let ffffffff = sqr_mul ( & ffff, 0 + 16 , & ffff) ;
230+ let _10 = sqr ( _1, cpu ) ; // 2
231+ let _100 = sqr ( & _10, cpu ) ; // 4
232+ let _101 = mul ( & _100, _1, cpu ) ; // 5
233+ let _111 = mul ( & _101, & _10, cpu ) ; // 7
234+
235+ let _1000 = sqr ( & _100, cpu ) ; // 8
236+ let _10000 = sqr ( & _1000, cpu ) ; // 16
237+ let _100000 = sqr ( & _10000, cpu ) ; // 32
238+
239+ let _100111 = mul ( & _111, & _100000, cpu ) ; // 39 = 7 + 32
240+ let _101011 = mul ( & _100, & _100111, cpu ) ; // 43 = 4 + 39
241+ let _101111 = mul ( & _100, & _101011, cpu ) ; // 47 = 4 + 39
242+ let _1001111 = mul ( & _100000, & _101111, cpu ) ; // 79 = 32 + 47
243+ let _86 = sqr ( & _101011, cpu ) ; // 86 = 43 * 2
244+ let _1011011 = mul ( & _101, & _86, cpu ) ; // 91 = 5 + 86
245+ let _92 = mul ( _1, & _1011011, cpu ) ; // 92 = 1 + 91
246+ let _1100011 = mul ( & _111, & _92, cpu ) ; // 99 = 7 + 92
247+ let _10111111 = mul ( & _92, & _1100011, cpu ) ; // 191 = 92 + 99
248+ let _11011111 = mul ( & _100000, & _10111111, cpu ) ; // 223 = 32 + 191
249+
250+ let ff = mul ( & _100000, & _11011111, cpu ) ; // 255 = 32 + 223
251+ let ffff = sqr_mul ( & ff, 0 + 8 , & ff, cpu ) ;
252+ let ffffffff = sqr_mul ( & ffff, 0 + 16 , & ffff, cpu ) ;
246253
247254 // ffffffff00000000ffffffff
248- let mut acc = sqr_mul ( & ffffffff, 32 + 32 , & ffffffff) ;
255+ let mut acc = sqr_mul ( & ffffffff, 32 + 32 , & ffffffff, cpu ) ;
249256
250257 // ffffffff00000000ffffffffffffffff
251- sqr_mul_acc ( & mut acc, 0 + 32 , & ffffffff) ;
258+ sqr_mul_acc ( & mut acc, 0 + 32 , & ffffffff, cpu ) ;
252259
253260 // The rest of the exponent, in binary, is:
254261 //
255262 // 1011110011100110111110101010110110100111000101111001111010000100
256263 // 1111001110111001110010101100001011111100011000110010010101001111
257264
258- sqr_mul_acc ( & mut acc, 6 , & _101111) ;
259- sqr_mul_acc ( & mut acc, 2 + 3 , & _111) ;
260- sqr_mul_acc ( & mut acc, 2 + 8 , & _11011111) ;
261- sqr_mul_acc ( & mut acc, 1 + 3 , & _101) ;
262- sqr_mul_acc ( & mut acc, 1 + 7 , & _1011011) ;
263- sqr_mul_acc ( & mut acc, 1 + 6 , & _100111) ;
264- sqr_mul_acc ( & mut acc, 3 + 6 , & _101111) ;
265- sqr_mul_acc ( & mut acc, 2 + 3 , & _111) ;
266- sqr_mul_acc ( & mut acc, 3 , & _101) ;
267- sqr_mul_acc ( & mut acc, 4 + 7 , & _1001111) ;
268- sqr_mul_acc ( & mut acc, 2 + 3 , & _111) ;
269- sqr_mul_acc ( & mut acc, 1 + 3 , & _111) ;
270- sqr_mul_acc ( & mut acc, 2 + 3 , & _111) ;
271- sqr_mul_acc ( & mut acc, 2 + 6 , & _101011) ;
272- sqr_mul_acc ( & mut acc, 4 + 8 , & _10111111) ;
273- sqr_mul_acc ( & mut acc, 3 + 7 , & _1100011) ;
274- sqr_mul_acc ( & mut acc, 2 + 1 , _1) ;
275- sqr_mul_acc ( & mut acc, 2 + 3 , & _101) ;
276- sqr_mul_acc ( & mut acc, 1 + 7 , & _1001111) ;
265+ sqr_mul_acc ( & mut acc, 6 , & _101111, cpu ) ;
266+ sqr_mul_acc ( & mut acc, 2 + 3 , & _111, cpu ) ;
267+ sqr_mul_acc ( & mut acc, 2 + 8 , & _11011111, cpu ) ;
268+ sqr_mul_acc ( & mut acc, 1 + 3 , & _101, cpu ) ;
269+ sqr_mul_acc ( & mut acc, 1 + 7 , & _1011011, cpu ) ;
270+ sqr_mul_acc ( & mut acc, 1 + 6 , & _100111, cpu ) ;
271+ sqr_mul_acc ( & mut acc, 3 + 6 , & _101111, cpu ) ;
272+ sqr_mul_acc ( & mut acc, 2 + 3 , & _111, cpu ) ;
273+ sqr_mul_acc ( & mut acc, 3 , & _101, cpu ) ;
274+ sqr_mul_acc ( & mut acc, 4 + 7 , & _1001111, cpu ) ;
275+ sqr_mul_acc ( & mut acc, 2 + 3 , & _111, cpu ) ;
276+ sqr_mul_acc ( & mut acc, 1 + 3 , & _111, cpu ) ;
277+ sqr_mul_acc ( & mut acc, 2 + 3 , & _111, cpu ) ;
278+ sqr_mul_acc ( & mut acc, 2 + 6 , & _101011, cpu ) ;
279+ sqr_mul_acc ( & mut acc, 4 + 8 , & _10111111, cpu ) ;
280+ sqr_mul_acc ( & mut acc, 3 + 7 , & _1100011, cpu ) ;
281+ sqr_mul_acc ( & mut acc, 2 + 1 , _1, cpu ) ;
282+ sqr_mul_acc ( & mut acc, 2 + 3 , & _101, cpu ) ;
283+ sqr_mul_acc ( & mut acc, 1 + 7 , & _1001111, cpu ) ;
277284
278285 acc
279286}
0 commit comments