2525//! In other words, if we bit-reverse (over 32-bits) the operands then we bit-reverse (over 64-bits)
2626//! the result.
2727
28+ use super :: bmul;
2829use crate :: field_element:: FieldElement ;
30+ use core:: { array, ops:: BitXor } ;
2931
3032impl FieldElement {
33+ /// Compute the unreduced 256-bit carryless product of two 128-bit field elements using 32-bit
34+ /// limbs.
35+ ///
36+ /// Uses a Karatsuba decomposition in which the 128x128 multiplication is reduced to three 64x64
37+ /// multiplications, hence nine 32x32 multiplications. With the bit-reversal trick, we have to
38+ /// perform 18 32x32 multiplications.
39+ #[ inline]
40+ pub ( crate ) fn karatsuba_mul ( self , rhs : FieldElement ) -> Product {
41+ let hw = self . to_u32x4 ( ) ;
42+ let yw = rhs. to_u32x4 ( ) ;
43+ let hwr = [
44+ hw[ 0 ] . reverse_bits ( ) ,
45+ hw[ 1 ] . reverse_bits ( ) ,
46+ hw[ 2 ] . reverse_bits ( ) ,
47+ hw[ 3 ] . reverse_bits ( ) ,
48+ ] ;
49+
50+ // Karatsuba input decomposition for H
51+ let mut a = [ 0u32 ; 18 ] ;
52+ a[ 0 ] = yw[ 0 ] ;
53+ a[ 1 ] = yw[ 1 ] ;
54+ a[ 2 ] = yw[ 2 ] ;
55+ a[ 3 ] = yw[ 3 ] ;
56+ a[ 4 ] = a[ 0 ] ^ a[ 1 ] ;
57+ a[ 5 ] = a[ 2 ] ^ a[ 3 ] ;
58+ a[ 6 ] = a[ 0 ] ^ a[ 2 ] ;
59+ a[ 7 ] = a[ 1 ] ^ a[ 3 ] ;
60+ a[ 8 ] = a[ 6 ] ^ a[ 7 ] ;
61+ a[ 9 ] = yw[ 0 ] . reverse_bits ( ) ;
62+ a[ 10 ] = yw[ 1 ] . reverse_bits ( ) ;
63+ a[ 11 ] = yw[ 2 ] . reverse_bits ( ) ;
64+ a[ 12 ] = yw[ 3 ] . reverse_bits ( ) ;
65+ a[ 13 ] = a[ 9 ] ^ a[ 10 ] ;
66+ a[ 14 ] = a[ 11 ] ^ a[ 12 ] ;
67+ a[ 15 ] = a[ 9 ] ^ a[ 11 ] ;
68+ a[ 16 ] = a[ 10 ] ^ a[ 12 ] ;
69+ a[ 17 ] = a[ 15 ] ^ a[ 16 ] ;
70+
71+ // Karatsuba input decomposition for Y
72+ let mut b = [ 0u32 ; 18 ] ;
73+ b[ 0 ] = hw[ 0 ] ;
74+ b[ 1 ] = hw[ 1 ] ;
75+ b[ 2 ] = hw[ 2 ] ;
76+ b[ 3 ] = hw[ 3 ] ;
77+ b[ 4 ] = b[ 0 ] ^ b[ 1 ] ;
78+ b[ 5 ] = b[ 2 ] ^ b[ 3 ] ;
79+ b[ 6 ] = b[ 0 ] ^ b[ 2 ] ;
80+ b[ 7 ] = b[ 1 ] ^ b[ 3 ] ;
81+ b[ 8 ] = b[ 6 ] ^ b[ 7 ] ;
82+ b[ 9 ] = hwr[ 0 ] ;
83+ b[ 10 ] = hwr[ 1 ] ;
84+ b[ 11 ] = hwr[ 2 ] ;
85+ b[ 12 ] = hwr[ 3 ] ;
86+ b[ 13 ] = b[ 9 ] ^ b[ 10 ] ;
87+ b[ 14 ] = b[ 11 ] ^ b[ 12 ] ;
88+ b[ 15 ] = b[ 9 ] ^ b[ 11 ] ;
89+ b[ 16 ] = b[ 10 ] ^ b[ 12 ] ;
90+ b[ 17 ] = b[ 15 ] ^ b[ 16 ] ;
91+
92+ // 18 carryless 32x32 multiplications
93+ let mut c = [ 0u32 ; 18 ] ;
94+ for i in 0 ..18 {
95+ c[ i] = bmul ( a[ i] , b[ i] , 0x1111_1111 ) ;
96+ }
97+
98+ // Karatsuba recombination (normal)
99+ c[ 4 ] ^= c[ 0 ] ^ c[ 1 ] ;
100+ c[ 5 ] ^= c[ 2 ] ^ c[ 3 ] ;
101+ c[ 8 ] ^= c[ 6 ] ^ c[ 7 ] ;
102+
103+ // Karatsuba recombination (bit-reversed)
104+ c[ 13 ] ^= c[ 9 ] ^ c[ 10 ] ;
105+ c[ 14 ] ^= c[ 11 ] ^ c[ 12 ] ;
106+ c[ 17 ] ^= c[ 15 ] ^ c[ 16 ] ;
107+
108+ // Assemble the final 256-bit product as `u32` x 8.
109+ let zw0 = c[ 0 ] ;
110+ let zw1 = c[ 4 ] ^ c[ 9 ] . reverse_bits ( ) >> 1 ;
111+ let zw2 = c[ 1 ] ^ c[ 0 ] ^ c[ 2 ] ^ c[ 6 ] ^ c[ 13 ] . reverse_bits ( ) >> 1 ;
112+ let zw3 = c[ 4 ] ^ c[ 5 ] ^ c[ 8 ] ^ ( c[ 10 ] ^ c[ 9 ] ^ c[ 11 ] ^ c[ 15 ] ) . reverse_bits ( ) >> 1 ;
113+ let zw4 = c[ 2 ] ^ c[ 1 ] ^ c[ 3 ] ^ c[ 7 ] ^ ( c[ 13 ] ^ c[ 14 ] ^ c[ 17 ] ) . reverse_bits ( ) >> 1 ;
114+ let zw5 = c[ 5 ] ^ ( c[ 11 ] ^ c[ 10 ] ^ c[ 12 ] ^ c[ 16 ] ) . reverse_bits ( ) >> 1 ;
115+ let zw6 = c[ 3 ] ^ c[ 14 ] . reverse_bits ( ) >> 1 ;
116+ let zw7 = c[ 12 ] . reverse_bits ( ) >> 1 ;
117+ Product ( [ zw0, zw1, zw2, zw3, zw4, zw5, zw6, zw7] )
118+ }
119+
31120 #[ inline]
32121 fn from_u32x4 ( v : [ u32 ; 4 ] ) -> FieldElement {
33122 let mut bytes = [ 0u8 ; 16 ] ;
@@ -49,112 +138,35 @@ impl FieldElement {
49138 }
50139}
51140
52- /// Compute the unreduced 256-bit carryless product of two 128-bit field elements using 32-bit
53- /// limbs.
54- ///
55- /// Uses a Karatsuba decomposition in which the 128x128 multiplication is reduced to three 64x64
56- /// multiplications, hence nine 32x32 multiplications. With the bit-reversal trick, we have to
57- /// perform 18 32x32 multiplications.
58- #[ inline]
59- pub ( super ) fn karatsuba ( h : FieldElement , y : FieldElement ) -> [ u32 ; 8 ] {
60- let hw = h. to_u32x4 ( ) ;
61- let yw = y. to_u32x4 ( ) ;
62- let hwr = [
63- hw[ 0 ] . reverse_bits ( ) ,
64- hw[ 1 ] . reverse_bits ( ) ,
65- hw[ 2 ] . reverse_bits ( ) ,
66- hw[ 3 ] . reverse_bits ( ) ,
67- ] ;
68-
69- // Karatsuba input decomposition for H
70- let mut a = [ 0u32 ; 18 ] ;
71- a[ 0 ] = yw[ 0 ] ;
72- a[ 1 ] = yw[ 1 ] ;
73- a[ 2 ] = yw[ 2 ] ;
74- a[ 3 ] = yw[ 3 ] ;
75- a[ 4 ] = a[ 0 ] ^ a[ 1 ] ;
76- a[ 5 ] = a[ 2 ] ^ a[ 3 ] ;
77- a[ 6 ] = a[ 0 ] ^ a[ 2 ] ;
78- a[ 7 ] = a[ 1 ] ^ a[ 3 ] ;
79- a[ 8 ] = a[ 6 ] ^ a[ 7 ] ;
80- a[ 9 ] = yw[ 0 ] . reverse_bits ( ) ;
81- a[ 10 ] = yw[ 1 ] . reverse_bits ( ) ;
82- a[ 11 ] = yw[ 2 ] . reverse_bits ( ) ;
83- a[ 12 ] = yw[ 3 ] . reverse_bits ( ) ;
84- a[ 13 ] = a[ 9 ] ^ a[ 10 ] ;
85- a[ 14 ] = a[ 11 ] ^ a[ 12 ] ;
86- a[ 15 ] = a[ 9 ] ^ a[ 11 ] ;
87- a[ 16 ] = a[ 10 ] ^ a[ 12 ] ;
88- a[ 17 ] = a[ 15 ] ^ a[ 16 ] ;
89-
90- // Karatsuba input decomposition for Y
91- let mut b = [ 0u32 ; 18 ] ;
92- b[ 0 ] = hw[ 0 ] ;
93- b[ 1 ] = hw[ 1 ] ;
94- b[ 2 ] = hw[ 2 ] ;
95- b[ 3 ] = hw[ 3 ] ;
96- b[ 4 ] = b[ 0 ] ^ b[ 1 ] ;
97- b[ 5 ] = b[ 2 ] ^ b[ 3 ] ;
98- b[ 6 ] = b[ 0 ] ^ b[ 2 ] ;
99- b[ 7 ] = b[ 1 ] ^ b[ 3 ] ;
100- b[ 8 ] = b[ 6 ] ^ b[ 7 ] ;
101- b[ 9 ] = hwr[ 0 ] ;
102- b[ 10 ] = hwr[ 1 ] ;
103- b[ 11 ] = hwr[ 2 ] ;
104- b[ 12 ] = hwr[ 3 ] ;
105- b[ 13 ] = b[ 9 ] ^ b[ 10 ] ;
106- b[ 14 ] = b[ 11 ] ^ b[ 12 ] ;
107- b[ 15 ] = b[ 9 ] ^ b[ 11 ] ;
108- b[ 16 ] = b[ 10 ] ^ b[ 12 ] ;
109- b[ 17 ] = b[ 15 ] ^ b[ 16 ] ;
110-
111- // 18 carryless 32x32 multiplications
112- let mut c = [ 0u32 ; 18 ] ;
113- for i in 0 ..18 {
114- c[ i] = bmul32 ( a[ i] , b[ i] ) ;
115- }
141+ /// Unreduced 256-bit carryless product.
142+ pub ( crate ) struct Product ( [ u32 ; 8 ] ) ;
116143
117- // Karatsuba recombination (normal)
118- c[ 4 ] ^= c[ 0 ] ^ c[ 1 ] ;
119- c[ 5 ] ^= c[ 2 ] ^ c[ 3 ] ;
120- c[ 8 ] ^= c[ 6 ] ^ c[ 7 ] ;
121-
122- // Karatsuba recombination (bit-reversed)
123- c[ 13 ] ^= c[ 9 ] ^ c[ 10 ] ;
124- c[ 14 ] ^= c[ 11 ] ^ c[ 12 ] ;
125- c[ 17 ] ^= c[ 15 ] ^ c[ 16 ] ;
126-
127- // Assemble the final 256-bit product as `u32` x 8.
128- let zw0 = c[ 0 ] ;
129- let zw1 = c[ 4 ] ^ c[ 9 ] . reverse_bits ( ) >> 1 ;
130- let zw2 = c[ 1 ] ^ c[ 0 ] ^ c[ 2 ] ^ c[ 6 ] ^ c[ 13 ] . reverse_bits ( ) >> 1 ;
131- let zw3 = c[ 4 ] ^ c[ 5 ] ^ c[ 8 ] ^ ( c[ 10 ] ^ c[ 9 ] ^ c[ 11 ] ^ c[ 15 ] ) . reverse_bits ( ) >> 1 ;
132- let zw4 = c[ 2 ] ^ c[ 1 ] ^ c[ 3 ] ^ c[ 7 ] ^ ( c[ 13 ] ^ c[ 14 ] ^ c[ 17 ] ) . reverse_bits ( ) >> 1 ;
133- let zw5 = c[ 5 ] ^ ( c[ 11 ] ^ c[ 10 ] ^ c[ 12 ] ^ c[ 16 ] ) . reverse_bits ( ) >> 1 ;
134- let zw6 = c[ 3 ] ^ c[ 14 ] . reverse_bits ( ) >> 1 ;
135- let zw7 = c[ 12 ] . reverse_bits ( ) >> 1 ;
136- [ zw0, zw1, zw2, zw3, zw4, zw5, zw6, zw7]
137- }
144+ impl Product {
145+ /// Reduce the 256-bit carryless product of Karatsuba modulo the POLYVAL polynomial.
146+ ///
147+ /// This performs constant-time folding using shifts and XORs corresponding to the irreducible
148+ /// polynomial `x^128 + x^127 + x^126 + x^121 + 1`.
149+ ///
150+ /// This is closely related to GHASH reduction but the bit order is reversed in POLYVAL.
151+ #[ inline]
152+ pub ( crate ) fn mont_reduce ( self ) -> FieldElement {
153+ let mut zw = self . 0 ;
138154
139- /// Carryless multiplication in GF(2)[X], truncated to the low 32-bits.
140- # [ inline ]
141- fn bmul32 ( x : u32 , y : u32 ) -> u32 {
142- super :: bmul ( x , y , 0x1111_1111 )
143- }
155+ for i in 0 .. 4 {
156+ let lw = zw [ i ] ;
157+ zw [ i + 4 ] ^= lw ^ ( lw >> 1 ) ^ ( lw >> 2 ) ^ ( lw >> 7 ) ;
158+ zw [ i + 3 ] ^= ( lw << 31 ) ^ ( lw << 30 ) ^ ( lw << 25 ) ;
159+ }
144160
145- /// Reduce the 256-bit carryless product of Karatsuba modulo the POLYVAL polynomial.
146- ///
147- /// This performs constant-time folding using shifts and XORs corresponding to the irreducible
148- /// polynomial `x^128 + x^127 + x^126 + x^121 + 1`.
149- ///
150- /// This is closely related to GHASH reduction but the bit order is reversed in POLYVAL.
151- #[ inline]
152- pub ( super ) fn mont_reduce ( mut zw : [ u32 ; 8 ] ) -> FieldElement {
153- for i in 0 ..4 {
154- let lw = zw[ i] ;
155- zw[ i + 4 ] ^= lw ^ ( lw >> 1 ) ^ ( lw >> 2 ) ^ ( lw >> 7 ) ;
156- zw[ i + 3 ] ^= ( lw << 31 ) ^ ( lw << 30 ) ^ ( lw << 25 ) ;
161+ FieldElement :: from_u32x4 ( [ zw[ 4 ] , zw[ 5 ] , zw[ 6 ] , zw[ 7 ] ] )
157162 }
163+ }
164+
165+ impl BitXor for Product {
166+ type Output = Self ;
158167
159- FieldElement :: from_u32x4 ( [ zw[ 4 ] , zw[ 5 ] , zw[ 6 ] , zw[ 7 ] ] )
168+ #[ inline]
169+ fn bitxor ( self , rhs : Self ) -> Self :: Output {
170+ Self ( array:: from_fn ( |n| self . 0 [ n] ^ rhs. 0 [ n] ) )
171+ }
160172}
0 commit comments