@@ -25,11 +25,8 @@ macro_rules! assert_lengths_eq {
2525 } ;
2626}
2727
28- /// Optimized implementation for byte slices which coalesces them into word-sized chunks first,
29- /// then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
30- ///
31- /// # Panics
32- /// - if slices have unequal lengths
28+ // Optimized implementation for byte slices which coalesces them into word-sized chunks first,
29+ // then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
3330impl Cmov for [ u8 ] {
3431 #[ inline]
3532 #[ track_caller]
@@ -50,11 +47,8 @@ impl Cmov for [u8] {
5047 }
5148}
5249
53- /// Optimized implementation for slices of `u16` which coalesces them into word-sized chunks first,
54- /// then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
55- ///
56- /// # Panics
57- /// - if slices have unequal lengths
50+ // Optimized implementation for slices of `u16` which coalesces them into word-sized chunks first,
51+ // then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
5852#[ cfg( not( target_pointer_width = "64" ) ) ]
5953#[ cfg_attr( docsrs, doc( cfg( true ) ) ) ]
6054impl Cmov for [ u16 ] {
@@ -78,11 +72,8 @@ impl Cmov for [u16] {
7872 }
7973}
8074
81- /// Optimized implementation for slices of `u16` which coalesces them into word-sized chunks first,
82- /// then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
83- ///
84- /// # Panics
85- /// - if slices have unequal lengths
75+ // Optimized implementation for slices of `u16` which coalesces them into word-sized chunks first,
76+ // then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
8677#[ cfg( target_pointer_width = "64" ) ]
8778#[ cfg_attr( docsrs, doc( cfg( true ) ) ) ]
8879impl Cmov for [ u16 ] {
@@ -116,90 +107,36 @@ impl Cmov for [u16] {
116107 }
117108}
118109
119- /// Implementation for slices of `u32` on 32-bit platforms, where we can just loop.
120- ///
121- /// # Panics
122- /// - if slices have unequal lengths
123- #[ cfg( not( target_pointer_width = "64" ) ) ]
124- #[ cfg_attr( docsrs, doc( cfg( true ) ) ) ]
125- impl Cmov for [ u32 ] {
126- #[ inline]
127- #[ track_caller]
128- fn cmovnz ( & mut self , value : & Self , condition : Condition ) {
129- assert_lengths_eq ! ( self . len( ) , value. len( ) ) ;
130-
131- for ( a, b) in self . iter_mut ( ) . zip ( value. iter ( ) ) {
132- a. cmovnz ( b, condition) ;
133- }
134- }
135- }
136-
137- /// Optimized implementation for slices of `u32` which coalesces them into word-sized chunks first,
138- /// then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
139- ///
140- /// # Panics
141- /// - if slices have unequal lengths
142- #[ cfg( target_pointer_width = "64" ) ]
143- #[ cfg_attr( docsrs, doc( cfg( true ) ) ) ]
144- impl Cmov for [ u32 ] {
145- #[ inline]
146- #[ track_caller]
147- fn cmovnz ( & mut self , value : & Self , condition : Condition ) {
148- assert_lengths_eq ! ( self . len( ) , value. len( ) ) ;
149-
150- let ( dst_chunks, dst_remainder) = slice_as_chunks_mut :: < u32 , 2 > ( self ) ;
151- let ( src_chunks, src_remainder) = slice_as_chunks :: < u32 , 2 > ( value) ;
152-
153- for ( dst_chunk, src_chunk) in dst_chunks. iter_mut ( ) . zip ( src_chunks. iter ( ) ) {
154- let mut a = Word :: from ( dst_chunk[ 0 ] ) | ( Word :: from ( dst_chunk[ 1 ] ) << 32 ) ;
155- let b = Word :: from ( src_chunk[ 0 ] ) | ( Word :: from ( src_chunk[ 1 ] ) << 32 ) ;
156- a. cmovnz ( & b, condition) ;
157- dst_chunk[ 0 ] = ( a & 0xFFFF_FFFF ) as u32 ;
158- dst_chunk[ 1 ] = ( a >> 32 ) as u32 ;
159- }
160-
161- cmovnz_remainder ( dst_remainder, src_remainder, condition) ;
162- }
163- }
164-
165110/// Implement [`Cmov`] using a simple loop.
166111macro_rules! impl_cmov_with_loop {
167- ( $int: ty, $doc: expr) => {
168- #[ doc = $doc]
169- #[ doc = "# Panics" ]
170- #[ doc = "- if slices have unequal lengths" ]
171- impl Cmov for [ $int] {
172- #[ inline]
173- #[ track_caller]
174- fn cmovnz( & mut self , value: & Self , condition: Condition ) {
175- assert_lengths_eq!( self . len( ) , value. len( ) ) ;
176- for ( a, b) in self . iter_mut( ) . zip( value. iter( ) ) {
177- a. cmovnz( b, condition) ;
112+ ( $( $int: ty) ,+ ) => {
113+ $(
114+ impl Cmov for [ $int] {
115+ #[ inline]
116+ #[ track_caller]
117+ fn cmovnz( & mut self , value: & Self , condition: Condition ) {
118+ assert_lengths_eq!( self . len( ) , value. len( ) ) ;
119+ for ( a, b) in self . iter_mut( ) . zip( value. iter( ) ) {
120+ a. cmovnz( b, condition) ;
121+ }
178122 }
179123 }
180- }
124+ ) +
181125 } ;
182126}
183127
184- impl_cmov_with_loop ! (
185- u64 ,
186- "Implementation for `u64` slices where we can just loop."
187- ) ;
188- impl_cmov_with_loop ! (
189- u128 ,
190- "Implementation for `u128` slices where we can just loop."
191- ) ;
128+ impl_cmov_with_loop ! ( u32 , u64 , u128 ) ;
192129
193130macro_rules! assert_size_and_alignment_eq {
194- ( $signed : ty, $unsigned : ty) => {
131+ ( $int : ty, $uint : ty) => {
195132 const {
196133 assert!(
197- size_of:: <$signed >( ) == size_of:: <$unsigned >( ) ,
134+ size_of:: <$int >( ) == size_of:: <$uint >( ) ,
198135 "integers are of unequal size"
199136 ) ;
200137
201138 assert!(
202- align_of:: <$signed >( ) == align_of:: <$unsigned >( ) ,
139+ align_of:: <$int >( ) == align_of:: <$uint >( ) ,
203140 "integers have unequal alignment"
204141 ) ;
205142 }
@@ -208,34 +145,20 @@ macro_rules! assert_size_and_alignment_eq {
208145
209146/// Implement [`Cmov`] for a signed type by invoking the corresponding unsigned impl.
210147macro_rules! impl_cmov_for_signed_with_unsigned {
211- ( $signed: ty, $unsigned: ty) => {
212- impl_cmov_for_signed_with_unsigned!(
213- $signed,
214- $unsigned,
215- "Delegating implementation of `Cmov` for signed type which delegates to unsigned."
216- ) ;
217- } ;
218- ( $signed: ty, $unsigned: ty, $doc: expr) => {
219- #[ doc = $doc]
220- #[ doc = "# Panics" ]
221- #[ doc = "- if slices have unequal lengths" ]
222- impl Cmov for [ $signed] {
148+ ( $int: ty, $uint: ty) => {
149+ impl Cmov for [ $int] {
223150 #[ inline]
224151 #[ track_caller]
152+ #[ allow( unsafe_code) ]
225153 fn cmovnz( & mut self , value: & Self , condition: Condition ) {
226- assert_size_and_alignment_eq!( $signed , $unsigned ) ;
154+ assert_size_and_alignment_eq!( $int , $uint ) ;
227155
228156 // SAFETY:
229157 // - Slices being constructed are of same-sized integers as asserted above.
230158 // - We source the slice length directly from the other valid slice.
231- #[ allow( unsafe_code) ]
232- let ( self_unsigned, value_unsigned) = unsafe {
233- (
234- slice:: from_raw_parts_mut( self . as_mut_ptr( ) as * mut $unsigned, self . len( ) ) ,
235- slice:: from_raw_parts( value. as_ptr( ) as * const $unsigned, value. len( ) ) ,
236- )
237- } ;
238159
160+ let self_unsigned = unsafe { cast_slice_mut:: <$int, $uint>( self ) } ;
161+ let value_unsigned = unsafe { cast_slice:: <$int, $uint>( value) } ;
239162 self_unsigned. cmovnz( value_unsigned, condition) ;
240163 }
241164 }
@@ -244,58 +167,43 @@ macro_rules! impl_cmov_for_signed_with_unsigned {
244167
245168/// Implement [`CmovEq`] for a signed type by invoking the corresponding unsigned impl.
246169macro_rules! impl_cmoveq_for_signed_with_unsigned {
247- ( $signed: ty, $unsigned: ty) => {
248- impl_cmoveq_for_signed_with_unsigned!(
249- $signed,
250- $unsigned,
251- "Delegating implementation of `CmovEq` for signed type which delegates to unsigned."
252- ) ;
253- } ;
254- ( $signed: ty, $unsigned: ty, $doc: expr) => {
255- #[ doc = $doc]
256- #[ doc = "# Panics" ]
257- #[ doc = "- if slices have unequal lengths" ]
258- impl CmovEq for [ $signed] {
170+ ( $int: ty, $uint: ty) => {
171+ impl CmovEq for [ $int] {
259172 #[ inline]
173+ #[ allow( unsafe_code) ]
260174 fn cmovne( & self , rhs: & Self , input: Condition , output: & mut Condition ) {
261- assert_size_and_alignment_eq!( $signed, $unsigned) ;
262-
263175 // SAFETY:
264176 // - Slices being constructed are of same-sized integers as asserted above.
265177 // - We source the slice length directly from the other valid slice.
266- #[ allow( unsafe_code) ]
267- let ( self_unsigned, rhs_unsigned) = unsafe {
268- (
269- slice:: from_raw_parts( self . as_ptr( ) as * const $unsigned, self . len( ) ) ,
270- slice:: from_raw_parts( rhs. as_ptr( ) as * const $unsigned, rhs. len( ) ) ,
271- )
272- } ;
273-
178+ let self_unsigned = unsafe { cast_slice:: <$int, $uint>( self ) } ;
179+ let rhs_unsigned = unsafe { cast_slice:: <$int, $uint>( rhs) } ;
274180 self_unsigned. cmovne( rhs_unsigned, input, output) ;
275181 }
276182 }
277183 } ;
278184}
279185
280186/// Implement [`Cmov`] and [`CmovEq`] for the given signed/unsigned type pair.
187+ // TODO(tarcieri): use `cast_unsigned`/`cast_signed` to get rid of the `=> u*`
281188macro_rules! impl_cmov_traits_for_signed_with_unsigned {
282- ( $signed: ty, $unsigned: ty) => {
283- impl_cmov_for_signed_with_unsigned!( $signed, $unsigned) ;
284- impl_cmoveq_for_signed_with_unsigned!( $signed, $unsigned) ;
189+ ( $( $int: ty => $uint: ty) ,+ ) => {
190+ $(
191+ impl_cmov_for_signed_with_unsigned!( $int, $uint) ;
192+ impl_cmoveq_for_signed_with_unsigned!( $int, $uint) ;
193+ ) +
285194 } ;
286195}
287196
288- impl_cmov_traits_for_signed_with_unsigned ! ( i8 , u8 ) ;
289- impl_cmov_traits_for_signed_with_unsigned ! ( i16 , u16 ) ;
290- impl_cmov_traits_for_signed_with_unsigned ! ( i32 , u32 ) ;
291- impl_cmov_traits_for_signed_with_unsigned ! ( i64 , u64 ) ;
292- impl_cmov_traits_for_signed_with_unsigned ! ( i128 , u128 ) ;
197+ impl_cmov_traits_for_signed_with_unsigned ! (
198+ i8 => u8 ,
199+ i16 => u16 ,
200+ i32 => u32 ,
201+ i64 => u64 ,
202+ i128 => u128
203+ ) ;
293204
294- /// Optimized implementation for byte slices which coalesces them into word-sized chunks first,
295- /// then performs [`CmovEq`] at the word-level to cut down on the total number of instructions.
296- ///
297- /// This is only constant-time for equal-length slices, and will short-circuit and set `output`
298- /// in the event the slices are of unequal length.
205+ // Optimized implementation for byte slices which coalesces them into word-sized chunks first,
206+ // then performs [`CmovEq`] at the word-level to cut down on the total number of instructions.
299207impl CmovEq for [ u8 ] {
300208 #[ inline]
301209 fn cmovne ( & self , rhs : & Self , input : Condition , output : & mut Condition ) {
@@ -321,43 +229,67 @@ impl CmovEq for [u8] {
321229
322230/// Implement [`CmovEq`] using a simple loop.
323231macro_rules! impl_cmoveq_with_loop {
324- ( $int: ty, $doc: expr) => {
325- #[ doc = $doc]
326- impl CmovEq for [ $int] {
327- #[ inline]
328- fn cmovne( & self , rhs: & Self , input: Condition , output: & mut Condition ) {
329- // Short-circuit the comparison if the slices are of different lengths, and set the output
330- // condition to the input condition.
331- if self . len( ) != rhs. len( ) {
332- * output = input;
333- return ;
334- }
335-
336- for ( a, b) in self . iter( ) . zip( rhs. iter( ) ) {
337- a. cmovne( b, input, output) ;
232+ ( $( $int: ty) ,+ ) => {
233+ $(
234+ impl CmovEq for [ $int] {
235+ #[ inline]
236+ fn cmovne( & self , rhs: & Self , input: Condition , output: & mut Condition ) {
237+ // Short-circuit the comparison if the slices are of different lengths, and set the output
238+ // condition to the input condition.
239+ if self . len( ) != rhs. len( ) {
240+ * output = input;
241+ return ;
242+ }
243+
244+ for ( a, b) in self . iter( ) . zip( rhs. iter( ) ) {
245+ a. cmovne( b, input, output) ;
246+ }
338247 }
339248 }
340- }
249+ ) +
341250 } ;
342251}
343252
344- // TODO(tarcieri): optimized coalescing implementations of `CmovEq` for `u16` and `u32`
345- impl_cmoveq_with_loop ! (
346- u16 ,
347- "Implementation for `u16` slices where we can just loop."
348- ) ;
349- impl_cmoveq_with_loop ! (
350- u32 ,
351- "Implementation for `u32` slices where we can just loop."
352- ) ;
353- impl_cmoveq_with_loop ! (
354- u64 ,
355- "Implementation for `u64` slices where we can just loop."
356- ) ;
357- impl_cmoveq_with_loop ! (
358- u128 ,
359- "Implementation for `u128` slices where we can just loop."
360- ) ;
253+ // TODO(tarcieri): investigate word-coalescing impls
254+ impl_cmoveq_with_loop ! ( u16 , u32 , u64 , u128 ) ;
255+
256+ /// Performs an unsafe pointer cast from one slice type to the other.
257+ ///
258+ /// # Panics
259+ /// - If `T` and `U` differ in size
260+ /// - If `T` and `U` differ in alignment
261+ #[ allow( unsafe_code) ]
262+ unsafe fn cast_slice < T , U > ( slice : & [ T ] ) -> & [ U ] {
263+ const {
264+ assert ! ( size_of:: <T >( ) == size_of:: <U >( ) , "T/U size differs" ) ;
265+ assert ! ( align_of:: <T >( ) == align_of:: <U >( ) , "T/U alignment differs" ) ;
266+ }
267+
268+ // SAFETY:
269+ // - Slices being constructed are of same-sized/aligned types as asserted above.
270+ // - We source the slice length directly from the other valid slice.
271+ // - It's up to the caller to ensure the pointer cast itself is valid.
272+ unsafe { slice:: from_raw_parts ( slice. as_ptr ( ) as * const U , slice. len ( ) ) }
273+ }
274+
275+ /// Performs an unsafe pointer cast from one mutable slice type to the other.
276+ ///
277+ /// # Panics
278+ /// - If `T` and `U` differ in size
279+ /// - If `T` and `U` differ in alignment
280+ #[ allow( unsafe_code) ]
281+ unsafe fn cast_slice_mut < T , U > ( slice : & mut [ T ] ) -> & mut [ U ] {
282+ const {
283+ assert ! ( size_of:: <T >( ) == size_of:: <U >( ) , "T/U size differs" ) ;
284+ assert ! ( align_of:: <T >( ) == align_of:: <U >( ) , "T/U alignment differs" ) ;
285+ }
286+
287+ // SAFETY:
288+ // - Slices being constructed are of same-sized/aligned types as asserted above.
289+ // - We source the slice length directly from the other valid slice.
290+ // - It's up to the caller to ensure the pointer cast itself is valid.
291+ unsafe { slice:: from_raw_parts_mut ( slice. as_mut_ptr ( ) as * mut U , slice. len ( ) ) }
292+ }
361293
362294/// Compare the two remainder slices by loading a `Word` then performing `cmovne`.
363295#[ inline]
0 commit comments