@@ -279,7 +279,7 @@ macro_rules! impl_Display {
279
279
// Format per two digits from the lookup table.
280
280
if remain > 9 {
281
281
// SAFETY: All of the decimals fit in buf due to MAX_DEC_N
282
- // and the while condition ensures at least 2 more decimals.
282
+ // and the if condition ensures at least 2 more decimals.
283
283
unsafe { core:: hint:: assert_unchecked( offset >= 2 ) }
284
284
// SAFETY: The offset counts down from its initial buf.len()
285
285
// without underflow due to the previous precondition.
@@ -565,93 +565,6 @@ mod imp {
565
565
}
566
566
impl_Exp ! ( i128 , u128 as u128 via to_u128 named exp_u128) ;
567
567
568
- /// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
569
- fn parse_u64_into < const N : usize > ( mut n : u64 , buf : & mut [ MaybeUninit < u8 > ; N ] , curr : & mut usize ) {
570
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( buf) ;
571
- let lut_ptr = DEC_DIGITS_LUT . as_ptr ( ) ;
572
- assert ! ( * curr > 19 ) ;
573
-
574
- // SAFETY:
575
- // Writes at most 19 characters into the buffer. Guaranteed that any ptr into LUT is at most
576
- // 198, so will never OOB. There is a check above that there are at least 19 characters
577
- // remaining.
578
- unsafe {
579
- if n >= 1e16 as u64 {
580
- let to_parse = n % 1e16 as u64 ;
581
- n /= 1e16 as u64 ;
582
-
583
- // Some of these are nops but it looks more elegant this way.
584
- let d1 = ( ( to_parse / 1e14 as u64 ) % 100 ) << 1 ;
585
- let d2 = ( ( to_parse / 1e12 as u64 ) % 100 ) << 1 ;
586
- let d3 = ( ( to_parse / 1e10 as u64 ) % 100 ) << 1 ;
587
- let d4 = ( ( to_parse / 1e8 as u64 ) % 100 ) << 1 ;
588
- let d5 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
589
- let d6 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
590
- let d7 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
591
- let d8 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
592
-
593
- * curr -= 16 ;
594
-
595
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
596
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
597
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
598
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
599
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d5 as usize ) , buf_ptr. add ( * curr + 8 ) , 2 ) ;
600
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d6 as usize ) , buf_ptr. add ( * curr + 10 ) , 2 ) ;
601
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d7 as usize ) , buf_ptr. add ( * curr + 12 ) , 2 ) ;
602
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d8 as usize ) , buf_ptr. add ( * curr + 14 ) , 2 ) ;
603
- }
604
- if n >= 1e8 as u64 {
605
- let to_parse = n % 1e8 as u64 ;
606
- n /= 1e8 as u64 ;
607
-
608
- // Some of these are nops but it looks more elegant this way.
609
- let d1 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
610
- let d2 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
611
- let d3 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
612
- let d4 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
613
- * curr -= 8 ;
614
-
615
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
616
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
617
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
618
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
619
- }
620
- // `n` < 1e8 < (1 << 32)
621
- let mut n = n as u32 ;
622
- if n >= 1e4 as u32 {
623
- let to_parse = n % 1e4 as u32 ;
624
- n /= 1e4 as u32 ;
625
-
626
- let d1 = ( to_parse / 100 ) << 1 ;
627
- let d2 = ( to_parse % 100 ) << 1 ;
628
- * curr -= 4 ;
629
-
630
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
631
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
632
- }
633
-
634
- // `n` < 1e4 < (1 << 16)
635
- let mut n = n as u16 ;
636
- if n >= 100 {
637
- let d1 = ( n % 100 ) << 1 ;
638
- n /= 100 ;
639
- * curr -= 2 ;
640
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
641
- }
642
-
643
- // decode last 1 or 2 chars
644
- if n < 10 {
645
- * curr -= 1 ;
646
- * buf_ptr. add ( * curr) = ( n as u8 ) + b'0' ;
647
- } else {
648
- let d1 = n << 1 ;
649
- * curr -= 2 ;
650
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
651
- }
652
- }
653
- }
654
-
655
568
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
656
569
impl fmt:: Display for u128 {
657
570
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
@@ -662,90 +575,152 @@ impl fmt::Display for u128 {
662
575
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
663
576
impl fmt:: Display for i128 {
664
577
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
665
- let is_nonnegative = * self >= 0 ;
666
- let n = if is_nonnegative {
667
- self . to_u128 ( )
668
- } else {
669
- // convert the negative num to positive by summing 1 to its 2s complement
670
- ( !self . to_u128 ( ) ) . wrapping_add ( 1 )
671
- } ;
672
- fmt_u128 ( n, is_nonnegative, f)
578
+ fmt_u128 ( self . unsigned_abs ( ) , * self >= 0 , f)
673
579
}
674
580
}
675
581
676
- /// Specialized optimization for u128. Instead of taking two items at a time, it splits
677
- /// into at most 2 u64s, and then chunks by 10e16, 10e8, 10e4, 10e2, and then 10e1.
678
- /// It also has to handle 1 last item, as 10^40 > 2^128 > 10^39, whereas
679
- /// 10^20 > 2^64 > 10^19.
582
+ /// Format optimized for u128. Computation of 128 bits is limited by proccessing
583
+ /// in batches of 16 decimals at a time.
680
584
fn fmt_u128 ( n : u128 , is_nonnegative : bool , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
585
+ // Optimize common-case zero, which would also need special treatment due to
586
+ // its "leading" zero.
587
+ if n == 0 {
588
+ return f. pad_integral ( true , "" , "0" ) ;
589
+ }
590
+
591
+ // U128::MAX has 39 significant-decimals.
681
592
const MAX_DEC_N : usize = u128:: MAX . ilog ( 10 ) as usize + 1 ;
593
+ // Buffer decimals with right alignment.
682
594
let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; MAX_DEC_N ] ;
683
- let mut curr = buf. len ( ) ;
684
-
685
- let ( n, rem) = udiv_1e19 ( n) ;
686
- parse_u64_into ( rem, & mut buf, & mut curr) ;
687
-
688
- if n != 0 {
689
- // 0 pad up to point
690
- let target = buf. len ( ) - 19 ;
691
- // SAFETY: Guaranteed that we wrote at most 19 bytes, and there must be space
692
- // remaining since it has length 39
693
- unsafe {
694
- ptr:: write_bytes (
695
- MaybeUninit :: slice_as_mut_ptr ( & mut buf) . add ( target) ,
696
- b'0' ,
697
- curr - target,
698
- ) ;
699
- }
700
- curr = target;
701
-
702
- let ( n, rem) = udiv_1e19 ( n) ;
703
- parse_u64_into ( rem, & mut buf, & mut curr) ;
704
- // Should this following branch be annotated with unlikely?
705
- if n != 0 {
706
- let target = buf. len ( ) - 38 ;
707
- // The raw `buf_ptr` pointer is only valid until `buf` is used the next time,
708
- // buf `buf` is not used in this scope so we are good.
709
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( & mut buf) ;
710
- // SAFETY: At this point we wrote at most 38 bytes, pad up to that point,
711
- // There can only be at most 1 digit remaining.
712
- unsafe {
713
- ptr:: write_bytes ( buf_ptr. add ( target) , b'0' , curr - target) ;
714
- curr = target - 1 ;
715
- * buf_ptr. add ( curr) = ( n as u8 ) + b'0' ;
716
- }
595
+
596
+ // Take the 16 least-significant decimals.
597
+ let ( quot_1e16, mod_1e16) = div_rem_1e16 ( n) ;
598
+ let ( mut remain, mut offset) = if quot_1e16 == 0 {
599
+ ( mod_1e16, MAX_DEC_N )
600
+ } else {
601
+ // Write digits at buf[23..39].
602
+ enc_16lsd :: < { MAX_DEC_N - 16 } > ( & mut buf, mod_1e16) ;
603
+
604
+ // Take another 16 decimals.
605
+ let ( quot2, mod2) = div_rem_1e16 ( quot_1e16) ;
606
+ if quot2 == 0 {
607
+ ( mod2, MAX_DEC_N - 16 )
608
+ } else {
609
+ // Write digits at buf[7..23].
610
+ enc_16lsd :: < { MAX_DEC_N - 32 } > ( & mut buf, mod2) ;
611
+ // Quot2 has at most 7 decimals remaining after two 1e16 divisions.
612
+ ( quot2 as u64 , MAX_DEC_N - 32 )
717
613
}
614
+ } ;
615
+
616
+ // Format per four digits from the lookup table.
617
+ while remain > 999 {
618
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
619
+ // and the while condition ensures at least 4 more decimals.
620
+ unsafe { core:: hint:: assert_unchecked ( offset >= 4 ) }
621
+ // SAFETY: The offset counts down from its initial buf.len()
622
+ // without underflow due to the previous precondition.
623
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
624
+ offset -= 4 ;
625
+
626
+ // pull two pairs
627
+ let quad = remain % 1_00_00 ;
628
+ remain /= 1_00_00 ;
629
+ let pair1 = ( quad / 100 ) as usize ;
630
+ let pair2 = ( quad % 100 ) as usize ;
631
+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
632
+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
633
+ buf[ offset + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
634
+ buf[ offset + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
635
+ }
636
+
637
+ // Format per two digits from the lookup table.
638
+ if remain > 9 {
639
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
640
+ // and the if condition ensures at least 2 more decimals.
641
+ unsafe { core:: hint:: assert_unchecked ( offset >= 2 ) }
642
+ // SAFETY: The offset counts down from its initial buf.len()
643
+ // without underflow due to the previous precondition.
644
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
645
+ offset -= 2 ;
646
+
647
+ let pair = ( remain % 100 ) as usize ;
648
+ remain /= 100 ;
649
+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 0 ] ) ;
650
+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 1 ] ) ;
651
+ }
652
+
653
+ // Format the last remaining digit, if any.
654
+ if remain != 0 {
655
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
656
+ // and the if condition ensures (at least) 1 more decimals.
657
+ unsafe { core:: hint:: assert_unchecked ( offset >= 1 ) }
658
+ // SAFETY: The offset counts down from its initial buf.len()
659
+ // without underflow due to the previous precondition.
660
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
661
+ offset -= 1 ;
662
+
663
+ // Either the compiler sees that remain < 10, or it prevents
664
+ // a boundary check up next.
665
+ let last = ( remain & 15 ) as usize ;
666
+ buf[ offset] . write ( DEC_DIGITS_LUT [ last * 2 + 1 ] ) ;
667
+ // not used: remain = 0;
718
668
}
719
669
720
- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
721
- // UTF-8 since `DEC_DIGITS_LUT` is
722
- let buf_slice = unsafe {
670
+ // SAFETY: All buf content since offset is set.
671
+ let written = unsafe { buf. get_unchecked ( offset..) } ;
672
+ // SAFETY: Writes use ASCII from the lookup table exclusively.
673
+ let as_str = unsafe {
723
674
str:: from_utf8_unchecked ( slice:: from_raw_parts (
724
- MaybeUninit :: slice_as_mut_ptr ( & mut buf ) . add ( curr ) ,
725
- buf . len ( ) - curr ,
675
+ MaybeUninit :: slice_as_ptr ( written ) ,
676
+ written . len ( ) ,
726
677
) )
727
678
} ;
728
- f. pad_integral ( is_nonnegative, "" , buf_slice )
679
+ f. pad_integral ( is_nonnegative, "" , as_str )
729
680
}
730
681
731
- /// Partition of `n` into n > 1e19 and rem <= 1e19
682
+ /// Encodes the 16 least-significant decimals of n into `buf[OFFSET .. OFFSET +
683
+ /// 16 ]`.
684
+ fn enc_16lsd < const OFFSET : usize > ( buf : & mut [ MaybeUninit < u8 > ; 39 ] , n : u64 ) {
685
+ // Consume the least-significant decimals from a working copy.
686
+ let mut remain = n;
687
+
688
+ // Format per four digits from the lookup table.
689
+ for quad_index in ( 0 ..4 ) . rev ( ) {
690
+ // pull two pairs
691
+ let quad = remain % 1_00_00 ;
692
+ remain /= 1_00_00 ;
693
+ let pair1 = ( quad / 100 ) as usize ;
694
+ let pair2 = ( quad % 100 ) as usize ;
695
+ buf[ quad_index * 4 + OFFSET + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
696
+ buf[ quad_index * 4 + OFFSET + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
697
+ buf[ quad_index * 4 + OFFSET + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
698
+ buf[ quad_index * 4 + OFFSET + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
699
+ }
700
+ }
701
+
702
+ /// Euclidean division plus remainder with constant 1E16 basically consumes 16
703
+ /// decimals from n.
732
704
///
733
- /// Integer division algorithm is based on the following paper:
705
+ /// The integer division algorithm is based on the following paper:
734
706
///
735
707
/// T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
736
708
/// in Proc. of the SIGPLAN94 Conference on Programming Language Design and
737
709
/// Implementation, 1994, pp. 61–72
738
710
///
739
- fn udiv_1e19 ( n : u128 ) -> ( u128 , u64 ) {
740
- const DIV : u64 = 1e19 as u64 ;
741
- const FACTOR : u128 = 156927543384667019095894735580191660403 ;
711
+ #[ inline]
712
+ fn div_rem_1e16 ( n : u128 ) -> ( u128 , u64 ) {
713
+ const D : u128 = 1_0000_0000_0000_0000 ;
714
+ // The check inlines well with the caller flow.
715
+ if n < D {
716
+ return ( 0 , n as u64 ) ;
717
+ }
742
718
743
- let quot = if n < 1 << 83 {
744
- ( ( n >> 19 ) as u64 / ( DIV >> 19 ) ) as u128
745
- } else {
746
- n. widening_mul ( FACTOR ) . 1 >> 62
747
- } ;
719
+ // These constant values are computed with the CHOOSE_MULTIPLIER procedure.
720
+ const M_HIGH : u128 = 76624777043294442917917351357515459181 ;
721
+ const SH_POST : u8 = 51 ;
748
722
749
- let rem = ( n - quot * DIV as u128 ) as u64 ;
750
- ( quot, rem)
723
+ let quot = n. widening_mul ( M_HIGH ) . 1 >> SH_POST ;
724
+ let rem = n - quot * D ;
725
+ ( quot, rem as u64 )
751
726
}
0 commit comments