Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions src/primitives.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
use crate::{Choice, WideWord, Word};

/// Adds wide numbers represented by pairs of (least significant word, most significant word)
/// and returns the result in the same format `(lo, hi)`.
#[inline(always)]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn addhilo(x_lo: Word, x_hi: Word, y_lo: Word, y_hi: Word) -> (Word, Word) {
let res = (((x_hi as WideWord) << Word::BITS) | (x_lo as WideWord))
+ (((y_hi as WideWord) << Word::BITS) | (y_lo as WideWord));
(res as Word, (res >> Word::BITS) as Word)
}

/// Computes `lhs + rhs + carry`, returning the result along with the new carry (0, 1, or 2).
#[inline(always)]
#[allow(clippy::cast_possible_truncation)]
Expand Down
176 changes: 124 additions & 52 deletions src/uint/div_limb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
//! (DOI: 10.1109/TC.2010.143, <https://gmplib.org/~tege/division-paper.pdf>).

use crate::{
Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word,
primitives::{addhilo, widening_mul},
word,
Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word, primitives::widening_mul, word,
};

cpubits::cpubits! {
32 => {
/// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set.
///
/// This method corresponds to Algorithm 3
pub const fn reciprocal(d: Word) -> Word {
debug_assert!(d >= (1 << (Word::BITS - 1)));

Expand All @@ -34,7 +34,7 @@ cpubits::cpubits! {

// The paper has `(v2 + 1) * d / 2^32` (there's another 2^32, but it's accounted for later).
// If `v2 == 2^32-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
// Hence the `ct_select()`.
// Hence the `word::select()`.
let x = v2.wrapping_add(1);
let (_lo, hi) = widening_mul(x, d);
let hi = word::select(d, hi, Choice::from_u32_nz(x));
Expand All @@ -44,6 +44,8 @@ cpubits::cpubits! {
}
64 => {
/// Calculates the reciprocal of the given 64-bit divisor with the highmost bit set.
///
/// This method corresponds to Algorithm 2
pub const fn reciprocal(d: Word) -> Word {
debug_assert!(d >= (1 << (Word::BITS - 1)));

Expand All @@ -64,7 +66,7 @@ cpubits::cpubits! {

// The paper has `(v3 + 1) * d / 2^64` (there's another 2^64, but it's accounted for later).
// If `v3 == 2^64-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
// Hence the `ct_select()`.
// Hence the `word::select()`.
let x = v3.wrapping_add(1);
let (_lo, hi) = widening_mul(x, d);
let hi = word::select(d, hi, word::choice_from_nz(x));
Expand Down Expand Up @@ -104,15 +106,18 @@ const fn short_div(mut dividend: u32, dividend_bits: u32, divisor: u32, divisor_

/// Calculate the quotient and the remainder of the division of a wide word
/// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`.
///
/// This method corresponds to Algorithm 4
#[inline(always)]
pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Word, Word) {
pub(crate) const fn div2by1(u0: Word, u1: Word, reciprocal: &Reciprocal) -> (Word, Word) {
let d = reciprocal.divisor_normalized;
let v = reciprocal.reciprocal;

debug_assert!(d >= (1 << (Word::BITS - 1)));
debug_assert!(u1 < d);
debug_assert!(d >= (1 << (Word::BITS - 1)), "divisor top bit unset");
debug_assert!(u1 < d, "dividend >= divisor");

let (q0, q1) = widening_mul(reciprocal.reciprocal, u1);
let (q0, q1) = addhilo(q0, q1, u0, u1);
let q = (v as WideWord * u1 as WideWord) + word::join(u0, u1);
let (q0, q1) = word::split_wide(q);
let q1 = q1.wrapping_add(1);
let r = u0.wrapping_sub(q1.wrapping_mul(d));

Expand All @@ -136,48 +141,44 @@ pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Wor
/// calculates `q` such that `q - 1 <= floor(u / v) <= q`.
/// In place of `v1` takes its reciprocal, and assumes that `v` was already pre-shifted
/// so that v1 has its most significant bit set (that is, the reciprocal's `shift` is 0).
///
// This method corresponds to Algorithm 5
#[inline(always)]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn div3by2(
u2: Word,
u1: Word,
u0: Word,
v1_reciprocal: &Reciprocal,
v0: Word,
) -> Word {
debug_assert!(v1_reciprocal.shift == 0);
debug_assert!(u2 <= v1_reciprocal.divisor_normalized);

// This method corresponds to Algorithm Q:
// https://janmr.com/blog/2014/04/basic-multiple-precision-long-division/

let q_maxed = word::choice_from_eq(u2, v1_reciprocal.divisor_normalized);
let (mut quo, rem) = div2by1(word::select(u2, 0, q_maxed), u1, v1_reciprocal);
(u0, u1, u2): (Word, Word, Word),
(d0, d1): (Word, Word),
v: Word,
) -> (Word, WideWord) {
let d = word::join(d0, d1);
let u_hi = word::join(u1, u2);

debug_assert!(d >= (1 << (WideWord::BITS - 1)), "divisor top bit unset");
debug_assert!(u_hi <= d, "dividend > divisor");

let q = (v as WideWord * u2 as WideWord) + u_hi;
let q1w = q >> Word::BITS;
let r1 = u1.wrapping_sub((q1w as Word).wrapping_mul(d1));
let t = d0 as WideWord * q1w;
let r = word::join(u0, r1).wrapping_sub(t).wrapping_sub(d);

let r1_ge_q0 = word::choice_from_le(q as Word, (r >> Word::BITS) as Word);
let q1 = q1w as Word;
let q1 = word::select(q1.wrapping_add(1), q1, r1_ge_q0);
let r = word::select_wide(r, r.wrapping_add(d), r1_ge_q0);

let r_ge_d = word::choice_from_wide_le(d, r);
let q1 = word::select(q1, q1.wrapping_add(1), r_ge_d);
let r = word::select_wide(r, r.wrapping_sub(d), r_ge_d);

// When the leading dividend word equals the leading divisor word, cap the quotient
// at Word::MAX and set the remainder to the sum of the top dividend words.
quo = word::select(quo, Word::MAX, q_maxed);
let mut rem = word::select_wide(
rem as WideWord,
(u2 as WideWord) + (u1 as WideWord),
q_maxed,
);

let mut i = 0;
while i < 2 {
let qy = (quo as WideWord) * (v0 as WideWord);
let rx = (rem << Word::BITS) | (u0 as WideWord);
// If r < b and q*y[-2] > r*x[-1], then set q = q - 1 and r = r + v1
let done =
word::choice_from_nz((rem >> Word::BITS) as Word).or(word::choice_from_wide_le(qy, rx));
quo = word::select(quo.wrapping_sub(1), quo, done);
rem = word::select_wide(
rem + (v1_reciprocal.divisor_normalized as WideWord),
rem,
done,
);
i += 1;
}
// at WideWord::MAX and update the remainder. This differs from the original algorithm
// but is required for multi-word division.
let maxed = word::choice_from_wide_eq(u_hi, d);
let q1 = word::select(q1, Word::MAX, maxed);
let r = word::select_wide(r, d.saturating_add(u0 as WideWord), maxed);

quo
(q1, r)
}

/// A pre-calculated reciprocal for division by a single limb.
Expand Down Expand Up @@ -229,6 +230,35 @@ impl Reciprocal {
pub const fn shift(&self) -> u32 {
self.shift
}

/// Adjusted reciprocal for 3x2 division
///
/// This method corresponds to Algorithm 6
#[must_use]
pub const fn reciprocal_3by2(&self, d0: Word, d1: Word) -> Word {
debug_assert!(self.shift == 0 && d1 == self.divisor_normalized);

let v = self.reciprocal;
let p = d1.wrapping_mul(v).wrapping_add(d0);

let p_lt_d0 = word::choice_from_lt(p, d0);
let v = word::select(v, v.wrapping_sub(1), p_lt_d0);

let p_ge_d1 = word::choice_from_le(d1, p).and(p_lt_d0);
let v = word::select(v, v.wrapping_sub(1), p_ge_d1);
let p = word::select(p, p.wrapping_sub(d1), p_ge_d1);
let p = word::select(p, p.wrapping_sub(d1), p_lt_d0);

let (t0, t1) = widening_mul(v, d0);
let p = p.wrapping_add(t1);

let p_lt_t1 = word::choice_from_lt(p, t1);
let v = word::select(v, v.wrapping_sub(1), p_lt_t1);
let d = word::join(d0, d1);
let t0p = word::join(t0, p);
let t0p_ge_d = word::choice_from_wide_le(d, t0p).and(p_lt_t1);
word::select(v, v.wrapping_sub(1), t0p_ge_d)
}
}

impl CtSelect for Reciprocal {
Expand Down Expand Up @@ -265,7 +295,7 @@ pub(crate) const fn rem_limb_with_reciprocal<const L: usize>(
let mut j = L;
while j > 0 {
j -= 1;
let (_, rj) = div2by1(r, u_shifted.as_limbs()[j].0, reciprocal);
let (_, rj) = div2by1(u_shifted.as_limbs()[j].0, r, reciprocal);
r = rj;
}
Limb(r >> reciprocal.shift)
Expand All @@ -281,16 +311,58 @@ pub(crate) const fn mul_rem(a: Limb, b: Limb, d: NonZero<Limb>) -> Limb {

#[cfg(test)]
mod tests {
use super::{Reciprocal, div2by1};
use crate::{Limb, NonZero, Word};
use super::{Reciprocal, div2by1, reciprocal};
use crate::{Limb, NonZero, Uint, WideWord, Word, word};

#[test]
fn reciprocal_valid() {
#![allow(clippy::integer_division_remainder_used, reason = "test")]
fn test(d: Word) {
let v = reciprocal(d);

// the reciprocal must be equal to floor((β^2 - 1) / d) - β
// v = floor((β^2 - 1) / d) - β = floor((β - 1 - d)*β + β - 1>/d)
let expected = WideWord::MAX / WideWord::from(d) - WideWord::from(Word::MAX) - 1;
assert_eq!(WideWord::from(v), expected);
}

test(Word::MAX);
test(1 << (Word::BITS - 1));
test((1 << (Word::BITS - 1)) | 1);
}

#[test]
fn reciprocal_3by2_valid() {
fn test(d: WideWord) {
let (d0, d1) = word::split_wide(d);
let v0 = Reciprocal::new(NonZero::<Limb>::new_unwrap(Limb(d1)));
let v = v0.reciprocal_3by2(d0, d1);

// the reciprocal must be equal to v = floor((β^3 − 1)/d) − β
// (β^3 − βd - 1)/d - 1 < v <= (β^3 − βd - 1)/d
// β^3 − βd - 1 - d < v*d <= β^3 − βd - 1
// β^3-1 - d < (v+β)d <= β^3-1
let actual = (Uint::<3>::from_word(v)
+ Uint::<3>::ZERO.set_bit_vartime(Word::BITS, true))
.checked_mul(&Uint::<3>::from_wide_word(d))
.expect("overflow");
let min = Uint::<3>::MAX - Uint::<3>::from_wide_word(d);
assert!(actual > min, "{actual} <= {min}");
}

test(WideWord::MAX);
test(1 << (WideWord::BITS - 1));
test((1 << (WideWord::BITS - 1)) | 1);
}

#[test]
fn div2by1_overflow() {
// A regression test for a situation when in div2by1() an operation (`q1 + 1`)
// that is protected from overflowing by a condition in the original paper (`r >= d`)
// still overflows because we're calculating the results for both branches.
let r = Reciprocal::new(NonZero::new(Limb(Word::MAX - 1)).unwrap());
assert_eq!(
div2by1(Word::MAX - 2, Word::MAX - 63, &r),
div2by1(Word::MAX - 63, Word::MAX - 2, &r),
(Word::MAX, Word::MAX - 65)
);
}
Expand Down
36 changes: 19 additions & 17 deletions src/uint/ref_type/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ impl UintRef {
// but this can only be the case if `limb_div` is falsy, in which case we discard
// the result anyway, so we conditionally set `x_hi` to zero for this branch.
let x_hi_adjusted = Limb::select(Limb::ZERO, x_hi, limb_div);
let (quo2, rem2) = div2by1(x_hi_adjusted.0, x.limbs[0].0, &reciprocal);
let (quo2, rem2) = div2by1(x.limbs[0].0, x_hi_adjusted.0, &reciprocal);

// Adjust the quotient for single limb division
x.limbs[0] = Limb::select(x.limbs[0], Limb(quo2), limb_div);
Expand Down Expand Up @@ -293,14 +293,15 @@ impl UintRef {
let mut i;
let mut carry;

// Compute the adjusted reciprocal
let v = reciprocal.reciprocal_3by2(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0);

while xi > 0 {
// Divide high dividend words by the high divisor word to estimate the quotient word
let mut quo = div3by2(
x_hi.0,
x_xi.0,
x.limbs[xi - 1].0,
&reciprocal,
y.limbs[ysize - 2].0,
let (mut quo, _) = div3by2(
(x.limbs[xi - 1].0, x_xi.0, x_hi.0),
(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0),
v,
);

// This loop is a no-op once xi is smaller than the number of words in the divisor
Expand Down Expand Up @@ -415,7 +416,7 @@ impl UintRef {
// but this can only be the case if `limb_div` is falsy, in which case we discard
// the result anyway, so we conditionally set `x_hi` to zero for this branch.
let x_hi_adjusted = Limb::select(Limb::ZERO, x_hi, limb_div);
let (_, rem2) = div2by1(x_hi_adjusted.0, x.limbs[0].0, &reciprocal);
let (_, rem2) = div2by1(x.limbs[0].0, x_hi_adjusted.0, &reciprocal);

// Copy out the low limb of the remainder
y.limbs[0] = Limb::select(x.limbs[0], Limb(rem2), limb_div);
Expand Down Expand Up @@ -465,17 +466,18 @@ impl UintRef {
let mut i;
let mut carry;

// Compute the adjusted reciprocal
let v = reciprocal.reciprocal_3by2(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0);

// We proceed similarly to `div_rem_large_shifted()` applied to the high half of
// the dividend, fetching the limbs from the lower part as we go.

while xi > 0 {
// Divide high dividend words by the high divisor word to estimate the quotient word
let mut quo = div3by2(
x_hi.0,
x_xi.0,
x.limbs[xi - 1].0,
&reciprocal,
y.limbs[ysize - 2].0,
let (mut quo, _) = div3by2(
(x.limbs[xi - 1].0, x_xi.0, x_hi.0),
(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0),
v,
);

// This loop is a no-op once xi is smaller than the number of words in the divisor
Expand Down Expand Up @@ -563,7 +565,7 @@ impl UintRef {
let mut j = self.limbs.len();
while j > 0 {
j -= 1;
(self.limbs[j].0, hi.0) = div2by1(hi.0, self.limbs[j].0, reciprocal);
(self.limbs[j].0, hi.0) = div2by1(self.limbs[j].0, hi.0, reciprocal);
}
hi.shr(reciprocal.shift())
}
Expand Down Expand Up @@ -594,9 +596,9 @@ impl UintRef {
j -= 1;
lo = self.limbs[j].0 << lshift;
lo |= word::select(0, self.limbs[j - 1].0 >> rshift, nz);
(_, hi) = div2by1(hi, lo, reciprocal);
(_, hi) = div2by1(lo, hi, reciprocal);
}
(_, hi) = div2by1(hi, self.limbs[0].0 << lshift, reciprocal);
(_, hi) = div2by1(self.limbs[0].0 << lshift, hi, reciprocal);
Limb(hi >> lshift)
}
}
Loading
Loading