Merge pull request #132 from saxbophone/josh/43-bit-shift-divmod-opt

saxbophone · web-flow · commit ee8ef3494c81 · 2023-02-18T16:56:12.000Z
Optimise multiplication and division by powers of two
diff --git a/arby/include/arby/Nat.hpp b/arby/include/arby/Nat.hpp
@@ -552,6 +552,11 @@ namespace com::saxbophone::arby {
             _digits = product._digits;
             return *this; // return the result by reference
         }
+    private: // private helper methods for multiplication operator
+        constexpr bool is_power_of_2() const {
+            return *this == Nat(1) << (bit_length() - 1);
+        }
+    public:
         /**
          * @brief Multiplication operator for Nat
          * @param lhs,rhs operands for the multiplication
@@ -562,35 +567,43 @@ namespace com::saxbophone::arby {
             // init product to zero
             Nat product;
             // either operand being zero always results in zero, so only run the algorithm if they're both non-zero
-            if (not (lhs._digits.front() == 0 or rhs._digits.front() == 0)) {
-                // multiply each digit from lhs with each digit from rhs
-                std::size_t l = 0; // manual indices to track which digit we are on,
-                std::size_t r = 0; // as codlili's iterators are not random-access
-                for (auto lhs_digit : lhs._digits) {
-                    // reset r index as it cycles through multiple times
-                    r = 0;
-                    for (auto rhs_digit : rhs._digits) {
-                        // cast lhs to OverflowType to make sure both operands get promoted to avoid wrap-around overflow
-                        OverflowType multiplication = (OverflowType)lhs_digit * rhs_digit;
-                        // create a new Nat with this intermediate result and add trailing places as needed
-                        Nat intermediate = multiplication;
-                        // we need to remap the indices as the digits are stored big-endian
-                        std::size_t shift_amount = (lhs._digits.size() - 1 - l) + (rhs._digits.size() - 1 - r);
-                        // add that many trailing zeroes to intermediate's digits
-                        intermediate._digits.push_back(shift_amount, 0);
-                        // finally, add it to lhs as an accumulator
-                        product += intermediate;
-                        // increment manual indices
-                        r++;
-                    }
-                    l++;
+            if (lhs._digits.front() == 0 or rhs._digits.front() == 0) {
+                return product;
+            }
+            // optimisation using bitshifting when multiplying by binary powers
+            if (rhs.is_power_of_2()) {
+                return lhs << (rhs.bit_length() - 1);
+            } else if (lhs.is_power_of_2()) {
+                return rhs * lhs;
+            }
+            // multiply each digit from lhs with each digit from rhs
+            std::size_t l = 0; // manual indices to track which digit we are on,
+            std::size_t r = 0; // as codlili's iterators are not random-access
+            for (auto lhs_digit : lhs._digits) {
+                // reset r index as it cycles through multiple times
+                r = 0;
+                for (auto rhs_digit : rhs._digits) {
+                    // cast lhs to OverflowType to make sure both operands get promoted to avoid wrap-around overflow
+                    OverflowType multiplication = (OverflowType)lhs_digit * rhs_digit;
+                    // create a new Nat with this intermediate result and add trailing places as needed
+                    Nat intermediate = multiplication;
+                    // we need to remap the indices as the digits are stored big-endian
+                    std::size_t shift_amount = (lhs._digits.size() - 1 - l) + (rhs._digits.size() - 1 - r);
+                    // add that many trailing zeroes to intermediate's digits
+                    intermediate._digits.push_back(shift_amount, 0);
+                    // finally, add it to lhs as an accumulator
+                    product += intermediate;
+                    // increment manual indices
+                    r++;
                 }
+                l++;
             }
             product._validate_digits();
             return product;
         }
     private: // private helper methods for Nat::divmod()
         // function that shifts up rhs to be just big enough to be smaller than lhs
+        // TODO: rewrite this to use bit-shifting for speed
         static constexpr Nat get_max_shift(const Nat& lhs, const Nat& rhs) {
             // how many places can we shift rhs left until it's the same width as lhs?
             std::size_t wiggle_room = lhs._digits.size() - rhs._digits.size();
@@ -638,6 +651,18 @@ namespace com::saxbophone::arby {
             if (rhs._digits.front() == 0) {
                 throw std::domain_error("division by zero");
             }
+            if (lhs._digits.front() == 0) { return {lhs, lhs}; } // zero shortcut
+            // optimisation using bitshifting when dividing by binary powers
+            if (rhs.is_power_of_2()) {
+                auto width = rhs.bit_length();
+                // the remainder is the digits that are shifted out, so bitmask for them
+                auto bitmask = (Nat(1) << (width - 1)) - 1;
+                Nat quotient = lhs >> (width - 1);
+                Nat remainder = lhs & bitmask;
+                quotient._validate_digits();
+                remainder._validate_digits();
+                return {quotient, remainder};
+            }
             // this will gradually accumulate the calculated quotient
             Nat quotient;
             // this will gradually decrement with each subtraction
@@ -912,7 +937,9 @@ namespace com::saxbophone::arby {
             if (_digits.empty()) {
                 _digits = {0};
             }
-            _validate_digits(); // TODO: remove when satisfied not required
+            // needed in some cases, probably when the intial whole-digit shift leaves a small value which then turns 0
+            _remove_leading_zeroes();
+            _validate_digits();
             return *this;
         }
         /**
diff --git a/tests/Nat/bit_shifting.cpp b/tests/Nat/bit_shifting.cpp
@@ -19,6 +19,7 @@ TEST_CASE("arby::Nat left bit-shift", "[bit-shifting]") {
                 {0b10001011_nat, 0, 0b10001011_nat},
                 {0b10101110001_nat, 4, 0b101011100010000_nat},
                 {0b1_nat, 70, 0b10000000000000000000000000000000000000000000000000000000000000000000000_nat},
+                {0xfeed_nat, 32, 0xfeed00000000_nat},
             }
         )
     );
@@ -38,6 +39,7 @@ TEST_CASE("arby::Nat left bit-shift assignment", "[bit-shifting]") {
                 {0b10001011_nat, 0, 0b10001011_nat},
                 {0b10101110001_nat, 4, 0b101011100010000_nat},
                 {0b1_nat, 70, 0b10000000000000000000000000000000000000000000000000000000000000000000000_nat},
+                {0xfeed_nat, 32, 0xfeed00000000_nat},
             }
         )
     );
@@ -56,7 +58,8 @@ TEST_CASE("arby::Nat right bit-shift", "[bit-shifting]") {
                 {0b10000000110100000000011101101000_nat, 54, 0b0_nat},
                 {0b10011001010_nat, 0, 0b10011001010_nat},
                 {0b1101011000011000_nat, 8, 0b11010110_nat},
-                {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat}
+                {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat},
+                {0xfeedface1_nat, 32, 0xf_nat},
             }
         )
     );
@@ -76,7 +79,8 @@ TEST_CASE("arby::Nat right bit-shift assignment", "[bit-shifting]") {
                 {0b10000000110100000000011101101000_nat, 54, 0b0_nat},
                 {0b10011001010_nat, 0, 0b10011001010_nat},
                 {0b1101011000011000_nat, 8, 0b11010110_nat},
-                {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat}
+                {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat},
+                {0xfeedface1_nat, 32, 0xf_nat},
             }
         )
     );
diff --git a/tests/Nat/divmod.cpp b/tests/Nat/divmod.cpp
@@ -4,6 +4,7 @@
 
 #include <catch2/catch.hpp>
 
+#include <arby/math.hpp>
 #include <arby/Nat.hpp>
 
 using namespace com::saxbophone;
@@ -207,3 +208,34 @@ TEST_CASE("Failing division", "[divmod]") {
     CHECK(quotient == 8123889139_nat);
     CHECK(remainder == 1892371893_nat);
 }
+
+// regression tests for dividing by powers of two
+
+// std::pow() is not accurate for large powers and we need exactness
+// TODO: put this in a helper function accessible to all tests
+static uintmax_t integer_pow(uintmax_t base, uintmax_t exponent) {
+    // 1 to the power of anything is always 1
+    if (base == 1) {
+        return 1;
+    }
+    uintmax_t power = 1;
+    for (uintmax_t i = 0; i < exponent; i++) {
+        power *= base;
+    }
+    return power;
+}
+
+TEST_CASE("divmod of arby::Nat by a power of two", "[divmod]") {
+    uintmax_t power = GENERATE(range((uintmax_t)0, (uintmax_t)std::numeric_limits<uintmax_t>::digits));
+    uintmax_t denominator = integer_pow(2, power);
+    uintmax_t numerator = GENERATE_COPY(take(100, random(denominator, std::numeric_limits<uintmax_t>::max())));
+
+    CAPTURE(numerator, denominator);
+
+    auto [quotient, remainder] = arby::Nat::divmod(numerator, denominator);
+
+    CAPTURE(numerator, denominator, quotient, remainder);
+
+    CHECK(quotient == numerator / denominator);
+    CHECK(remainder == numerator % denominator);
+}
diff --git a/tests/Nat/multiplication.cpp b/tests/Nat/multiplication.cpp
@@ -88,3 +88,29 @@ TEST_CASE("arby::Nat multiplication by arby::Nat", "[multiplication]") {
 
     CHECK((uintmax_t)(lhs * rhs) == product);
 }
+
+// regression tests for multiplying by powers of two
+
+// std::pow() is not accurate for large powers and we need exactness
+// TODO: put this in a helper function accessible to all tests
+static uintmax_t integer_pow(uintmax_t base, uintmax_t exponent) {
+    // 1 to the power of anything is always 1
+    if (base == 1) {
+        return 1;
+    }
+    uintmax_t power = 1;
+    for (uintmax_t i = 0; i < exponent; i++) {
+        power *= base;
+    }
+    return power;
+}
+
+TEST_CASE("multiply arby::Nat by a power of two", "[multiplication]") {
+    uintmax_t power = GENERATE(range((uintmax_t)0, (uintmax_t)std::numeric_limits<uintmax_t>::digits / 2));
+    uintmax_t rhs = integer_pow(2, power);
+    uintmax_t lhs = GENERATE_COPY(take(100, random((uintmax_t)0, rhs)));
+
+    auto product = arby::Nat(lhs) * arby::Nat(rhs);
+
+    CHECK(product == lhs * rhs);
+}
diff --git a/tests/math_support/pow.cpp b/tests/math_support/pow.cpp
@@ -23,6 +23,7 @@ TEST_CASE("Zero raised to the power of any non-zero arby::Nat returns 0", "[math
 }
 
 // std::pow() is not accurate for large powers and we need exactness
+// TODO: put this in a helper function accessible to all tests
 static uintmax_t integer_pow(uintmax_t base, uintmax_t exponent) {
     // 1 to the power of anything is always 1
     if (base == 1) {

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ TEST_CASE("arby::Nat left bit-shift", "[bit-shifting]") {`
`19`	`19`	`{0b10001011_nat, 0, 0b10001011_nat},`
`20`	`20`	`{0b10101110001_nat, 4, 0b101011100010000_nat},`
`21`	`21`	`{0b1_nat, 70, 0b10000000000000000000000000000000000000000000000000000000000000000000000_nat},`
	`22`	`+ {0xfeed_nat, 32, 0xfeed00000000_nat},`
`22`	`23`	`}`
`23`	`24`	`)`
`24`	`25`	`);`
`@@ -38,6 +39,7 @@ TEST_CASE("arby::Nat left bit-shift assignment", "[bit-shifting]") {`
`38`	`39`	`{0b10001011_nat, 0, 0b10001011_nat},`
`39`	`40`	`{0b10101110001_nat, 4, 0b101011100010000_nat},`
`40`	`41`	`{0b1_nat, 70, 0b10000000000000000000000000000000000000000000000000000000000000000000000_nat},`
	`42`	`+ {0xfeed_nat, 32, 0xfeed00000000_nat},`
`41`	`43`	`}`
`42`	`44`	`)`
`43`	`45`	`);`
`@@ -56,7 +58,8 @@ TEST_CASE("arby::Nat right bit-shift", "[bit-shifting]") {`
`56`	`58`	`{0b10000000110100000000011101101000_nat, 54, 0b0_nat},`
`57`	`59`	`{0b10011001010_nat, 0, 0b10011001010_nat},`
`58`	`60`	`{0b1101011000011000_nat, 8, 0b11010110_nat},`
`59`		`- {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat}`
	`61`	`+ {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat},`
	`62`	`+ {0xfeedface1_nat, 32, 0xf_nat},`
`60`	`63`	`}`
`61`	`64`	`)`
`62`	`65`	`);`
`@@ -76,7 +79,8 @@ TEST_CASE("arby::Nat right bit-shift assignment", "[bit-shifting]") {`
`76`	`79`	`{0b10000000110100000000011101101000_nat, 54, 0b0_nat},`
`77`	`80`	`{0b10011001010_nat, 0, 0b10011001010_nat},`
`78`	`81`	`{0b1101011000011000_nat, 8, 0b11010110_nat},`
`79`		`- {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat}`
	`82`	`+ {0b11111111111111111111111111111111111111111111111111111111111111111111111111111111_nat, 70, 0b1111111111_nat},`
	`83`	`+ {0xfeedface1_nat, 32, 0xf_nat},`
`80`	`84`	`}`
`81`	`85`	`)`
`82`	`86`	`);`
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@ TEST_CASE("Zero raised to the power of any non-zero arby::Nat returns 0", "[math`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`// std::pow() is not accurate for large powers and we need exactness`
	`26`	`+// TODO: put this in a helper function accessible to all tests`
`26`	`27`	`static uintmax_t integer_pow(uintmax_t base, uintmax_t exponent) {`
`27`	`28`	`// 1 to the power of anything is always 1`
`28`	`29`	`if (base == 1) {`