first principle construction of double-double and quad-double values

Ravenwater · Ravenwater · commit c49c54857e00 · 2024-08-21T10:47:43.000-04:00
diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp
@@ -30,6 +30,7 @@
 
 namespace sw { namespace universal {
 
+	// this is debug infrastructure: TODO: remove when decimal conversion is solved reliably
 	constexpr bool bTraceDecimalConversion = false;
 	constexpr bool bTraceDecimalRounding = false;
 	std::ostream& operator<<(std::ostream& ostr, const std::vector<char>& s) {
@@ -668,7 +669,6 @@ class dd {
 		if (iszero()) {
 			exponent = 0;
 			for (int i = 0; i < precision; ++i) s[i] = '0';
-			s[precision] = 0; // termination null
 			return;
 		}
 
diff --git a/include/universal/number/qd/qd_impl.hpp b/include/universal/number/qd/qd_impl.hpp
@@ -17,6 +17,7 @@
 #include <iomanip>
 #include <limits>
 #include <cmath>
+#include <vector>
 
 // supporting types and functions
 #include <universal/native/ieee754.hpp>
@@ -672,7 +673,7 @@ class qd {
 
 				int nrDigitsForFixedFormat = nrDigits;
 				if (fixed)
-					nrDigitsForFixedFormat = std::max(60, nrDigits); // can be much longer than the max accuracy for double-double
+					nrDigitsForFixedFormat = std::max(120, nrDigits); // can be much longer than the max accuracy for quad-double
 
 				// a number in the range of [0.5, 1.0) to be printed with zero precision 
 				// must be rounded up to 1 to print correctly
@@ -682,28 +683,28 @@ class qd {
 				}
 
 				if (fixed && nrDigits <= 0) {
-					// process values with negative exponents (powerOfTenScale < 0)
+					// process values that are near zero
 					s += '0';
 					if (precision > 0) {
 						s += '.';
 						s.append(static_cast<unsigned int>(precision), '0');
 					}
 				}
 				else {
-					char* t;
+					std::vector<char> t;
 
 					if (fixed) {
-						t = new char[static_cast<size_t>(nrDigitsForFixedFormat + 1)];
+						t.resize(nrDigitsForFixedFormat + 1);
 						to_digits(t, e, nrDigitsForFixedFormat);
 					}
 					else {
-						t = new char[static_cast<size_t>(nrDigits + 1)];
+						t.resize(nrDigits + 1);
 						to_digits(t, e, nrDigits);
 					}
 
 					if (fixed) {
 						// round the decimal string
-						round_string(t, nrDigits, &integerDigits);
+						round_string(t, nrDigits+1, &integerDigits);
 
 						if (integerDigits > 0) {
 							int i;
@@ -727,7 +728,6 @@ class qd {
 							s += t[i];
 
 					}
-					delete[] t;
 				}
 			}
 
@@ -869,7 +869,7 @@ class qd {
 	/// functional helpers
 
 	// precondition: string s must be all digits
-	void round_string(char* s, int precision, int* decimalPoint) const {
+	void round_string(std::vector<char>& s, int precision, int* decimalPoint) const {
 		int nrDigits = precision;
 		// round decimal string and propagate carry
 		int lastDigit = nrDigits - 1;
@@ -891,8 +891,6 @@ class qd {
 			(*decimalPoint)++; // increment decimal point
 			++precision;
 		}
-
-		s[precision] = 0; // aqd termination null
 	}
 
 	void append_exponent(std::string& str, int e) const {
@@ -923,17 +921,15 @@ class qd {
 	/// <param name="s"></param>
 	/// <param name="exponent"></param>
 	/// <param name="precision"></param>
-	void to_digits(char* s, int& exponent, int precision) const {
+	void to_digits(std::vector<char>& s, int& exponent, int precision) const {
 		constexpr qd _one(1.0), _ten(10.0);
 		constexpr double _log2(0.301029995663981);
 		double hi = x[0];
 		//double lo = x[1];
 
 		if (iszero()) {
-			std::cout << "I am zero\n";
 			exponent = 0;
 			for (int i = 0; i < precision; ++i) s[i] = '0';
-			s[precision] = 0; // termination null
 			return;
 		}
 
diff --git a/static/dd/api/experiments.cpp b/static/dd/api/experiments.cpp
@@ -103,6 +103,7 @@ try {
 		double exponent = -std::ceil(std::abs(std::log10(low)));
 		std::cout << "exponent : " << exponent << '\n';
 
+		// now let's walk that bit down to the ULP
 		std::cout << std::setprecision(32);
 		for (int i = 0; i < 54; ++i) {
 			low = (std::pow(2.0, -53.0 - double(i)));
diff --git a/static/qd/api/experiments.cpp b/static/qd/api/experiments.cpp
@@ -75,6 +75,16 @@ namespace sw {
 			return ostr << v.v;
 		}
 
+
+		std::string centered(const std::string& label, unsigned columnWidth) {
+			unsigned length = static_cast<unsigned>(label.length());
+			if (columnWidth < length) return label;
+
+			unsigned padding = columnWidth - length;
+			unsigned leftPadding = (padding >> 1);
+			unsigned rightPadding = padding - leftPadding;
+			return std::string(leftPadding, ' ') + label + std::string(rightPadding, ' ');
+		}
 	}
 }
 
@@ -96,13 +106,75 @@ try {
 		// dd = high + lo
 		//    = 1*2^0 + 1*2^-53
 		//    = 1.0e00 + 1.0elog10(2^-53)
-		ReportValue(std::pow(2.0, 0.0), "2^0");
-		ReportValue(std::pow(2.0, -53.0), "2^-53");
-		std::cout << std::log10(std::pow(2.0, -53.0)) << '\n';
-		double exponent = std::ceil(std::log10(std::pow(2.0, -53.0)));
+		double high{ std::pow(2.0, 0.0) };
+		ReportValue(high, "2^0");
+		double low{ std::pow(2.0, -53.0) };
+		ReportValue(low, "2^-53");
+		std::cout << std::log10(low) << '\n';
+		double exponent = -std::ceil(std::abs(std::log10(low)));
 		std::cout << "exponent : " << exponent << '\n';
+
+		// now let's walk that bit down to the ULP
+		double x0{ 1.0 };
+		double x1{ 0.0 };
+		double x2{ 0.0 };
+		double x3{ 0.0 };
+		int precisionForRange = 16;
+		std::cout << std::setprecision(precisionForRange);
+		x0 = 1.0;
+		qd a(x0, x1, x2, x3);
+		std::cout << centered("quad-double", precisionForRange + 6) << " : ";
+		std::cout << centered("binary form of x0", 68) << " : ";
+		std::cout << centered("real value of x0", 15) << '\n';
+		std::cout << a << " : " << to_binary(x0) << " : " << x0 << '\n';
+		for (int i = 1; i < 53; ++i) {
+			x0 = 1.0 + (std::pow(2.0, - double(i)));
+			qd a(x0, x1, x2, x3);
+			std::cout << a << " : " << to_binary(x0) << " : " << std::setprecision(7) << x0 << std::setprecision(precisionForRange) << '\n';
+		}
+		// x0 is 1.0 + eps() at this point
+		// std::cout << to_binary(x0) << '\n';
+		std::cout << to_binary(qd(x0, x1, x2, x3)) << '\n';
+		x0 = 1.0;
+		precisionForRange = 32;
+		std::cout << std::setprecision(precisionForRange);
+		std::cout << centered("quad-double", precisionForRange + 6) << " : ";
+		std::cout << centered("binary form of x1", 68) << " : ";
+		std::cout << centered("real value of x1", 15) << '\n';
+		for (int i = 0; i < 54; ++i) {
+			x1 = (std::pow(2.0, -53.0 - double(i)));
+			qd a(x0, x1, x2, x3);
+			std::cout << a << " : " << to_binary(x1) << " : " << std::setprecision(7) << x1 << std::setprecision(precisionForRange) << '\n';
+		}
+		std::cout << to_binary(qd(x0, x1, x2, x3)) << '\n';
+		x1 = 0.0;
+		precisionForRange = 48;
+		std::cout << std::setprecision(precisionForRange);
+		std::cout << centered("quad-double", precisionForRange + 6) << " : ";
+		std::cout << centered("binary form of x2", 68) << " : ";
+		std::cout << centered("real value of x2", 15) << '\n';
+		for (int i = 0; i < 54; ++i) {
+			x2 = (std::pow(2.0, -106.0 - double(i)));
+			qd a(x0, x1, x2, x3);
+			std::cout << a << " : " << to_binary(x2) << " : " << std::setprecision(7) << x2 << std::setprecision(precisionForRange) << '\n';
+		}
+		std::cout << to_binary(qd(x0, x1, x2, x3)) << '\n';
+		x2 = 0.0;
+		precisionForRange = 64;
+		std::cout << std::setprecision(precisionForRange);
+		std::cout << centered("quad-double", precisionForRange + 6) << " : ";
+		std::cout << centered("binary form of x3", 68) << " : ";
+		std::cout << centered("real value of x3", 15) << '\n';
+		for (int i = 0; i < 54; ++i) {
+			x3 = (std::pow(2.0, -159.0 - double(i)));
+			qd a(x0, x1, x2, x3);
+			std::cout << a << " : " << to_binary(x3) << " : " << std::setprecision(7) << x3 << std::setprecision(precisionForRange) << '\n';
+		}
+		std::cout << to_binary(qd(x0, x1, x2, x3)) << '\n';
+		std::cout << std::setprecision(defaultPrecision);
 	}
 
+	return 0;
 	{
 		// what is the difference between ostream fmt scientific/fixed