adding a double-double command line utility

Ravenwater · Ravenwater · commit b122cb2f2dff · 2024-08-16T14:37:33.000-04:00
diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp
@@ -31,7 +31,9 @@ namespace sw { namespace universal {
 
 // fwd references to free functions used in to_digits()
 dd operator*(const dd& lhs, const dd&);
-dd pown(dd const&, int);
+std::ostream& operator<<(std::ostream&, const dd&);
+dd pown(const dd&, int);
+dd frexp(const dd&, int*);
 
 // dd is an unevaluated pair of IEEE-754 doubles that provides a (1,11,106) floating-point triple
 class dd {
@@ -811,6 +813,16 @@ inline std::string to_pair(const dd& v, int precision = 17) {
 	return s.str();
 }
 
+inline std::string to_triple(const dd& v, int precision = 17) {
+	std::stringstream s;
+	bool isneg = v.isneg();
+	int scale = v.scale();
+	int exponent;
+	dd fraction = frexp(v, &exponent);
+	s << '(' << (isneg ? '1' : '0') << ", " << scale << ", " << std::setprecision(precision) << fraction << ')';
+	return s.str();
+}
+
 inline std::string to_binary(const dd& number, bool bNibbleMarker = false) {
 	std::stringstream s;
 	constexpr int nrLimbs = 2;
@@ -863,7 +875,7 @@ inline dd abs(dd a) {
 	return dd(hi, lo);
 }
 
-inline dd ceil(dd const& a)
+inline dd ceil(const dd& a)
 {
 	if (a.isnan()) return a;
 
@@ -878,7 +890,7 @@ inline dd ceil(dd const& a)
 	return dd(hi, lo);
 }
 
-inline dd floor(dd const& a) {
+inline dd floor(const dd& a) {
 	if (a.isnan()) return a;
 
 	double hi = std::floor(a.high());
@@ -974,7 +986,7 @@ inline dd mul_pwr2(const dd& a, double b) {
 // quad-double operators
 
 // quad-double + double-double
-void qd_add(double const a[4], dd const& b, double s[4]) {
+void qd_add(double const a[4], const dd& b, double s[4]) {
 	double t[5];
 	s[0] = two_sum(a[0], b.high(), t[0]);		//	s0 - O( 1 ); t0 - O( e )
 	s[1] = two_sum(a[1], b.low(), t[1]);		//	s1 - O( e ); t1 - O( e^2 )
@@ -991,7 +1003,7 @@ void qd_add(double const a[4], dd const& b, double s[4]) {
 }
 
 // quad-double = double-double * double-double
-void qd_mul(dd const& a, dd const& b, double p[4]) {
+void qd_mul(const dd& a, const dd& b, double p[4]) {
 	double p4, p5, p6, p7;
 
 	//	powers of e - 0, 1, 1, 1, 2, 2, 2, 3
@@ -1025,15 +1037,15 @@ void qd_mul(dd const& a, dd const& b, double p[4]) {
 	}
 }
 
-inline dd fma(dd const& a, dd const& b, dd const& c) {
+inline dd fma(const dd& a, const dd& b, const dd& c) {
 	double p[4];
 	qd_mul(a, b, p);
 	qd_add(p, c, p);
 	p[0] = two_sum(p[0], p[1] + p[2] + p[3], p[1]);
 	return dd(p[0], p[1]);
 }
 
-inline dd sqr(dd const& a) {
+inline dd sqr(const dd& a) {
 	if (a.isnan()) return a;
 
 	double p2, p1 = two_sqr(a.high(), p2);
@@ -1044,7 +1056,7 @@ inline dd sqr(dd const& a) {
 	return dd(s1, s2);
 }
 
-inline dd reciprocal(dd const& a) {
+inline dd reciprocal(const dd& a) {
 	if (a.iszero()) return dd(SpecificValue::infpos);
 
 	if (a.isinf()) return dd(0.0);
@@ -1065,7 +1077,7 @@ inline dd reciprocal(dd const& a) {
 	}
 }
 
-inline dd pown(dd const& a, int n) {
+inline dd pown(const dd& a, int n) {
 	if (a.isnan()) return a;
 
 	int N = (n < 0) ? -n : n;
diff --git a/include/universal/number/dd/numeric_limits.hpp b/include/universal/number/dd/numeric_limits.hpp
@@ -15,7 +15,6 @@ class numeric_limits< sw::universal::dd > {
 	using DoubleDouble = sw::universal::dd;
 	static constexpr bool is_specialized = true;
 	static constexpr DoubleDouble min() { // return minimum value
-		// return DoubleDouble(sw::universal::SpecificValue::minpos);
 		return DoubleDouble(radix * (numeric_limits< double >::min() / numeric_limits< double >::epsilon()));
 	} 
 	static constexpr DoubleDouble max() { // return maximum value
@@ -27,26 +26,23 @@ class numeric_limits< sw::universal::dd > {
 		return (-(max)());
 	} 
 	static constexpr DoubleDouble epsilon() { // return smallest effective increment from 1.0
-		return numeric_limits< double >::epsilon() * numeric_limits< double >::epsilon() / radix;
+		constexpr double epsilon{ std::numeric_limits< double >::epsilon() };
+		return (epsilon * epsilon) * 0.5;
 	}
 	static constexpr DoubleDouble round_error() { // return largest rounding error
 		return DoubleDouble(1.0 / radix);
 	}
 	static constexpr DoubleDouble denorm_min() {  // return minimum denormalized value
-		// return DoubleDouble(sw::universal::SpecificValue::minpos);
-		return 0.0;
+		return DoubleDouble(std::numeric_limits<double>::denorm_min());
 	}
 	static constexpr DoubleDouble infinity() { // return positive infinity
 		return DoubleDouble(sw::universal::SpecificValue::infpos);
-		//return numeric_limits< double >::infinity();
 	}
 	static constexpr DoubleDouble quiet_NaN() { // return non-signaling NaN
 		return DoubleDouble(sw::universal::SpecificValue::qnan);
-		//return numeric_limits< double >::quiet_NaN();
 	}
 	static constexpr DoubleDouble signaling_NaN() { // return signaling NaN
 		return DoubleDouble(sw::universal::SpecificValue::snan);
-		//return numeric_limits< double >::signaling_NaN();
 	}
 
 	static constexpr int  digits                   = 2 * std::numeric_limits<double>::digits;
diff --git a/static/dd/api/api.cpp b/static/dd/api/api.cpp
@@ -61,6 +61,34 @@ namespace sw {
 			ostr << str << '\n';
 		}
 
+		void construct_largest_doubledouble() {
+			using Scalar = dd;
+
+			double firstLimb = std::numeric_limits<double>::max();
+			dd a = std::numeric_limits<Scalar>::max();
+			std::cout << std::setprecision(32) << a << '\n';
+			int expOfFirstLimb = scale(a);
+			std::cout << to_binary(expOfFirstLimb) << " : " << expOfFirstLimb << '\n';
+			// second limb exponent
+			int expOfSecondLimb = expOfFirstLimb - std::log10(1ull << 53);
+			std::cout << "exponent of the first  limb : " << expOfFirstLimb << '\n';
+			std::cout << "exponent of the second limb : " << expOfSecondLimb << '\n';
+			// construct the second limb
+			double secondLimb = std::ldexp(1.0, expOfSecondLimb);
+			std::cout << "1.0         " << to_binary(1.0) << '\n';
+			std::cout << "first  limb " << to_binary(firstLimb) << '\n';
+			std::cout << "second limb " << to_binary(secondLimb) << '\n';
+
+			dd aa(firstLimb, secondLimb);
+			std::cout << std::setprecision(16) << firstLimb << '\n';
+			std::cout << std::setprecision(16) << aa << '\n';
+			std::cout << std::setprecision(32) << aa << '\n';
+
+			dd b = ulp(std::numeric_limits<double>::max());
+			dd c = a + b;
+			std::cout << c << '\n';
+		}
+
 	}
 }
 
diff --git a/tools/cmd/doubledouble.cpp b/tools/cmd/doubledouble.cpp
@@ -0,0 +1,74 @@
+// doubledouble.cpp: components of a double-double: cli to show the sign/scale/limb components of a double-double floating-point
+//
+// Copyright (C) 2017 Stillwater Supercomputing, Inc.
+// SPDX-License-Identifier: MIT
+//
+// This file is part of the universal numbers project, which is released under an MIT Open Source license.
+#include <universal/utility/directives.hpp>
+#include <universal/utility/bit_cast.hpp>
+#include <limits>
+#include <universal/number/dd/dd.hpp>
+#include <universal/common/number_traits_reports.hpp>
+
+// ShowRepresentations prints the different output formats for the long double type
+template<typename Scalar>
+void ShowRepresentations(std::ostream& ostr, const Scalar& f) {
+	using namespace sw::universal;
+	auto oldprec = ostr.precision(); // save stream state
+
+	constexpr int max_digits10 = std::numeric_limits<Scalar>::max_digits10; 	// floating-point attribute for printing scientific format
+
+	Scalar v(f); // convert to target cfloat
+	ostr << "scientific   : " << std::setprecision(max_digits10) << v << '\n';
+	ostr << "triple form  : " << to_triple(v) << '\n';
+	ostr << "binary form  : " << to_binary(v, true) << '\n';
+	ostr << "color coded  : " << color_print(v) << '\n';
+
+	ostr << std::setprecision(oldprec);
+}
+
+// receive a float and print the components of a long double representation
+int main(int argc, char** argv)
+try {
+	using namespace sw::universal;
+	using Scalar = dd;
+
+	if (argc != 2) {
+		std::cerr << "doubledouble: components of a double-double floating-point\n";
+		std::cerr << "Show the sign/scale/fraction components of an double-double.\n";
+		std::cerr << "Usage: doubledouble fp_value_string\n";
+		std::cerr << "Example: doubledouble 0.03124999\n";
+		ShowRepresentations<Scalar>(std::cerr, 0.03124999);
+
+		std::cout << "Number Traits of a double-double\n";
+		numberTraits<Scalar>(std::cout);
+
+		std::cout << "largest normal number\n";
+		std::cout << to_binary(std::numeric_limits<Scalar>::max()) << '\n';
+		std::cout << "smallest normal number\n";
+		std::cout << to_binary(std::numeric_limits<Scalar>::min()) << '\n';
+		std::cout << "smallest denormalized number\n";
+		std::cout << to_binary(std::numeric_limits<Scalar>::denorm_min()) << '\n';
+
+		constexpr Scalar epsilon{ std::numeric_limits< Scalar >::epsilon() };
+		std::cout << "epsilon : " << epsilon << '\n';
+		std::cout << to_binary(epsilon) << '\n';
+
+		std::cout.flush();
+		return EXIT_SUCCESS;   // signal successful completion for ctest
+	}
+
+	dd doubledouble(argv[1]);
+	ShowRepresentations<Scalar>(std::cout, doubledouble);
+
+	std::cout.flush();
+	return EXIT_SUCCESS;
+}
+catch (const char* const msg) {
+	std::cerr << msg << std::endl;
+	return EXIT_FAILURE;
+}
+catch (...) {
+	std::cerr << "Caught unknown exception" << std::endl;
+	return EXIT_FAILURE;
+}
diff --git a/tools/cmd/quaddouble.cpp b/tools/cmd/quaddouble.cpp
@@ -12,7 +12,7 @@
 
 // ShowRepresentations prints the different output formats for the quad-double type
 template<typename Scalar>
-void ShowRepresentations(std::ostream& ostr, sw::universal::qd f) {
+void ShowRepresentations(std::ostream& ostr, const Scalar& f) {
 	using namespace sw::universal;
 	auto defaultPrecision = ostr.precision(); // save stream state
 
@@ -43,12 +43,12 @@ try {
 
 	if (argc != 2) {
 		std::cerr << "quaddouble: components of a quad-double floating-point\n";
-		std::cerr << "Show the sign/scale/limbs components of a quad-double.\n";
+		std::cerr << "Show the sign/scale/fraction components of a quad-double.\n";
 		std::cerr << "Usage: quaddouble fp_value_string\n";
 		std::cerr << "Example: quaddouble 0.03124999\n";
 		ShowRepresentations<Scalar>(std::cerr, 0.03124999);
 
-		std::cout << "Number Traits of quad-double\n";
+		std::cout << "Number Traits of a quad-double\n";
 		numberTraits<Scalar>(std::cout);
 
 		std::cout << "largest normal number\n";
@@ -58,9 +58,10 @@ try {
 		std::cout << "smallest denormalized number\n";
 		std::cout << to_binary(std::numeric_limits<Scalar>::denorm_min()) << '\n';
 
-		Scalar epsilon{ std::numeric_limits< Scalar >::epsilon() };
+		constexpr Scalar epsilon{ std::numeric_limits< Scalar >::epsilon() };
 		std::cout << "epsilon : " << epsilon << '\n';
 		std::cout << to_binary(epsilon) << '\n';
+
 		std::cout.flush();
 		return EXIT_SUCCESS;   // signal successful completion for ctest
 	}

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,6 @@ class numeric_limits< sw::universal::dd > {`
`15`	`15`	`using DoubleDouble = sw::universal::dd;`
`16`	`16`	`static constexpr bool is_specialized = true;`
`17`	`17`	`static constexpr DoubleDouble min() { // return minimum value`
`18`		`- // return DoubleDouble(sw::universal::SpecificValue::minpos);`
`19`	`18`	`return DoubleDouble(radix * (numeric_limits< double >::min() / numeric_limits< double >::epsilon()));`
`20`	`19`	`}`
`21`	`20`	`static constexpr DoubleDouble max() { // return maximum value`
`@@ -27,26 +26,23 @@ class numeric_limits< sw::universal::dd > {`
`27`	`26`	`return (-(max)());`
`28`	`27`	`}`
`29`	`28`	`static constexpr DoubleDouble epsilon() { // return smallest effective increment from 1.0`
`30`		`- return numeric_limits< double >::epsilon() * numeric_limits< double >::epsilon() / radix;`
	`29`	`+ constexpr double epsilon{ std::numeric_limits< double >::epsilon() };`
	`30`	`+ return (epsilon * epsilon) * 0.5;`
`31`	`31`	`}`
`32`	`32`	`static constexpr DoubleDouble round_error() { // return largest rounding error`
`33`	`33`	`return DoubleDouble(1.0 / radix);`
`34`	`34`	`}`
`35`	`35`	`static constexpr DoubleDouble denorm_min() { // return minimum denormalized value`
`36`		`- // return DoubleDouble(sw::universal::SpecificValue::minpos);`
`37`		`- return 0.0;`
	`36`	`+ return DoubleDouble(std::numeric_limits<double>::denorm_min());`
`38`	`37`	`}`
`39`	`38`	`static constexpr DoubleDouble infinity() { // return positive infinity`
`40`	`39`	`return DoubleDouble(sw::universal::SpecificValue::infpos);`
`41`		`- //return numeric_limits< double >::infinity();`
`42`	`40`	`}`
`43`	`41`	`static constexpr DoubleDouble quiet_NaN() { // return non-signaling NaN`
`44`	`42`	`return DoubleDouble(sw::universal::SpecificValue::qnan);`
`45`		`- //return numeric_limits< double >::quiet_NaN();`
`46`	`43`	`}`
`47`	`44`	`static constexpr DoubleDouble signaling_NaN() { // return signaling NaN`
`48`	`45`	`return DoubleDouble(sw::universal::SpecificValue::snan);`
`49`		`- //return numeric_limits< double >::signaling_NaN();`
`50`	`46`	`}`
`51`	`47`
`52`	`48`	`static constexpr int digits = 2 * std::numeric_limits<double>::digits;`