fix(cfloat): frexp returns a [0.5,1) fraction (std::frexp semantics) (#1029)

Ravenwater · claude · web-flow · commit d8a88d7bce82 · 2026-05-28T13:09:00.000-04:00
cfloat's frexp returned a fraction in [1,2) with *exp = scale() (floor(log2|x|)), which does not match std::frexp (fraction in [0.5,1), *exp = floor(log2|x|)+1) -- issue #1027. Generic code written to the std contract (and every other Universal type's frexp: dd, qd, ereal, bfloat16 all use [0.5,1)) saw the wrong exponent. Now: place the fraction at scale -1 (*exp = scale()+1). Extreme low-range configs (es <= 2, minimum normal exponent >= 0) cannot represent any value below 1.0 as a normal, so [0.5,1) is unachievable there; those fall back to the [1,2) fraction. ldexp is UNCHANGED -- it rebuilds the exponent from scale(), so the round-trip ldexp(frexp(x,&e),e) == x holds in every case. Also added the std special cases: +-0/inf/nan return unchanged with *exp = 0 (the old code corrupted them). Dependency sweep (the reason this is safe): the only callers of cfloat's frexp are round-trip tests, which are convention-agnostic; no generic/templated code instantiates cfloat's frexp expecting [1,2); manipulators/attributes/conversions do not use it. So the change is confined to frexp + its test. Test (fractional.cpp): assert the [0.5,1) fraction range for normal inputs where representable, plus frexp(0)/inf/nan special cases; added half and cfloat<16,8> coverage. (Note: a separate cfloat quirk -- isnormal() reports true for +-0 -- required excluding zero from the range assertion explicitly.) Verified gcc + clang: cfloat_fractional passes; the #1022 elreal oracle (which reads cfloat bits directly) is regression-clean. Resolves #1027 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/elastic/elreal/arithmetic/exact_value_oracle.cpp b/elastic/elreal/arithmetic/exact_value_oracle.cpp
@@ -86,14 +86,15 @@ using namespace sw::universal;
 //   * p > 53 with the Universal FP API (cfloat quad and up): read the encoding
 //     DIRECTLY -- sign, scale, and the fbits stored fraction bits -- and assemble
 //         value = (-1)^sign * (2^fbits + F) * 2^(scale - fbits),  fbits = p - 1.
-//     This deliberately avoids cfloat's wide-precision frexp and floor: cfloat
-//     frexp normalises the mantissa to [1,2) (not std's [0.5,1)), and cfloat
-//     floor mis-handles large integers (floor(2^100+8) != 2^100+8, it routes
-//     through double) -- both filed separately. The earlier frexp/floor-based
-//     extraction passed #1023's quad sweeps only because those fed double-derived
-//     (<= 53-bit) q128 values; it silently mis-extracted genuine 113-bit values.
-//     The bit-based path is verified consistent across widths and against an
-//     independent 2x-wider cfloat product.
+//     Reading the encoding directly keeps this oracle self-contained: it depends
+//     on no cfloat math function, only on the bit layout. (Historically it also
+//     side-stepped two wide-precision cfloat bugs that an earlier frexp/floor
+//     extraction tripped over -- cfloat floor mis-handling large integers, #1026,
+//     and cfloat frexp's non-std [1,2) fraction, #1027; both now fixed. That
+//     earlier extraction had passed #1023's quad sweeps only because they fed
+//     double-derived <= 53-bit q128 values, silently mis-extracting genuine
+//     113-bit ones.) The bit-based path is verified consistent across widths and
+//     against an independent 2x-wider cfloat product.
 template <typename T>
 dyadic exact_real(T v) {
     if (v == T(0)) return dyadic();
diff --git a/include/sw/universal/number/cfloat/cfloat_impl.hpp b/include/sw/universal/number/cfloat/cfloat_impl.hpp
@@ -4352,11 +4352,25 @@ constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasM
 
 // standard library functions for floating point
 
+// frexp: decompose x into a normalized fraction and an integer power of two, so
+// that x == fraction * 2^(*exp), following std::frexp semantics with the fraction
+// in [0.5, 1). (Earlier this returned a [1,2) fraction with *exp = scale(), which
+// did not match std::frexp -- issue #1027.)
 template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
 constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> frexp(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& x, int* exp) {
-	*exp = x.scale();
-	cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> fraction(x);
-	fraction.setexponent(0);
+	using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
+	// std::frexp special cases: +-0, inf, and nan are returned unchanged with *exp = 0.
+	if (x.iszero() || x.isinf() || x.isnan()) { *exp = 0; return x; }
+	// Place the fraction at scale -1 so |fraction| lands in [0.5, 1) (std::frexp).
+	// A few extreme low-range configs (es <= 2, where the minimum normal exponent
+	// is >= 0) cannot represent any value below 1.0 as a normal, so [0.5,1) is not
+	// achievable; those fall back to the [1,2) fraction (scale 0). Either way the
+	// round-trip ldexp(frexp(x,&e),e) == x holds, since ldexp rebuilds the
+	// exponent from scale().
+	constexpr int targetScale = (std::numeric_limits<Cfloat>::min_exponent <= 0) ? -1 : 0;
+	*exp = x.scale() - targetScale;       // scale() is floor(log2|x|); +1 for the [0.5,1) case
+	Cfloat fraction(x);
+	fraction.setexponent(targetScale);
 	return fraction;
 }
 
diff --git a/static/float/cfloat/math/fractional.cpp b/static/float/cfloat/math/fractional.cpp
@@ -19,23 +19,43 @@ int VerifyCfloatFractionExponent(bool reportTestCases) {
 	TestType a, b, c;
 	int exp;
 
+	// std::frexp returns the fraction in [0.5, 1). cfloat can represent that range
+	// only when its minimum normal exponent reaches at most -1; the extreme es<=2
+	// configs cannot, and frexp falls back to a [1,2) fraction there (still a valid
+	// round-trip). Assert the [0.5,1) range only where it is representable.
+	constexpr bool checkRange = (std::numeric_limits<TestType>::min_exponent <= 0);
 	for (size_t i = 1; i < NR_TEST_CASES; ++i) {
 		a.setbits(i);
+		if (a.isnan() || a.isinf()) continue;
 		b = frexp(a, &exp);
 		c = ldexp(b, exp);
-//		std::cout << "input : " << to_binary(a) << " : " << a << '\n';
-//		std::cout << "frexp : " << to_binary(b) << " : " << b << '\n';
-//		std::cout << "ldexp : " << to_binary(c) << " : " << c << '\n';
-		if (a != c) {
-			if (a.isnan() && c.isnan()) continue; // (s)nan != (s)nan, so the regular equivalance test fails
+		if (a != c) {                       // round-trip must always hold
 			nrOfFailedTests++;
 			if (reportTestCases)	ReportOneInputFunctionError("FAIL", "frexp/ldexp", a, b, c);
 		}
-		else {
-			// if (reportTestCases) ReportOneInputFunctionError("PASS", "frexp/ldexp", a, b, c);
+		// (cfloat isnormal() also reports true for +-0, so exclude zero explicitly)
+		if (checkRange && a.isnormal() && !a.iszero()) {   // std::frexp fraction range, where representable
+			double fb = std::abs(double(b));
+			if (!(fb >= 0.5 && fb < 1.0)) {
+				nrOfFailedTests++;
+				if (reportTestCases) std::cout << "frexp range FAIL: " << to_binary(a) << " -> fraction " << b << " (|f|=" << fb << ")\n";
+			}
 		}
 		if (nrOfFailedTests > 24) return 25;
 	}
+	// special cases: +-0, inf, nan return unchanged with exp == 0
+	{
+		TestType z(0); int e = -99; TestType f = frexp(z, &e);
+		if (!f.iszero() || e != 0) { ++nrOfFailedTests; if (reportTestCases) std::cout << "frexp(0) FAIL: f=" << f << " e=" << e << '\n'; }
+	}
+	if (std::numeric_limits<TestType>::has_infinity) {
+		TestType inf; inf.setinf(false); int e = 0; TestType f = frexp(inf, &e);
+		if (!f.isinf()) { ++nrOfFailedTests; if (reportTestCases) std::cout << "frexp(inf) FAIL\n"; }
+	}
+	if (std::numeric_limits<TestType>::has_quiet_NaN) {
+		TestType nan; nan.setnan(); int e = 0; TestType f = frexp(nan, &e);
+		if (!f.isnan()) { ++nrOfFailedTests; if (reportTestCases) std::cout << "frexp(nan) FAIL\n"; }
+	}
 	return nrOfFailedTests;
 }
 
@@ -470,6 +490,12 @@ try {
 	nrOfFailedTestCases += ReportTestResult(
 		VerifyCfloatFractionExponent < cfloat<8, 4, uint8_t, true, true, false> >(reportTestCases), type_tag(cfloat<8, 4, uint8_t, true, true, false>()), "frexp/ldexp");
 
+	// wider configs exercise the std::frexp [0.5,1) fraction range (issue #1027)
+	nrOfFailedTestCases += ReportTestResult(
+		VerifyCfloatFractionExponent < half >(reportTestCases), type_tag(half()), "frexp/ldexp");
+	nrOfFailedTestCases += ReportTestResult(
+		VerifyCfloatFractionExponent < cfloat<16, 8, uint16_t, true, false, false> >(reportTestCases), type_tag(cfloat<16, 8, uint16_t, true, false, false>()), "frexp/ldexp");
+
 	nrOfFailedTestCases += ReportTestResult(
 		VerifyCfloatFmod < cfloat<8, 4, uint8_t, true, false, false> >(reportTestCases), type_tag(cfloat<8, 4, uint8_t, true, false, false>()), "fmod");