From 5d822c7b280dc7e9a4c753458657604a2c2926b8 Mon Sep 17 00:00:00 2001 From: Theodore Omtzigt Date: Wed, 20 May 2026 19:29:19 -0400 Subject: [PATCH 1/4] feat(elreal): Phase B -- rational + string construction with lazy refinement Phase B of epic #873 (resolves #875): adds the construction-from-source machinery and activates the lazy refinement protocol for rational inputs. Generator slot promoted from reserved-comment to real member: - mutable std::function _generator - at(k) walks the generator on cache miss, appending to _components - refine_to(precision_bits) iterates at(0..ceil(p/53)) - An absent generator (e.g. for elreal(double) values) leaves at(k>=depth) returning the implicit-zero extension, same as Phase A Rational constructor elreal(long long p, long long q): - Component 0: double(p) / double(q) (IEEE-754 rounded) - Component 1 (via generator): exact residual p - c0 * q computed with two_prod + two_sum, divided by q. Exact for |p| < 2^53 (i.e. any long long whose absolute value is within the 53-bit double-significand envelope); incurs one extra rounding for larger magnitudes. - Components 2+: 0.0 with a documented limitation. Arbitrary-depth refinement via McCleeary long division is deferred to Phase E/F. - elreal(p, 0) -> NaN; elreal(0, q) -> canonical zero. String constructor elreal(const std::string&): - Accepts decimal ("3.14"), scientific ("6.022e23"), and rational ("1/3") forms, plus the special tokens nan / inf / infinity (case-insensitive, with optional sign). - The parser normalises decimal / scientific to (p, q) form and delegates to the rational constructor. The acceptance criterion of #875 -- elreal("1/3").at(1) != 0 -- is met because the rational path returns a non-trivial correction. - Overflow fallback: scientific-notation values whose normalised numerator or denominator exceeds long long range fall back to a double-precision reconstruction (mantissa * pow(10, exp)). The leading component is faithful; exact-rational refinement past component 0 is unavailable. This handles e.g. "6.022e23" gracefully. - Silent-failure pattern matching dfloat / ereal: the std::string ctor swallows parse errors and returns canonical zero. parse(str, elreal&) is the bool-returning variant for callers that want to observe errors. operator double() fix: start the sum at _components[0] instead of 0.0 so that single-component -0.0 round-trips with its IEEE sign bit preserved. Tests under elastic/elreal/conversion/: - native_types.cpp: round-trip int/float/double/subnormal/inf/nan/-0.0 - rational.cpp: elreal(1, 3) delivers a non-zero correction; refinement is observably distinct from elreal(1.0/3.0); 1/2 and 1/4 are exact in a single component; -3/4 is exact; 1/0 -> NaN; 0/7 -> zero - string.cpp: decimal, scientific, rational, special-token, and parse- failure cases Cross-construction from dd / qd / ereal is left as future work; #875 marks it optional and the include-graph cost is non-trivial. Verified clean under gcc 13 and clang on dual-build-dir setup. Co-Authored-By: Claude Opus 4.7 (1M context) --- elastic/elreal/conversion/native_types.cpp | 106 ++++++ elastic/elreal/conversion/rational.cpp | 149 ++++++++ elastic/elreal/conversion/string.cpp | 135 +++++++ .../universal/number/elreal/elreal_impl.hpp | 347 ++++++++++++++++-- 4 files changed, 707 insertions(+), 30 deletions(-) create mode 100644 elastic/elreal/conversion/native_types.cpp create mode 100644 elastic/elreal/conversion/rational.cpp create mode 100644 elastic/elreal/conversion/string.cpp diff --git a/elastic/elreal/conversion/native_types.cpp b/elastic/elreal/conversion/native_types.cpp new file mode 100644 index 000000000..0928d7754 --- /dev/null +++ b/elastic/elreal/conversion/native_types.cpp @@ -0,0 +1,106 @@ +// native_types.cpp: round-trip conversion tests for elreal native-type ctors (Phase B) +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +#include + +#define ELREAL_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include +#include + +template +static int check_int_round_trip(const char* label, Int v) { + sw::universal::elreal x(v); + double back = double(x); + if (back != static_cast(v)) { + std::cerr << "FAIL: " << label << " (" << v << ") round-trip != double(v): got " << back << "\n"; + return 1; + } + return 0; +} + +static int check_double_round_trip(const char* label, double v) { + sw::universal::elreal x(v); + double back = double(x); + if (back != v) { + // Treat NaN match as success + if (std::isnan(v) && std::isnan(back)) return 0; + std::cerr << "FAIL: " << label << " (" << v << ") round-trip != input: got " << back << "\n"; + return 1; + } + return 0; +} + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "elreal Phase B native-type conversion"; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, false); + + // Standard floating-point round-trip + nrOfFailedTestCases += check_double_round_trip("0.0", 0.0); + nrOfFailedTestCases += check_double_round_trip("1.0", 1.0); + nrOfFailedTestCases += check_double_round_trip("-2.5", -2.5); + nrOfFailedTestCases += check_double_round_trip("pi", 3.141592653589793); + nrOfFailedTestCases += check_double_round_trip("e", 2.718281828459045); + nrOfFailedTestCases += check_double_round_trip("1e100", 1e100); + nrOfFailedTestCases += check_double_round_trip("1e-300", 1e-300); + + // Subnormal + { + double sub = std::numeric_limits::denorm_min(); + nrOfFailedTestCases += check_double_round_trip("denorm_min", sub); + } + // Signed zero (should remain bitwise -0.0 through the round-trip). + { + double nz = -0.0; + elreal x(nz); + if (std::signbit(double(x)) != true) { + std::cerr << "FAIL: signed zero -0.0 lost its sign through elreal round-trip\n"; + ++nrOfFailedTestCases; + } + } + // Infinities + { + double pinf = std::numeric_limits::infinity(); + double ninf = -std::numeric_limits::infinity(); + nrOfFailedTestCases += check_double_round_trip("+inf", pinf); + nrOfFailedTestCases += check_double_round_trip("-inf", ninf); + } + // NaN (compare via std::isnan, not bit-for-bit) + { + double n = std::numeric_limits::quiet_NaN(); + elreal x(n); + if (!std::isnan(double(x))) { + std::cerr << "FAIL: NaN did not survive elreal round-trip\n"; + ++nrOfFailedTestCases; + } + } + + // Integer round-trip (values fit in double exactly) + nrOfFailedTestCases += check_int_round_trip("int 0", 0); + nrOfFailedTestCases += check_int_round_trip("int -1", -1); + nrOfFailedTestCases += check_int_round_trip("int 42", 42); + nrOfFailedTestCases += check_int_round_trip("int INT_MAX", INT_MAX); + nrOfFailedTestCases += check_int_round_trip("int INT_MIN", INT_MIN); + nrOfFailedTestCases += check_int_round_trip("long long 1<<40", static_cast(1) << 40); + nrOfFailedTestCases += check_int_round_trip("unsigned int 1<<31", 1u << 31); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << '\n'; + return EXIT_FAILURE; +} +catch (const std::exception& err) { + std::cerr << "Caught exception: " << err.what() << '\n'; + return EXIT_FAILURE; +} diff --git a/elastic/elreal/conversion/rational.cpp b/elastic/elreal/conversion/rational.cpp new file mode 100644 index 000000000..2f40769e7 --- /dev/null +++ b/elastic/elreal/conversion/rational.cpp @@ -0,0 +1,149 @@ +// rational.cpp: tests for elreal(p, q) rational construction (Phase B) +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +// +// Phase B acceptance criterion (issue #875): +// `elreal("1/3")` is observably distinct from `elreal(1.0/3.0)` -- the +// rational constructor delivers more bits when asked for them. +// +// This file exercises the equivalent direct-rational form `elreal(1, 3)`. + +#include + +#define ELREAL_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "elreal Phase B rational construction"; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, false); + + // --- exact rationals: representable in a single double --------------- + { + elreal half(1LL, 2LL); + if (half.at(0) != 0.5 || half.at(1) != 0.0) { + std::cerr << "FAIL: 1/2 should be exact in a single component, got at(0)=" + << half.at(0) << " at(1)=" << half.at(1) << "\n"; + ++nrOfFailedTestCases; + } + } + { + elreal quarter(1LL, 4LL); + if (quarter.at(0) != 0.25 || quarter.at(1) != 0.0) { + std::cerr << "FAIL: 1/4 should be exact in a single component\n"; + ++nrOfFailedTestCases; + } + } + // Negative numerator + { + elreal nq(-3LL, 4LL); + if (nq.at(0) != -0.75 || nq.at(1) != 0.0) { + std::cerr << "FAIL: -3/4 should be exact in a single component\n"; + ++nrOfFailedTestCases; + } + } + + // --- inexact rationals: 1/3 (the canonical example) ------------------ + { + elreal third(1LL, 3LL); + double c0 = third.at(0); + double c1 = third.at(1); + + // at(0) must match the IEEE-754 rounded value of 1.0 / 3.0. + if (c0 != 1.0 / 3.0) { + std::cerr << "FAIL: elreal(1,3).at(0) != IEEE 1.0/3.0: got " << c0 + << " expected " << (1.0 / 3.0) << "\n"; + ++nrOfFailedTestCases; + } + // at(1) must be non-zero (the acceptance criterion of #875): the + // rational constructor delivers a correction beyond the leading double. + if (c1 == 0.0) { + std::cerr << "FAIL: elreal(1,3).at(1) == 0 (rational constructor not " + << "delivering refinement)\n"; + ++nrOfFailedTestCases; + } + // at(1) must be small relative to at(0) (it is a correction term, + // not a competing approximation). One ulp(c0) is a generous upper + // bound -- correctly-computed corrections live below 0.5*ulp(c0). + if (std::abs(c1) >= std::abs(c0)) { + std::cerr << "FAIL: elreal(1,3).at(1) is not a correction (|c1| >= |c0|)\n"; + ++nrOfFailedTestCases; + } + // at(0) + at(1) must be a tighter approximation of 1/3 than at(0) + // alone. Compare against a long-double reference, which carries more + // than 53 bits of precision on most platforms (and degrades to double + // on some -- in which case the check is trivially passed by equality, + // not a regression). + long double ld_third = 1.0L / 3.0L; + long double sum_two = static_cast(c0) + static_cast(c1); + long double err_one = std::abs(ld_third - static_cast(c0)); + long double err_two = std::abs(ld_third - sum_two); + if (err_two > err_one) { + std::cerr << "FAIL: elreal(1,3) refinement made the approximation worse" + << "\n err(c0 alone) = " << static_cast(err_one) + << "\n err(c0 + c1 sum) = " << static_cast(err_two) << "\n"; + ++nrOfFailedTestCases; + } + + std::cout << "elreal(1, 3):\n" + << " at(0) = " << c0 << "\n" + << " at(1) = " << c1 << "\n" + << " at(0) + at(1) = " << (c0 + c1) << "\n" + << " double(1.0/3.0) = " << (1.0 / 3.0) << "\n"; + } + + // --- elreal(1, 3) is observably distinct from elreal(1.0/3.0) --------- + { + elreal from_rational(1LL, 3LL); + elreal from_double(1.0 / 3.0); + + // Both must agree on at(0). + if (from_rational.at(0) != from_double.at(0)) { + std::cerr << "FAIL: at(0) disagrees between rational and double forms\n"; + ++nrOfFailedTestCases; + } + // They must disagree on at(1): the rational form has more bits. + if (from_rational.at(1) == from_double.at(1)) { + std::cerr << "FAIL: elreal(1,3) and elreal(1.0/3.0) agree on at(1) -- " + << "the rational form is not delivering refinement\n"; + ++nrOfFailedTestCases; + } + } + + // --- division by zero produces NaN ------------------------------------ + { + elreal nan_form(1LL, 0LL); + if (!nan_form.isnan()) { + std::cerr << "FAIL: elreal(1, 0) is not NaN\n"; + ++nrOfFailedTestCases; + } + } + + // --- numerator zero produces canonical zero --------------------------- + { + elreal z(0LL, 7LL); + if (!z.iszero()) { + std::cerr << "FAIL: elreal(0, 7) is not zero\n"; + ++nrOfFailedTestCases; + } + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << '\n'; + return EXIT_FAILURE; +} +catch (const std::exception& err) { + std::cerr << "Caught exception: " << err.what() << '\n'; + return EXIT_FAILURE; +} diff --git a/elastic/elreal/conversion/string.cpp b/elastic/elreal/conversion/string.cpp new file mode 100644 index 000000000..23f7008a7 --- /dev/null +++ b/elastic/elreal/conversion/string.cpp @@ -0,0 +1,135 @@ +// string.cpp: tests for elreal(const std::string&) construction (Phase B) +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +#include + +#define ELREAL_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include + +static int check_string_value(const char* input, double expected_lead) { + sw::universal::elreal x(std::string{input}); + double back = x.at(0); + if (back != expected_lead) { + std::cerr << "FAIL: elreal(\"" << input << "\").at(0) = " << back + << " (expected " << expected_lead << ")\n"; + return 1; + } + return 0; +} + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "elreal Phase B string parsing"; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, false); + + // --- decimal integers ------------------------------------------------ + nrOfFailedTestCases += check_string_value("0", 0.0); + nrOfFailedTestCases += check_string_value("42", 42.0); + nrOfFailedTestCases += check_string_value("-7", -7.0); + nrOfFailedTestCases += check_string_value("+12", 12.0); + + // --- decimal fractions ----------------------------------------------- + nrOfFailedTestCases += check_string_value("0.5", 0.5); + nrOfFailedTestCases += check_string_value("3.14", 3.14); + nrOfFailedTestCases += check_string_value("-2.5", -2.5); + nrOfFailedTestCases += check_string_value("0.125", 0.125); + + // --- scientific notation --------------------------------------------- + nrOfFailedTestCases += check_string_value("1e0", 1.0); + nrOfFailedTestCases += check_string_value("1.5e2", 150.0); + nrOfFailedTestCases += check_string_value("3.14e-2", 0.0314); + nrOfFailedTestCases += check_string_value("6.022e23", 6.022e23); + nrOfFailedTestCases += check_string_value("-1.0e-10", -1.0e-10); + + // --- rational form --------------------------------------------------- + nrOfFailedTestCases += check_string_value("1/2", 0.5); + nrOfFailedTestCases += check_string_value("3/4", 0.75); + nrOfFailedTestCases += check_string_value("-22/7", -22.0 / 7.0); + + // "1/3" delivers a real correction (the acceptance criterion of #875). + { + elreal third{ std::string("1/3") }; + if (third.at(0) != 1.0 / 3.0) { + std::cerr << "FAIL: elreal(\"1/3\").at(0) != IEEE 1.0/3.0\n"; + ++nrOfFailedTestCases; + } + if (third.at(1) == 0.0) { + std::cerr << "FAIL: elreal(\"1/3\").at(1) == 0 (no refinement)\n"; + ++nrOfFailedTestCases; + } + } + + // --- special tokens (case-insensitive) ------------------------------- + { + elreal nan_lower{ std::string("nan") }; + elreal nan_upper{ std::string("NAN") }; + elreal nan_mixed{ std::string("NaN") }; + if (!nan_lower.isnan() || !nan_upper.isnan() || !nan_mixed.isnan()) { + std::cerr << "FAIL: nan / NAN / NaN did not all parse as NaN\n"; + ++nrOfFailedTestCases; + } + } + { + elreal pinf{ std::string("inf") }; + elreal pinfinity{ std::string("infinity") }; + elreal ninf{ std::string("-inf") }; + elreal ninfinity{ std::string("-INFINITY") }; + if (!pinf.isinf() || !pinfinity.isinf() || !ninf.isinf() || !ninfinity.isinf()) { + std::cerr << "FAIL: inf / infinity / -inf / -INFINITY did not all parse as inf\n"; + ++nrOfFailedTestCases; + } + if (double(ninf) > 0.0 || double(ninfinity) > 0.0) { + std::cerr << "FAIL: -inf / -INFINITY did not produce negative inf\n"; + ++nrOfFailedTestCases; + } + } + + // --- parse failures yield canonical zero ----------------------------- + { + // The std::string constructor swallows parse failures (matching dfloat / + // ereal). Use the bool-returning parse() to observe the failure directly. + elreal sink; + if (parse(std::string{"foo"}, sink)) { + std::cerr << "FAIL: parse(\"foo\") returned true\n"; + ++nrOfFailedTestCases; + } + if (parse(std::string{""}, sink)) { + std::cerr << "FAIL: parse(\"\") returned true\n"; + ++nrOfFailedTestCases; + } + if (parse(std::string{"1.2.3"}, sink)) { + std::cerr << "FAIL: parse(\"1.2.3\") returned true\n"; + ++nrOfFailedTestCases; + } + if (parse(std::string{"1/0"}, sink)) { + std::cerr << "FAIL: parse(\"1/0\") returned true (denominator zero must be rejected)\n"; + ++nrOfFailedTestCases; + } + + elreal silent{ std::string("garbage") }; + if (!silent.iszero()) { + std::cerr << "FAIL: silent-failure string ctor did not yield zero\n"; + ++nrOfFailedTestCases; + } + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << '\n'; + return EXIT_FAILURE; +} +catch (const std::exception& err) { + std::cerr << "Caught exception: " << err.what() << '\n'; + return EXIT_FAILURE; +} diff --git a/include/sw/universal/number/elreal/elreal_impl.hpp b/include/sw/universal/number/elreal/elreal_impl.hpp index 46a466fc0..dcfd608a7 100644 --- a/include/sw/universal/number/elreal/elreal_impl.hpp +++ b/include/sw/universal/number/elreal/elreal_impl.hpp @@ -98,35 +98,58 @@ // What ships in Phase A vs later phases // ----------------------------------------------------------------------------- // -// Phase A (this file, skeleton only): -// - Construct from `double` (the trivial case: a single-component stream) -// - Construct from `int`, `long`, `long long` (all exact at this stage if -// within double's representable range) -// - `at(0)` and `refine_to(precision_bits)` as no-ops past component 0 -// - `operator double()` returns `_components[0]` -// - Triviality is *not* claimed: the type contains `std::vector` and (in -// Phase C) `std::function`, neither of which is trivially constructible. -// Universal's library-wide `ReportTrivialityOfType` is reported, not -// asserted, for elastic types -- consistent with `ereal`. +// Phase A (foundation): +// - Construct from `double`, `float`, integer types +// - `at(0)` and `refine_to(precision_bits)` -- entry points in place +// - `operator double()` returns the sum of materialised components +// +// Phase B (this update, #875): +// - The closure-based generator slot is now real (mutable std::function). +// `at(k)` calls the generator on cache miss; `refine_to(p)` iterates +// `at(0..ceil(p/53))`. +// - Construction from `p/q` rationals (`elreal(p, q)`). Component 0 is +// `double(p)/double(q)`. Component 1 is the exact residual via EFTs +// (two_prod + two_sum). Components 2+ return 0.0 with a documented +// limitation -- full McCleeary long division for arbitrary depth is +// deferred to Phase E/F. The acceptance criterion of #875 is that +// `elreal(1,3).at(1) != 0`, i.e. observably distinct from +// `elreal(1.0/3.0)`. +// - Construction from decimal/scientific/rational strings. The parser +// normalises the input to a rational `(p, q)` form, then delegates to +// the rational constructor. +// - Cross-construction from `dd` / `qd` / `ereal` is left as future +// work (the include-graph cost is non-trivial and these types' Phase B +// acceptance criterion is not "implicit cross-conversion exists"). +// +// Triviality is *not* claimed: the type contains `std::vector` and now also +// `std::function`, neither of which is trivially constructible. Universal's +// library-wide `ReportTrivialityOfType` is reported, not asserted, for +// elastic types -- consistent with `ereal`. // // Deferred to later phases: -// - Construction from `p/q` rationals and decimal strings (Phase B, #875) -// - Cross-construction from `dd`, `qd`, `ereal` (Phase B, #875) -// - The closure-based generator and the four ring operations (Phase C, #876) +// - The four ring operations (Phase C, #876) // - Comparison, sign, and the refinement budget policy (Phase D, #877) // - Math functions (Phase E, #878) +// - Cross-construction from dd/qd/ereal (a future PR; #875 marks it optional) +// - Deep-depth rational refinement (k >= 2) via McCleeary long division +// (Phase E/F) // // ============================================================================= #include #include #include +#include #include +#include +#include #include +#include #include #include #include +#include namespace sw { namespace universal { @@ -211,6 +234,76 @@ class elreal { } } + // Rational construction: elreal(p, q) represents the exact value p/q. + // Component 0 is `double(p)/double(q)` (the IEEE-754 rounded division). + // Component 1 is the exact residual `p/q - component_0` computed via + // two_prod + two_sum -- this is exact when `p` fits in a double (i.e. + // `|p| < 2^53`, which is true for any `long long` whose absolute value + // is < 2^53; for larger magnitudes the residual computation incurs one + // extra rounding error, documented in Phase E follow-up). + // Components 2 and beyond return 0.0 in Phase B; full McCleeary + // long-division refinement to arbitrary depth lands in Phase E/F. + elreal(long long p, long long q) + : _components{}, _computed_depth{0}, _generator{} + { + if (q == 0) { + _components.push_back(std::numeric_limits::quiet_NaN()); + _computed_depth = 1; + return; + } + if (p == 0) { + // canonical zero; leave _components empty + return; + } + + double c0 = static_cast(p) / static_cast(q); + _components.push_back(c0); + _computed_depth = 1; + + // Generator: produces the k-th correction component on demand. + // k == 1 -> exact residual via EFTs. + // k >= 2 -> 0.0 (Phase B limitation; documented above). + long long p_cap = p; + long long q_cap = q; + double c0_cap = c0; + _generator = [p_cap, q_cap, c0_cap](std::size_t k) -> double { + if (k != 1) return 0.0; + // Compute the residual p - c0 * q exactly using EFTs: + // two_prod(c0, q) -> (prod_hi, prod_err) with c0*q = prod_hi + prod_err + // two_sum(p, -prod_hi) -> (s1, s1_err) + // two_sum(s1, -prod_err) -> (s2, s2_err) + // residual = s2 + s1_err + s2_err (sum of error terms collapsed to a + // double; further bits are below + // double precision in this scope) + double prod_err; + double prod_hi = two_prod(c0_cap, static_cast(q_cap), prod_err); + double s1_err; + double s1 = two_sum(static_cast(p_cap), -prod_hi, s1_err); + double s2_err; + double s2 = two_sum(s1, -prod_err, s2_err); + double residual = s2 + s1_err + s2_err; + return residual / static_cast(q_cap); + }; + } + + // String construction: parses decimal ("3.14"), scientific ("6.022e23"), + // rational ("1/3"), and the special tokens "nan" / "inf" / "infinity" + // (case-insensitive). The parse normalises to a rational (p, q) pair + // and delegates to the rational constructor. Returns the canonical zero + // on parse failure (silent-failure pattern matching the dfloat and + // ereal string constructors); see parse() below for the bool-returning + // variant. + explicit elreal(const std::string& str) + : _components{}, _computed_depth{0}, _generator{} + { + if (!parse_into(*this, str)) { + // parse failed -- leave at canonical zero + _components.clear(); + _computed_depth = 0; + _generator = {}; + } + } + // rule-of-five: defaulted, defensible for a vector-holding type elreal(const elreal&) = default; elreal(elreal&&) noexcept = default; @@ -231,16 +324,28 @@ class elreal { // least ceil(p / 53) components materialised. No-op // in Phase A. - double at(std::size_t k) const noexcept { + double at(std::size_t k) const { + // Materialise components up to and including index k by calling the + // generator on cache misses. When no generator is installed (e.g. for + // values constructed from a single double), the stream is treated as + // terminating at _computed_depth and at(k>=depth) returns 0.0. + while (_computed_depth <= k && _generator) { + double next = _generator(_computed_depth); + _components.push_back(next); + ++_computed_depth; + } if (k < _computed_depth) return _components[k]; - // Phase A: no generator; the implicit extension is 0.0. return 0.0; } - void refine_to(std::size_t precision_bits) const noexcept { - // Phase A: no-op. Phase C will iteratively call at(k) to materialise - // the prefix. - (void)precision_bits; + void refine_to(std::size_t precision_bits) const { + // Each component carries ~53 bits of precision (one IEEE-754 double + // significand), so we want ceil(precision_bits / 53) components + // materialised. Force at least one component for any positive request. + if (precision_bits == 0) return; + std::size_t target = (precision_bits + 52) / 53; + // Touching at(target - 1) walks the generator up to that depth. + (void)at(target - 1); } std::size_t computed_depth() const noexcept { return _computed_depth; } @@ -250,10 +355,13 @@ class elreal { // --------------------------------------------------------------------- // Sum of the currently-computed components -- the best estimate of the - // true value at the current refinement depth. + // true value at the current refinement depth. Starts the sum at + // _components[0] (rather than literal 0.0) so that single-component + // -0.0 round-trips with its IEEE sign bit preserved. explicit operator double() const noexcept { - double s = 0.0; - for (std::size_t i = 0; i < _computed_depth; ++i) s += _components[i]; + if (_computed_depth == 0) return 0.0; + double s = _components[0]; + for (std::size_t i = 1; i < _computed_depth; ++i) s += _components[i]; return s; } @@ -287,14 +395,193 @@ class elreal { // _components and grow _computed_depth as the generator fires. mutable std::size_t _computed_depth; - // Generator slot for Phase C. Empty in Phase A; the at(k) entry point - // returns the implicit-zero extension when k >= _computed_depth. - // - // (Declared here as a comment to document the planned storage layout; - // the actual member lands in Phase C alongside the closure-instantiation - // patterns.) - // - // mutable std::function _generator; + // Generator slot. Empty by default; the rational constructor (and, in + // Phase C, the arithmetic operators) install a closure here. When the + // generator is empty, at(k) for k >= _computed_depth returns 0.0 + // (the implicit-zero extension that matches the leading-component-only + // representation of a constructed-from-double value). + mutable std::function _generator; + + // String parser. Lives as a friend free function so it can populate + // the private state. Public-facing wrappers are the std::string + // constructor and the namespace-level parse(str, elreal&) declared + // in elreal_fwd.hpp. + friend bool parse_into(elreal& out, const std::string& str); }; +// ============================================================================= +// String parser +// ============================================================================= +// +// Accepts: +// "123", "-42", "+42" -- decimal integer +// "3.14", "-1.25" -- decimal fraction (normalised to p/q) +// "6.022e23", "1.5E-10" -- scientific (normalised to p/q with q a +// power of 10) +// "1/3", "-22/7" -- rational +// "nan", "inf", "-infinity" -- special tokens (case-insensitive) +// +// Returns true on success, false on parse error. On error, `out` is set to +// canonical zero. +// +// Limitation: the integer accumulators are `long long`, so mantissa digit +// counts beyond ~18 or scientific exponents that produce a denominator +// requiring > 18 digits overflow silently and return false. The phase-B +// acceptance does not include arbitrary-precision string parsing; the +// canonical use cases (representing 1/3, 22/7, pi as a finite decimal, etc.) +// all fit comfortably. + +inline bool parse_into(elreal& out, const std::string& str) { + out._components.clear(); + out._computed_depth = 0; + out._generator = {}; + + if (str.empty()) return false; + std::size_t pos = 0; + + // Skip whitespace + while (pos < str.length() && std::isspace(static_cast(str[pos]))) ++pos; + if (pos >= str.length()) return false; + + // Sign + bool negative = false; + if (str[pos] == '+' || str[pos] == '-') { + negative = (str[pos] == '-'); + ++pos; + } + + // nan / inf / infinity tokens + { + std::string lower; + for (std::size_t q = pos; q < str.length(); ++q) { + lower.push_back(static_cast(std::tolower(static_cast(str[q])))); + } + if (lower == "nan") { + out._components.push_back(std::numeric_limits::quiet_NaN()); + out._computed_depth = 1; + return true; + } + if (lower == "inf" || lower == "infinity") { + out._components.push_back(negative + ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity()); + out._computed_depth = 1; + return true; + } + } + + // Rational form: look for a '/' separator (no decimal / scientific + // mixing supported -- rational form is its own track). + std::size_t slash = str.find('/', pos); + if (slash != std::string::npos) { + long long num = 0, den = 0; + bool found_num = false; + for (std::size_t i = pos; i < slash; ++i) { + char c = str[i]; + if (!std::isdigit(static_cast(c))) return false; + num = num * 10 + (c - '0'); + found_num = true; + } + if (!found_num) return false; + bool found_den = false; + for (std::size_t i = slash + 1; i < str.length(); ++i) { + char c = str[i]; + if (!std::isdigit(static_cast(c))) return false; + den = den * 10 + (c - '0'); + found_den = true; + } + if (!found_den || den == 0) return false; + out = elreal(negative ? -num : num, den); + return true; + } + + // Decimal / scientific form + long long mantissa = 0; + bool found_digit = false; + bool seen_dot = false; + int frac_digits = 0; + while (pos < str.length()) { + char c = str[pos]; + if (std::isdigit(static_cast(c))) { + mantissa = mantissa * 10 + (c - '0'); + if (seen_dot) ++frac_digits; + found_digit = true; + } + else if (c == '.' && !seen_dot) { + seen_dot = true; + } + else if (c == 'e' || c == 'E') { + ++pos; + break; + } + else { + return false; + } + ++pos; + } + if (!found_digit) return false; + + int exponent = 0; + bool exp_negative = false; + if (pos < str.length()) { + if (str[pos] == '+' || str[pos] == '-') { + exp_negative = (str[pos] == '-'); + ++pos; + } + bool found_exp_digit = false; + while (pos < str.length()) { + char c = str[pos]; + if (!std::isdigit(static_cast(c))) return false; + exponent = exponent * 10 + (c - '0'); + found_exp_digit = true; + ++pos; + } + if (!found_exp_digit) return false; + if (exp_negative) exponent = -exponent; + } + + // Normalize to (p, q) where p = signed mantissa, q = 10^(frac_digits - exponent). + // If either integer would overflow long long (typical for scientific + // notation like "6.022e23"), fall back to a double-precision construction + // that captures the leading 53 bits accurately. The exact-rational + // refinement at depth > 0 is then unavailable for that value -- a + // documented Phase B limitation. The integer-fit case (any decimal whose + // numerator and denominator individually fit in 18-19 digits) gets + // full rational refinement. + int q_pow = frac_digits - exponent; + long long p = negative ? -mantissa : mantissa; + long long q = 1; + bool overflowed = false; + if (q_pow > 0) { + for (int i = 0; i < q_pow && !overflowed; ++i) { + if (q > std::numeric_limits::max() / 10) overflowed = true; + else q *= 10; + } + } + else if (q_pow < 0) { + for (int i = 0; i < -q_pow && !overflowed; ++i) { + long long pmag = p < 0 ? -p : p; + if (pmag > std::numeric_limits::max() / 10) overflowed = true; + else p *= 10; + } + } + + if (overflowed) { + // Reconstruct via double * 10^q_pow_signed. Loses exact-rational + // refinement but keeps the leading component faithful. + double mag = static_cast(mantissa); + double scaled = mag * std::pow(10.0, static_cast(-q_pow)); + out = elreal(negative ? -scaled : scaled); + return true; + } + + out = elreal(p, q); + return true; +} + +// Namespace-level parse() (matches dfloat / ereal convention) +inline bool parse(const std::string& str, elreal& out) { + return parse_into(out, str); +} + }} // namespace sw::universal From 3ead8ea143aed9001d10ab086aea1ba33ef14dcb Mon Sep 17 00:00:00 2001 From: Theodore Omtzigt Date: Wed, 20 May 2026 21:11:23 -0400 Subject: [PATCH 2/4] fix(elreal): overflow guards, LLONG_MIN UB, signed-zero from string Address CodeRabbit findings on PR #884. Three correctness fixes in elreal_impl.hpp: 1. Rational parser num / den accumulation now rejects on signed-overflow (num * 10 + digit > LLONG_MAX) before the multiply happens. Without the guard, 19+ digit inputs silently wrapped. 2. Decimal / scientific parser mantissa accumulation gets the same guard. 3. The q_pow < 0 multiplication loop replaces the `pmag = (p < 0 ? -p : p)` magnitude with unsigned arithmetic so LLONG_MIN does not trigger signed-negation UB. With the mantissa overflow guard above, p cannot actually reach LLONG_MIN in this path, but the unsigned form is defensive against a future caller of the rational ctor with a hand-rolled value. One consistency fix: parse("-0"), parse("-0.0"), parse("-0/7") now all preserve the negative sign through to the resulting elreal. Previously the parser collapsed these to canonical (sign-less) zero, inconsistent with elreal(-0.0) which preserves the IEEE-754 sign bit. Sign now survives across all "construct from -0" paths. Tests under elastic/elreal/conversion/string.cpp gain three new cases (elreal("-0"), elreal("-0.0"), elreal("-0/7") all asserted via std::signbit) so the contract is locked in. Trivial style cleanup in elastic/elreal/conversion/native_types.cpp: std::signbit(...) != true -> !std::signbit(...). Declining one CodeRabbit nitpick (whitespace-wrapped tokens like " +inf "): the parser claims leading-whitespace-only support (matching ereal's parse() in include/sw/universal/number/ereal/ ereal_impl.hpp:327). Trailing-whitespace handling is not part of the contract on either type; adding it without ereal also adopting it would create an asymmetric API surface. Left as future work if a consumer needs it on both types. Verified clean under gcc 13 and clang. Co-Authored-By: Claude Opus 4.7 (1M context) --- elastic/elreal/conversion/native_types.cpp | 2 +- elastic/elreal/conversion/string.cpp | 21 ++++++++ .../universal/number/elreal/elreal_impl.hpp | 48 +++++++++++++++++-- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/elastic/elreal/conversion/native_types.cpp b/elastic/elreal/conversion/native_types.cpp index 0928d7754..1199fc851 100644 --- a/elastic/elreal/conversion/native_types.cpp +++ b/elastic/elreal/conversion/native_types.cpp @@ -62,7 +62,7 @@ try { { double nz = -0.0; elreal x(nz); - if (std::signbit(double(x)) != true) { + if (!std::signbit(double(x))) { std::cerr << "FAIL: signed zero -0.0 lost its sign through elreal round-trip\n"; ++nrOfFailedTestCases; } diff --git a/elastic/elreal/conversion/string.cpp b/elastic/elreal/conversion/string.cpp index 23f7008a7..6b83cee13 100644 --- a/elastic/elreal/conversion/string.cpp +++ b/elastic/elreal/conversion/string.cpp @@ -37,6 +37,27 @@ try { nrOfFailedTestCases += check_string_value("-7", -7.0); nrOfFailedTestCases += check_string_value("+12", 12.0); + // "-0" should produce a negative zero (sign bit preserved). Numeric + // equality alone (0.0 == -0.0) can't catch a sign-bit regression, so + // check via std::signbit explicitly. + { + elreal nz{ std::string("-0") }; + if (!std::signbit(double(nz))) { + std::cerr << "FAIL: elreal(\"-0\") did not preserve negative zero\n"; + ++nrOfFailedTestCases; + } + elreal nz_decimal{ std::string("-0.0") }; + if (!std::signbit(double(nz_decimal))) { + std::cerr << "FAIL: elreal(\"-0.0\") did not preserve negative zero\n"; + ++nrOfFailedTestCases; + } + elreal nz_rational{ std::string("-0/7") }; + if (!std::signbit(double(nz_rational))) { + std::cerr << "FAIL: elreal(\"-0/7\") did not preserve negative zero\n"; + ++nrOfFailedTestCases; + } + } + // --- decimal fractions ----------------------------------------------- nrOfFailedTestCases += check_string_value("0.5", 0.5); nrOfFailedTestCases += check_string_value("3.14", 3.14); diff --git a/include/sw/universal/number/elreal/elreal_impl.hpp b/include/sw/universal/number/elreal/elreal_impl.hpp index dcfd608a7..c373a0c74 100644 --- a/include/sw/universal/number/elreal/elreal_impl.hpp +++ b/include/sw/universal/number/elreal/elreal_impl.hpp @@ -479,7 +479,10 @@ inline bool parse_into(elreal& out, const std::string& str) { for (std::size_t i = pos; i < slash; ++i) { char c = str[i]; if (!std::isdigit(static_cast(c))) return false; - num = num * 10 + (c - '0'); + int digit = c - '0'; + // Reject before signed overflow: `num * 10 + digit > LLONG_MAX` ? + if (num > (std::numeric_limits::max() - digit) / 10) return false; + num = num * 10 + digit; found_num = true; } if (!found_num) return false; @@ -487,10 +490,23 @@ inline bool parse_into(elreal& out, const std::string& str) { for (std::size_t i = slash + 1; i < str.length(); ++i) { char c = str[i]; if (!std::isdigit(static_cast(c))) return false; - den = den * 10 + (c - '0'); + int digit = c - '0'; + if (den > (std::numeric_limits::max() - digit) / 10) return false; + den = den * 10 + digit; found_den = true; } if (!found_den || den == 0) return false; + // Signed zero: "-0/q" preserves the negative sign through the parse + // even though zero has no sign mathematically; matches the elreal(-0.0) + // round-trip behavior so the IEEE-754 sign bit is consistent across + // ctor paths. + if (num == 0 && negative) { + out._components.clear(); + out._components.push_back(-0.0); + out._computed_depth = 1; + out._generator = {}; + return true; + } out = elreal(negative ? -num : num, den); return true; } @@ -503,7 +519,9 @@ inline bool parse_into(elreal& out, const std::string& str) { while (pos < str.length()) { char c = str[pos]; if (std::isdigit(static_cast(c))) { - mantissa = mantissa * 10 + (c - '0'); + int digit = c - '0'; + if (mantissa > (std::numeric_limits::max() - digit) / 10) return false; + mantissa = mantissa * 10 + digit; if (seen_dot) ++frac_digits; found_digit = true; } @@ -560,8 +578,17 @@ inline bool parse_into(elreal& out, const std::string& str) { } else if (q_pow < 0) { for (int i = 0; i < -q_pow && !overflowed; ++i) { - long long pmag = p < 0 ? -p : p; - if (pmag > std::numeric_limits::max() / 10) overflowed = true; + // Compute |p| via unsigned arithmetic so LLONG_MIN does not + // trigger signed-negation UB. With the mantissa overflow guard + // above, p cannot actually reach LLONG_MIN at this point, but + // keeping the unsigned form is defensive coding -- a future + // caller of this constructor with a hand-rolled value could. + unsigned long long pmag = p < 0 + ? static_cast(-(p + 1)) + 1u + : static_cast(p); + constexpr unsigned long long limit = + static_cast(std::numeric_limits::max()) / 10u; + if (pmag > limit) overflowed = true; else p *= 10; } } @@ -575,6 +602,17 @@ inline bool parse_into(elreal& out, const std::string& str) { return true; } + // Signed zero preservation: "-0", "-0.0", "-0e5", etc. should produce a + // negative zero (matching the elreal(-0.0) round-trip). The general + // rational ctor would collapse this to canonical (sign-less) zero. + if (p == 0 && negative) { + out._components.clear(); + out._components.push_back(-0.0); + out._computed_depth = 1; + out._generator = {}; + return true; + } + out = elreal(p, q); return true; } From 3016552b62ae6e7db40d75108e42a9d33ae6b9a0 Mon Sep 17 00:00:00 2001 From: Theodore Omtzigt Date: Wed, 20 May 2026 21:34:43 -0400 Subject: [PATCH 3/4] fix(elreal): route mantissa overflow through std::stod fallback Address CodeRabbit second-round finding on PR #884. The previous commit added overflow guards on the mantissa accumulator that returned false immediately, which had an unintended consequence: valid scientific literals with 20+ significant digits (e.g., "12345678901234567890e5") silently parsed as canonical zero through the std::string ctor. The code already had an `overflowed` flag and a fallback path for the q_pow scaling overflow case, but the mantissa overflow was hitting return false before reaching the fallback. Fix: on mantissa overflow, set a mantissa_overflow flag and keep scanning to validate the rest of the literal (rejecting on invalid characters, exhausting the exponent, etc.). After the parse loop completes, if mantissa_overflow is set, use std::stod() on the original input string to get the correctly-rounded leading double and construct the elreal from that. Exact-rational refinement past depth 0 is unavailable in this overflow path -- a documented Phase B limitation matching the existing q_pow overflow fallback. Tests in elastic/elreal/conversion/string.cpp gain two cases: - "12345678901234567890" (20-digit integer mantissa) - "1.234567890123456789012345e5" (25-digit scientific mantissa) Both must produce non-zero results matching std::stod on the same string. Verified clean under gcc 13 and clang. Co-Authored-By: Claude Opus 4.7 (1M context) --- elastic/elreal/conversion/string.cpp | 38 +++++++++++++++++++ .../universal/number/elreal/elreal_impl.hpp | 35 +++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/elastic/elreal/conversion/string.cpp b/elastic/elreal/conversion/string.cpp index 6b83cee13..1702130b5 100644 --- a/elastic/elreal/conversion/string.cpp +++ b/elastic/elreal/conversion/string.cpp @@ -114,6 +114,44 @@ try { } } + // --- mantissa overflow uses the std::stod fallback ------------------- + // Literals with 20+ significant digits exceed long long range. They + // should not parse as canonical zero; they should produce a faithful + // leading double via the overflow fallback. + { + // 20-digit mantissa (just past LLONG_MAX = 9.2e18, so any 20-digit + // integer starting with a digit other than 0 overflows). + elreal big{ std::string("12345678901234567890") }; + if (big.iszero()) { + std::cerr << "FAIL: 20-digit mantissa silently parsed to zero " + << "(overflow fallback not engaged)\n"; + ++nrOfFailedTestCases; + } + double expected = 12345678901234567890.0; + if (big.at(0) != expected) { + std::cerr << "FAIL: elreal(\"12345678901234567890\").at(0) = " + << big.at(0) << " (expected " << expected << ")\n"; + ++nrOfFailedTestCases; + } + } + { + // 25-digit scientific mantissa. + elreal sci{ std::string("1.234567890123456789012345e5") }; + if (sci.iszero()) { + std::cerr << "FAIL: 25-digit scientific mantissa silently " + << "parsed to zero\n"; + ++nrOfFailedTestCases; + } + // Should be approximately 1.234567890123456789012345e5; verify the + // leading double is within a few ULPs of the std::stod reference. + double ref = std::stod("1.234567890123456789012345e5"); + if (sci.at(0) != ref) { + std::cerr << "FAIL: overflowed mantissa fallback did not match " + << "std::stod (got " << sci.at(0) << ", expected " << ref << ")\n"; + ++nrOfFailedTestCases; + } + } + // --- parse failures yield canonical zero ----------------------------- { // The std::string constructor swallows parse failures (matching dfloat / diff --git a/include/sw/universal/number/elreal/elreal_impl.hpp b/include/sw/universal/number/elreal/elreal_impl.hpp index c373a0c74..1de85645f 100644 --- a/include/sw/universal/number/elreal/elreal_impl.hpp +++ b/include/sw/universal/number/elreal/elreal_impl.hpp @@ -515,13 +515,24 @@ inline bool parse_into(elreal& out, const std::string& str) { long long mantissa = 0; bool found_digit = false; bool seen_dot = false; + bool mantissa_overflow = false; int frac_digits = 0; while (pos < str.length()) { char c = str[pos]; if (std::isdigit(static_cast(c))) { int digit = c - '0'; - if (mantissa > (std::numeric_limits::max() - digit) / 10) return false; - mantissa = mantissa * 10 + digit; + if (!mantissa_overflow) { + if (mantissa > (std::numeric_limits::max() - digit) / 10) { + // Stop updating mantissa once it would overflow, but keep + // scanning to validate the rest of the literal. The + // overflow fallback below will produce a faithful leading + // double via std::stod() on the full input string. + mantissa_overflow = true; + } + else { + mantissa = mantissa * 10 + digit; + } + } if (seen_dot) ++frac_digits; found_digit = true; } @@ -593,8 +604,26 @@ inline bool parse_into(elreal& out, const std::string& str) { } } + if (mantissa_overflow) { + // The mantissa portion of the literal exceeded long long range. Use + // std::stod() on the full original string to get the correctly-rounded + // leading double; exact-rational refinement at depth > 0 is + // unavailable for this value. std::stod is locale-aware; for + // canonical numeric literals (no thousands separator, '.' as decimal + // point) it round-trips correctly in any reasonable locale. + try { + double v = std::stod(str); + out = elreal(v); + return true; + } + catch (...) { + return false; + } + } + if (overflowed) { - // Reconstruct via double * 10^q_pow_signed. Loses exact-rational + // q_pow scaling overflowed long long even though the mantissa fit. + // Reconstruct via double * 10^(-q_pow). Loses exact-rational // refinement but keeps the leading component faithful. double mag = static_cast(mantissa); double scaled = mag * std::pow(10.0, static_cast(-q_pow)); From 901ec08b36cad50b2623b0ef6f89f25d9d446b5e Mon Sep 17 00:00:00 2001 From: Theodore Omtzigt Date: Wed, 20 May 2026 22:03:32 -0400 Subject: [PATCH 4/4] fix(elreal): guard exponent overflow + use std::strtod for saturating fallback Address CodeRabbit third-round finding on PR #884. The exponent accumulation in parse_into was vulnerable to signed integer overflow UB for adversarial inputs like "1e9999999999" -- the second-round mantissa fix only covered mantissa overflow, leaving the exponent loop exposed. CodeRabbit caught this in their third pass. Two changes: 1. Mirror the mantissa overflow guard on the exponent loop. Track exponent_overflow with the same `exp > (INT_MAX - digit) / 10` test before each multiply-add. Stop updating exponent on overflow but keep scanning to validate format. Route `mantissa_overflow || exponent_overflow` to the existing fallback. 2. Switch the fallback from std::stod to std::strtod. std::stod throws std::out_of_range when the value is unrepresentable as double (e.g. for "1e9999999999"); my catch swallowed the exception and returned false, yielding canonical zero through the std::string ctor. std::strtod sets errno = ERANGE and returns +/-HUGE_VAL (overflow) or 0.0 (underflow), which matches IEEE-754 saturation semantics -- the right behavior for inputs the rational track cannot represent anyway. Tests in elastic/elreal/conversion/string.cpp gain two cases: - "1e9999999999" must saturate to +inf (not silently zero) - "1e-9999999999" must underflow to zero Caught the std::stod-throws issue during local testing of the first attempt -- the initial fix used std::stod and failed under both gcc and clang. Switching to std::strtod made both compilers green. Verified all four elreal_* binaries PASS on gcc 13 and clang. Co-Authored-By: Claude Opus 4.7 (1M context) --- elastic/elreal/conversion/string.cpp | 24 +++++++++ .../universal/number/elreal/elreal_impl.hpp | 50 +++++++++++++------ 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/elastic/elreal/conversion/string.cpp b/elastic/elreal/conversion/string.cpp index 1702130b5..f75d8accb 100644 --- a/elastic/elreal/conversion/string.cpp +++ b/elastic/elreal/conversion/string.cpp @@ -152,6 +152,30 @@ try { } } + // --- exponent overflow uses the std::stod fallback ------------------ + // Inputs with an exponent that does not fit in `int` (max ~2.1e9) must + // not trigger signed-integer overflow UB. They should route through the + // same std::stod fallback as mantissa overflow: std::stod handles + // arbitrarily large exponents by saturating to +/-inf or 0. + { + // Exponent "9999999999" has 10 digits (10^10), well past INT_MAX. + // std::stod returns +inf for this input. + elreal big_exp{ std::string("1e9999999999") }; + if (!big_exp.isinf()) { + std::cerr << "FAIL: elreal(\"1e9999999999\") did not saturate to inf " + << "(exponent-overflow fallback not engaged)\n"; + ++nrOfFailedTestCases; + } + } + { + elreal small_exp{ std::string("1e-9999999999") }; + // std::stod returns 0.0 for this input (underflow to zero). + if (!small_exp.iszero()) { + std::cerr << "FAIL: elreal(\"1e-9999999999\") did not underflow to zero\n"; + ++nrOfFailedTestCases; + } + } + // --- parse failures yield canonical zero ----------------------------- { // The std::string constructor swallows parse failures (matching dfloat / diff --git a/include/sw/universal/number/elreal/elreal_impl.hpp b/include/sw/universal/number/elreal/elreal_impl.hpp index 1de85645f..f557584f2 100644 --- a/include/sw/universal/number/elreal/elreal_impl.hpp +++ b/include/sw/universal/number/elreal/elreal_impl.hpp @@ -138,6 +138,8 @@ #include #include +#include // std::strtod +#include // errno, ERANGE #include #include #include @@ -552,6 +554,7 @@ inline bool parse_into(elreal& out, const std::string& str) { int exponent = 0; bool exp_negative = false; + bool exponent_overflow = false; if (pos < str.length()) { if (str[pos] == '+' || str[pos] == '-') { exp_negative = (str[pos] == '-'); @@ -561,7 +564,19 @@ inline bool parse_into(elreal& out, const std::string& str) { while (pos < str.length()) { char c = str[pos]; if (!std::isdigit(static_cast(c))) return false; - exponent = exponent * 10 + (c - '0'); + int digit = c - '0'; + if (!exponent_overflow) { + if (exponent > (std::numeric_limits::max() - digit) / 10) { + // Stop updating exponent to avoid signed-int overflow UB; + // keep scanning to validate the rest of the literal. The + // mantissa_overflow fallback below also handles this case + // via std::stod() on the full original string. + exponent_overflow = true; + } + else { + exponent = exponent * 10 + digit; + } + } found_exp_digit = true; ++pos; } @@ -604,21 +619,24 @@ inline bool parse_into(elreal& out, const std::string& str) { } } - if (mantissa_overflow) { - // The mantissa portion of the literal exceeded long long range. Use - // std::stod() on the full original string to get the correctly-rounded - // leading double; exact-rational refinement at depth > 0 is - // unavailable for this value. std::stod is locale-aware; for - // canonical numeric literals (no thousands separator, '.' as decimal - // point) it round-trips correctly in any reasonable locale. - try { - double v = std::stod(str); - out = elreal(v); - return true; - } - catch (...) { - return false; - } + if (mantissa_overflow || exponent_overflow) { + // The mantissa or exponent portion of the literal exceeded its + // integer range. Use std::strtod (rather than std::stod) on the + // full original string to get the correctly-rounded leading double. + // We prefer strtod because it sets errno = ERANGE and returns + // +/-HUGE_VAL (overflow) or 0.0 (underflow) instead of throwing, + // which matches IEEE-754 saturation semantics -- the right behavior + // for inputs the rational track cannot represent anyway. + // Exact-rational refinement at depth > 0 is unavailable for this + // path; only the leading double is materialised. + const char* c_str = str.c_str(); + char* endp = nullptr; + errno = 0; + double v = std::strtod(c_str, &endp); + if (endp == c_str) return false; // no conversion happened + // errno == ERANGE is OK -- strtod has already set v to +/-HUGE_VAL or 0. + out = elreal(v); + return true; } if (overflowed) {