Skip to content

Commit 9aeed89

Browse files
authored
V3.79: quad-double arithmetic type (#437)
* Setting SEMVER to v3.79 * enabling CI on the new v3.79 release branch * making fixpnt trivially constructable, disabling complex<fixpnt> regression * WIP: identified double rounding as cause for complex<fixpnt> failures * labeling the complex<CustomType> use case: we'll need to create our own * fixpnt is now trivially constructable, any implicit initialiation assumptions in the regression tests need to be fixed * adding include <cstdint> to satisfy gcc builds * dd bug fix and enhancement: supporting compiler environments prior to C++20 * recorded bug in bfloat16 that converts a float qNaN to a bfloat sNaN * remove bfloat16 separate addition regression test, and replace with increased intensity of randoms * enhancing the dfloat class API as first step in implementation * WIP: adding native trig functions to double-double * bug fix: trigonometry functions * WIP: adding trigonometric function verification suites * bug fix: both shim and native sqrt function needed fixing * bug fix of inverse trigonometry functions for double-double * removing meta_programming experiment as it is causing compilation issues * removing meta_programming from build * adding clang17/18 builder containers, which now include the gdb debugger * standardizing on double-double documentation * code hygiene double-double * adding three_sum2 version of three_sum * code hygiene * initial check-in of quad-double skeleton * rewriting three_sums to reflect Li/Bailey LBNL paper * changing behavior of to_binary for double-double and quad-double * unifying the to_binary() algorithm for double-double and quad-double * implementing addition and subtraction for quad-double * reclassifying some helper class methods to be protected * adding multiplication to the quad-double * adding division to quad-double arithmetic * generating and testing the quad-precision constants * enabling native log() functions for double-double type * adding sub() and div() methods to double-double * adding classification regression test for quad-double * adding error/gamma regression test for quad-double * adding exponent regression test for quad-double: exp algorithm is way off * compilation fix for gcc * completing the double add/sub/mul/div promotion to double-double API example * adding fmod/remainder regression test for quad-double * adding hyperbolic functions regression test for quad-double * adding hypot function regression test for quad-double * adding logarithmic function regression tests for quad-double: approximations are way off * adding min/max function regression tests for quad-double * adding nextafter/toward regression tests to quad-double * adding pow function regression test to quad-double * adding truncating functions regression tests to quad-double * adding trigonometry regression tests to quad-double * adding sqrt regression test to quad-double
1 parent 15ce7cd commit 9aeed89

File tree

124 files changed

+8353
-1369
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+8353
-1369
lines changed

.devcontainer/devcontainer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"image": "stillwater/builders:clang16builder"
2+
"image": "stillwater/builders:clang18builder"
33
}

.github/workflows/cmake.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: CMake
22

33
on:
44
push:
5-
branches: [ v3.78, dev, main ]
5+
branches: [ v3.79, dev, main ]
66
pull_request:
77
branches: [ main ]
88

CMakeLists.txt

+9-9
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ if(NOT DEFINED UNIVERSAL_VERSION_MAJOR)
2525
set(UNIVERSAL_VERSION_MAJOR 3)
2626
endif()
2727
if(NOT DEFINED UNIVERSAL_VERSION_MINOR)
28-
set(UNIVERSAL_VERSION_MINOR 78)
28+
set(UNIVERSAL_VERSION_MINOR 79)
2929
endif()
3030
if(NOT DEFINED UNIVERSAL_VERSION_PATCH)
3131
set(UNIVERSAL_VERSION_PATCH 1)
@@ -130,8 +130,8 @@ option(BUILD_NUMBER_FIXPNTS "Set to ON to build static fixed-point
130130
option(BUILD_NUMBER_BFLOATS "Set to ON to build static bfloat tests" OFF)
131131
option(BUILD_NUMBER_CFLOATS "Set to ON to build static cfloat tests" OFF)
132132
option(BUILD_NUMBER_DFLOATS "Set to ON to build static dfloat tests" OFF)
133-
option(BUILD_NUMBER_DDS "Set to ON to build static double-double tests" OFF)
134-
option(BUILD_NUMBER_QDS "Set to ON to build static quad-double tests" OFF)
133+
option(BUILD_NUMBER_DOUBLE_DOUBLE "Set to ON to build static double-double tests" OFF)
134+
option(BUILD_NUMBER_QUAD_DOUBLE "Set to ON to build static quad-double tests" OFF)
135135
option(BUILD_NUMBER_AREALS "Set to ON to build static areal tests" OFF)
136136
option(BUILD_NUMBER_UNUM1S "Set to ON to build static unum type 1 tests" OFF)
137137
option(BUILD_NUMBER_UNUM2S "Set to ON to build static unum type 2 tests" OFF)
@@ -662,8 +662,8 @@ if(BUILD_NUMBER_STATICS)
662662
set(BUILD_NUMBER_BFLOATS ON)
663663
set(BUILD_NUMBER_CFLOATS ON)
664664
set(BUILD_NUMBER_DFLOATS ON)
665-
set(BUILD_NUMBER_DDS ON)
666-
set(BUILD_NUMBER_QDS ON)
665+
set(BUILD_NUMBER_DOUBLE_DOUBLE ON)
666+
set(BUILD_NUMBER_QUAD_DOUBLE ON)
667667
set(BUILD_NUMBER_AREALS ON)
668668
set(BUILD_NUMBER_UNUM1S ON)
669669
set(BUILD_NUMBER_UNUM2S ON)
@@ -827,14 +827,14 @@ add_subdirectory("static/dfloat")
827827
endif(BUILD_NUMBER_DFLOATS)
828828

829829
# double-double floats
830-
if(BUILD_NUMBER_DDS)
830+
if(BUILD_NUMBER_DOUBLE_DOUBLE)
831831
add_subdirectory("static/dd")
832-
endif(BUILD_NUMBER_DDS)
832+
endif(BUILD_NUMBER_DOUBLE_DOUBLE)
833833

834834
# quad-double floats
835-
if(BUILD_NUMBER_QDS)
835+
if(BUILD_NUMBER_QUAD_DOUBLE)
836836
add_subdirectory("static/qd")
837-
endif(BUILD_NUMBER_QDS)
837+
endif(BUILD_NUMBER_QUAD_DOUBLE)
838838

839839
# conversion tests suites
840840
if(BUILD_NUMBER_CONVERSIONS)

docker/Dockerfile.clang11builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang12builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang13builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang14builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang15builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang16builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang17builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/Dockerfile.clang18builder

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \
1414
curl \
1515
vim \
1616
gdb \
17+
gdbserver \
1718
&& apt-get clean \
1819
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1920

docker/build_build_containers.sh

+4
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,7 @@ docker build --target clang15builder -t stillwater/builders:clang15builder -f Do
2828
docker push stillwater/builders:clang15builder
2929
docker build --target clang16builder -t stillwater/builders:clang16builder -f Dockerfile.clang16builder .
3030
docker push stillwater/builders:clang16builder
31+
docker build --target clang17builder -t stillwater/builders:clang17builder -f Dockerfile.clang17builder .
32+
docker push stillwater/builders:clang17builder
33+
docker build --target clang18builder -t stillwater/builders:clang18builder -f Dockerfile.clang18builder .
34+
docker push stillwater/builders:clang18builder

docker/build_release_container.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# example would be to strace an executable to find its dependencies
66

77
MAJOR=v3
8-
MINOR=78
8+
MINOR=79
99
VERSION="$MAJOR.$MINOR"
1010

1111
if [[ $# == 0 ]]; then

docker/build_test_container.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# example would be to strace an executable to find its dependencies
1212

1313
MAJOR=v3
14-
MINOR=78
14+
MINOR=79
1515
VERSION="$MAJOR.$MINOR"
1616

1717
if [[ $# == 0 ]]; then

include/universal/blas/blas.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
//
88
// Super-simple BLAS implementation to aid application,
99
// numerical, and reproducibility examples.
10-
1110
#ifndef _UNIVERSAL_BLAS_LIBRARY
1211
#define _UNIVERSAL_BLAS_LIBRARY
1312

13+
#include <cstdint>
14+
1415
// aggregation types for serialization
1516
constexpr uint32_t UNIVERSAL_AGGREGATE_SCALAR = 0x1001;
1617
constexpr uint32_t UNIVERSAL_AGGREGATE_VECTOR = 0x2002;

include/universal/native/error_free_ops.hpp

+101-16
Original file line numberDiff line numberDiff line change
@@ -110,22 +110,85 @@ namespace sw { namespace universal {
110110
return s;
111111
}
112112

113-
// ThreeSum
113+
// ThreeSum enumerations
114114

115115
/// <summary>
116-
/// three_sum computes the relationship a + b + c = s + r
116+
/// three_sum computes the relationship x + y + z = r0 + r1 + r2
117117
/// </summary>
118-
/// <param name="a">input</param>
119-
/// <param name="b">input</param>
120-
/// <param name="c">input value, output residual</param>
121-
inline void three_sum(volatile double& a, volatile double& b, volatile double& c) {
122-
volatile double t1, t2, t3;
118+
/// <param name="x">input, yields output r0 (==sum)</param>
119+
/// <param name="y">input, yields output r1</param>
120+
/// <param name="z">input, yields output r2</param>
121+
inline void three_sum(volatile double& x, volatile double& y, volatile double& z) {
122+
volatile double u, v, w;
123+
124+
u = two_sum(x, y, v);
125+
x = two_sum(z, u, w); // x = r0 (==sum)
126+
y = two_sum(v, w, z); // y = r1, and z = r2
127+
}
128+
129+
/// <summary>
130+
/// three_sum2 computes the relationship x + y + z = r0 + r1
131+
/// </summary>
132+
/// <param name="x">input, yields output r0 (==sum)</param>
133+
/// <param name="y">input, yields output r1</param>
134+
/// <param name="z">input</param>
135+
inline void three_sum2(volatile double& x, volatile double& y, double z) {
136+
volatile double u, v, w;
137+
138+
u = two_sum(x, y, v);
139+
x = two_sum(z, u, w); // x = r0 (==sum)
140+
y = v + w; // y = r1
141+
}
123142

124-
t1 = two_sum(a, b, t2);
125-
a = two_sum(c, t1, t3);
126-
b = two_sum(t2, t3, c);
143+
/// <summary>
144+
/// three_sum3 computes the relationship x + y + z = r0
145+
/// just the sum of (x, y, z) without any residuals
146+
/// </summary>
147+
/// <param name="x">input</param>
148+
/// <param name="y">input</param>
149+
/// <param name="z">input</param>
150+
/// <returns>the (rounded) sum of (x + y + z)</returns>
151+
inline double three_sum3(double x, double y, double z) {
152+
double u = x + y;
153+
return u + z; // traditional information loss if z << (x + y) and/or y << x
127154
}
128155

156+
/* */
157+
158+
/// <summary>
159+
/// quick_three_accumulate calculates the relationship a + b + c = s + r
160+
/// s = quick_three_accum(a, b, c) adds c to the dd-pair (a, b).
161+
/// If the result does not fit in two doubles, then the sum is
162+
/// output into s and (a, b) contains the remainder.Otherwise
163+
/// s is zero and (a, b) contains the sum.
164+
/// </summary>
165+
/// <param name="a"></param>
166+
/// <param name="b"></param>
167+
/// <param name="c"></param>
168+
/// <returns></returns>
169+
inline double quick_three_accumulation(volatile double& a, volatile double& b, double c) {
170+
volatile double s;
171+
bool za, zb;
172+
173+
s = two_sum(b, c, b);
174+
s = two_sum(a, s, a);
175+
176+
za = (a != 0.0);
177+
zb = (b != 0.0);
178+
179+
if (za && zb)
180+
return s;
181+
182+
if (!zb) {
183+
b = a;
184+
a = s;
185+
}
186+
else {
187+
a = s;
188+
}
189+
190+
return 0.0;
191+
}
129192

130193
// Split
131194

@@ -165,8 +228,7 @@ namespace sw { namespace universal {
165228
/// <param name="b">input</param>
166229
/// <param name="r">reference to the residual</param>
167230
/// <returns>the product of a * b</returns>
168-
inline double two_prod(double a, double b, volatile double& r)
169-
{
231+
inline double two_prod(double a, double b, volatile double& r) {
170232
volatile double p = a * b;
171233
if (std::isfinite(p)) {
172234
#if defined( QD_FMS )
@@ -192,8 +254,7 @@ namespace sw { namespace universal {
192254
/// <returns>the square product of a</returns>
193255
inline double two_sqr(double a, volatile double& r) {
194256
volatile double p = a * a;
195-
if (std::isfinite(p))
196-
{
257+
if (std::isfinite(p)) {
197258
#if defined( QD_FMS )
198259
err = QD_FMS(a, a, p);
199260
#else
@@ -208,6 +269,30 @@ namespace sw { namespace universal {
208269
}
209270

210271

272+
// Computes the nearest integer to d
273+
inline double nint(double d) {
274+
if (d == std::floor(d)) return d;
275+
return std::floor(d + 0.5);
276+
}
277+
278+
// Computes the truncated integer
279+
inline double aint(double d) {
280+
return (d >= 0.0) ? std::floor(d) : std::ceil(d);
281+
}
282+
283+
/* These are provided to give consistent
284+
interface for double with double-double and quad-double. */
285+
inline void sincosh(double t, double& sinh_t, double& cosh_t) {
286+
sinh_t = std::sinh(t);
287+
cosh_t = std::cosh(t);
288+
}
289+
290+
// square of argument t
291+
inline double sqr(double t) {
292+
return t * t;
293+
}
294+
295+
211296
/// <summary>
212297
/// renorm adjusts the quad-double to a canonical form
213298
/// A quad-double number is an unevaluated sum of four IEEE double numbers.
@@ -225,7 +310,7 @@ namespace sw { namespace universal {
225310
/// <param name="a2"></param>
226311
/// <param name="a3"></param>
227312
inline void renorm(volatile double& a0, volatile double& a1, volatile double& a2, volatile double& a3) {
228-
volatile double s0, s1, s2 = 0.0, s3 = 0.0;
313+
volatile double s0, s1, s2{ 0.0 }, s3{ 0.0 };
229314

230315
if (std::isinf(a0)) return;
231316

@@ -274,7 +359,7 @@ namespace sw { namespace universal {
274359
/// <param name="a3">reference to a3</param>
275360
/// <param name="a4">reference to a4</param>
276361
inline void renorm(volatile double& a0, volatile double& a1, volatile double& a2, volatile double& a3, volatile double& a4) {
277-
volatile double s0, s1, s2 = 0.0, s3 = 0.0;
362+
volatile double s0, s1, s2{ 0.0 }, s3{ 0.0 };
278363

279364
if (std::isinf(a0)) return;
280365

include/universal/native/extract_fields.hpp

-16
Original file line numberDiff line numberDiff line change
@@ -257,21 +257,5 @@ template<typename Real>
257257
}
258258
return bIsInf;
259259
}
260-
261-
inline void setFields(float& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept {
262-
float_decoder decoder;
263-
decoder.parts.sign = s;
264-
decoder.parts.exponent = rawExponentBits & 0xFF;
265-
decoder.parts.fraction = rawFractionBits & 0x7FFFFF;
266-
value = decoder.f;
267-
}
268-
269-
inline void setFields(double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept {
270-
double_decoder decoder;
271-
decoder.parts.sign = s;
272-
decoder.parts.exponent = rawExponentBits & 0x7FF;
273-
decoder.parts.fraction = rawFractionBits & 0xF'FFFF'FFFF'FFFF;
274-
value = decoder.d;
275-
}
276260

277261
}} // namespace sw::universal

include/universal/native/ieee754.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
// constexpr compatible bit casts, otherwise
3434
// fallback to nonconstexpr bit casts.
3535
#include <universal/native/extract_fields.hpp>
36+
#include <universal/native/set_fields.hpp>
3637

3738
// functions that do not need to be constexpr
3839
#include <universal/native/nonconst_bitcast.hpp>

include/universal/native/ieee754_parameter.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// SPDX-License-Identifier: MIT
66
//
77
// This file is part of the universal numbers project, which is released under an MIT Open Source license.
8+
#include <cstdint>
89

910
namespace sw { namespace universal {
1011

include/universal/native/manipulators.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ namespace sw { namespace universal {
297297
template<typename Real,
298298
typename = typename std::enable_if< std::is_floating_point<Real>::value, Real >::type
299299
>
300-
inline std::string color_print(Real number) {
300+
inline std::string color_print(Real number, bool nibbleMarker = false) {
301301
std::stringstream s;
302302

303303
bool sign{ false };
@@ -322,7 +322,7 @@ namespace sw { namespace universal {
322322
uint64_t mask = (1 << (ieee754_parameter<Real>::ebits - 1));
323323
for (int i = (ieee754_parameter<Real>::ebits - 1); i >= 0; --i) {
324324
s << cyan << ((rawExponent & mask) ? '1' : '0');
325-
// if (i > 0 && i % 4 == 0) s << cyan << '\'';
325+
if (nibbleMarker && i > 0 && i % 4 == 0) s << cyan << '\'';
326326
mask >>= 1;
327327
}
328328
}
@@ -333,7 +333,7 @@ namespace sw { namespace universal {
333333
uint64_t mask = (uint64_t(1) << (ieee754_parameter<Real>::fbits - 1));
334334
for (int i = (ieee754_parameter<Real>::fbits - 1); i >= 0; --i) {
335335
s << magenta << ((rawFraction & mask) ? '1' : '0');
336-
// if (i > 0 && i % 4 == 0) s << magenta << '\'';
336+
if (nibbleMarker && i > 0 && i % 4 == 0) s << magenta << '\'';
337337
mask >>= 1;
338338
}
339339

0 commit comments

Comments
 (0)