Skip to content

Commit 021c77a

Browse files
authored
Merge pull request #4258 from pleroy/RoundingTest
Implement the rounding test from Muller et al.
2 parents 5f5d940 + 3797454 commit 021c77a

1 file changed

Lines changed: 18 additions & 21 deletions

File tree

numerics/sin_cos.cpp

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ constexpr Argument three_term_θ_reduced_threshold =
8787
constexpr Argument mantissa_reduce_shifter =
8888
1LL << (std::numeric_limits<double>::digits - 1);
8989

90+
constexpr double sin_near_zero_e = 0x1.0000'32D7'5E64'Cp0;
91+
constexpr double sin_e = 0x1.0000'A03C'34D3'9p0;
92+
constexpr double cos_e = 0x1.0000'A07E'1980'8p0;
93+
9094
template<FMAPolicy fma_policy>
9195
using Polynomial1 = HornerEvaluator<Value, Argument, 1, fma_policy>;
9296

@@ -149,31 +153,23 @@ double FusedNegatedMultiplyAdd(double const a, double const b, double const c) {
149153
}
150154
}
151155

152-
// Evaluates the sum `x + Δx`. If that sum has a dangerous rounding
153-
// configuration (that is, the bits after the last mantissa bit of the sum are
154-
// either 1000... or 0111..., then returns `NaN`. Otherwise returns the sum.
155-
// `x` is always positive. `Δx` may be positive or negative.
156-
inline double DetectDangerousRounding(double const x, double const Δx) {
156+
// Evaluates the sum `x + Δx` and performs the rounding test using the technique
157+
// described in [Mul+10], section 11.6.3. If rounding is safe, returns the sum;
158+
// otherwise, returns `NaN`. `x` is always positive. `Δx` may be positive or
159+
// negative.
160+
template<FMAPolicy fma_policy, double e>
161+
double DetectDangerousRounding(double const x, double const Δx) {
157162
DoublePrecision<double> const sum = QuickTwoSum(x, Δx);
158163
double const& value = sum.value;
159164
double const& error = sum.error;
160-
__m128i const value_exponent_128i =
161-
_mm_castpd_si128(_mm_and_pd(masks::exponent_bits, _mm_set_sd(value)));
162-
__m128i const error_128i =
163-
_mm_castpd_si128(_mm_andnot_pd(masks::sign_bit, _mm_set_sd(error)));
164-
double const normalized_error = _mm_cvtsd_f64(
165-
_mm_castsi128_pd(_mm_sub_epi64(error_128i, value_exponent_128i)));
166-
// TODO(phl): Error analysis to refine the thresholds. Also, can we avoid
167-
// negative numbers?
168-
OSACA_IF(normalized_error < -0x1.ffffp971 &&
169-
normalized_error > -0x1.00008p972) {
165+
OSACA_IF(value == FusedMultiplyAdd<fma_policy>(error, e, value)) {
166+
return value;
167+
} else {
170168
#if _DEBUG
171169
LOG(ERROR) << std::setprecision(25) << x << " " << std::hexfloat << value
172-
<< " " << error << " " << normalized_error;
170+
<< " " << error << " " << e;
173171
#endif
174172
return std::numeric_limits<double>::quiet_NaN();
175-
} else {
176-
return value;
177173
}
178174
}
179175

@@ -286,7 +282,7 @@ Value SinImplementation(DoublePrecision<Argument> const θ_reduced) {
286282
double const x³_term = FusedMultiplyAdd<fma_policy>(
287283
x³, SinPolynomialNearZero<fma_policy>(x²), e);
288284
// Relative error of the result better than 72.8 bits.
289-
return DetectDangerousRounding(x, x³_term);
285+
return DetectDangerousRounding<fma_policy, sin_near_zero_e>(x, x³_term);
290286
} else {
291287
__m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(x));
292288
double const e_abs = _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(e), sign));
@@ -313,7 +309,7 @@ Value SinImplementation(DoublePrecision<Argument> const θ_reduced) {
313309
sin_x₀ * h² * CosPolynomial<fma_policy>(h²)) +
314310
FusedMultiplyAdd<fma_policy>(cos_x₀, e_abs, sin_x₀_plus_h_cos_x₀.error);
315311
return _mm_cvtsd_f64(
316-
_mm_xor_pd(_mm_set_sd(DetectDangerousRounding(
312+
_mm_xor_pd(_mm_set_sd(DetectDangerousRounding<fma_policy, sin_e>(
317313
sin_x₀_plus_h_cos_x₀.value, polynomial_term)),
318314
sign));
319315
}
@@ -350,7 +346,8 @@ Value CosImplementation(DoublePrecision<Argument> const θ_reduced) {
350346
cos_x₀ * h² * CosPolynomial<fma_policy>(h²)) +
351347
FusedNegatedMultiplyAdd<fma_policy>(
352348
sin_x₀, e_abs, cos_x₀_minus_h_sin_x₀.error);
353-
return DetectDangerousRounding(cos_x₀_minus_h_sin_x₀.value, polynomial_term);
349+
return DetectDangerousRounding<fma_policy, cos_e>(cos_x₀_minus_h_sin_x₀.value,
350+
polynomial_term);
354351
}
355352

356353
template<FMAPolicy fma_policy>

0 commit comments

Comments
 (0)