@@ -87,6 +87,10 @@ constexpr Argument three_term_θ_reduced_threshold =
8787constexpr Argument mantissa_reduce_shifter =
8888 1LL << (std::numeric_limits<double >::digits - 1 );
8989
90+ constexpr double sin_near_zero_e = 0x1.0000' 32D7' 5E64' Cp0;
91+ constexpr double sin_e = 0x1.0000' A03C' 34D3' 9p0;
92+ constexpr double cos_e = 0x1.0000' A07E' 1980' 8p0;
93+
9094template <FMAPolicy fma_policy>
9195using Polynomial1 = HornerEvaluator<Value, Argument, 1 , fma_policy>;
9296
@@ -149,31 +153,23 @@ double FusedNegatedMultiplyAdd(double const a, double const b, double const c) {
149153 }
150154}
151155
152- // Evaluates the sum `x + Δx`. If that sum has a dangerous rounding
153- // configuration (that is, the bits after the last mantissa bit of the sum are
154- // either 1000... or 0111..., then returns `NaN`. Otherwise returns the sum.
155- // `x` is always positive. `Δx` may be positive or negative.
156- inline double DetectDangerousRounding (double const x, double const Δx) {
156+ // Evaluates the sum `x + Δx` and performs the rounding test using the technique
157+ // described in [Mul+10], section 11.6.3. If rounding is safe, returns the sum;
158+ // otherwise, returns `NaN`. `x` is always positive. `Δx` may be positive or
159+ // negative.
160+ template <FMAPolicy fma_policy, double e>
161+ double DetectDangerousRounding (double const x, double const Δx) {
157162 DoublePrecision<double > const sum = QuickTwoSum (x, Δx);
158163 double const & value = sum.value ;
159164 double const & error = sum.error ;
160- __m128i const value_exponent_128i =
161- _mm_castpd_si128 (_mm_and_pd (masks::exponent_bits, _mm_set_sd (value)));
162- __m128i const error_128i =
163- _mm_castpd_si128 (_mm_andnot_pd (masks::sign_bit, _mm_set_sd (error)));
164- double const normalized_error = _mm_cvtsd_f64 (
165- _mm_castsi128_pd (_mm_sub_epi64 (error_128i, value_exponent_128i)));
166- // TODO(phl): Error analysis to refine the thresholds. Also, can we avoid
167- // negative numbers?
168- OSACA_IF (normalized_error < -0x1 .ffffp971 &&
169- normalized_error > -0x1 .00008p972) {
165+ OSACA_IF (value == FusedMultiplyAdd<fma_policy>(error, e, value)) {
166+ return value;
167+ } else {
170168#if _DEBUG
171169 LOG (ERROR) << std::setprecision (25 ) << x << " " << std::hexfloat << value
172- << " " << error << " " << normalized_error ;
170+ << " " << error << " " << e ;
173171#endif
174172 return std::numeric_limits<double >::quiet_NaN ();
175- } else {
176- return value;
177173 }
178174}
179175
@@ -286,7 +282,7 @@ Value SinImplementation(DoublePrecision<Argument> const θ_reduced) {
286282 double const x³_term = FusedMultiplyAdd<fma_policy>(
287283 x³, SinPolynomialNearZero<fma_policy>(x²), e);
288284 // Relative error of the result better than 72.8 bits.
289- return DetectDangerousRounding (x, x³_term);
285+ return DetectDangerousRounding<fma_policy, sin_near_zero_e> (x, x³_term);
290286 } else {
291287 __m128d const sign = _mm_and_pd (masks::sign_bit, _mm_set_sd (x));
292288 double const e_abs = _mm_cvtsd_f64 (_mm_xor_pd (_mm_set_sd (e), sign));
@@ -313,7 +309,7 @@ Value SinImplementation(DoublePrecision<Argument> const θ_reduced) {
313309 sin_x₀ * h² * CosPolynomial<fma_policy>(h²)) +
314310 FusedMultiplyAdd<fma_policy>(cos_x₀, e_abs, sin_x₀_plus_h_cos_x₀.error );
315311 return _mm_cvtsd_f64 (
316- _mm_xor_pd (_mm_set_sd (DetectDangerousRounding (
312+ _mm_xor_pd (_mm_set_sd (DetectDangerousRounding<fma_policy, sin_e> (
317313 sin_x₀_plus_h_cos_x₀.value , polynomial_term)),
318314 sign));
319315 }
@@ -350,7 +346,8 @@ Value CosImplementation(DoublePrecision<Argument> const θ_reduced) {
350346 cos_x₀ * h² * CosPolynomial<fma_policy>(h²)) +
351347 FusedNegatedMultiplyAdd<fma_policy>(
352348 sin_x₀, e_abs, cos_x₀_minus_h_sin_x₀.error );
353- return DetectDangerousRounding (cos_x₀_minus_h_sin_x₀.value , polynomial_term);
349+ return DetectDangerousRounding<fma_policy, cos_e>(cos_x₀_minus_h_sin_x₀.value ,
350+ polynomial_term);
354351}
355352
356353template <FMAPolicy fma_policy>
0 commit comments