@@ -97,16 +97,15 @@ Expr eval_poly_horner(const std::vector<double> &coefs, const Expr &x) {
9797}
9898
9999inline std::pair<Expr, Expr> two_sum (const Expr &a, const Expr &b) {
100- // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
101- Expr x = strict_float (a + b);
102- Expr z = strict_float (x - a);
103- Expr y = strict_float (strict_float (a - strict_float (x - z)) + strict_float (b - z));
100+ Expr x = strict_add (a, b);
101+ Expr z = strict_sub (x, a);
102+ Expr y = strict_add (strict_sub (a, strict_sub (x, z)), strict_sub (b, z));
104103 return {x, y};
105104}
106105
107106inline std::pair<Expr, Expr> two_prod (const Expr &a, const Expr &b) {
108- // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
109- Expr x = strict_float (a * b);
107+ Expr x = strict_mul (a, b);
108+ // TODO(mcourteaux): replace with proper strict_float fma intrinsic op.
110109 Expr y = (a * b - x); // No strict float, so let's hope it gets compiled as FMA.
111110 return {x, y};
112111}
@@ -176,8 +175,7 @@ Expr fast_sin(const Expr &x_full, ApproximationPrecision precision) {
176175 Expr pi_over_two_minus_x = make_const (type, PI_OVER_TWO) - x;
177176 if (type == Float (32 ) && precision.optimized_for == ApproximationPrecision::MULPE) {
178177 auto [hi, lo] = split_float (PI_OVER_TWO);
179- // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
180- pi_over_two_minus_x = strict_float (make_const (type, hi) - x) + make_const (type, lo);
178+ pi_over_two_minus_x = strict_sub (make_const (type, hi), x) + make_const (type, lo);
181179 }
182180 x = select (mirror, pi_over_two_minus_x, x);
183181
@@ -210,7 +208,7 @@ Expr fast_cos(const Expr &x_full, ApproximationPrecision precision) {
210208 if (type == Float (32 ) && precision.optimized_for == ApproximationPrecision::MULPE) {
211209 auto [hi, lo] = split_float (PI_OVER_TWO);
212210 // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
213- pi_over_two_minus_x = strict_float ( strict_float (make_const (type, hi) - x) + make_const (type, lo));
211+ pi_over_two_minus_x = strict_add ( strict_sub (make_const (type, hi), x), make_const (type, lo));
214212 }
215213 x = select (mirror, pi_over_two_minus_x, x);
216214
@@ -238,8 +236,7 @@ Expr fast_tan(const Expr &x_full, ApproximationPrecision precision) {
238236 Expr x = x_full - k_real * make_const (type, PI);
239237 if (type == Float (32 ) && precision.optimized_for == ApproximationPrecision::MULPE) {
240238 auto [pi_hi, pi_lo] = split_float (PI);
241- // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
242- x = strict_float (strict_float (x_full - k_real * make_const (type, pi_hi)) - (k_real * make_const (type, pi_lo)));
239+ x = strict_sub ((x_full - k_real * make_const (type, pi_hi)), (k_real * make_const (type, pi_lo)));
243240 }
244241
245242 // When polynomial: x is assumed to be reduced to [-pi/2, pi/2]!
@@ -250,11 +247,11 @@ Expr fast_tan(const Expr &x_full, ApproximationPrecision precision) {
250247 Expr use_cotan = abs_x > make_const (type, PI / 4.0 );
251248 Expr pi_over_two_minus_abs_x;
252249 if (type == Float (64 )) {
250+ // TODO(mcourteaux): We could do split floats here too.
253251 pi_over_two_minus_abs_x = make_const (type, PI_OVER_TWO) - abs_x;
254252 } else if (type == Float (32 )) { // We want to do this trick always, because we invert later.
255253 auto [hi, lo] = split_float (PI_OVER_TWO);
256- // TODO(mcourteaux): replace with proper strict_float intrinsic ops.
257- pi_over_two_minus_abs_x = strict_float (make_const (type, hi) - abs_x) + make_const (type, lo);
254+ pi_over_two_minus_abs_x = strict_sub (make_const (type, hi), abs_x) + make_const (type, lo);
258255 }
259256 Expr arg = select (use_cotan, pi_over_two_minus_abs_x, abs_x);
260257
0 commit comments