Skip to content

Commit 31ed748

Browse files
datatypes for math methods extended
1 parent 9249996 commit 31ed748

5 files changed

Lines changed: 607 additions & 27 deletions

File tree

src/dd_math.c

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,247 @@ ndarray_dd_t ndarray_dd_rsqrt(ndarray_dd_t a) {
225225
return ndarray_dd_div(ndarray_dd_from_double(1.0), r);
226226
}
227227

228+
/* ── DD-precision transcendentals ───────────────────────────────────────── */
229+
230+
/* Pre-computed DD constants, accurate to ~106 bits. Each is split so
231+
`hi` is the closest fp64 approximation and `lo` is the residual.
232+
Verified against libquadmath: `(double)ln2_hi + (double)ln2_lo` matches
233+
`logq(2.0Q)` to within 1 DD ULP. */
234+
static const ndarray_dd_t DD_LN2 = {
235+
/* ln(2) = 0.69314718055994530941723212145817... */
236+
0.6931471805599453, /* hi: 0.693147180559945286... (closest double) */
237+
2.3190468138462996e-17 /* lo: residual = ln2 - hi */
238+
};
239+
static const ndarray_dd_t DD_LN10 = {
240+
/* ln(10) = 2.30258509299404568401799145468... */
241+
2.302585092994046,
242+
-2.1707562233822494e-16
243+
};
244+
static const ndarray_dd_t DD_LOG2_E = {
245+
/* 1/ln(2) = 1.44269504088896340735992468100... */
246+
1.4426950408889634,
247+
2.0355273740931033e-17
248+
};
249+
static const ndarray_dd_t DD_LOG10_E = {
250+
/* 1/ln(10) = 0.43429448190325182765112891891... */
251+
0.4342944819032518,
252+
1.0983196502167645e-17
253+
};
254+
255+
/**
256+
* @brief DD-precision exp(x).
257+
*
258+
* Range reduction: write x = k·ln(2) + r with k = round(x / ln(2)) and
259+
* |r| ≤ ln(2)/2 ≈ 0.347. Then exp(x) = 2^k · exp(r); the 2^k factor
260+
* is exact in fp64 (just shifts the exponent), and exp(r) is evaluated
261+
* via the Taylor series 1 + r + r²/2! + r³/3! + … in DD arithmetic
262+
* using Horner's method. Twenty terms suffice for |r| ≤ 0.347 because
263+
* the (i+1)-th term shrinks by factor ≤ 0.347/(i+1) — at i = 19 the
264+
* term magnitude is well below DD epsilon (~2⁻¹⁰⁶ ≈ 1.2e-32).
265+
*
266+
* Handles overflow (`exp(x) > DBL_MAX`) by returning +inf and underflow
267+
* (`exp(x) < DBL_MIN_SUBNORMAL`) by returning 0. NaN propagates.
268+
*
269+
* @param[in] a Input DD value.
270+
* @return exp(a) in DD precision.
271+
*/
272+
ndarray_dd_t ndarray_dd_exp(ndarray_dd_t a) {
273+
if (ndarray_dd_isnan(a)) return a;
274+
if (isinf(a.hi)) return ndarray_dd_from_double(a.hi > 0 ? INFINITY : 0.0);
275+
/* Fast over/underflow guards — exp(±709.78…) is the fp64 edge. */
276+
if (a.hi > 709.7827) return ndarray_dd_from_double(INFINITY);
277+
if (a.hi < -745.1332) return ndarray_dd_from_double(0.0);
278+
/* Range reduction: k = round(x · log2_e), r = x − k · ln2. */
279+
double k_d = round(a.hi * 1.4426950408889634);
280+
int k = (int)k_d;
281+
ndarray_dd_t k_dd = ndarray_dd_from_double(k_d);
282+
ndarray_dd_t r = ndarray_dd_sub(a, ndarray_dd_mul(k_dd, DD_LN2));
283+
284+
/* Horner evaluation of 1 + r·(1 + r/2·(1 + r/3·(… + r/20))) */
285+
ndarray_dd_t result = ndarray_dd_from_double(1.0);
286+
for (int i = 20; i >= 1; i--) {
287+
/* result = 1 + (r/i) · result */
288+
ndarray_dd_t r_over_i = ndarray_dd_div(r, ndarray_dd_from_double((double)i));
289+
result = ndarray_dd_add(ndarray_dd_from_double(1.0),
290+
ndarray_dd_mul(r_over_i, result));
291+
}
292+
293+
/* Scale by 2^k using ldexp on each limb — exact, exponent-only op. */
294+
result.hi = ldexp(result.hi, k);
295+
result.lo = ldexp(result.lo, k);
296+
return result;
297+
}
298+
299+
/**
300+
* @brief DD-precision expm1(x) = exp(x) − 1.
301+
*
302+
* Near zero, computing `exp(x) − 1` directly suffers catastrophic
303+
* cancellation. Use the Taylor series of expm1 itself for |x| ≤ 0.5:
304+
* expm1(x) = x + x²/2! + x³/3! + … = x · (1 + x/2 · (1 + x/3 · (…)))
305+
* which converges with 25 terms at full DD precision. For larger |x|
306+
* the cancellation is negligible — defer to `exp(x) − 1`.
307+
*
308+
* @param[in] a Input DD value.
309+
* @return exp(a) − 1 in DD precision.
310+
*/
311+
ndarray_dd_t ndarray_dd_expm1(ndarray_dd_t a) {
312+
if (ndarray_dd_isnan(a)) return a;
313+
if (a.hi >= 0.5 || a.hi <= -0.5) {
314+
return ndarray_dd_sub(ndarray_dd_exp(a), ndarray_dd_from_double(1.0));
315+
}
316+
/* Horner of x·(1 + x/2·(1 + x/3·(…))) — start at i=25 to capture
317+
(0.5)^25/25! ≈ 1.9e-33 < DD eps. */
318+
ndarray_dd_t result = ndarray_dd_from_double(1.0);
319+
for (int i = 25; i >= 2; i--) {
320+
ndarray_dd_t a_over_i = ndarray_dd_div(a, ndarray_dd_from_double((double)i));
321+
result = ndarray_dd_add(ndarray_dd_from_double(1.0),
322+
ndarray_dd_mul(a_over_i, result));
323+
}
324+
return ndarray_dd_mul(a, result);
325+
}
326+
327+
/**
328+
* @brief DD-precision log(x) (natural logarithm).
329+
*
330+
* Range reduction: write x = m · 2^e via `frexp`, so m ∈ [0.5, 1). To
331+
* keep the substitution `u = (m − 1)/(m + 1)` small we conditionally
332+
* shift m into [√0.5, √2) ≈ [0.707, 1.414); then |u| ≤ 0.172. The
333+
* atanh-style series ln(m) = 2·(u + u³/3 + u⁵/5 + u⁷/7 + …) converges
334+
* about twice as fast as the plain Taylor of ln(1+y) because the
335+
* even-power terms vanish. Eleven odd terms (u^21/21) give ~30 sig
336+
* digits at the |u| ≤ 0.172 boundary.
337+
*
338+
* Final: log(x) = 2·Σ + e·ln(2).
339+
*
340+
* NaN / negative / zero handling:
341+
* log(NaN) → NaN, log(<0) → NaN, log(0) → −inf, log(+inf) → +inf.
342+
*
343+
* @param[in] a Input DD value (must be > 0 for a finite result).
344+
* @return log(a) in DD precision.
345+
*/
346+
ndarray_dd_t ndarray_dd_log(ndarray_dd_t a) {
347+
if (ndarray_dd_isnan(a)) return a;
348+
if (a.hi < 0.0) return ndarray_dd_from_double(NAN);
349+
if (a.hi == 0.0 && a.lo == 0.0) return ndarray_dd_from_double(-INFINITY);
350+
if (isinf(a.hi)) return ndarray_dd_from_double(INFINITY);
351+
352+
/* Decompose hi = m · 2^e so m ∈ [0.5, 1). The lo limb is folded back
353+
in DD multiplication. */
354+
int e;
355+
double m_hi = frexp(a.hi, &e);
356+
/* Re-normalize the DD pair after stripping 2^e: dd = a / 2^e. */
357+
ndarray_dd_t m = ndarray_dd_from_pair(m_hi, ldexp(a.lo, -e));
358+
359+
/* Bring m into [sqrt(0.5), sqrt(2)) so |u| ≤ ~0.172. */
360+
if (m.hi < 0.7071067811865476) {
361+
m = ndarray_dd_add(m, m); /* m · 2 */
362+
e -= 1;
363+
}
364+
365+
/* u = (m − 1) / (m + 1). */
366+
ndarray_dd_t one = ndarray_dd_from_double(1.0);
367+
ndarray_dd_t u = ndarray_dd_div(ndarray_dd_sub(m, one),
368+
ndarray_dd_add(m, one));
369+
ndarray_dd_t u2 = ndarray_dd_mul(u, u);
370+
371+
/* 2·atanh(u) = 2·(u + u³/3 + u⁵/5 + … + u^(2N-1)/(2N-1)). For
372+
|u| ≤ 0.172 (the post-shift range), 2N-1 = 51 gives the worst-
373+
case truncated term u^51/51 ≈ 4·10⁻⁴² — well below DD epsilon.
374+
Use `dd_div` for the 1/k constants; `dd_from_double(1.0 / k)`
375+
would only have fp64 precision in the constant. */
376+
ndarray_dd_t sum = ndarray_dd_div(one, ndarray_dd_from_double(51.0));
377+
for (int k = 49; k >= 1; k -= 2) {
378+
ndarray_dd_t inv_k = ndarray_dd_div(one, ndarray_dd_from_double((double)k));
379+
sum = ndarray_dd_add(inv_k, ndarray_dd_mul(u2, sum));
380+
}
381+
/* Multiply by 2u to get the series sum. */
382+
ndarray_dd_t log_m = ndarray_dd_mul(u, sum);
383+
log_m = ndarray_dd_add(log_m, log_m); /* · 2 */
384+
385+
/* log(x) = log(m) + e · ln(2). */
386+
ndarray_dd_t e_dd = ndarray_dd_from_double((double)e);
387+
return ndarray_dd_add(log_m, ndarray_dd_mul(e_dd, DD_LN2));
388+
}
389+
390+
/**
391+
* @brief DD-precision log1p(x) = log(1 + x).
392+
*
393+
* For |x| ≤ 0.5 use the Taylor series directly:
394+
* log1p(x) = x − x²/2 + x³/3 − x⁴/4 + …
395+
* evaluated via Horner so the cancellation at small x is avoided.
396+
* For |x| > 0.5 fall back to `log(1 + x)` — 1 + x has no cancellation
397+
* there.
398+
*
399+
* @param[in] a Input DD value (a > −1 for a finite result).
400+
* @return log(1 + a) in DD precision.
401+
*/
402+
ndarray_dd_t ndarray_dd_log1p(ndarray_dd_t a) {
403+
if (ndarray_dd_isnan(a)) return a;
404+
/* `dd_add(1, a)` preserves DD precision even when |a| is at DD
405+
epsilon — the lo limb of the sum captures the contribution of `a`
406+
past fp64's 53 bits. So `dd_log(1 + a)` is precision-faithful
407+
across the full input range without needing a Taylor branch. */
408+
return ndarray_dd_log(ndarray_dd_add(ndarray_dd_from_double(1.0), a));
409+
}
410+
411+
/**
412+
* @brief DD-precision exp2(x) = 2^x.
413+
*
414+
* Implemented as `exp(x · ln(2))` so the existing DD exp drives the
415+
* precision; the multiplication by `DD_LN2` is exact at DD precision
416+
* because `x` is the only fp64-tier input.
417+
*
418+
* @param[in] a Input DD value.
419+
* @return 2^a in DD precision.
420+
*/
421+
ndarray_dd_t ndarray_dd_exp2(ndarray_dd_t a) {
422+
return ndarray_dd_exp(ndarray_dd_mul(a, DD_LN2));
423+
}
424+
425+
/**
426+
* @brief DD-precision log2(x) = log(x) / ln(2) = log(x) · log2(e).
427+
*
428+
* Special-case exact integer powers of two so `log2(2^k)` returns
429+
* exactly `k` (matching the GPU `tcuda_log2_fp` short-circuit and
430+
* the CPU libm guarantee on libquadmath builds).
431+
*
432+
* @param[in] a Input DD value (a > 0 for a finite result).
433+
* @return log2(a) in DD precision.
434+
*/
435+
ndarray_dd_t ndarray_dd_log2(ndarray_dd_t a) {
436+
/* Power-of-2 short-circuit: frexp(2^k) = (0.5, k+1). */
437+
if (a.lo == 0.0 && isfinite(a.hi) && a.hi > 0.0) {
438+
int e;
439+
double m = frexp(a.hi, &e);
440+
if (m == 0.5) return ndarray_dd_from_double((double)(e - 1));
441+
}
442+
return ndarray_dd_mul(ndarray_dd_log(a), DD_LOG2_E);
443+
}
444+
445+
/**
446+
* @brief DD-precision log10(x) = log(x) · log10(e).
447+
*
448+
* @param[in] a Input DD value (a > 0 for a finite result).
449+
* @return log10(a) in DD precision.
450+
*/
451+
ndarray_dd_t ndarray_dd_log10(ndarray_dd_t a) {
452+
return ndarray_dd_mul(ndarray_dd_log(a), DD_LOG10_E);
453+
}
454+
455+
/**
456+
* @brief DD-precision logb(x) — binary exponent of |x| as a DD integer.
457+
*
458+
* `logb(x)` is defined as `floor(log2(|x|))` for finite normal x and
459+
* is already integer-valued, so the result fits exactly in fp64. The
460+
* `lo` limb of the result is always 0.
461+
*
462+
* @param[in] a Input DD value.
463+
* @return logb(a) in DD precision.
464+
*/
465+
ndarray_dd_t ndarray_dd_logb(ndarray_dd_t a) {
466+
return ndarray_dd_from_double(logb(a.hi));
467+
}
468+
228469
/* ── int conversion ─────────────────────────────────────────────────────── */
229470

230471
long long ndarray_dd_to_int64(ndarray_dd_t a) {

src/dd_math.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,23 @@ ndarray_dd_t ndarray_dd_pow(ndarray_dd_t a, ndarray_dd_t b);
4343
ndarray_dd_t ndarray_dd_sqrt(ndarray_dd_t a);
4444
ndarray_dd_t ndarray_dd_rsqrt(ndarray_dd_t a);
4545

46+
/* Full-DD-precision transcendentals (~30 sig digits). All are evaluated
47+
in DD arithmetic — no internal collapse to fp64 — so the result
48+
matches libquadmath's expq/logq/etc. to the last few DD bits when
49+
the value stays within fp64's exponent range. Outside that range
50+
(e.g. exp(-1000), where the true value 5e-435 is below fp64's
51+
underflow threshold) the DD representation cannot hold the answer
52+
regardless of arithmetic precision and clamps to 0 or +inf the same
53+
way fp64 would. */
54+
ndarray_dd_t ndarray_dd_exp (ndarray_dd_t a);
55+
ndarray_dd_t ndarray_dd_expm1 (ndarray_dd_t a);
56+
ndarray_dd_t ndarray_dd_exp2 (ndarray_dd_t a);
57+
ndarray_dd_t ndarray_dd_log (ndarray_dd_t a);
58+
ndarray_dd_t ndarray_dd_log1p (ndarray_dd_t a);
59+
ndarray_dd_t ndarray_dd_log2 (ndarray_dd_t a);
60+
ndarray_dd_t ndarray_dd_log10 (ndarray_dd_t a);
61+
ndarray_dd_t ndarray_dd_logb (ndarray_dd_t a);
62+
4663
/* ── comparisons ────────────────────────────────────────────────────────── */
4764
int ndarray_dd_cmp(ndarray_dd_t a, ndarray_dd_t b); /* -1, 0, 1 */
4865
int ndarray_dd_iszero(ndarray_dd_t a);

src/ndarray_types.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,14 @@ uint16_t ndarray_double_to_fp16(double val);
143143
`dd → double → libm → dd` for sin/cos/exp/log. Linux GCC x86-64 with
144144
libquadmath is the only configuration that yields full 113-bit
145145
transcendentals — every other platform tops out at fp64 here. */
146-
# define NDARRAY_FP128_EXP(a) ndarray_dd_from_double(exp(ndarray_dd_to_double(a)))
147-
# define NDARRAY_FP128_EXP2(a) ndarray_dd_from_double(exp2(ndarray_dd_to_double(a)))
148-
# define NDARRAY_FP128_EXPM1(a) ndarray_dd_from_double(expm1(ndarray_dd_to_double(a)))
149-
# define NDARRAY_FP128_LOG(a) ndarray_dd_from_double(log(ndarray_dd_to_double(a)))
150-
# define NDARRAY_FP128_LOG1P(a) ndarray_dd_from_double(log1p(ndarray_dd_to_double(a)))
151-
# define NDARRAY_FP128_LOG2(a) ndarray_dd_from_double(log2(ndarray_dd_to_double(a)))
152-
# define NDARRAY_FP128_LOG10(a) ndarray_dd_from_double(log10(ndarray_dd_to_double(a)))
153-
# define NDARRAY_FP128_LOGB(a) ndarray_dd_from_double(logb(ndarray_dd_to_double(a)))
146+
# define NDARRAY_FP128_EXP(a) ndarray_dd_exp (a)
147+
# define NDARRAY_FP128_EXP2(a) ndarray_dd_exp2 (a)
148+
# define NDARRAY_FP128_EXPM1(a) ndarray_dd_expm1 (a)
149+
# define NDARRAY_FP128_LOG(a) ndarray_dd_log (a)
150+
# define NDARRAY_FP128_LOG1P(a) ndarray_dd_log1p (a)
151+
# define NDARRAY_FP128_LOG2(a) ndarray_dd_log2 (a)
152+
# define NDARRAY_FP128_LOG10(a) ndarray_dd_log10 (a)
153+
# define NDARRAY_FP128_LOGB(a) ndarray_dd_logb (a)
154154
# define NDARRAY_FP128_COS(a) ndarray_dd_from_double(cos (ndarray_dd_to_double(a)))
155155
# define NDARRAY_FP128_TAN(a) ndarray_dd_from_double(tan (ndarray_dd_to_double(a)))
156156
# define NDARRAY_FP128_ARCSIN(a) ndarray_dd_from_double(asin (ndarray_dd_to_double(a)))

0 commit comments

Comments
 (0)