Skip to content

Commit 36e38e6

Browse files
a new double_extras module, with a fast Lambert W function
1 parent f127486 commit 36e38e6

File tree

7 files changed

+564
-1
lines changed

7 files changed

+564
-1
lines changed

Makefile.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ BUILD_DIRS = ulong_extras long_extras perm fmpz fmpz_vec fmpz_poly fmpq_poly \
22
fmpz_mat mpfr_vec mpfr_mat nmod_vec nmod_poly \
33
arith mpn_extras nmod_mat fmpq fmpq_mat padic fmpz_poly_q \
44
fmpz_poly_mat nmod_poly_mat fmpz_mod_poly fmpz_mod_poly_factor \
5-
fmpz_factor fmpz_poly_factor fft qsieve
5+
fmpz_factor fmpz_poly_factor fft qsieve double_extras
66

77
LIBS=-L$(CURDIR) -L$(FLINT_MPIR_LIB_DIR) -L$(FLINT_MPFR_LIB_DIR) -L$(FLINT_NTL_LIB_DIR) -L$(FLINT_BLAS_LIB_DIR) -lflint $(EXTRA_LIBS) -lmpfr -lmpir -lm -lpthread
88
LIBS2=-L$(FLINT_MPIR_LIB_DIR) -L$(FLINT_MPFR_LIB_DIR) -L$(FLINT_NTL_LIB_DIR) -L$(FLINT_BLAS_LIB_DIR) $(EXTRA_LIBS) -lmpfr -lmpir -lm -lpthread

double_extras.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*=============================================================================
2+
3+
This file is part of FLINT.
4+
5+
FLINT is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
FLINT is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with FLINT; if not, write to the Free Software
17+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18+
19+
=============================================================================*/
20+
/******************************************************************************
21+
22+
Copyright (C) 2012 Fredrik Johansson
23+
24+
******************************************************************************/
25+
26+
#ifndef DOUBLE_EXTRAS_H
27+
#define DOUBLE_EXTRAS_H
28+
29+
#include <math.h>
30+
#include <float.h>
31+
#include "flint.h"
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
37+
#define D_BITS 53
38+
#define D_EPS 2.2204460492503130808e-16
39+
#define D_INF HUGE_VAL
40+
#define D_NAN (HUGE_VAL - HUGE_VAL)
41+
42+
double d_randtest(flint_rand_t state);
43+
44+
static __inline__ double
45+
d_polyval(const double * poly, int len, double x)
46+
{
47+
double t;
48+
int i;
49+
50+
for (t = poly[len-1], i = len-2; i >= 0; i--)
51+
t = poly[i] + x * t;
52+
53+
return t;
54+
}
55+
56+
double d_lambertw(double x);
57+
58+
#ifdef __cplusplus
59+
}
60+
#endif
61+
62+
#endif

double_extras/Makefile

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
SOURCES = $(wildcard *.c)
2+
3+
OBJS = $(patsubst %.c, $(BUILD_DIR)/$(MOD_DIR)_%.o, $(SOURCES))
4+
5+
LIB_OBJS = $(patsubst %.c, $(BUILD_DIR)/%.lo, $(SOURCES))
6+
7+
TEST_SOURCES = $(wildcard test/*.c)
8+
9+
PROF_SOURCES = $(wildcard profile/*.c)
10+
11+
TUNE_SOURCES = $(wildcard tune/*.c)
12+
13+
TESTS = $(patsubst %.c, $(BUILD_DIR)/%, $(TEST_SOURCES))
14+
15+
TESTS_RUN = $(foreach file, $(TESTS), $(file)_RUN)
16+
17+
PROFS = $(patsubst %.c, %, $(PROF_SOURCES))
18+
19+
TUNE = $(patsubst %.c, %, $(TUNE_SOURCES))
20+
21+
.SECONDARY:
22+
23+
all: $(OBJS)
24+
25+
library: $(LIB_OBJS)
26+
27+
profile: $(PROF_SOURCES)
28+
$(foreach prog, $(PROFS), $(CC) -O2 -std=c99 $(INCS) $(prog).c ../profiler.o -o $(BUILD_DIR)/$(prog) $(LIBS) || exit $$?;)
29+
30+
tune: $(TUNE_SOURCES)
31+
$(foreach prog, $(TUNE), $(CC) -O2 -std=c99 $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) || exit $$?;)
32+
33+
$(BUILD_DIR)/$(MOD_DIR)_%.o: %.c
34+
$(CC) $(CFLAGS) -c $(INCS) $< -o $@
35+
36+
$(BUILD_DIR)/%.lo: %.c
37+
$(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
38+
39+
clean:
40+
rm -rf $(BUILD_DIR)
41+
42+
check: $(TESTS) $(TESTS_RUN)
43+
44+
%_RUN: %
45+
@$<
46+
47+
$(BUILD_DIR)/test/%: test/%.c
48+
$(CC) $(CFLAGS) $(INCS) $< -o $@ $(LIBS)
49+
50+
.PHONY: profile tune clean check all %_RUN
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*=============================================================================
2+
3+
This file is part of FLINT.
4+
5+
FLINT is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
FLINT is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with FLINT; if not, write to the Free Software
17+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18+
19+
=============================================================================*/
20+
/******************************************************************************
21+
22+
Copyright (C) 2012 Fredrik Johansson
23+
24+
******************************************************************************/
25+
26+
*******************************************************************************
27+
28+
Random functions
29+
30+
*******************************************************************************
31+
32+
double d_randtest(flint_rand_t state)
33+
34+
Returns a random number in the interval $[0.5, 1)$.
35+
36+
37+
*******************************************************************************
38+
39+
Arithmetic
40+
41+
*******************************************************************************
42+
43+
double d_polyval(const double * poly, int len, double x)
44+
45+
Uses Horner's rule to evaluate the the polynomial defined by the given
46+
\code{len} coefficients. Requires that \code{len} is nonzero.
47+
48+
49+
*******************************************************************************
50+
51+
Special functions
52+
53+
*******************************************************************************
54+
55+
double d_lambertw(double x)
56+
57+
Computes the principal branch of the Lambert W function, solving
58+
the equation $x = W(x) \exp(W(x))$. If $x < -1/e$, the solution is
59+
complex, and NaN is returned.
60+
61+
Depending on the magnitude of $x$, we start from a piecewise rational
62+
approximation or a zeroth-order truncation of the asymptotic expansion
63+
at infinity, and perform 0, 1 or 2 iterations with Halley's
64+
method to obtain full accuracy.
65+
66+
A test of $10^7$ random inputs showed a maximum relative error smaller
67+
than 0.95 times \code{DBL_EPSILON} ($2^{-52}$) for positive $x$.
68+
Accuracy for negative $x$ is slightly worse, and can grow to
69+
about 10 times \code{DBL_EPSILON} close to $-1/e$.
70+
However, accuracy may be worse depending on compiler flags and
71+
the accuracy of the system libm functions.

double_extras/lambertw.c

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*=============================================================================
2+
3+
This file is part of FLINT.
4+
5+
FLINT is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
FLINT is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with FLINT; if not, write to the Free Software
17+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18+
19+
=============================================================================*/
20+
/******************************************************************************
21+
22+
Copyright (C) 2012 Fredrik Johansson
23+
24+
******************************************************************************/
25+
26+
#include "double_extras.h"
27+
28+
#define POLY(p, x) d_polyval((p), sizeof(p) / sizeof(double), (x))
29+
30+
static const double pol1[4] = {
31+
0.2278634396856248853716, 0.6685854654191353381433,
32+
0.4670475452404395343887, 0.061184972065242761167 };
33+
34+
static const double pol2[5] = {
35+
0.2278636537503804204913, 0.8964421845409468074626,
36+
1.0217927151592500702475, 0.34513102625055769873401,
37+
0.020801230123523916719604 };
38+
39+
static const double pol3[6] = {
40+
0.00005767860320327097931, 0.029896654795890461899563,
41+
0.0378739044968912982585405, 0.00971957088414193124615358,
42+
0.000488576886695502361566636, 1.150549466178344373015667e-6 };
43+
44+
static const double pol4[5] = {
45+
0.030306172539339585635388, 0.066596680780796068408204,
46+
0.035483738872057375987452, 0.00506436278851840340711316,
47+
0.0001465263028844943142786722 };
48+
49+
static const double pol5[6] = {
50+
0.00048233868073637531461, 0.004268700087824343609188,
51+
0.00127714949974214706149789, 0.0000799706171559085390983949,
52+
1.186347211803672341928371e-6, 2.943454067276155504308283e-9 };
53+
54+
static const double pol6[6] = {
55+
0.00553288881087242781512, 0.0043904877060733941697614,
56+
0.00069354549834088964895342, 0.0000288257440032545960408328,
57+
3.01054066921000066105342e-7, 4.94316029290773314755549e-10 };
58+
59+
static const double pol7[4] = {
60+
-0.93011683587619427070, -2.9702322028603227386,
61+
-2.0759083419960793148, -0.042485660005713612806 };
62+
63+
static const double pol8[4] = {
64+
0.93011683587619458392, 4.3654074566738568022,
65+
6.1437079650412473506, 2.4613195056093927345 };
66+
67+
static const double pol9[11] = {
68+
-1.0000000000000000000, 2.3316439815971242034,
69+
-1.8121878856393634902, 1.9366311144923597554,
70+
-2.3535512018816145168, 3.0668589010506319129,
71+
-4.1753356002581771389, 5.8580237298747741488,
72+
-8.4010322175239773710, 12.250753501314460424,
73+
-18.100697012472442755 };
74+
75+
static const double pol10[6] = {
76+
-5.1972986075163593071, -37.478686466672907613,
77+
-96.155193004929291698, -102.23856988136744607,
78+
-37.181958033133170210, -0.48504976999675644134 };
79+
80+
static const double pol11[6] = {
81+
5.1972986074950082685, 45.274634378414741754, 150.20768172029114131,
82+
233.88699813222871981, 167.13313463159765859, 42.171248374042409414 };
83+
84+
85+
/* avoid overflows in the formula when x is close to 2^EMAX */
86+
#define RESCALE 1.1102230246251565404e-16
87+
88+
static double
89+
halley(double x, double w)
90+
{
91+
double t, u, v;
92+
93+
/* exp() does not overflow, since w is an underestimate
94+
when the asymptotic series is used */
95+
t = exp(w) * RESCALE;
96+
u = 2*w + 2;
97+
v = w*t - x * RESCALE;
98+
t = w - u*v / (u*t*(w+1) - (w+2)*v);
99+
100+
return t;
101+
}
102+
103+
/* this should be exactly 6627126856707895 * 2^(-54), which
104+
is the most negative double in the domain */
105+
#define ONE_OVER_E 0.36787944117144228
106+
107+
/* difference from -1/e */
108+
#define CORRECTION 4.3082397558469466e-17
109+
110+
double
111+
d_lambertw(double x)
112+
{
113+
double t, u, w;
114+
115+
if (x == 0.0 || x != x || x == D_INF)
116+
return x;
117+
118+
if (x < 0.0)
119+
{
120+
/* complex result */
121+
if (x < -ONE_OVER_E)
122+
return D_NAN;
123+
/* close to zero */
124+
else if (x > -1e-9)
125+
return x - x * x;
126+
/* close to the singularity at -1/e */
127+
else if (x + ONE_OVER_E < 0.0003)
128+
return POLY(pol9, sqrt((x + ONE_OVER_E) + CORRECTION));
129+
130+
/* otherwise get initial value for Halley iteration */
131+
if (x + ONE_OVER_E < 0.04)
132+
w = POLY(pol9, sqrt((x + ONE_OVER_E) + CORRECTION));
133+
else
134+
w = x * (1.0 + x * POLY(pol10, x) / POLY(pol11, x));
135+
}
136+
else
137+
{
138+
/* close to zero */
139+
if (x <= 0.03125)
140+
{
141+
if (x < 1e-9)
142+
return x - x * x;
143+
else
144+
return x * (1.0 + x * POLY(pol7, x) / POLY(pol8, x));
145+
}
146+
147+
/* get initial value for Halley iteration */
148+
if (x <= 1.0)
149+
w = x * POLY(pol1, x) / POLY(pol2, x);
150+
else if (x <= 6.0)
151+
w = POLY(pol3, x) / POLY(pol4, x);
152+
else if (x <= 40.0)
153+
w = POLY(pol5, x) / POLY(pol6, x);
154+
else
155+
{
156+
/* asymptotic series */
157+
t = log(x);
158+
u = log(t);
159+
w = (2*t*t*t - 2*(1+(t-1)*t)*u + u*u)/(2*t*t);
160+
/* one extra refinement */
161+
if (x < 1e15)
162+
w = halley(x, w);
163+
}
164+
}
165+
166+
return halley(x, w);
167+
}

0 commit comments

Comments
 (0)