a new double_extras module, with a fast Lambert W function

fredrik-johansson · fredrik-johansson · commit 36e38e61e70d · 2012-10-04T19:13:53.000+02:00
diff --git a/Makefile.in b/Makefile.in
@@ -2,7 +2,7 @@ BUILD_DIRS = ulong_extras long_extras perm fmpz fmpz_vec fmpz_poly fmpq_poly \
    fmpz_mat mpfr_vec mpfr_mat nmod_vec nmod_poly \
    arith mpn_extras nmod_mat fmpq fmpq_mat padic fmpz_poly_q \
    fmpz_poly_mat nmod_poly_mat fmpz_mod_poly fmpz_mod_poly_factor \
-   fmpz_factor fmpz_poly_factor fft qsieve
+   fmpz_factor fmpz_poly_factor fft qsieve double_extras
 
 LIBS=-L$(CURDIR) -L$(FLINT_MPIR_LIB_DIR) -L$(FLINT_MPFR_LIB_DIR) -L$(FLINT_NTL_LIB_DIR) -L$(FLINT_BLAS_LIB_DIR) -lflint $(EXTRA_LIBS) -lmpfr -lmpir -lm -lpthread
 LIBS2=-L$(FLINT_MPIR_LIB_DIR) -L$(FLINT_MPFR_LIB_DIR) -L$(FLINT_NTL_LIB_DIR) -L$(FLINT_BLAS_LIB_DIR) $(EXTRA_LIBS) -lmpfr -lmpir -lm -lpthread
diff --git a/double_extras.h b/double_extras.h
@@ -0,0 +1,62 @@
+/*=============================================================================
+
+    This file is part of FLINT.
+
+    FLINT is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    FLINT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with FLINT; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#ifndef DOUBLE_EXTRAS_H
+#define DOUBLE_EXTRAS_H
+
+#include <math.h>
+#include <float.h>
+#include "flint.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#define D_BITS 53
+#define D_EPS 2.2204460492503130808e-16
+#define D_INF HUGE_VAL
+#define D_NAN (HUGE_VAL - HUGE_VAL)
+
+double d_randtest(flint_rand_t state);
+
+static __inline__ double
+d_polyval(const double * poly, int len, double x)
+{
+    double t;
+    int i;
+
+    for (t = poly[len-1], i = len-2; i >= 0; i--)
+        t = poly[i] + x * t;
+
+    return t;
+}
+
+double d_lambertw(double x);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/double_extras/Makefile b/double_extras/Makefile
@@ -0,0 +1,50 @@
+SOURCES = $(wildcard *.c)
+
+OBJS = $(patsubst %.c, $(BUILD_DIR)/$(MOD_DIR)_%.o, $(SOURCES))
+
+LIB_OBJS = $(patsubst %.c, $(BUILD_DIR)/%.lo, $(SOURCES))
+
+TEST_SOURCES = $(wildcard test/*.c)
+
+PROF_SOURCES = $(wildcard profile/*.c)
+
+TUNE_SOURCES = $(wildcard tune/*.c)
+
+TESTS = $(patsubst %.c, $(BUILD_DIR)/%, $(TEST_SOURCES))
+
+TESTS_RUN = $(foreach file, $(TESTS), $(file)_RUN)
+
+PROFS = $(patsubst %.c, %, $(PROF_SOURCES))
+
+TUNE = $(patsubst %.c, %, $(TUNE_SOURCES))
+
+.SECONDARY:
+
+all: $(OBJS)
+
+library: $(LIB_OBJS)
+
+profile: $(PROF_SOURCES)
+	$(foreach prog, $(PROFS), $(CC) -O2 -std=c99 $(INCS) $(prog).c ../profiler.o -o $(BUILD_DIR)/$(prog) $(LIBS) || exit $$?;)
+        
+tune: $(TUNE_SOURCES)
+	$(foreach prog, $(TUNE), $(CC) -O2 -std=c99 $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) || exit $$?;)
+
+$(BUILD_DIR)/$(MOD_DIR)_%.o: %.c
+	$(CC) $(CFLAGS) -c $(INCS) $< -o $@
+
+$(BUILD_DIR)/%.lo: %.c
+	$(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
+
+clean:
+	rm -rf $(BUILD_DIR)	
+
+check: $(TESTS) $(TESTS_RUN)
+
+%_RUN: %
+	@$<
+
+$(BUILD_DIR)/test/%: test/%.c
+	$(CC) $(CFLAGS) $(INCS) $< -o $@ $(LIBS)
+
+.PHONY: profile tune clean check all %_RUN
diff --git a/double_extras/doc/double_extras.txt b/double_extras/doc/double_extras.txt
@@ -0,0 +1,71 @@
+/*=============================================================================
+
+    This file is part of FLINT.
+
+    FLINT is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    FLINT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with FLINT; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+*******************************************************************************
+
+    Random functions 
+
+*******************************************************************************
+
+double d_randtest(flint_rand_t state)
+
+    Returns a random number in the interval $[0.5, 1)$.
+
+
+*******************************************************************************
+
+    Arithmetic
+
+*******************************************************************************
+
+double d_polyval(const double * poly, int len, double x)
+
+    Uses Horner's rule to evaluate the the polynomial defined by the given
+    \code{len} coefficients. Requires that \code{len} is nonzero.
+
+
+*******************************************************************************
+
+    Special functions
+
+*******************************************************************************
+
+double d_lambertw(double x)
+
+    Computes the principal branch of the Lambert W function, solving
+    the equation $x = W(x) \exp(W(x))$. If $x < -1/e$, the solution is
+    complex, and NaN is returned.
+
+    Depending on the magnitude of $x$, we start from a piecewise rational
+    approximation or a zeroth-order truncation of the asymptotic expansion
+    at infinity, and perform 0, 1 or 2 iterations with Halley's
+    method to obtain full accuracy.
+
+    A test of $10^7$ random inputs showed a maximum relative error smaller
+    than 0.95 times \code{DBL_EPSILON} ($2^{-52}$) for positive $x$.
+    Accuracy for negative $x$ is slightly worse, and can grow to
+    about 10 times \code{DBL_EPSILON} close to $-1/e$.
+    However, accuracy may be worse depending on compiler flags and
+    the accuracy of the system libm functions.
diff --git a/double_extras/lambertw.c b/double_extras/lambertw.c
@@ -0,0 +1,167 @@
+/*=============================================================================
+
+    This file is part of FLINT.
+
+    FLINT is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    FLINT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with FLINT; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+   Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "double_extras.h"
+
+#define POLY(p, x) d_polyval((p), sizeof(p) / sizeof(double), (x))
+
+static const double pol1[4] = {
+    0.2278634396856248853716, 0.6685854654191353381433,
+    0.4670475452404395343887, 0.061184972065242761167 };
+
+static const double pol2[5] = {
+    0.2278636537503804204913, 0.8964421845409468074626,
+    1.0217927151592500702475, 0.34513102625055769873401,
+    0.020801230123523916719604 };
+
+static const double pol3[6] = {
+    0.00005767860320327097931, 0.029896654795890461899563,
+    0.0378739044968912982585405, 0.00971957088414193124615358,
+    0.000488576886695502361566636, 1.150549466178344373015667e-6 };
+
+static const double pol4[5] = {
+    0.030306172539339585635388, 0.066596680780796068408204,
+    0.035483738872057375987452, 0.00506436278851840340711316,
+    0.0001465263028844943142786722 };
+
+static const double pol5[6] = {
+    0.00048233868073637531461, 0.004268700087824343609188,
+    0.00127714949974214706149789, 0.0000799706171559085390983949,
+    1.186347211803672341928371e-6, 2.943454067276155504308283e-9 };
+
+static const double pol6[6] = {
+    0.00553288881087242781512, 0.0043904877060733941697614,
+    0.00069354549834088964895342, 0.0000288257440032545960408328,
+    3.01054066921000066105342e-7, 4.94316029290773314755549e-10 };
+
+static const double pol7[4] = {
+    -0.93011683587619427070, -2.9702322028603227386,
+    -2.0759083419960793148, -0.042485660005713612806 };
+
+static const double pol8[4] = {
+    0.93011683587619458392, 4.3654074566738568022,
+    6.1437079650412473506, 2.4613195056093927345 };
+
+static const double pol9[11] = {
+    -1.0000000000000000000, 2.3316439815971242034,
+    -1.8121878856393634902, 1.9366311144923597554,
+    -2.3535512018816145168, 3.0668589010506319129,
+    -4.1753356002581771389, 5.8580237298747741488,
+    -8.4010322175239773710, 12.250753501314460424,
+    -18.100697012472442755 };
+
+static const double pol10[6] = {
+    -5.1972986075163593071, -37.478686466672907613,
+    -96.155193004929291698, -102.23856988136744607,
+    -37.181958033133170210, -0.48504976999675644134 };
+
+static const double pol11[6] = {
+    5.1972986074950082685, 45.274634378414741754, 150.20768172029114131,
+    233.88699813222871981, 167.13313463159765859, 42.171248374042409414 };
+
+
+/* avoid overflows in the formula when x is close to 2^EMAX */
+#define RESCALE 1.1102230246251565404e-16
+
+static double
+halley(double x, double w)
+{
+    double t, u, v;
+
+    /* exp() does not overflow, since w is an underestimate
+        when the asymptotic series is used */
+    t = exp(w) * RESCALE;
+    u = 2*w + 2;
+    v = w*t - x * RESCALE;
+    t = w - u*v / (u*t*(w+1) - (w+2)*v);
+
+    return t;
+}
+
+/* this should be exactly 6627126856707895 * 2^(-54), which
+   is the most negative double in the domain */
+#define ONE_OVER_E 0.36787944117144228
+
+/* difference from -1/e */
+#define CORRECTION 4.3082397558469466e-17
+
+double
+d_lambertw(double x)
+{
+    double t, u, w;
+
+    if (x == 0.0 || x != x || x == D_INF)
+        return x;
+
+    if (x < 0.0)
+    {
+        /* complex result */
+        if (x < -ONE_OVER_E)
+            return D_NAN;
+        /* close to zero */
+        else if (x > -1e-9)
+            return x - x * x;
+        /* close to the singularity at -1/e */
+        else if (x + ONE_OVER_E < 0.0003)
+            return POLY(pol9, sqrt((x + ONE_OVER_E) + CORRECTION));
+
+        /* otherwise get initial value for Halley iteration */
+        if (x + ONE_OVER_E < 0.04)
+            w = POLY(pol9, sqrt((x + ONE_OVER_E) + CORRECTION));
+        else
+            w = x * (1.0 + x * POLY(pol10, x) / POLY(pol11, x));
+    }
+    else
+    {
+        /* close to zero */
+        if (x <= 0.03125)
+        {
+            if (x < 1e-9)
+                return x - x * x;
+            else
+                return x * (1.0 + x * POLY(pol7, x) / POLY(pol8, x));
+        }
+
+        /* get initial value for Halley iteration  */
+        if (x <= 1.0)
+            w = x * POLY(pol1, x) / POLY(pol2, x);
+        else if (x <= 6.0)
+            w = POLY(pol3, x) / POLY(pol4, x);
+        else if (x <= 40.0)
+            w = POLY(pol5, x) / POLY(pol6, x);
+        else
+        {
+            /* asymptotic series */
+            t = log(x);
+            u = log(t);
+            w = (2*t*t*t - 2*(1+(t-1)*t)*u + u*u)/(2*t*t);
+            /* one extra refinement */
+            if (x < 1e15)
+                w = halley(x, w);
+        }
+    }
+
+    return halley(x, w);
+}
diff --git a/double_extras/randtest.c b/double_extras/randtest.c
diff --git a/double_extras/test/t-lambertw.c b/double_extras/test/t-lambertw.c