@@ -12,9 +12,6 @@ for f in (:erf, :erfc)
1212 @eval begin
1313 $ f (x:: Number ) = $ internalf (float (x))
1414
15- $ internalf (x:: Float64 ) = ccall (($ libopenlibmf, libopenlibm), Float64, (Float64,), x)
16- $ internalf (x:: Float32 ) = ccall (($ libopenlibmf0, libopenlibm), Float32, (Float32,), x)
17- $ internalf (x:: Float16 ) = Float16 ($ internalf (Float32 (x)))
1815
1916 $ internalf (z:: Complex{Float64} ) = Complex {Float64} (ccall (($ openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), z, zero (Float64)))
2017 $ internalf (z:: Complex{Float32} ) = Complex {Float32} (ccall (($ openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), Complex {Float64} (z), Float64 (eps (Float32))))
@@ -28,6 +25,10 @@ for f in (:erf, :erfc)
2825 end
2926end
3027
28+ _erfc (x:: Float64 ) = ccall ((:erfc , libopenlibm), Float64, (Float64,), x)
29+ _erfc (x:: Float32 ) = ccall ((:erfcf , libopenlibm), Float32, (Float32,), x)
30+ _erfc (x:: Float16 ) = Float16 (_erfc (Float32 (x)))
31+
3132for f in (:erfcx , :erfi , :dawson , :faddeeva )
3233 internalf = Symbol (:_ , f)
3334 openspecfunfsym = Symbol (:Faddeeva_ , f === :dawson ? :Dawson : f === :faddeeva ? :w : f)
@@ -96,11 +97,165 @@ See also:
9697[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).
9798
9899# Implementation by
99- - `Float32`/`Float64`: C standard math library
100- [libm]( https://en.wikipedia.org/wiki/C_mathematical_functions#libm).
100+ - `Float32`: polynomial approximations of erf
101+ - `Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
101102- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
102103"""
103104function erf end
105+
106+ # Fast erf implementation using a mix of
107+ # rational and polynomial approximations.
108+ # Highest measured error is 0.80 ULPs at 0.827964.
109+ function _erf (x:: Float64 )
110+
111+ # # top 16 bits
112+ ix:: UInt16 = reinterpret (UInt64,x)>> 48
113+ # # top 16, without sign bit
114+ ia:: UInt16 = ix & 0x7fff
115+
116+ if (ia < 0x3feb )
117+ # a = |x| < 0.84375.
118+
119+ x2 = x * x
120+
121+ if (ia < 0x3fe0 )
122+ # # a < 0.5 - Use polynomial approximation.
123+
124+ # Minimax approximation of erf of the form x*P(x^2) approximately on the interval [0;0.5]
125+ PA= (0.12837916709551256 , - 0.3761263890318287 , 0.11283791670896592 , - 0.026866170630075903 , 0.005223977428649761 , - 0.0008548312229989974 , 0.00012054654502151287 , - 1.4906315067498891e-5 , 1.6126444349070824e-6 , - 1.3074259056679966e-7 )
126+
127+ r= fma (x,evalpoly (x2,PA),x) # # This fma is crucial for accuracy.
128+ return r
129+ else
130+ # # 0.5 <= a < 0.84375 - Use rational approximation.
131+
132+ # Rational approximation on [0x1p-28, 0.84375]
133+ NA= (0x1 .06 eba8214db68p- 3 , - 0x1 .4 cd7d691cb913p- 2 , - 0x1 . d2a51dbd7194fp- 6 ,- 0x1 .7 a291236668e4p- 8 , - 0x1 .8 ead6120016acp- 16 )
134+ DA= (1 ,0x1 .97779 cddadc09p- 2 , 0x1 .0 a54c5536cebap- 4 , 0x1 .4 d022c4d36b0fp- 8 ,0x1 .15 dc9221c1a1p- 13 , - 0x1 .09 c4342a2612p- 18 )
135+
136+ P= evalpoly (x2,NA)
137+ Q= evalpoly (x2,DA)
138+
139+ return fma (P / Q, x, x)
140+ end
141+ elseif (ia < 0x3ff4 )
142+ # # 0.84375 <= |x| < 1.25.
143+
144+ # Rational approximation on [0.84375, 1.25]
145+ NB= ( - 0x1 .359 b8bef77538p- 9 , 0x1 . a8d00ad92b34dp- 2 , - 0x1 .7 d240fbb8c3f1p- 2 , 0x1 .45 fca805120e4p- 2 , - 0x1 . c63983d3e28ecp- 4 , 0x1 .22 a36599795ebp- 5 , - 0x1 .1 bf380a96073fp- 9 )
146+ DB= ( 1 , 0x1 . b3e6618eee323p- 4 , 0x1 .14 af092eb6f33p- 1 , 0x1 .2635 cd99fe9a7p- 4 , 0x1 .02660e763351 fp- 3 , 0x1 . bedc26b51dd1cp- 7 , 0x1 .88 b545735151dp- 7 )
147+
148+ C = 0x1 . b0ac16p- 1
149+
150+ a = abs (x) - 1.0
151+
152+ P= evalpoly (a,NB)
153+ Q= evalpoly (a,DB)
154+
155+ r= C + P / Q
156+ return copysign (r,x)
157+
158+ elseif (ia < 0x4000 )
159+ # # 1.25 <= |x| < 2.0.
160+ a = abs (x)
161+ a = a - 1.25
162+
163+ # erfc polynomial approximation
164+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
165+ PC= ( 0x1 .3 bcd133aa0ffcp- 4 , - 0x1 . e4652fadcb702p- 3 , 0x1 .2 ebf3dcca0446p- 2 , - 0x1 .571 d01c62d66p- 3 , 0x1 .93 a9a8f5b3413p- 8 , 0x1 .8281 cbcc2cd52p- 5 , - 0x1 .5 cffd86b4de16p- 6 , - 0x1 . db4ccf595053ep- 9 , 0x1 .757 cbf8684edap- 8 , - 0x1 . ce7dfd2a9e56ap- 11 , - 0x1 .99 ee3bc5a3263p- 11 , 0x1 .3 c57cf9213f5fp- 12 , 0x1 .60692996 bf254p- 14 , - 0x1 .6e44 cb7c1fa2ap- 14 , 0x1 .9 d4484ac482b2p- 16 , - 0x1 .578 c9e375d37p- 19 )
166+
167+ # Obtains erfc of |x|
168+ r= 1.0 - evalpoly (a,PC)
169+ return copysign (r,x)
170+
171+ elseif (ia < 0x400a )
172+ # # 2 <= |x| < 3.25.
173+ a = abs (x)
174+ a = fma (0.5 , a, - 1.0 )
175+
176+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
177+ PD= ( 0x1 .328f5 ec350e5p- 8 , - 0x1 .529 b9e8cf8e99p- 5 , 0x1 .529 b9e8cd9e71p- 3 , - 0x1 .8 b0ae3a023bf2p- 2 , 0x1 .1 a2c592599d82p- 1 , - 0x1 . ace732477e494p- 2 , - 0x1 . e1a06a27920ffp- 6 , 0x1 . bae92a6d27af6p- 2 , - 0x1 . a15470fcf5ce7p- 2 , 0x1 . bafe45d18e213p- 6 , 0x1 .0 d950680d199ap- 2 , - 0x1 .8 c9481e8f22e3p- 3 , - 0x1 .158450 ed5c899p- 4 , 0x1 . c01f2973b44p- 3 , - 0x1 .73 ed2827546a7p- 3 , 0x1 .47733687 d1ff7p- 4 , - 0x1 .2 dec70d00b8e1p- 6 , 0x1 . a947ab83cd4fp- 10 )
178+
179+ # Obtains erfc of |x|
180+ r= 1.0 - evalpoly (a,PD)
181+ return copysign (r,x)
182+
183+ elseif (ia < 0x4012 )
184+ # # 3.25 <= |x| < 4.5.
185+ a = abs (x)
186+ a = a - 3.25
187+
188+ # Generated using Sollya::remez(erfc(x+a),15,[0;b-a],1,1e-32) with a=3.25 b=4.5
189+ PE= (4.302779463707753e-6 , - 2.9189025396754986e-5 , 9.486433337913454e-5 , - 0.00019580973528519273 , 0.00028656966098267845 , - 0.0003137999213284758 , 0.00026354316698539023 , - 0.00017004596013480086 , 8.179009099315484e-5 , - 2.6177001557562883e-5 , 2.6667235974837568e-6 , 2.5840073872012914e-6 , - 1.7970865794807476e-6 , 6.040008157462737e-7 , - 1.1384065360475506e-7 , 9.657905491660664e-9 )
190+
191+ r= 1.0 - evalpoly (a,PE)
192+ return copysign (r,x)
193+
194+ elseif (ia < 0x4018 )
195+ # # 4.5 <= |x| < 6.0.
196+ a = abs (x)
197+ a = a - 4.5
198+
199+ # Generated using Sollya::remez(erfc(x+a),14,[0;b-a],1,1e-32) with a=4.5 b=6.0
200+ PF= (1.966160232770269e-10 , - 1.8112993751963606e-9 , 8.150538393058677e-9 , - 2.3841928179141717e-8 , 5.086831026559613e-8 , - 8.405611838790292e-8 , 1.1114551347025387e-7 , - 1.1927287548359634e-7 , 1.0374501165951575e-7 , - 7.205498311583407e-8 , 3.885921643476988e-8 , - 1.558946100318052e-8 , 4.352049759187871e-9 , - 7.505506222192582e-10 , 5.996753601814976e-11 )
201+
202+ r= 1.0 - evalpoly (a,PF)
203+ return copysign (r,x)
204+ elseif isnan (x)
205+ return NaN
206+ else
207+ return copysign (1.0 ,x)
208+ end
209+ end
210+
211+ # Fast erf implementation using
212+ # polynomial approximations of erf and erfc.
213+ # Highest measured error is 1.12 ULPs at x = 1.232469
214+ function _erf (x:: Float32 )
215+ xabs= abs (x)
216+
217+ if (xabs< 0.5 )
218+ # range [0;0.5]
219+ # # erf approximation using erf(x)=x+x*P(x^2) with degree 6
220+ # Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
221+
222+ p= (0.12837917f0 , - 0.37612638f0 , 0.11283784f0 , - 0.026865287f0 , 0.005218856f0 , - 0.0008394848f0 )
223+ return copysign (fma (evalpoly (x^ 2 ,p),x,x),x)
224+
225+ elseif (xabs< 1.25 )
226+ # range [0.5;1.25]
227+ # # erfc approximation with degree 11
228+ # Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
229+
230+ p= (0.47950011f0 , - 0.8787826f0 , 0.4393913f0 , 0.14646415f0 , - 0.18308467f0 , - 0.007286422f0 , 0.04987047f0 , - 0.0048868246f0 , - 0.011067663f0 , 0.003422347f0 , 0.00073027064f0 , - 0.0003758171f0 )
231+ return copysign (1f0 - evalpoly (xabs- 0.5f0 ,p),x)
232+
233+ elseif (xabs< 2.5 )
234+ # range [1.25;2.5]
235+ # erfc approximation with degree 13
236+ # Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
237+
238+ p= (0.077099875f0 , - 0.23652112f0 , 0.2956514f0 , - 0.16753574f0 , 0.006158593f0 , 0.04718712f0 , - 0.021331023f0 , - 0.0035262543f0 , 0.005461831f0 , - 0.00047858673f0 , - 0.0012763853f0 , 0.00073386944f0 , - 0.00017831658f0 , 1.7451624f-5 )
239+ return copysign (1f0 - evalpoly (xabs- 1.25f0 ,p),x)
240+
241+ elseif (xabs< 4.0 )
242+ # range [2.5;4.0]
243+ # erfc approximation with degree 13
244+ # Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
245+
246+ p= (0.00040695202f0 , - 0.002178284f0 , 0.0054457085f0 , - 0.008350053f0 , 0.008622011f0 , - 0.006115167f0 , 0.0027899458f0 , - 0.000519395f0 , - 0.00030461047f0 , 0.00031068458f0 , - 0.00013866898f0 , 3.6909692f-5 , - 5.682889f-6 , 3.929763f-7 )
247+ return copysign (1f0 - evalpoly (xabs- 2.5f0 ,p),x)
248+
249+ elseif isnan (x)
250+ return NaN32
251+ else
252+ # range [4.0,inf)
253+ return copysign (1f0 , x)
254+ end
255+ end
256+
257+ _erf (x:: Float16 )= Float16 (_erf (Float32 (x)))
258+
104259"""
105260 erf(x, y)
106261
0 commit comments