Skip to content

Commit a358e65

Browse files
authored
Merge pull request #491 from AhmedYKadah/erf(x)-implementation
Added erf(x) Float64 and Float32 Julia implementations
2 parents 46a2874 + f7470ba commit a358e65

File tree

3 files changed

+199
-7
lines changed

3 files changed

+199
-7
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "SpecialFunctions"
22
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
3-
version = "2.6.1"
3+
version = "2.7.0"
44

55
[deps]
66
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"

src/erf.jl

Lines changed: 160 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ for f in (:erf, :erfc)
1212
@eval begin
1313
$f(x::Number) = $internalf(float(x))
1414

15-
$internalf(x::Float64) = ccall(($libopenlibmf, libopenlibm), Float64, (Float64,), x)
16-
$internalf(x::Float32) = ccall(($libopenlibmf0, libopenlibm), Float32, (Float32,), x)
17-
$internalf(x::Float16) = Float16($internalf(Float32(x)))
1815

1916
$internalf(z::Complex{Float64}) = Complex{Float64}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), z, zero(Float64)))
2017
$internalf(z::Complex{Float32}) = Complex{Float32}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), Complex{Float64}(z), Float64(eps(Float32))))
@@ -28,6 +25,10 @@ for f in (:erf, :erfc)
2825
end
2926
end
3027

28+
_erfc(x::Float64) = ccall((:erfc, libopenlibm), Float64, (Float64,), x)
29+
_erfc(x::Float32) = ccall((:erfcf, libopenlibm), Float32, (Float32,), x)
30+
_erfc(x::Float16) = Float16(_erfc(Float32(x)))
31+
3132
for f in (:erfcx, :erfi, :dawson, :faddeeva)
3233
internalf = Symbol(:_, f)
3334
openspecfunfsym = Symbol(:Faddeeva_, f === :dawson ? :Dawson : f === :faddeeva ? :w : f)
@@ -96,11 +97,165 @@ See also:
9697
[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).
9798
9899
# Implementation by
99-
- `Float32`/`Float64`: C standard math library
100-
[libm](https://en.wikipedia.org/wiki/C_mathematical_functions#libm).
100+
- `Float32`: polynomial approximations of erf
101+
- `Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
101102
- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
102103
"""
103104
function erf end
105+
106+
# Fast erf implementation using a mix of
107+
# rational and polynomial approximations.
108+
# Highest measured error is 0.80 ULPs at 0.827964.
109+
function _erf(x::Float64)
110+
111+
# # top 16 bits
112+
ix::UInt16=reinterpret(UInt64,x)>>48
113+
# # top 16, without sign bit
114+
ia::UInt16=ix & 0x7fff
115+
116+
if (ia < 0x3feb)
117+
# a = |x| < 0.84375.
118+
119+
x2 = x * x
120+
121+
if (ia < 0x3fe0)
122+
## a < 0.5 - Use polynomial approximation.
123+
124+
# Minimax approximation of erf of the form x*P(x^2) approximately on the interval [0;0.5]
125+
PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7)
126+
127+
r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy.
128+
return r
129+
else
130+
## 0.5 <= a < 0.84375 - Use rational approximation.
131+
132+
# Rational approximation on [0x1p-28, 0.84375]
133+
NA=(0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16)
134+
DA=(1,0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18)
135+
136+
P=evalpoly(x2,NA)
137+
Q=evalpoly(x2,DA)
138+
139+
return fma(P / Q, x, x)
140+
end
141+
elseif (ia < 0x3ff4)
142+
## 0.84375 <= |x| < 1.25.
143+
144+
# Rational approximation on [0.84375, 1.25]
145+
NB=( -0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2, 0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5, -0x1.1bf380a96073fp-9 )
146+
DB=( 1, 0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4, 0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7 )
147+
148+
C = 0x1.b0ac16p-1
149+
150+
a = abs(x) - 1.0
151+
152+
P=evalpoly(a,NB)
153+
Q=evalpoly(a,DB)
154+
155+
r= C + P / Q
156+
return copysign(r,x)
157+
158+
elseif (ia < 0x4000)
159+
## 1.25 <= |x| < 2.0.
160+
a = abs(x)
161+
a = a - 1.25
162+
163+
# erfc polynomial approximation
164+
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
165+
PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19)
166+
167+
# Obtains erfc of |x|
168+
r=1.0-evalpoly(a,PC)
169+
return copysign(r,x)
170+
171+
elseif (ia < 0x400a)
172+
## 2 <= |x| < 3.25.
173+
a = abs(x)
174+
a = fma(0.5, a, -1.0)
175+
176+
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
177+
PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 )
178+
179+
# Obtains erfc of |x|
180+
r=1.0-evalpoly(a,PD)
181+
return copysign(r,x)
182+
183+
elseif (ia < 0x4012)
184+
## 3.25 <= |x| < 4.5.
185+
a = abs(x)
186+
a = a - 3.25
187+
188+
# Generated using Sollya::remez(erfc(x+a),15,[0;b-a],1,1e-32) with a=3.25 b=4.5
189+
PE=(4.302779463707753e-6, -2.9189025396754986e-5, 9.486433337913454e-5, -0.00019580973528519273, 0.00028656966098267845, -0.0003137999213284758, 0.00026354316698539023, -0.00017004596013480086, 8.179009099315484e-5, -2.6177001557562883e-5, 2.6667235974837568e-6, 2.5840073872012914e-6, -1.7970865794807476e-6, 6.040008157462737e-7, -1.1384065360475506e-7, 9.657905491660664e-9)
190+
191+
r=1.0-evalpoly(a,PE)
192+
return copysign(r,x)
193+
194+
elseif (ia < 0x4018)
195+
## 4.5 <= |x| < 6.0.
196+
a = abs(x)
197+
a = a - 4.5
198+
199+
# Generated using Sollya::remez(erfc(x+a),14,[0;b-a],1,1e-32) with a=4.5 b=6.0
200+
PF=(1.966160232770269e-10, -1.8112993751963606e-9, 8.150538393058677e-9, -2.3841928179141717e-8, 5.086831026559613e-8, -8.405611838790292e-8, 1.1114551347025387e-7, -1.1927287548359634e-7, 1.0374501165951575e-7, -7.205498311583407e-8, 3.885921643476988e-8, -1.558946100318052e-8, 4.352049759187871e-9, -7.505506222192582e-10, 5.996753601814976e-11)
201+
202+
r=1.0-evalpoly(a,PF)
203+
return copysign(r,x)
204+
elseif isnan(x)
205+
return NaN
206+
else
207+
return copysign(1.0,x)
208+
end
209+
end
210+
211+
# Fast erf implementation using
212+
# polynomial approximations of erf and erfc.
213+
# Highest measured error is 1.12 ULPs at x = 1.232469
214+
function _erf(x::Float32)
215+
xabs=abs(x)
216+
217+
if (xabs< 0.5)
218+
# range [0;0.5]
219+
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6
220+
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
221+
222+
p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0)
223+
return copysign(fma(evalpoly(x^2,p),x,x),x)
224+
225+
elseif(xabs<1.25)
226+
# range [0.5;1.25]
227+
# # erfc approximation with degree 11
228+
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
229+
230+
p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0)
231+
return copysign(1f0-evalpoly(xabs-0.5f0,p),x)
232+
233+
elseif(xabs<2.5)
234+
# range [1.25;2.5]
235+
# erfc approximation with degree 13
236+
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
237+
238+
p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5)
239+
return copysign(1f0-evalpoly(xabs-1.25f0,p),x)
240+
241+
elseif (xabs<4.0)
242+
# range [2.5;4.0]
243+
# erfc approximation with degree 13
244+
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
245+
246+
p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7)
247+
return copysign(1f0-evalpoly(xabs-2.5f0,p),x)
248+
249+
elseif isnan(x)
250+
return NaN32
251+
else
252+
# range [4.0,inf)
253+
return copysign(1f0, x)
254+
end
255+
end
256+
257+
_erf(x::Float16)=Float16(_erf(Float32(x)))
258+
104259
"""
105260
erf(x, y)
106261

test/erf.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,47 @@
22
@testset "real argument" begin
33
for T in (Float16, Float32, Float64)
44
@test @inferred(erf(T(1))) isa T
5+
@test erf(T(0.25)) T(0.27632639016823696) rtol=2*eps(T)
6+
@test erf(T(0.75)) T(0.7111556336535151) rtol=2*eps(T)
57
@test erf(T(1)) T(0.84270079294971486934) rtol=2*eps(T)
8+
@test erf(T(1.5)) T(0.9661051464753108) rtol=2*eps(T)
9+
@test erf(T(2.5)) T(0.9995930479825551) rtol=2*eps(T)
10+
@test erf(T(3.5)) T(0.9999992569016276) rtol=2*eps(T)
11+
@test erf(T(4.5)) T(0.9999999998033839) rtol=2*eps(T)
12+
@test erf(T(6)) T(1.0) rtol=2*eps(T)
13+
14+
@test erf(T(-0.25)) T(-0.27632639016823696) rtol=2*eps(T)
15+
@test erf(T(-0.75)) T(-0.7111556336535151) rtol=2*eps(T)
16+
@test erf(T(-1)) T(-0.84270079294971486934) rtol=2*eps(T)
17+
@test erf(T(-1.5)) T(-0.9661051464753108) rtol=2*eps(T)
18+
@test erf(T(-2.5)) T(-0.9995930479825551) rtol=2*eps(T)
19+
@test erf(T(-3.5)) T(-0.9999992569016276) rtol=2*eps(T)
20+
@test erf(T(-4.5)) T(-0.9999999998033839) rtol=2*eps(T)
21+
@test erf(T(-6)) T(-1.0) rtol=2*eps(T)
22+
23+
@test isnan(erf(T(NaN)))
624

725
@test @inferred(erfc(T(1))) isa T
8-
@test erfc(T(1)) T(0.15729920705028513066) rtol=2*eps(T)
26+
@test erfc(T(0.25)) T(0.7236736098317631) rtol=2*eps(T)
27+
@test erfc(T(0.75)) T(0.28884436634648486) rtol=2*eps(T)
28+
@test erfc(T(1.0)) T(0.15729920705028513) rtol=2*eps(T)
29+
@test erfc(T(2.0)) T(0.004677734981047266) rtol=2*eps(T)
30+
@test erfc(T(3.0)) T(2.209049699858544e-5) rtol=2*eps(T)
31+
@test erfc(T(4.0)) T(1.541725790028002e-8) rtol=2*eps(T)
32+
@test erfc(T(5.0)) T(1.537459794428035e-12) rtol=2*eps(T)
33+
@test erfc(T(6.0)) T(2.1519736712498913e-17) rtol=2*eps(T)
34+
@test erfc(T(Inf)) T(0.0) rtol=2*eps(T)
35+
36+
@test erfc(T(-0.25)) T(1.276326390168237) rtol=2*eps(T)
37+
@test erfc(T(-0.75)) T(1.7111556336535152) rtol=2*eps(T)
38+
@test erfc(T(-1.0)) T(1.8427007929497148) rtol=2*eps(T)
39+
@test erfc(T(-1.5)) T(1.9661051464753108) rtol=2*eps(T)
40+
@test erfc(T(-2.5)) T(1.999593047982555) rtol=2*eps(T)
41+
@test erfc(T(-3.5)) T(1.9999992569016276) rtol=2*eps(T)
42+
@test erfc(T(-4.5)) T(1.999999999803384) rtol=2*eps(T)
43+
@test erfc(T(-6.0)) T(2) rtol=2*eps(T)
44+
45+
@test isnan(erfc(T(NaN)))
946

1047
@test @inferred(erfcx(T(1))) isa T
1148
@test erfcx(T(1)) T(0.42758357615580700442) rtol=2*eps(T)

0 commit comments

Comments
 (0)