Skip to content

Commit ba2b2e3

Browse files
FriesischScottlrnv
andauthored
Add the rosenblatt transform and its inverse (#254)
Co-authored-by: Oskar Laverny <oskar.laverny@univ-amu.fr>
1 parent 29216bf commit ba2b2e3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1441
-850
lines changed

Project.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version = "0.1.30"
44
authors = ["Oskar Laverny"]
55

66
[deps]
7+
BigCombinatorics = "7b33fef7-ef1d-5d54-bb67-cf96d4c8a166"
78
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
89
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
910
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -12,6 +13,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
1213
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1314
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
1415
MvNormalCDF = "37188c8d-bc69-4638-b057-733e744175ec"
16+
PolyLog = "85e3b03c-9856-11eb-0374-4dc1f8670e7f"
1517
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1618
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
1719
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -23,6 +25,7 @@ WilliamsonTransforms = "48feb556-9bdd-43a2-8e10-96100ec25e22"
2325

2426
[compat]
2527
Aqua = "v0.8"
28+
BigCombinatorics = "0.3.6"
2629
Combinatorics = "1"
2730
Distributions = "0.25"
2831
ForwardDiff = "0.10, 1"
@@ -32,6 +35,7 @@ InteractiveUtils = "1"
3235
LinearAlgebra = "1"
3336
LogExpFunctions = "0.3"
3437
MvNormalCDF = "0.2, 0.3"
38+
PolyLog = "2.6.0"
3539
PrecompileTools = "1"
3640
QuadGK = "2"
3741
Random = "1"
@@ -51,8 +55,9 @@ HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
5155
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
5256
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
5357
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
58+
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
5459
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
5560
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
5661

5762
[targets]
58-
test = ["Test", "TestItemRunner", "InteractiveUtils", "LinearAlgebra", "HypothesisTests", "Aqua", "StableRNGs"]
63+
test = ["Test", "TestItemRunner", "InteractiveUtils", "LinearAlgebra", "HypothesisTests", "Aqua", "StableRNGs", "StatsBase"]

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ makedocs(;
3131
"Empirical Copulas" => "empirical/generalities.md",
3232
"Vines Copulas" => "Vines.md",
3333
"Dependence measures" => "dependence_measures.md",
34+
"Rosenblatt transformatons" => "rosenblatt_transform.md",
3435
],
3536

3637
"Bestiary" => [

docs/src/assets/references.bib

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,4 +874,15 @@ @article{blier2022stochastic
874874
pages={107506},
875875
year={2022},
876876
publisher={Elsevier}
877+
}
878+
879+
@article{rosenblatt1952,
880+
title={Remarks on a multivariate transformation},
881+
author={Rosenblatt, Murray},
882+
journal={The annals of mathematical statistics},
883+
volume={23},
884+
number={3},
885+
pages={470--472},
886+
year={1952},
887+
publisher={JSTOR}
877888
}

docs/src/rosenblatt_transform.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
```@meta
2+
CurrentModule = Copulas
3+
```
4+
5+
# Rosenblatt transformations
6+
7+
## Definition and usefullness
8+
9+
10+
!!! definition "Definition (Rosenblatt transformation):"
11+
The Rosenblatt transformation considers a random vector ``X`` to be distributed as a certain multivariate cumulative distribution function ``F_{X}(x)``, and maps it back to a uniform distribution on the unit hypercube.
12+
13+
More formally, consider the map ``R_X(x)`` defined as follows:
14+
15+
```math
16+
R_X(x_1,...,x_d) = (r_1 = F_{X_1}(x_1), r_2 = F_{X_2 | X_1}(x_2 | x_1), ..., r_{d} = F_{X_d | X_1,...,X_{d-1}}(x_d|x_1,...x_{d-1}))
17+
```
18+
19+
20+
In certain circonstances, in paritcular for Archimedean copulas, this map simplifies to tractable expressions. it has a few nice properties:
21+
22+
* ``R_X(X) \sim \texttt{Uniform(Unit Hypercube)}``
23+
* ``R_X`` is a bijection.
24+
25+
These two properties are leveraged in some cases to construct the inverse rosenblatt transformations, that maps random noise to proper samples from the copula. In some cases, this is the best sampling algorithm available.
26+
27+
## Implementation
28+
29+
As soon as the random vector ``X`` is represented by an object `X` that subtypes `SklarDist` or `Copula`, you have access to the `rosenblatt(X, x)` and `inverse_rosenblatt(X,x)` operators, which both have a straghtforward interpretation from their names.
30+
31+
```@docs
32+
rosenblatt
33+
inverse_rosenblatt
34+
```
35+
36+
!!! note "Not all copulas available !"
37+
Some copulas such has archimedeans have known expressions for their rosenblatt and/or inverse rosenblatt transforms, and therefore benefit from this interface and our implementation. On the other hand, some copulas have no known closed form expressions for conditional cdfs, and therefore their rosenblatt transformation is hard to implement.
38+
39+
If you feel like you miss methods for certain particular copulas while the theory exists and it should be possible, do not hesitate to open an issue !
40+
41+
42+
```@bibliography
43+
Pages = [@__FILE__]
44+
Canonical = false
45+
```

src/ArchimedeanCopula.jl

Lines changed: 104 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
"""
22
ArchimedeanCopula{d, TG}
33
4-
Fields:
5-
- G::TG : the generator <: Generator.
4+
Fields:
5+
- G::TG : the generator <: Generator.
66
7-
Constructor:
7+
Constructor:
88
99
ArchimedeanCopula(d::Int,G::Generator)
1010
11-
For some Archimedean [`Generator`](@ref) `G::Generator` and some dimenson `d`, this class models the archimedean copula which has this generator. The constructor checks for validity by ensuring that `max_monotony(G) ≥ d`. The ``d``-variate archimedean copula with generator ``\\phi`` writes:
11+
For some Archimedean [`Generator`](@ref) `G::Generator` and some dimenson `d`, this class models the archimedean copula which has this generator. The constructor checks for validity by ensuring that `max_monotony(G) ≥ d`. The ``d``-variate archimedean copula with generator ``\\phi`` writes:
1212
1313
```math
1414
C(\\mathbf u) = \\phi^{-1}\\left(\\sum_{i=1}^d \\phi(u_i)\\right)
1515
```
1616
17-
The default sampling method is the Radial-simplex decomposition using the Williamson transformation of ``\\phi``.
17+
The default sampling method is the Radial-simplex decomposition using the Williamson transformation of ``\\phi``.
1818
19-
There exists several known parametric generators that are implement in the package. For every `NamedGenerator <: Generator` implemented in the package, we provide a type alias ``NamedCopula{d,...} = ArchimedeanCopula{d,NamedGenerator{...}}` to be able to manipulate the classic archimedean copulas without too much hassle for known and usefull special cases.
19+
There exists several known parametric generators that are implement in the package. For every `NamedGenerator <: Generator` implemented in the package, we provide a type alias ``NamedCopula{d,...} = ArchimedeanCopula{d,NamedGenerator{...}}` to be able to manipulate the classic archimedean copulas without too much hassle for known and usefull special cases.
2020
21-
A generic archimdean copula can be constructed as follows:
21+
A generic archimdean copula can be constructed as follows:
2222
2323
```julia
2424
struct MyGenerator <: Generator end
2525
ϕ(G::MyGenerator,t) = exp(-t) # your archimedean generator, can be any d-monotonous function.
26-
max_monotony(G::MyGenerator) = Inf # could depend on generators parameters.
26+
max_monotony(G::MyGenerator) = Inf # could depend on generators parameters.
2727
C = ArchimedeanCopula(d,MyGenerator())
2828
```
2929
30-
The obtained model can be used as follows:
30+
The obtained model can be used as follows:
3131
```julia
3232
spl = rand(C,1000) # sampling
3333
cdf(C,spl) # cdf
@@ -48,45 +48,34 @@ struct ArchimedeanCopula{d,TG} <: Copula{d}
4848
return new{d,typeof(G)}(G)
4949
end
5050
end
51-
ϕ(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ(C.G,t)
52-
ϕ⁻¹(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁻¹(C.G,t)
53-
ϕ⁽¹⁾(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁽¹⁾(C.G,t)
54-
ϕ⁽ᵏ⁾(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁽ᵏ⁾(C.G, Val(d), t)
55-
williamson_dist(C::ArchimedeanCopula{d,TG}) where {d,TG} = williamson_dist(C.G,Val(d))
51+
ϕ(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ(C.G, t)
52+
ϕ⁻¹(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁻¹(C.G, t)
53+
ϕ⁻¹⁽¹⁾(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁻¹⁽¹⁾(C.G, t)
54+
ϕ⁽¹⁾(C::ArchimedeanCopula{d,TG},t) where {d,TG} = ϕ⁽¹⁾(C.G, t)
55+
ϕ⁽ᵏ⁾(C::ArchimedeanCopula{d,TG},k,t) where {d,TG} = ϕ⁽ᵏ⁾(C.G, k, t)
56+
williamson_dist(C::ArchimedeanCopula{d,TG}) where {d,TG} = williamson_dist(C.G, Val{d}())
5657

57-
58-
function _cdf(C::CT,u) where {CT<:ArchimedeanCopula}
59-
sum_ϕ⁻¹u = 0.0
60-
for us in u
61-
sum_ϕ⁻¹u += ϕ⁻¹(C,us)
62-
end
63-
return ϕ(C,sum_ϕ⁻¹u)
64-
end
58+
_cdf(C::ArchimedeanCopula, u) = ϕ(C, sum(ϕ⁻¹.(C, u)))
6559
function Distributions._logpdf(C::ArchimedeanCopula{d,TG}, u) where {d,TG}
66-
if !all(0 .<= u .<= 1)
60+
if !all(0 .< u .< 1)
6761
return eltype(u)(-Inf)
6862
end
69-
T = promote_type(Float64, eltype(u)) # the Float64 here should be eltype(C) when copulas will be type agnostic...
70-
logdenom = sum_ϕ⁻¹u = zero(T)
71-
for us in u
72-
ϕ⁻¹u = ϕ⁻¹(C,us)
73-
sum_ϕ⁻¹u += ϕ⁻¹u
74-
logdenom += log(-ϕ⁽¹⁾(C,ϕ⁻¹u)) # log of negative here because ϕ⁽¹⁾ is necessarily negative
75-
end
76-
numer = abs(ϕ⁽ᵏ⁾(C, sum_ϕ⁻¹u))
77-
if numer > 0
78-
r = log(numer) - logdenom
79-
!isnan(r) && return r
63+
return log(ϕ⁽ᵏ⁾(C, Val{d}(), sum(ϕ⁻¹.(C, u))) * prod(ϕ⁻¹⁽¹⁾.(C, u)))
64+
end
65+
66+
function Distributions._logpdf(C::ArchimedeanCopula{d,TG}, u) where {d,TG<:ClaytonGenerator}
67+
if !all(0 .< u .< 1) || (C.G.θ < 0 && sum(u .^ -(C.G.θ)) < (d - 1))
68+
return eltype(u)(-Inf)
8069
end
81-
return -T(Inf)
70+
return log(ϕ⁽ᵏ⁾(C, Val{d}(), sum(ϕ⁻¹.(C, u))) * prod(ϕ⁻¹⁽¹⁾.(C, u)))
8271
end
8372

84-
# function τ(C::ArchimedeanCopula)
73+
# function τ(C::ArchimedeanCopula)
8574
# return 4*Distributions.expectation(r -> ϕ(C,r), williamson_dist(C)) - 1
8675
# end
8776

8877
function Distributions._rand!(rng::Distributions.AbstractRNG, C::CT, x::AbstractVector{T}) where {T<:Real, CT<:ArchimedeanCopula}
89-
# By default, we use the Williamson sampling.
78+
# By default, we use the Williamson sampling.
9079
Random.randexp!(rng,x)
9180
r = rand(rng,williamson_dist(C))
9281
sx = sum(x)
@@ -96,7 +85,7 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, C::CT, x::Abstract
9685
return x
9786
end
9887
function Distributions._rand!(rng::Distributions.AbstractRNG, C::CT, A::DenseMatrix{T}) where {T<:Real, CT<:ArchimedeanCopula}
99-
# More efficient version that precomputes the Williamson transform on each call to sample in batches:
88+
# More efficient version that precomputes the Williamson transform on each call to sample in batches:
10089
Random.randexp!(rng,A)
10190
n = size(A,2)
10291
r = rand(rng,williamson_dist(C),n)
@@ -112,7 +101,7 @@ function Distributions.fit(::Type{CT},u) where {CT <: ArchimedeanCopula}
112101
# @info "Archimedean fits are by default through inverse kendall tau."
113102
d = size(u,1)
114103
τ = StatsBase.corkendall(u')
115-
# Then the off-diagonal elements of the matrix should be averaged:
104+
# Then the off-diagonal elements of the matrix should be averaged:
116105
avgτ = (sum(τ) .- d) / (d^2-d)
117106
GT = generatorof(CT)
118107
θ = τ⁻¹(GT,avgτ)
@@ -131,12 +120,12 @@ end
131120
################################################################################################
132121

133122

134-
## Automatic syntactic sugar for all ZeroVariateGenerators and UnivariateGenerators.
123+
## Automatic syntactic sugar for all ZeroVariateGenerators and UnivariateGenerators.
135124
## see https://discourse.julialang.org/t/how-to-dispatch-on-a-type-alias/106476/38?u=lrnv
136125
function generatorof(::Type{S}) where {S <: ArchimedeanCopula}
137126
S2 = hasproperty(S,:body) ? S.body : S
138127
S3 = hasproperty(S2, :body) ? S2.body : S2
139-
try
128+
try
140129
return S3.parameters[2].name.wrapper
141130
catch e
142131
@error "There is no generator type associated with the archimedean type $S"
@@ -159,7 +148,7 @@ for T in InteractiveUtils.subtypes(UnivariateGenerator)
159148
end
160149
end
161150

162-
# The zero-variate ones just need a few more methods:
151+
# The zero-variate ones just need a few more methods:
163152
Distributions._logpdf(::ArchimedeanCopula{d,IndependentGenerator}, u) where {d} = all(0 .<= u .<= 1) ? zero(eltype(u)) : eltype(u)(-Inf)
164153
Distributions._logpdf(::ArchimedeanCopula{d,MGenerator}, u) where {d} = all(u == u[1]) ? zero(eltype(u)) : eltype(u)(-Inf)
165154
Distributions._logpdf(::ArchimedeanCopula{d,WGenerator}, u) where {d} = sum(u) == 1 ? zero(eltype(u)) : eltype(u)(-Inf)
@@ -198,3 +187,76 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, ::ArchimedeanCopul
198187
return A
199188
end
200189

190+
function Distributions._rand!(rng::Distributions.AbstractRNG, C::ClaytonCopula, A::DenseMatrix{<:Real})
191+
A[:] = inverse_rosenblatt(C, rand(rng, size(A)...))
192+
return A
193+
end
194+
195+
function rosenblatt(C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real}) where {d,TG}
196+
@assert d == size(u, 1)
197+
198+
U = zeros(eltype(u), size(u))
199+
U[1, :] = u[1, :]
200+
rⱼ = ϕ⁻¹.(C.G, u[1,:])
201+
rⱼ₋₁ = similar(rⱼ)
202+
for j in 2:d
203+
rⱼ₋₁ .= rⱼ
204+
rⱼ .+= ϕ⁻¹.(C.G, u[j,:]) # so we do not compute too much of them, nor allocate too much.
205+
U[j, :] .= ϕ⁽ᵏ⁾.(C.G, Val(j - 1), rⱼ) ./ ϕ⁽ᵏ⁾.(C.G, Val(j - 1), rⱼ₋₁)
206+
end
207+
return U
208+
end
209+
210+
function rosenblatt(
211+
C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real}
212+
) where {d,TG<:ClaytonGenerator}
213+
@assert d == size(u, 1)
214+
215+
U = zeros(eltype(u), size(u))
216+
U[1, :] = u[1, :]
217+
218+
for j in 2:d
219+
U[j, :] .=
220+
(
221+
(1 .- j .+ sum(u[1:j, :] .^ (-C.G.θ); dims=1)[:]) ./
222+
(2 .- j .+ sum(u[1:(j - 1), :] .^ (-C.G.θ); dims=1)[:])
223+
) .^ (-1 / C.G.θ - (j - 1))
224+
end
225+
return U
226+
end
227+
228+
function inverse_rosenblatt(C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real}) where {d,TG}
229+
@assert d == size(u, 1)
230+
U = zeros(eltype(u), size(u))
231+
U[1, :] = u[1, :]
232+
for i in axes(u, 2)
233+
Cᵢⱼ = zero(eltype(u))
234+
for j in 2:d
235+
Cᵢⱼ += ϕ⁻¹(C.G, U[j - 1, i])
236+
Dᵢⱼ = ϕ⁽ᵏ⁾(C.G, Val(j - 1), Cᵢⱼ) * u[j,i]
237+
f(x) = ϕ⁽ᵏ⁾(C.G, Val(j - 1), Cᵢⱼ + ϕ⁻¹(C.G, x)) - Dᵢⱼ
238+
U[j, i] = Roots.find_zero(f, (eps(1.0), 1.0), Roots.A42())
239+
end
240+
end
241+
return U
242+
end
243+
244+
function inverse_rosenblatt(
245+
C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real}
246+
) where {d,TG<:ClaytonGenerator}
247+
@assert d == size(u, 1)
248+
249+
U = zeros(eltype(u), size(u))
250+
U[1, :] = u[1, :]
251+
252+
for j in 2:d
253+
U[j, :] =
254+
(
255+
1 .+
256+
(1 .- (j - 1) .+ sum(U[1:(j - 1), :] .^ -C.G.θ; dims=1))[:] .*
257+
(u[j, :] .^ (-1 / (j - 1 + 1 / C.G.θ)) .- 1)
258+
) .^ (-1 / C.G.θ)
259+
end
260+
261+
return U
262+
end

src/BivariateArchimedeanMethods.jl

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1-
# The Gumbel copula needs a specific bivariate method to handle large parameters.
2-
function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator}
3-
θ = C.G.θ
4-
x₁, x₂ = -log(u[1]), -log(u[2])
5-
lx₁, lx₂ = log(x₁), log(x₂)
6-
return 1 - LogExpFunctions.cexpexp(LogExpFunctions.logaddexp* lx₁, θ * lx₂) / θ)
7-
end
8-
function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator}
9-
T = promote_type(Float64, eltype(u))
10-
!all(0 .< u .<= 1) && return T(-Inf) # if not in range return -Inf
11-
12-
θ = C.G.θ
13-
x₁, x₂ = -log(u[1]), -log(u[2])
14-
lx₁, lx₂ = log(x₁), log(x₂)
15-
A = LogExpFunctions.logaddexp* lx₁, θ * lx₂)
16-
B = exp(A/θ)
17-
return - B + x₁ + x₂ +-1) * (lx₁ + lx₂) + A/θ - 2A + log(B + θ - 1)
18-
end
1+
# The Gumbel copula needs a specific bivariate method to handle large parameters.
2+
function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator}
3+
θ = C.G.θ
4+
x₁, x₂ = -log(u[1]), -log(u[2])
5+
lx₁, lx₂ = log(x₁), log(x₂)
6+
return 1 - LogExpFunctions.cexpexp(LogExpFunctions.logaddexp* lx₁, θ * lx₂) / θ)
7+
end
8+
function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator}
9+
T = promote_type(Float64, eltype(u))
10+
!all(0 .< u .<= 1) && return T(-Inf) # if not in range return -Inf
11+
12+
θ = C.G.θ
13+
x₁, x₂ = -log(u[1]), -log(u[2])
14+
lx₁, lx₂ = log(x₁), log(x₂)
15+
A = LogExpFunctions.logaddexp* lx₁, θ * lx₂)
16+
B = exp(A/θ)
17+
return - B + x₁ + x₂ +-1) * (lx₁ + lx₂) + A/θ - 2A + log(B + θ - 1)
18+
end

0 commit comments

Comments
 (0)