Skip to content

Commit b6e1ee7

Browse files
eliascarvjuliohm
andauthored
Add pratio and maxdim (#89)
* Add pratio and maxdim * Update outdim function * Update docstrings * Update tests * Change default pratio value to 1.0 * Update src/transforms/eigenanalysis.jl * Update src/transforms/eigenanalysis.jl Co-authored-by: Júlio Hoffimann <[email protected]>
1 parent 9af2b3b commit b6e1ee7

File tree

2 files changed

+108
-37
lines changed

2 files changed

+108
-37
lines changed

src/transforms/eigenanalysis.jl

+54-33
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
# ------------------------------------------------------------------
44

55
"""
6-
EigenAnalysis(proj, ndim=nothing)
6+
EigenAnalysis(proj; maxdim=nothing, pratio=1.0)
77
88
The eigenanalysis of the covariance with a given projection `proj`.
9-
The number of dimensions of the output is defined by the `ndim` argument.
9+
Optionally specify the maximum number of dimensions in the output `maxdim`
10+
and the percentage of variance to retain `pratio`. Default to all dimensions of
11+
the input.
1012
1113
## Projections
1214
@@ -32,26 +34,30 @@ for more details about these three variants of eigenanalysis.
3234
EigenAnalysis(:V)
3335
EigenAnalysis(:VD)
3436
EigenAnalysis(:VDV)
35-
EigenAnalysis(:V, 2)
37+
EigenAnalysis(:V, maxdim=3)
38+
EigenAnalysis(:VD, pratio=0.99)
39+
EigenAnalysis(:VDV, maxdim=3, pratio=0.99)
3640
```
3741
"""
3842
struct EigenAnalysis <: Transform
3943
proj::Symbol
40-
ndim::Union{Int,Nothing}
44+
maxdim::Union{Int,Nothing}
45+
pratio::Float64
4146

42-
function EigenAnalysis(proj, ndim=nothing)
47+
function EigenAnalysis(proj, maxdim, pratio)
4348
@assert proj (:V, :VD, :VDV) "Invalid projection."
44-
new(proj, ndim)
49+
@assert 0 pratio 1 "Invalid pratio."
50+
new(proj, maxdim, pratio)
4551
end
4652
end
4753

54+
EigenAnalysis(proj; maxdim=nothing, pratio=1.0) =
55+
EigenAnalysis(proj, maxdim, pratio)
56+
4857
assertions(::Type{EigenAnalysis}) = [assert_continuous]
4958

5059
isrevertible(::Type{EigenAnalysis}) = true
5160

52-
_ndim(ndim::Int, X) = ndim
53-
_ndim(ndim::Nothing, X) = size(X, 2)
54-
5561
function apply(transform::EigenAnalysis, table)
5662
# basic checks
5763
for assertion in assertions(transform)
@@ -65,21 +71,18 @@ function apply(transform::EigenAnalysis, table)
6571
# table as matrix
6672
X = Tables.matrix(table)
6773

68-
# output dimension
69-
d = _ndim(transform.ndim, X)
70-
7174
# center the data
7275
μ = mean(X, dims=1)
7376
Y = X .- μ
7477

7578
# eigenanalysis of covariance
76-
S, S⁻¹ = eigenmatrices(transform, Y, d)
79+
S, S⁻¹ = eigenmatrices(transform, Y)
7780

7881
# project the data
7982
Z = Y * S
8083

8184
# column names
82-
names = Symbol.(:PC, 1:d)
85+
names = Symbol.(:PC, 1:size(Z, 2))
8386

8487
# table with transformed columns
8588
𝒯 = (; zip(names, eachcol(Z))...)
@@ -115,9 +118,6 @@ function reapply(transform::EigenAnalysis, table, cache)
115118
# table as matrix
116119
X = Tables.matrix(table)
117120

118-
# output dimension
119-
d = _ndim(transform.ndim, X)
120-
121121
# retrieve cache
122122
μ, S, S⁻¹, onames = cache
123123

@@ -128,14 +128,26 @@ function reapply(transform::EigenAnalysis, table, cache)
128128
Z = Y * S
129129

130130
# column names
131-
names = Symbol.(:PC, 1:d)
131+
names = Symbol.(:PC, 1:size(Z, 2))
132132

133133
# table with transformed columns
134134
𝒯 = (; zip(names, eachcol(Z))...)
135135
𝒯 |> Tables.materializer(table)
136136
end
137137

138-
function eigenmatrices(transform, Y, d)
138+
_maxdim(maxdim::Int, Y) = maxdim
139+
_maxdim(::Nothing, Y) = size(Y, 2)
140+
141+
function outdim(transform, Y, λ)
142+
pratio = transform.pratio
143+
csums = cumsum(λ)
144+
ratios = csums ./ last(csums)
145+
mdim = _maxdim(transform.maxdim, Y)
146+
pdim = findfirst((pratio), ratios)
147+
min(mdim, pdim)
148+
end
149+
150+
function eigenmatrices(transform, Y)
139151
proj = transform.proj
140152

141153
Σ = cov(Y)
@@ -154,56 +166,65 @@ function eigenmatrices(transform, Y, d)
154166
S⁻¹ = V * Λ * transpose(V)
155167
end
156168

169+
d = outdim(transform, Y, λ)
170+
157171
S[:, 1:d], S⁻¹[1:d, :]
158172
end
159173

160174
"""
161-
PCA(ndim=nothing)
175+
PCA(; maxdim=nothing, pratio=1.0)
162176
163177
The PCA transform is a shortcut for
164-
`ZScore() → EigenAnalysis(:V, ndim)`.
178+
`ZScore() → EigenAnalysis(:V; maxdim, pratio)`.
165179
166180
See also: [`ZScore`](@ref), [`EigenAnalysis`](@ref).
167181
168182
# Examples
169183
170184
```julia
171-
PCA()
172-
PCA(2)
185+
PCA(maxdim=2)
186+
PCA(pratio=0.86)
187+
PCA(maxdim=2, pratio=0.86)
173188
```
174189
"""
175-
PCA(ndim=nothing) = ZScore() EigenAnalysis(:V, ndim)
190+
PCA(; maxdim=nothing, pratio=1.0) =
191+
ZScore() EigenAnalysis(:V, maxdim, pratio)
176192

177193
"""
178-
DRS(ndim=nothing)
194+
DRS(; maxdim=nothing, pratio=1.0)
179195
180196
The DRS transform is a shortcut for
181-
`ZScore() → EigenAnalysis(:VD, ndim)`.
197+
`ZScore() → EigenAnalysis(:VD; maxdim, pratio)`.
182198
183199
See also: [`ZScore`](@ref), [`EigenAnalysis`](@ref).
184200
185201
# Examples
186202
187203
```julia
188-
DRS()
189-
DRS(3)
204+
DRS(maxdim=3)
205+
DRS(pratio=0.87)
206+
DRS(maxdim=3, pratio=0.87)
190207
```
191208
"""
192-
DRS(ndim=nothing) = ZScore() EigenAnalysis(:VD, ndim)
209+
DRS(; maxdim=nothing, pratio=1.0) =
210+
ZScore() EigenAnalysis(:VD, maxdim, pratio)
193211

194212
"""
195-
SDS(ndim=nothing)
213+
SDS(; maxdim=nothing, pratio=1.0)
196214
197215
The SDS transform is a shortcut for
198-
`ZScore() → EigenAnalysis(:VDV, ndim)`.
216+
`ZScore() → EigenAnalysis(:VDV; maxdim, pratio)`.
199217
200218
See also: [`ZScore`](@ref), [`EigenAnalysis`](@ref).
201219
202220
# Examples
203221
204222
```julia
205223
SDS()
206-
SDS(4)
224+
SDS(maxdim=4)
225+
SDS(pratio=0.88)
226+
SDS(maxdim=4, pratio=0.88)
207227
```
208228
"""
209-
SDS(ndim=nothing) = ZScore() EigenAnalysis(:VDV, ndim)
229+
SDS(; maxdim=nothing, pratio=1.0) =
230+
ZScore() EigenAnalysis(:VDV, maxdim, pratio)

test/transforms.jl

+54-4
Original file line numberDiff line numberDiff line change
@@ -1506,22 +1506,22 @@
15061506
rtₒ = revert(T, n, c)
15071507
@test Tables.matrix(rt) Tables.matrix(rtₒ)
15081508

1509-
# ndim
1509+
# maxdim
15101510
x = randn(1000)
15111511
y = x + randn(1000)
15121512
z = 2x - y + randn(1000)
15131513
t = Table(; x, y, z)
15141514

15151515
# PCA
1516-
T = PCA(2)
1516+
T = PCA(maxdim=2)
15171517
n, c = apply(T, t)
15181518
Σ = cov(Tables.matrix(n))
15191519
@test Tables.columnnames(n) == (:PC1, :PC2)
15201520
@test isapprox(Σ[1,2], 0; atol=1e-6)
15211521
@test isapprox(Σ[2,1], 0; atol=1e-6)
15221522

15231523
# DRS
1524-
T = DRS(2)
1524+
T = DRS(maxdim=2)
15251525
n, c = apply(T, t)
15261526
Σ = cov(Tables.matrix(n))
15271527
@test Tables.columnnames(n) == (:PC1, :PC2)
@@ -1531,14 +1531,64 @@
15311531
@test isapprox(Σ[2,2], 1; atol=1e-6)
15321532

15331533
# SDS
1534-
T = SDS(2)
1534+
T = SDS(maxdim=2)
15351535
n, c = apply(T, t)
15361536
Σ = cov(Tables.matrix(n))
15371537
@test Tables.columnnames(n) == (:PC1, :PC2)
15381538
@test isapprox(Σ[1,2], 0; atol=1e-6)
15391539
@test isapprox(Σ[2,1], 0; atol=1e-6)
15401540
@test isapprox(Σ[1,1], 1; atol=1e-6)
15411541
@test isapprox(Σ[2,2], 1; atol=1e-6)
1542+
1543+
# pratio
1544+
a = randn(rng, 1000)
1545+
b = randn(rng, 1000)
1546+
c = a + randn(rng, 1000)
1547+
d = b - randn(rng, 1000)
1548+
e = 3d + c - randn(rng, 1000)
1549+
t = Table(; a, b, c, d, e)
1550+
1551+
# PCA
1552+
T = PCA(pratio=0.90)
1553+
n, c = apply(T, t)
1554+
Σ = cov(Tables.matrix(n))
1555+
@test Tables.columnnames(n) == (:PC1, :PC2, :PC3)
1556+
@test isapprox(Σ[1,2], 0; atol=1e-6)
1557+
@test isapprox(Σ[1,3], 0; atol=1e-6)
1558+
@test isapprox(Σ[2,1], 0; atol=1e-6)
1559+
@test isapprox(Σ[2,3], 0; atol=1e-6)
1560+
@test isapprox(Σ[3,1], 0; atol=1e-6)
1561+
@test isapprox(Σ[3,2], 0; atol=1e-6)
1562+
1563+
# DRS
1564+
T = DRS(pratio=0.90)
1565+
n, c = apply(T, t)
1566+
Σ = cov(Tables.matrix(n))
1567+
@test Tables.columnnames(n) == (:PC1, :PC2, :PC3)
1568+
@test isapprox(Σ[1,2], 0; atol=1e-6)
1569+
@test isapprox(Σ[1,3], 0; atol=1e-6)
1570+
@test isapprox(Σ[2,1], 0; atol=1e-6)
1571+
@test isapprox(Σ[2,3], 0; atol=1e-6)
1572+
@test isapprox(Σ[3,1], 0; atol=1e-6)
1573+
@test isapprox(Σ[3,2], 0; atol=1e-6)
1574+
@test isapprox(Σ[1,1], 1; atol=1e-6)
1575+
@test isapprox(Σ[2,2], 1; atol=1e-6)
1576+
@test isapprox(Σ[3,3], 1; atol=1e-6)
1577+
1578+
# SDS
1579+
T = SDS(pratio=0.90)
1580+
n, c = apply(T, t)
1581+
Σ = cov(Tables.matrix(n))
1582+
@test Tables.columnnames(n) == (:PC1, :PC2, :PC3)
1583+
@test isapprox(Σ[1,2], 0; atol=1e-6)
1584+
@test isapprox(Σ[1,3], 0; atol=1e-6)
1585+
@test isapprox(Σ[2,1], 0; atol=1e-6)
1586+
@test isapprox(Σ[2,3], 0; atol=1e-6)
1587+
@test isapprox(Σ[3,1], 0; atol=1e-6)
1588+
@test isapprox(Σ[3,2], 0; atol=1e-6)
1589+
@test isapprox(Σ[1,1], 1; atol=1e-6)
1590+
@test isapprox(Σ[2,2], 1; atol=1e-6)
1591+
@test isapprox(Σ[3,3], 1; atol=1e-6)
15421592
end
15431593

15441594
@testset "Sequential" begin

0 commit comments

Comments
 (0)