Skip to content

Commit 2c53837

Browse files
authored
Add support for multiple column selection in DropExtrema (#255)
* Add support for multiple column selection in 'DropExtrema' * Update tests
1 parent 14943f7 commit 2c53837

File tree

3 files changed

+72
-17
lines changed

3 files changed

+72
-17
lines changed

src/transforms/dropextrema.jl

+36-14
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,68 @@
33
# ------------------------------------------------------------------
44

55
"""
6-
DropExtrema(col; low=0.25, high=0.75)
6+
DropExtrema(; low=0.25, high=0.75)
77
8-
Drops the rows where the values in the column `col` are outside the interval
9-
`[quantile(col, low), quantile(col, high)]`.
8+
Drops rows where any of the values in all columns
9+
are outside the interval (`[quantile(col, low), quantile(col, high)]`).
10+
11+
DropExtrema(col₁, col₂, ..., colₙ; low=0.25, high=0.75)
12+
DropExtrema([col₁, col₂, ..., colₙ]; low=0.25, high=0.75)
13+
DropExtrema((col₁, col₂, ..., colₙ); low=0.25, high=0.75)
14+
15+
Drops rows where any of the values in columns `col₁`, `col₂`, ..., `colₙ`
16+
are outside the interval.
17+
18+
DropExtrema(regex; low=0.25, high=0.75)
19+
20+
Drops rows where any of the values in columns that match with `regex`
21+
are outside the interval.
1022
1123
# Examples
1224
1325
```julia
14-
DropExtrema(1)
26+
DropExtrema(low=0.3, high=0.7)
27+
DropExtrema(1, low=0.3, high=0.7)
1528
DropExtrema(:a, low=0.2, high=0.8)
1629
DropExtrema("a", low=0.3, high=0.7)
30+
DropExtrema(1, 3, 5, low=0, high=1)
31+
DropExtrema([:a, :c, :e], low=0.3, high=0.7)
32+
DropExtrema(("a", "c", "e"), low=0.25, high=0.75)
33+
DropExtrema(r"[ace]", low=0.3, high=0.7)
1734
```
1835
"""
19-
struct DropExtrema{S<:SingleColumnSelector,T} <: StatelessFeatureTransform
36+
struct DropExtrema{S<:ColumnSelector,T} <: StatelessFeatureTransform
2037
selector::S
2138
low::T
2239
high::T
2340

24-
function DropExtrema(selector::S, low::T, high::T) where {S<:SingleColumnSelector,T}
41+
function DropExtrema(selector::S, low::T, high::T) where {S<:ColumnSelector,T}
2542
_assert(0 low high 1, "invalid quantiles")
2643
new{S,T}(selector, low, high)
2744
end
2845
end
2946

30-
DropExtrema(selector::SingleColumnSelector, low, high) = DropExtrema(selector, promote(low, high)...)
31-
DropExtrema(col::Column; low=0.25, high=0.75) = DropExtrema(selector(col), low, high)
47+
DropExtrema(selector::ColumnSelector, low, high) = DropExtrema(selector, promote(low, high)...)
48+
49+
DropExtrema(; low=0.25, high=0.75) = DropExtrema(AllSelector(), low, high)
50+
DropExtrema(cols; low=0.25, high=0.75) = DropExtrema(selector(cols), low, high)
51+
DropExtrema(cols::C...; low=0.25, high=0.75) where {C<:Column} = DropExtrema(selector(cols), low, high)
3252

3353
isrevertible(::Type{<:DropExtrema}) = true
3454

3555
function preprocess(transform::DropExtrema, feat)
3656
cols = Tables.columns(feat)
3757
names = Tables.columnnames(cols)
38-
sname = selectsingle(transform.selector, names)
58+
snames = transform.selector(names)
3959

40-
x = Tables.getcolumn(cols, sname)
41-
low = convert(eltype(x), transform.low)
42-
high = convert(eltype(x), transform.high)
43-
xl, xh = quantile(x, (low, high))
60+
limits = map(snames) do name
61+
x = Tables.getcolumn(cols, name)
62+
low = convert(eltype(x), transform.low)
63+
high = convert(eltype(x), transform.high)
64+
name => quantile(x, (low, high))
65+
end
4466

45-
ftrans = Filter(row -> xl row[sname] xh)
67+
ftrans = Filter(row -> all(xl row[nm] xh for (nm, (xl, xh)) in limits))
4668
fprep = preprocess(ftrans, feat)
4769
ftrans, fprep
4870
end

test/shows.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,13 @@
180180

181181
# compact mode
182182
iostr = sprint(show, T)
183-
@test iostr == "DropExtrema(:a, 0.25, 0.75)"
183+
@test iostr == "DropExtrema([:a], 0.25, 0.75)"
184184

185185
# full mode
186186
iostr = sprint(show, MIME("text/plain"), T)
187187
@test iostr == """
188188
DropExtrema transform
189-
├─ selector = :a
189+
├─ selector = [:a]
190190
├─ low = 0.25
191191
└─ high = 0.75"""
192192
end

test/transforms/dropextrema.jl

+34-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@
2020
tₒ = revert(T, n, c)
2121
@test t == tₒ
2222

23+
T = DropExtrema(1, 2)
24+
n, c = apply(T, t)
25+
@test n.a == [5.1, 5.4]
26+
@test n.b == [4.4, 1.9]
27+
@test n.c == [2.8, 8.9]
28+
@test n.d == [8.1, 4.1]
29+
@test n.e == [5.9, 2.4]
30+
@test n.f == [1.3, 5.1]
31+
tₒ = revert(T, n, c)
32+
@test t == tₒ
33+
2334
T = DropExtrema(:c, low=0.3, high=0.7)
2435
n, c = apply(T, t)
2536
@test n.a == [6.9, 7.8, 4.8, 8.0]
@@ -31,6 +42,17 @@
3142
tₒ = revert(T, n, c)
3243
@test t == tₒ
3344

45+
T = DropExtrema([:c, :d], low=0.3, high=0.7)
46+
n, c = apply(T, t)
47+
@test n.a == [7.8, 4.8]
48+
@test n.b == [6.3, 0.5]
49+
@test n.c == [5.7, 6.7]
50+
@test n.d == [6.2, 6.2]
51+
@test n.e == [4.1, 1.3]
52+
@test n.f == [9.0, 8.9]
53+
tₒ = revert(T, n, c)
54+
@test t == tₒ
55+
3456
T = DropExtrema("e", low=0.2, high=0.8)
3557
n, c = apply(T, t)
3658
@test n.a == [7.8, 0.0, 5.1, 1.1, 8.0, 5.4]
@@ -42,6 +64,17 @@
4264
tₒ = revert(T, n, c)
4365
@test t == tₒ
4466

45-
# throws
67+
T = DropExtrema(("e", "f"), low=0.2, high=0.8)
68+
n, c = apply(T, t)
69+
@test n.a == [0.0, 1.1, 8.0, 5.4]
70+
@test n.b == [1.4, 3.0, 6.1, 1.9]
71+
@test n.c == [2.8, 8.4, 5.0, 8.9]
72+
@test n.d == [1.9, 4.0, 6.9, 4.1]
73+
@test n.e == [1.6, 4.9, 3.5, 2.4]
74+
@test n.f == [6.2, 6.2, 3.8, 5.1]
75+
tₒ = revert(T, n, c)
76+
@test t == tₒ
77+
78+
# error: invalid quantiles
4679
@test_throws AssertionError DropExtrema(:a, low=0, high=1.4)
4780
end

0 commit comments

Comments
 (0)