|
3 | 3 | # ------------------------------------------------------------------
|
4 | 4 |
|
5 | 5 | """
|
6 |
| - DropExtrema(col; low=0.25, high=0.75) |
| 6 | + DropExtrema(; low=0.25, high=0.75) |
7 | 7 |
|
8 |
| -Drops the rows where the values in the column `col` are outside the interval |
9 |
| -`[quantile(col, low), quantile(col, high)]`. |
| 8 | +Drops rows where any of the values in all columns |
| 9 | +are outside the interval (`[quantile(col, low), quantile(col, high)]`). |
| 10 | +
|
| 11 | + DropExtrema(col₁, col₂, ..., colₙ; low=0.25, high=0.75) |
| 12 | + DropExtrema([col₁, col₂, ..., colₙ]; low=0.25, high=0.75) |
| 13 | + DropExtrema((col₁, col₂, ..., colₙ); low=0.25, high=0.75) |
| 14 | +
|
| 15 | +Drops rows where any of the values in columns `col₁`, `col₂`, ..., `colₙ` |
| 16 | +are outside the interval. |
| 17 | +
|
| 18 | + DropExtrema(regex; low=0.25, high=0.75) |
| 19 | +
|
| 20 | +Drops rows where any of the values in columns that match with `regex` |
| 21 | +are outside the interval. |
10 | 22 |
|
11 | 23 | # Examples
|
12 | 24 |
|
13 | 25 | ```julia
|
14 |
| -DropExtrema(1) |
| 26 | +DropExtrema(low=0.3, high=0.7) |
| 27 | +DropExtrema(1, low=0.3, high=0.7) |
15 | 28 | DropExtrema(:a, low=0.2, high=0.8)
|
16 | 29 | DropExtrema("a", low=0.3, high=0.7)
|
| 30 | +DropExtrema(1, 3, 5, low=0, high=1) |
| 31 | +DropExtrema([:a, :c, :e], low=0.3, high=0.7) |
| 32 | +DropExtrema(("a", "c", "e"), low=0.25, high=0.75) |
| 33 | +DropExtrema(r"[ace]", low=0.3, high=0.7) |
17 | 34 | ```
|
18 | 35 | """
|
19 |
| -struct DropExtrema{S<:SingleColumnSelector,T} <: StatelessFeatureTransform |
| 36 | +struct DropExtrema{S<:ColumnSelector,T} <: StatelessFeatureTransform |
20 | 37 | selector::S
|
21 | 38 | low::T
|
22 | 39 | high::T
|
23 | 40 |
|
24 |
| - function DropExtrema(selector::S, low::T, high::T) where {S<:SingleColumnSelector,T} |
| 41 | + function DropExtrema(selector::S, low::T, high::T) where {S<:ColumnSelector,T} |
25 | 42 | _assert(0 ≤ low ≤ high ≤ 1, "invalid quantiles")
|
26 | 43 | new{S,T}(selector, low, high)
|
27 | 44 | end
|
28 | 45 | end
|
29 | 46 |
|
30 |
| -DropExtrema(selector::SingleColumnSelector, low, high) = DropExtrema(selector, promote(low, high)...) |
31 |
| -DropExtrema(col::Column; low=0.25, high=0.75) = DropExtrema(selector(col), low, high) |
| 47 | +DropExtrema(selector::ColumnSelector, low, high) = DropExtrema(selector, promote(low, high)...) |
| 48 | + |
| 49 | +DropExtrema(; low=0.25, high=0.75) = DropExtrema(AllSelector(), low, high) |
| 50 | +DropExtrema(cols; low=0.25, high=0.75) = DropExtrema(selector(cols), low, high) |
| 51 | +DropExtrema(cols::C...; low=0.25, high=0.75) where {C<:Column} = DropExtrema(selector(cols), low, high) |
32 | 52 |
|
33 | 53 | isrevertible(::Type{<:DropExtrema}) = true
|
34 | 54 |
|
35 | 55 | function preprocess(transform::DropExtrema, feat)
|
36 | 56 | cols = Tables.columns(feat)
|
37 | 57 | names = Tables.columnnames(cols)
|
38 |
| - sname = selectsingle(transform.selector, names) |
| 58 | + snames = transform.selector(names) |
39 | 59 |
|
40 |
| - x = Tables.getcolumn(cols, sname) |
41 |
| - low = convert(eltype(x), transform.low) |
42 |
| - high = convert(eltype(x), transform.high) |
43 |
| - xl, xh = quantile(x, (low, high)) |
| 60 | + limits = map(snames) do name |
| 61 | + x = Tables.getcolumn(cols, name) |
| 62 | + low = convert(eltype(x), transform.low) |
| 63 | + high = convert(eltype(x), transform.high) |
| 64 | + name => quantile(x, (low, high)) |
| 65 | + end |
44 | 66 |
|
45 |
| - ftrans = Filter(row -> xl ≤ row[sname] ≤ xh) |
| 67 | + ftrans = Filter(row -> all(xl ≤ row[nm] ≤ xh for (nm, (xl, xh)) in limits)) |
46 | 68 | fprep = preprocess(ftrans, feat)
|
47 | 69 | ftrans, fprep
|
48 | 70 | end
|
|
0 commit comments