|
| 1 | +# ------------------------------------------------------------------ |
| 2 | +# Licensed under the MIT License. See LICENSE in the project root. |
| 3 | +# ------------------------------------------------------------------ |
| 4 | + |
| 5 | +""" |
| 6 | + Map(cols₁ => fun₁, cols₂ => fun₂ => target₂, ..., colsₙ => funₙ => targetₙ) |
| 7 | +
|
| 8 | +Applies the `funᵢ` function to the columns selected by `colsᵢ` using |
| 9 | +the `map` function and saves the result in a new column named `targetᵢ`. |
| 10 | +The target column name is optional and when omitted a new name is generated |
| 11 | +by joining the selected column names with the function name. |
| 12 | +If the target column already exists in the table, the original |
| 13 | +column will be replaced. The column selection can be a single |
| 14 | +column identifier (index or name), a collection of identifiers |
| 15 | +or a regular expression (regex). |
| 16 | +
|
| 17 | +# Examples |
| 18 | +
|
| 19 | +```julia |
| 20 | +Map(1 => sin) |
| 21 | +Map(:a => sin, "b" => cos => :b_cos) |
| 22 | +Map([2, 3] => ((b, c) -> 2b + c)) |
| 23 | +Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1) |
| 24 | +Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan) |
| 25 | +Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") |
| 26 | +``` |
| 27 | +""" |
| 28 | +struct Map <: StatelessFeatureTransform |
| 29 | + colspecs::Vector{ColSpec} |
| 30 | + funs::Vector{Function} |
| 31 | + targets::Vector{Union{Nothing,Symbol}} |
| 32 | +end |
| 33 | + |
| 34 | +isrevertible(::Type{Map}) = true |
| 35 | + |
| 36 | +# utility types |
| 37 | +const TargetName = Union{Symbol,AbstractString} |
| 38 | +const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}} |
| 39 | +const PairWithoutTarget = Pair{<:Any,<:Function} |
| 40 | +const MapPair = Union{PairWithTarget,PairWithoutTarget} |
| 41 | + |
| 42 | +# utility functions |
| 43 | +_extract(p::PairWithTarget) = first(p), first(last(p)), last(last(p)) |
| 44 | +_extract(p::PairWithoutTarget) = first(p), last(p), nothing |
| 45 | + |
| 46 | +_colspec(spec) = colspec(spec) |
| 47 | +_colspec(col::Col) = colspec([col]) |
| 48 | + |
| 49 | +_target(name) = name |
| 50 | +_target(name::AbstractString) = Symbol(name) |
| 51 | + |
| 52 | +function Map(pairs::MapPair...) |
| 53 | + tuples = map(pairs) do p |
| 54 | + spec, fun, name = _extract(p) |
| 55 | + (_colspec(spec), fun, _target(name)) |
| 56 | + end |
| 57 | + colspecs = map(t -> t[1], tuples) |> collect |
| 58 | + funs = map(t -> t[2], tuples) |> collect |
| 59 | + targets = map(t -> t[3], tuples) |> collect |
| 60 | + Map(colspecs, funs, targets) |
| 61 | +end |
| 62 | + |
| 63 | +_makename(snames, fun) = Symbol(join([snames; nameof(fun)], "_")) |
| 64 | + |
| 65 | +function preprocess(transform::Map, table) |
| 66 | + cols = Tables.columns(table) |
| 67 | + names = Tables.columnnames(cols) |
| 68 | + |
| 69 | + colspecs = transform.colspecs |
| 70 | + funs = transform.funs |
| 71 | + targets = transform.targets |
| 72 | + |
| 73 | + map(colspecs, funs, targets) do colspec, fun, target |
| 74 | + snames = choose(colspec, names) |
| 75 | + newname = isnothing(target) ? _makename(snames, fun) : target |
| 76 | + columns = (Tables.getcolumn(cols, nm) for nm in snames) |
| 77 | + newcolumn = map(fun, columns...) |
| 78 | + newname => newcolumn |
| 79 | + end |
| 80 | +end |
| 81 | + |
| 82 | +function applyfeat(::Map, feat, prep) |
| 83 | + cols = Tables.columns(feat) |
| 84 | + onames = Tables.columnnames(cols) |
| 85 | + |
| 86 | + # new names and columns |
| 87 | + names = collect(onames) |
| 88 | + columns = Any[Tables.getcolumn(cols, nm) for nm in onames] |
| 89 | + |
| 90 | + # replaced names and columns |
| 91 | + rnames = empty(names) |
| 92 | + rcolumns = empty(columns) |
| 93 | + |
| 94 | + for (name, column) in prep |
| 95 | + if name ∈ onames |
| 96 | + push!(rnames, name) |
| 97 | + i = findfirst(==(name), onames) |
| 98 | + push!(rcolumns, columns[i]) |
| 99 | + columns[i] = column |
| 100 | + else |
| 101 | + push!(names, name) |
| 102 | + push!(columns, column) |
| 103 | + end |
| 104 | + end |
| 105 | + |
| 106 | + 𝒯 = (; zip(names, columns)...) |
| 107 | + newfeat = 𝒯 |> Tables.materializer(feat) |
| 108 | + newfeat, (onames, rnames, rcolumns) |
| 109 | +end |
| 110 | + |
| 111 | +function revertfeat(::Map, newfeat, fcache) |
| 112 | + cols = Tables.columns(newfeat) |
| 113 | + |
| 114 | + onames, rnames, rcolumns = fcache |
| 115 | + ocolumns = map(onames) do name |
| 116 | + if name ∈ rnames |
| 117 | + i = findfirst(==(name), rnames) |
| 118 | + rcolumns[i] |
| 119 | + else |
| 120 | + Tables.getcolumn(cols, name) |
| 121 | + end |
| 122 | + end |
| 123 | + |
| 124 | + 𝒯 = (; zip(onames, ocolumns)...) |
| 125 | + 𝒯 |> Tables.materializer(newfeat) |
| 126 | +end |
0 commit comments