Skip to content

Commit 10d470d

Browse files
authored
Add Map transform (#187)
* [WIP] Map transform * Add docstring * Fix typo * Update docstring * Add to docs * Update docstring * Apply suggestions * Add more tests * Apply suggestions
1 parent 9aeb688 commit 10d470d

File tree

7 files changed

+223
-0
lines changed

7 files changed

+223
-0
lines changed

docs/src/transforms.md

+6
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ Filter
5050
DropMissing
5151
```
5252

53+
## Map
54+
55+
```@docs
56+
Map
57+
```
58+
5359
## Replace
5460

5561
```@docs

src/TableTransforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export
5353
Sample,
5454
Filter,
5555
DropMissing,
56+
Map,
5657
Replace,
5758
Coalesce,
5859
Coerce,

src/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ include("transforms/stdnames.jl")
280280
include("transforms/sort.jl")
281281
include("transforms/sample.jl")
282282
include("transforms/filter.jl")
283+
include("transforms/map.jl")
283284
include("transforms/replace.jl")
284285
include("transforms/coalesce.jl")
285286
include("transforms/coerce.jl")

src/transforms/map.jl

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
Map(cols₁ => fun₁, cols₂ => fun₂ => target₂, ..., colsₙ => funₙ => targetₙ)
7+
8+
Applies the `funᵢ` function to the columns selected by `colsᵢ` using
9+
the `map` function and saves the result in a new column named `targetᵢ`.
10+
The target column name is optional and when omitted a new name is generated
11+
by joining the selected column names with the function name.
12+
If the target column already exists in the table, the original
13+
column will be replaced. The column selection can be a single
14+
column identifier (index or name), a collection of identifiers
15+
or a regular expression (regex).
16+
17+
# Examples
18+
19+
```julia
20+
Map(1 => sin)
21+
Map(:a => sin, "b" => cos => :b_cos)
22+
Map([2, 3] => ((b, c) -> 2b + c))
23+
Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1)
24+
Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan)
25+
Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
26+
```
27+
"""
28+
struct Map <: StatelessFeatureTransform
29+
colspecs::Vector{ColSpec}
30+
funs::Vector{Function}
31+
targets::Vector{Union{Nothing,Symbol}}
32+
end
33+
34+
isrevertible(::Type{Map}) = true
35+
36+
# utility types
37+
const TargetName = Union{Symbol,AbstractString}
38+
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
39+
const PairWithoutTarget = Pair{<:Any,<:Function}
40+
const MapPair = Union{PairWithTarget,PairWithoutTarget}
41+
42+
# utility functions
43+
_extract(p::PairWithTarget) = first(p), first(last(p)), last(last(p))
44+
_extract(p::PairWithoutTarget) = first(p), last(p), nothing
45+
46+
_colspec(spec) = colspec(spec)
47+
_colspec(col::Col) = colspec([col])
48+
49+
_target(name) = name
50+
_target(name::AbstractString) = Symbol(name)
51+
52+
function Map(pairs::MapPair...)
53+
tuples = map(pairs) do p
54+
spec, fun, name = _extract(p)
55+
(_colspec(spec), fun, _target(name))
56+
end
57+
colspecs = map(t -> t[1], tuples) |> collect
58+
funs = map(t -> t[2], tuples) |> collect
59+
targets = map(t -> t[3], tuples) |> collect
60+
Map(colspecs, funs, targets)
61+
end
62+
63+
_makename(snames, fun) = Symbol(join([snames; nameof(fun)], "_"))
64+
65+
function preprocess(transform::Map, table)
66+
cols = Tables.columns(table)
67+
names = Tables.columnnames(cols)
68+
69+
colspecs = transform.colspecs
70+
funs = transform.funs
71+
targets = transform.targets
72+
73+
map(colspecs, funs, targets) do colspec, fun, target
74+
snames = choose(colspec, names)
75+
newname = isnothing(target) ? _makename(snames, fun) : target
76+
columns = (Tables.getcolumn(cols, nm) for nm in snames)
77+
newcolumn = map(fun, columns...)
78+
newname => newcolumn
79+
end
80+
end
81+
82+
function applyfeat(::Map, feat, prep)
83+
cols = Tables.columns(feat)
84+
onames = Tables.columnnames(cols)
85+
86+
# new names and columns
87+
names = collect(onames)
88+
columns = Any[Tables.getcolumn(cols, nm) for nm in onames]
89+
90+
# replaced names and columns
91+
rnames = empty(names)
92+
rcolumns = empty(columns)
93+
94+
for (name, column) in prep
95+
if name onames
96+
push!(rnames, name)
97+
i = findfirst(==(name), onames)
98+
push!(rcolumns, columns[i])
99+
columns[i] = column
100+
else
101+
push!(names, name)
102+
push!(columns, column)
103+
end
104+
end
105+
106+
𝒯 = (; zip(names, columns)...)
107+
newfeat = 𝒯 |> Tables.materializer(feat)
108+
newfeat, (onames, rnames, rcolumns)
109+
end
110+
111+
function revertfeat(::Map, newfeat, fcache)
112+
cols = Tables.columns(newfeat)
113+
114+
onames, rnames, rcolumns = fcache
115+
ocolumns = map(onames) do name
116+
if name rnames
117+
i = findfirst(==(name), rnames)
118+
rcolumns[i]
119+
else
120+
Tables.getcolumn(cols, name)
121+
end
122+
end
123+
124+
𝒯 = (; zip(onames, ocolumns)...)
125+
𝒯 |> Tables.materializer(newfeat)
126+
end

test/shows.jl

+17
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,23 @@
133133
└─ colspec = [:a, :b, :c]"""
134134
end
135135

136+
@testset "Map" begin
137+
fun = (a, b) -> 2a + b
138+
T = Map(:a => sin, [:a, :b] => fun => :c)
139+
140+
# compact mode
141+
iostr = sprint(show, T)
142+
@test iostr == "Map(TableTransforms.ColSpec[[:a], [:a, :b]], Function[sin, $(typeof(fun))()], Union{Nothing, Symbol}[nothing, :c])"
143+
144+
# full mode
145+
iostr = sprint(show, MIME("text/plain"), T)
146+
@test iostr == """
147+
Map transform
148+
├─ colspecs = TableTransforms.ColSpec[[:a], [:a, :b]]
149+
├─ funs = Function[sin, $(typeof(fun))()]
150+
└─ targets = Union{Nothing, Symbol}[nothing, :c]"""
151+
end
152+
136153
@testset "Replace" begin
137154
pairs = IdDict(1 => -1, 5 => -5)
138155
T = Replace(pairs)

test/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ transformfiles = [
55
"sort.jl",
66
"sample.jl",
77
"filter.jl",
8+
"map.jl",
89
"replace.jl",
910
"coalesce.jl",
1011
"coerce.jl",

test/transforms/map.jl

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
@testset "Map" begin
2+
a = [4, 7, 8, 5, 8, 1]
3+
b = [1, 9, 1, 7, 9, 4]
4+
c = [2, 8, 6, 3, 2, 2]
5+
d = [7, 5, 9, 5, 3, 4]
6+
t = Table(; a, b, c, d)
7+
8+
T = Map(1 => sin)
9+
n, c = apply(T, t)
10+
@test Tables.schema(n).names == (:a, :b, :c, :d, :a_sin)
11+
@test n.a_sin == sin.(t.a)
12+
tₒ = revert(T, n, c)
13+
@test t == tₒ
14+
15+
T = Map(:b => cos)
16+
n, c = apply(T, t)
17+
@test Tables.schema(n).names == (:a, :b, :c, :d, :b_cos)
18+
@test n.b_cos == cos.(t.b)
19+
tₒ = revert(T, n, c)
20+
@test t == tₒ
21+
22+
T = Map("c" => tan)
23+
n, c = apply(T, t)
24+
@test Tables.schema(n).names == (:a, :b, :c, :d, :c_tan)
25+
@test n.c_tan == tan.(t.c)
26+
tₒ = revert(T, n, c)
27+
@test t == tₒ
28+
29+
T = Map(:a => sin => :a)
30+
n, c = apply(T, t)
31+
@test Tables.schema(n).names == (:a, :b, :c, :d)
32+
@test n.a == sin.(t.a)
33+
tₒ = revert(T, n, c)
34+
@test t == tₒ
35+
36+
T = Map(:a => sin => "a")
37+
n, c = apply(T, t)
38+
@test Tables.schema(n).names == (:a, :b, :c, :d)
39+
@test n.a == sin.(t.a)
40+
tₒ = revert(T, n, c)
41+
@test t == tₒ
42+
43+
T = Map([2, 3] => ((b, c) -> 2b + c) => :op1)
44+
n, c = apply(T, t)
45+
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
46+
@test n.op1 == @. 2 * t.b + t.c
47+
tₒ = revert(T, n, c)
48+
@test t == tₒ
49+
50+
T = Map([:a, :c] => ((a, c) -> 2a * 3c) => :op1)
51+
n, c = apply(T, t)
52+
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
53+
@test n.op1 == @. 2 * t.a * 3 * t.c
54+
tₒ = revert(T, n, c)
55+
@test t == tₒ
56+
57+
T = Map(["c", "a"] => ((c, a) -> 3c / a) => :op1, "c" => tan)
58+
n, c = apply(T, t)
59+
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :c_tan)
60+
@test n.op1 == @. 3 * t.c / t.a
61+
@test n.c_tan == tan.(t.c)
62+
tₒ = revert(T, n, c)
63+
@test t == tₒ
64+
65+
T = Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "op1")
66+
n, c = apply(T, t)
67+
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
68+
@test n.op1 == @. t.a^2 - 2 * t.b + t.c
69+
tₒ = revert(T, n, c)
70+
@test t == tₒ
71+
end

0 commit comments

Comments
 (0)