Skip to content

Commit 41d588b

Browse files
authored
Remove reversibility from transforms: Coalesce, DropExtrema, DropMissing, Filter, Satisfies, Select and Reject (#257)
* Remove reversibility from transforms: 'Coalesce', 'DropExtrema', 'DropMissing', 'Filter', 'Satisfies', 'Select' and 'Reject' * Fix tests
1 parent 50ccdc6 commit 41d588b

15 files changed

+35
-319
lines changed

src/transforms/coalesce.jl

+2-4
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,8 @@ Coalesce(; value) = Coalesce(AllSelector(), value)
4242
Coalesce(cols; value) = Coalesce(selector(cols), value)
4343
Coalesce(cols::C...; value) where {C<:Column} = Coalesce(selector(cols), value)
4444

45-
isrevertible(::Type{<:Coalesce}) = true
45+
isrevertible(::Type{<:Coalesce}) = false
4646

47-
colcache(::Coalesce, x) = findall(ismissing, x)
47+
colcache(::Coalesce, x) = nothing
4848

4949
colapply(transform::Coalesce, x, c) = coalesce.(x, transform.value)
50-
51-
colrevert(::Coalesce, y, c) = [i c ? missing : y[i] for i in 1:length(y)]

src/transforms/dropextrema.jl

+3-8
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ DropExtrema(; low=0.25, high=0.75) = DropExtrema(AllSelector(), low, high)
5050
DropExtrema(cols; low=0.25, high=0.75) = DropExtrema(selector(cols), low, high)
5151
DropExtrema(cols::C...; low=0.25, high=0.75) where {C<:Column} = DropExtrema(selector(cols), low, high)
5252

53-
isrevertible(::Type{<:DropExtrema}) = true
53+
isrevertible(::Type{<:DropExtrema}) = false
5454

5555
function preprocess(transform::DropExtrema, feat)
5656
cols = Tables.columns(feat)
@@ -71,11 +71,6 @@ end
7171

7272
function applyfeat(::DropExtrema, feat, prep)
7373
ftrans, fprep = prep
74-
newfeat, ffcache = applyfeat(ftrans, feat, fprep)
75-
newfeat, (ftrans, ffcache)
76-
end
77-
78-
function revertfeat(::DropExtrema, newfeat, fcache)
79-
ftrans, ffcache = fcache
80-
revertfeat(ftrans, newfeat, ffcache)
74+
newfeat, _ = applyfeat(ftrans, feat, fprep)
75+
newfeat, nothing
8176
end

src/transforms/dropmissing.jl

+3-23
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ DropMissing() = DropMissing(AllSelector())
4141
DropMissing(cols) = DropMissing(selector(cols))
4242
DropMissing(cols::C...) where {C<:Column} = DropMissing(selector(cols))
4343

44-
isrevertible(::Type{<:DropMissing}) = true
44+
isrevertible(::Type{<:DropMissing}) = false
4545

4646
function preprocess(transform::DropMissing, feat)
4747
cols = Tables.columns(feat)
@@ -60,7 +60,7 @@ _nonmissing(::Type{Union{Missing,T}}, x) where {T} = collect(T, x)
6060
function applyfeat(::DropMissing, feat, prep)
6161
# apply filter transform
6262
ftrans, fprep, snames = prep
63-
newfeat, ffcache = applyfeat(ftrans, feat, fprep)
63+
newfeat, _ = applyfeat(ftrans, feat, fprep)
6464

6565
# drop Missing type
6666
cols = Tables.columns(newfeat)
@@ -72,25 +72,5 @@ function applyfeat(::DropMissing, feat, prep)
7272
𝒯 = (; zip(names, columns)...)
7373
newfeat = 𝒯 |> Tables.materializer(feat)
7474

75-
# original column types
76-
types = Tables.schema(feat).types
77-
78-
newfeat, (ftrans, ffcache, snames, types)
79-
end
80-
81-
function revertfeat(::DropMissing, newfeat, fcache)
82-
ftrans, ffcache, snames, types = fcache
83-
84-
# reintroduce Missing type
85-
cols = Tables.columns(newfeat)
86-
names = Tables.columnnames(cols)
87-
columns = map(types, names) do T, name
88-
x = Tables.getcolumn(cols, name)
89-
name snames ? collect(T, x) : x
90-
end
91-
𝒯 = (; zip(names, columns)...)
92-
ofeat = 𝒯 |> Tables.materializer(newfeat)
93-
94-
# revert filter transform
95-
revertfeat(ftrans, ofeat, ffcache)
75+
newfeat, nothing
9676
end

src/transforms/filter.jl

+6-36
Original file line numberDiff line numberDiff line change
@@ -27,58 +27,28 @@ struct Filter{F} <: StatelessFeatureTransform
2727
pred::F
2828
end
2929

30-
isrevertible(::Type{<:Filter}) = true
30+
isrevertible(::Type{<:Filter}) = false
3131

3232
function preprocess(transform::Filter, feat)
3333
# lazy row iterator
3434
rows = tablerows(feat)
3535

3636
# selected indices
37-
sinds, nrows = Int[], 0
37+
sinds = Int[]
3838
for (i, row) in enumerate(rows)
3939
transform.pred(row) && push!(sinds, i)
40-
nrows += 1
4140
end
4241

43-
# rejected indices
44-
rinds = setdiff(1:nrows, sinds)
45-
46-
sinds, rinds
42+
sinds
4743
end
4844

4945
function applyfeat(::Filter, feat, prep)
5046
# preprocessed indices
51-
sinds, rinds = prep
47+
sinds = prep
5248

53-
# selected/rejected rows
49+
# selected rows
5450
srows = Tables.subset(feat, sinds, viewhint=true)
55-
rrows = Tables.subset(feat, rinds, viewhint=true)
5651

5752
newfeat = srows |> Tables.materializer(feat)
58-
59-
newfeat, (rinds, rrows)
60-
end
61-
62-
function revertfeat(::Filter, newfeat, fcache)
63-
cols = Tables.columns(newfeat)
64-
names = Tables.columnnames(cols)
65-
66-
rinds, rrows = fcache
67-
68-
# columns with selected rows
69-
columns = map(names) do name
70-
collect(Tables.getcolumn(cols, name))
71-
end
72-
73-
# insert rejected rows into columns
74-
rrcols = Tables.columns(rrows)
75-
for (name, x) in zip(names, columns)
76-
r = Tables.getcolumn(rrcols, name)
77-
for (i, v) in zip(rinds, r)
78-
insert!(x, i, v)
79-
end
80-
end
81-
82-
𝒯 = (; zip(names, columns)...)
83-
𝒯 |> Tables.materializer(newfeat)
53+
newfeat, nothing
8454
end

src/transforms/satisfies.jl

+3-8
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct Satisfies{F} <: StatelessFeatureTransform
1919
pred::F
2020
end
2121

22-
isrevertible(::Type{<:Satisfies}) = true
22+
isrevertible(::Type{<:Satisfies}) = false
2323

2424
function applyfeat(transform::Satisfies, feat, prep)
2525
pred = transform.pred
@@ -30,13 +30,8 @@ function applyfeat(transform::Satisfies, feat, prep)
3030
pred(x)
3131
end
3232
strans = Select(snames)
33-
newfeat, sfcache = applyfeat(strans, feat, prep)
34-
newfeat, (strans, sfcache)
35-
end
36-
37-
function revertfeat(::Satisfies, newfeat, fcache)
38-
strans, sfcache = fcache
39-
revertfeat(strans, newfeat, sfcache)
33+
newfeat, _ = applyfeat(strans, feat, prep)
34+
newfeat, nothing
4035
end
4136

4237
"""

src/transforms/select.jl

+5-48
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Select(pairs::Pair{C,S}...) where {C<:Column,S<:AbstractString} =
4545

4646
Select() = throw(ArgumentError("cannot create Select transform without arguments"))
4747

48-
isrevertible(::Type{<:Select}) = true
48+
isrevertible(::Type{<:Select}) = false
4949

5050
# utils
5151
_newnames(::Nothing, select) = select
@@ -54,50 +54,12 @@ _newnames(names::Vector{Symbol}, select) = names
5454
function applyfeat(transform::Select, feat, prep)
5555
cols = Tables.columns(feat)
5656
names = collect(Tables.columnnames(cols))
57-
58-
# retrieve relevant column names
5957
select = transform.selector(names)
60-
reject = setdiff(names, select)
6158
newnames = _newnames(transform.newnames, select)
62-
63-
# keep track of indices to revert later
64-
sinds = indexin(select, names)
65-
rinds = indexin(reject, names)
66-
67-
# sort indices to facilitate reinsertion
68-
sperm = sortperm(sinds)
69-
70-
# rejected columns
71-
rcolumns = [Tables.getcolumn(cols, name) for name in reject]
72-
73-
fcache = (select, sperm, reject, rcolumns, rinds)
7459
newfeat = TableSelection(feat, newnames, select)
75-
newfeat, fcache
76-
end
77-
78-
function revertfeat(::Select, newfeat, fcache)
79-
cols = Tables.columns(newfeat)
80-
names = Tables.columnnames(cols)
81-
# https://github.com/JuliaML/TableTransforms.jl/issues/76
82-
columns = Any[Tables.getcolumn(cols, name) for name in names]
83-
84-
select, sperm, reject, rcolumns, rinds = fcache
85-
86-
# restore rejected columns
87-
onames = select[sperm]
88-
ocolumns = columns[sperm]
89-
for (i, rind) in enumerate(rinds)
90-
insert!(onames, rind, reject[i])
91-
insert!(ocolumns, rind, rcolumns[i])
92-
end
93-
94-
𝒯 = (; zip(onames, ocolumns)...)
95-
𝒯 |> Tables.materializer(newfeat)
60+
newfeat, nothing
9661
end
9762

98-
# reverting a single TableSelection is trivial
99-
revertfeat(::Select, newfeat::TableSelection, fcache) = newfeat.table
100-
10163
"""
10264
Reject(col₁, col₂, ..., colₙ)
10365
Reject([col₁, col₂, ..., colₙ])
@@ -129,19 +91,14 @@ Reject(cols::C...) where {C<:Column} = Reject(selector(cols))
12991
Reject() = throw(ArgumentError("cannot create Reject transform without arguments"))
13092
Reject(::AllSelector) = throw(ArgumentError("cannot reject all columns"))
13193

132-
isrevertible(::Type{<:Reject}) = true
94+
isrevertible(::Type{<:Reject}) = false
13395

13496
function applyfeat(transform::Reject, feat, prep)
13597
cols = Tables.columns(feat)
13698
names = Tables.columnnames(cols)
13799
reject = transform.selector(names)
138100
select = setdiff(names, reject)
139101
strans = Select(select)
140-
newfeat, sfcache = applyfeat(strans, feat, prep)
141-
newfeat, (strans, sfcache)
142-
end
143-
144-
function revertfeat(::Reject, newfeat, fcache)
145-
strans, sfcache = fcache
146-
revertfeat(strans, newfeat, sfcache)
102+
newfeat, _ = applyfeat(strans, feat, prep)
103+
newfeat, nothing
147104
end

test/metadata.jl

-4
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
tn, tc = apply(T, t)
1414
@test mn.meta == m
1515
@test mn.table == tn
16-
mtₒ = revert(T, mn, mc)
17-
@test mtₒ == mt
1816

1917
T = Rename(:a => :x, :c => :y)
2018
mn, mc = apply(T, mt)
@@ -57,8 +55,6 @@
5755
tn, tc = apply(T, t)
5856
@test mn.meta == VarMeta(m.data .+ 2)
5957
@test mn.table == tn
60-
mtₒ = revert(T, mn, mc)
61-
@test mtₒ == mt
6258

6359
T = Rename(:b => :x, :d => :y)
6460
mn, mc = apply(T, mt)

test/transforms/coalesce.jl

+3-20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
@testset "Coalesce" begin
2+
@test !isrevertible(Coalesce(value=0))
3+
24
a = [3, 2, missing, 4, 5, 3]
35
b = [missing, 4, 4, 5, 8, 5]
46
c = [1, 1, 6, 2, 4, missing]
@@ -16,41 +18,22 @@
1618
@test n.e == [0, 5, 2, 6, 5, 2]
1719
@test n.f == [4, 0, 3, 4, 5, 2]
1820

19-
# revert test
20-
@test isrevertible(T)
21-
tₒ = revert(T, n, c)
22-
cols = Tables.columns(t)
23-
colsₒ = Tables.columns(tₒ)
24-
colnames = Tables.columnnames(t)
25-
for n in colnames
26-
col = Tables.getcolumn(cols, n)
27-
colₒ = Tables.getcolumn(colsₒ, n)
28-
@test isequal(col, colₒ)
29-
end
30-
31-
# table schema after apply and revert
21+
# table schema after apply
3222
T = Coalesce(value=0)
3323
n, c = apply(T, t)
34-
tₒ = revert(T, n, c)
35-
ttypes = Tables.schema(t).types
3624
ntypes = Tables.schema(n).types
3725
@test ntypes[1] == Int
3826
@test ntypes[2] == Int
3927
@test ntypes[3] == Int
4028
@test ntypes[4] == Int
4129
@test ntypes[5] == Int
4230
@test ntypes[6] == Int
43-
@test ttypes == Tables.schema(tₒ).types
4431

4532
# row table
4633
rt = Tables.rowtable(t)
4734
T = Coalesce(value=0)
4835
n, c = apply(T, rt)
4936
@test Tables.isrowtable(n)
50-
rtₒ = revert(T, n, c)
51-
for (row, rowₒ) in zip(rt, rtₒ)
52-
@test isequal(row, rowₒ)
53-
end
5437

5538
# colspec
5639
T = Coalesce(1, 3, 5, value=0)

test/transforms/dropextrema.jl

+1-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
@testset "DropExtrema" begin
2-
@test isrevertible(DropExtrema(:a))
2+
@test !isrevertible(DropExtrema(:a))
33

44
a = [6.9, 9.0, 7.8, 0.0, 5.1, 4.8, 1.1, 8.0, 5.4, 7.9]
55
b = [7.7, 4.2, 6.3, 1.4, 4.4, 0.5, 3.0, 6.1, 1.9, 1.5]
@@ -17,8 +17,6 @@
1717
@test n.d == [1.0, 6.2, 8.1, 4.1]
1818
@test n.e == [1.5, 4.1, 5.9, 2.4]
1919
@test n.f == [1.9, 9.0, 1.3, 5.1]
20-
tₒ = revert(T, n, c)
21-
@test t == tₒ
2220

2321
T = DropExtrema(1, 2)
2422
n, c = apply(T, t)
@@ -28,8 +26,6 @@
2826
@test n.d == [8.1, 4.1]
2927
@test n.e == [5.9, 2.4]
3028
@test n.f == [1.3, 5.1]
31-
tₒ = revert(T, n, c)
32-
@test t == tₒ
3329

3430
T = DropExtrema(:c, low=0.3, high=0.7)
3531
n, c = apply(T, t)
@@ -39,8 +35,6 @@
3935
@test n.d == [1.0, 6.2, 6.2, 6.9]
4036
@test n.e == [1.5, 4.1, 1.3, 3.5]
4137
@test n.f == [1.9, 9.0, 8.9, 3.8]
42-
tₒ = revert(T, n, c)
43-
@test t == tₒ
4438

4539
T = DropExtrema([:c, :d], low=0.3, high=0.7)
4640
n, c = apply(T, t)
@@ -50,8 +44,6 @@
5044
@test n.d == [6.2, 6.2]
5145
@test n.e == [4.1, 1.3]
5246
@test n.f == [9.0, 8.9]
53-
tₒ = revert(T, n, c)
54-
@test t == tₒ
5547

5648
T = DropExtrema("e", low=0.2, high=0.8)
5749
n, c = apply(T, t)
@@ -61,8 +53,6 @@
6153
@test n.d == [6.2, 1.9, 8.1, 4.0, 6.9, 4.1]
6254
@test n.e == [4.1, 1.6, 5.9, 4.9, 3.5, 2.4]
6355
@test n.f == [9.0, 6.2, 1.3, 6.2, 3.8, 5.1]
64-
tₒ = revert(T, n, c)
65-
@test t == tₒ
6656

6757
T = DropExtrema(("e", "f"), low=0.2, high=0.8)
6858
n, c = apply(T, t)
@@ -72,8 +62,6 @@
7262
@test n.d == [1.9, 4.0, 6.9, 4.1]
7363
@test n.e == [1.6, 4.9, 3.5, 2.4]
7464
@test n.f == [6.2, 6.2, 3.8, 5.1]
75-
tₒ = revert(T, n, c)
76-
@test t == tₒ
7765

7866
# error: invalid quantiles
7967
@test_throws AssertionError DropExtrema(:a, low=0, high=1.4)

0 commit comments

Comments
 (0)