Skip to content

Commit 1e610c2

Browse files
committed
Some optimizations in the 'DropMissing' transform
1 parent 0f48b1d commit 1e610c2

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

src/transforms/dropmissing.jl

+15-20
Original file line numberDiff line numberDiff line change
@@ -43,24 +43,19 @@ DropMissing(cols::T...) where {T<:Col} = DropMissing(colspec(cols))
4343

4444
isrevertible(::Type{<:DropMissing}) = true
4545

46-
_ftrans(::DropMissing{AllSpec}, snames) = Filter(row -> all(!ismissing, row))
47-
_ftrans(::DropMissing, snames) = Filter(row -> all(!ismissing, row[nm] for nm in snames))
48-
49-
# nonmissing
50-
_nonmissing(::Type{T}, x) where {T} = x
51-
_nonmissing(::Type{Union{Missing,T}}, x) where {T} = collect(T, x)
52-
_nonmissing(::Type{Missing}, x) = []
53-
_nonmissing(x) = _nonmissing(eltype(x), x)
54-
5546
function preprocess(transform::DropMissing, table)
56-
schema = Tables.schema(table)
57-
names = schema.names
47+
names = Tables.schema(table).names
5848
snames = choose(transform.colspec, names)
59-
ftrans = _ftrans(transform, snames)
49+
ftrans = Filter(row -> all(!ismissing(row[nm]) for nm in snames))
6050
fprep = preprocess(ftrans, table)
6151
ftrans, fprep, snames
6252
end
6353

54+
_nonmissing(x) = _nonmissing(eltype(x), x)
55+
_nonmissing(::Type{T}, x) where {T} = x
56+
_nonmissing(::Type{Missing}, x) = []
57+
_nonmissing(::Type{Union{Missing,T}}, x) where {T} = collect(T, x)
58+
6459
function applyfeat(::DropMissing, feat, prep)
6560
# apply filter transform
6661
ftrans, fprep, snames = prep
@@ -69,28 +64,28 @@ function applyfeat(::DropMissing, feat, prep)
6964
# drop Missing type
7065
cols = Tables.columns(newfeat)
7166
names = Tables.columnnames(cols)
72-
columns = map(names) do nm
73-
x = Tables.getcolumn(cols, nm)
74-
nm snames ? _nonmissing(x) : x
67+
columns = map(names) do name
68+
x = Tables.getcolumn(cols, name)
69+
name snames ? _nonmissing(x) : x
7570
end
7671
𝒯 = (; zip(names, columns)...)
7772
newfeat = 𝒯 |> Tables.materializer(feat)
7873

7974
# original column types
8075
types = Tables.schema(feat).types
8176

82-
newfeat, (ftrans, ffcache, types)
77+
newfeat, (ftrans, ffcache, snames, types)
8378
end
8479

8580
function revertfeat(::DropMissing, newfeat, fcache)
86-
ftrans, ffcache, types = fcache
81+
ftrans, ffcache, snames, types = fcache
8782

8883
# reintroduce Missing type
8984
cols = Tables.columns(newfeat)
9085
names = Tables.columnnames(cols)
91-
columns = map(zip(types, names)) do (T, nm)
92-
x = Tables.getcolumn(cols, nm)
93-
collect(T, x)
86+
columns = map(types, names) do T, name
87+
x = Tables.getcolumn(cols, name)
88+
name snames ? collect(T, x) : x
9489
end
9590
𝒯 = (; zip(names, columns)...)
9691
ofeat = 𝒯 |> Tables.materializer(newfeat)

0 commit comments

Comments
 (0)