33# ------------------------------------------------------------------
44
55"""
6- Filter(func )
6+ Filter(pred )
77
8- Filters the table returning only the rows where the `func` returns true.
8+ Filters the table returning only the rows where
9+ the predicate `pred` is `true`.
910
1011# Examples
1112
@@ -23,7 +24,7 @@ Filter(row -> row["a"] == true && row["b"] < 30)
2324* The schema of the table is preserved by the transform.
2425"""
2526struct Filter{F} <: StatelessFeatureTransform
26- func :: F
27+ pred :: F
2728end
2829
2930isrevertible (:: Type{<:Filter} ) = true
@@ -35,7 +36,7 @@ function preprocess(transform::Filter, table)
3536 # selected indices
3637 sinds, nrows = Int[], 0
3738 for (i, row) in enumerate (rows)
38- transform. func (row) && push! (sinds, i)
39+ transform. pred (row) && push! (sinds, i)
3940 nrows += 1
4041 end
4142
@@ -72,100 +73,3 @@ function revertfeat(::Filter, newfeat, fcache)
7273
7374 rows |> Tables. materializer (newfeat)
7475end
75-
76- """
77- DropMissing()
78- DropMissing(:)
79-
80- Drop all rows with missing values in table.
81-
82- DropMissing(col₁, col₂, ..., colₙ)
83- DropMissing([col₁, col₂, ..., colₙ])
84- DropMissing((col₁, col₂, ..., colₙ))
85-
86- Drop all rows with missing values in selected columns `col₁`, `col₂`, ..., `colₙ`.
87-
88- DropMissing(regex)
89-
90- Drop all rows with missing values in columns that match with `regex`.
91-
92- # Examples
93-
94- ```julia
95- DropMissing()
96- DropMissing("b", "c", "e")
97- DropMissing([2, 3, 5])
98- DropMissing((:b, :c, :e))
99- DropMissing(r"[bce]")
100- ```
101-
102- ## Notes
103-
104- * The transform can alter the element type of columns from `Union{Missing,T}` to `T`.
105- * If the transformed column has only `missing` values, it will be converted to an empty column of type `Any`.
106- """
107- struct DropMissing{S<: ColSpec } <: StatelessFeatureTransform
108- colspec:: S
109- end
110-
111- DropMissing () = DropMissing (AllSpec ())
112- DropMissing (spec) = DropMissing (colspec (spec))
113- DropMissing (cols:: T... ) where {T<: Col } = DropMissing (colspec (cols))
114-
115- isrevertible (:: Type{<:DropMissing} ) = true
116-
117- _ftrans (:: DropMissing{AllSpec} , snames) = Filter (row -> all (! ismissing, row))
118- _ftrans (:: DropMissing , snames) = Filter (row -> all (! ismissing, row[nm] for nm in snames))
119-
120- # nonmissing
121- _nonmissing (:: Type{T} , x) where {T} = x
122- _nonmissing (:: Type{Union{Missing,T}} , x) where {T} = collect (T, x)
123- _nonmissing (:: Type{Missing} , x) = []
124- _nonmissing (x) = _nonmissing (eltype (x), x)
125-
126- function preprocess (transform:: DropMissing , table)
127- schema = Tables. schema (table)
128- names = schema. names
129- snames = choose (transform. colspec, names)
130- ftrans = _ftrans (transform, snames)
131- fprep = preprocess (ftrans, table)
132- ftrans, fprep, snames
133- end
134-
135- function applyfeat (:: DropMissing , feat, prep)
136- # apply filter transform
137- ftrans, fprep, snames = prep
138- newfeat, ffcache = applyfeat (ftrans, feat, fprep)
139-
140- # drop Missing type
141- cols = Tables. columns (newfeat)
142- names = Tables. columnnames (cols)
143- columns = map (names) do nm
144- x = Tables. getcolumn (cols, nm)
145- nm ∈ snames ? _nonmissing (x) : x
146- end
147- 𝒯 = (; zip (names, columns)... )
148- newfeat = 𝒯 |> Tables. materializer (feat)
149-
150- # original column types
151- types = Tables. schema (feat). types
152-
153- newfeat, (ftrans, ffcache, types)
154- end
155-
156- function revertfeat (:: DropMissing , newfeat, fcache)
157- ftrans, ffcache, types = fcache
158-
159- # reintroduce Missing type
160- cols = Tables. columns (newfeat)
161- names = Tables. columnnames (cols)
162- columns = map (zip (types, names)) do (T, nm)
163- x = Tables. getcolumn (cols, nm)
164- collect (T, x)
165- end
166- 𝒯 = (; zip (names, columns)... )
167- ofeat = 𝒯 |> Tables. materializer (newfeat)
168-
169- # revert filter transform
170- revertfeat (ftrans, ofeat, ffcache)
171- end
0 commit comments