Skip to content

Commit 5a8ec6d

Browse files
authored
Add 'DropNaN' transform (#274)
1 parent 5e31881 commit 5a8ec6d

File tree

7 files changed

+189
-0
lines changed

7 files changed

+189
-0
lines changed

docs/src/transforms.md

+6
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ Filter
8080
DropMissing
8181
```
8282

83+
## DropNaN
84+
85+
```@docs
86+
DropNaN
87+
```
88+
8389
## DropExtrema
8490

8591
```@docs

src/TableTransforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ export
6262
Sample,
6363
Filter,
6464
DropMissing,
65+
DropNaN,
6566
DropExtrema,
6667
DropUnits,
6768
AbsoluteUnits,

src/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ include("transforms/sort.jl")
275275
include("transforms/sample.jl")
276276
include("transforms/filter.jl")
277277
include("transforms/dropmissing.jl")
278+
include("transforms/dropnan.jl")
278279
include("transforms/dropextrema.jl")
279280
include("transforms/dropunits.jl")
280281
include("transforms/dropconstant.jl")

src/transforms/dropnan.jl

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
DropNaN()
7+
DropNaN(:)
8+
9+
Drop all rows with NaN values in table.
10+
11+
DropNaN(col₁, col₂, ..., colₙ)
12+
DropNaN([col₁, col₂, ..., colₙ])
13+
DropNaN((col₁, col₂, ..., colₙ))
14+
15+
Drop all rows with NaN values in selected columns `col₁`, `col₂`, ..., `colₙ`.
16+
17+
DropNaN(regex)
18+
19+
Drop all rows with NaN values in columns that match with `regex`.
20+
21+
# Examples
22+
23+
```julia
24+
DropNaN(2, 3, 4)
25+
DropNaN([:b, :c, :d])
26+
DropNaN(("b", "c", "d"))
27+
DropNaN(r"[bcd]")
28+
```
29+
"""
30+
struct DropNaN{S<:ColumnSelector} <: StatelessFeatureTransform
31+
selector::S
32+
end
33+
34+
DropNaN() = DropNaN(AllSelector())
35+
DropNaN(cols) = DropNaN(selector(cols))
36+
DropNaN(cols::C...) where {C<:Column} = DropNaN(selector(cols))
37+
38+
isrevertible(::Type{<:DropNaN}) = false
39+
40+
_isnan(_) = false
41+
_isnan(x::Number) = isnan(x)
42+
43+
function preprocess(transform::DropNaN, feat)
44+
cols = Tables.columns(feat)
45+
names = Tables.columnnames(cols)
46+
snames = transform.selector(names)
47+
ftrans = Filter(row -> all(!_isnan(row[nm]) for nm in snames))
48+
fprep = preprocess(ftrans, feat)
49+
ftrans, fprep
50+
end
51+
52+
function applyfeat(::DropNaN, feat, prep)
53+
ftrans, fprep = prep
54+
applyfeat(ftrans, feat, fprep)
55+
end

test/shows.jl

+14
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,20 @@
175175
└─ selector = [:a, :b, :c]"""
176176
end
177177

178+
@testset "DropNaN" begin
179+
T = DropNaN(:a, :b, :c)
180+
181+
# compact mode
182+
iostr = sprint(show, T)
183+
@test iostr == "DropNaN([:a, :b, :c])"
184+
185+
# full mode
186+
iostr = sprint(show, MIME("text/plain"), T)
187+
@test iostr == """
188+
DropNaN transform
189+
└─ selector = [:a, :b, :c]"""
190+
end
191+
178192
@testset "DropExtrema" begin
179193
T = DropExtrema("a", low=0.25, high=0.75)
180194

test/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ transformfiles = [
99
"sample.jl",
1010
"filter.jl",
1111
"dropmissing.jl",
12+
"dropnan.jl",
1213
"dropextrema.jl",
1314
"dropunits.jl",
1415
"dropconstant.jl",

test/transforms/dropnan.jl

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
@testset "DropNaN" begin
2+
@test !isrevertible(DropNaN())
3+
4+
a = [1.8, 0.5, 1.2, 3.7, 5.0, NaN]
5+
b = [6.0f0, 5.4f0, 5.4f0, NaN32, 5.5f0, 2.6f0]
6+
c = [4.9, 5.1, NaN, 5.1, 8.6, 4.4] * u"m"
7+
d = [NaN32, 1.0f0, 8.8f0, 0.1f0, 1.5f0, 9.5f0] * u"m"
8+
e = ["yes", "no", "no", "yes", "yes", "no"]
9+
t = Table(; a, b, c, d, e)
10+
11+
T = DropNaN()
12+
n, c = apply(T, t)
13+
@test n.a == [0.5, 5.0]
14+
@test n.b == [5.4f0, 5.5f0]
15+
@test n.c == [5.1, 8.6] * u"m"
16+
@test n.d == [1.0f0, 1.5f0] * u"m"
17+
@test n.e == ["no", "yes"]
18+
19+
# args...
20+
# integers
21+
T = DropNaN(1, 3)
22+
n, c = apply(T, t)
23+
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
24+
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
25+
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
26+
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
27+
@test isequal(n.e, ["yes", "no", "yes", "yes"])
28+
29+
# symbols
30+
T = DropNaN(:a, :c)
31+
n, c = apply(T, t)
32+
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
33+
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
34+
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
35+
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
36+
@test isequal(n.e, ["yes", "no", "yes", "yes"])
37+
38+
# strings
39+
T = DropNaN("a", "c")
40+
n, c = apply(T, t)
41+
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
42+
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
43+
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
44+
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
45+
@test isequal(n.e, ["yes", "no", "yes", "yes"])
46+
47+
# vector
48+
# integers
49+
T = DropNaN([2, 4])
50+
n, c = apply(T, t)
51+
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
52+
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
53+
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
54+
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
55+
@test isequal(n.e, ["no", "no", "yes", "no"])
56+
57+
# symbols
58+
T = DropNaN([:b, :d])
59+
n, c = apply(T, t)
60+
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
61+
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
62+
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
63+
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
64+
@test isequal(n.e, ["no", "no", "yes", "no"])
65+
66+
# strings
67+
T = DropNaN(["b", "d"])
68+
n, c = apply(T, t)
69+
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
70+
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
71+
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
72+
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
73+
@test isequal(n.e, ["no", "no", "yes", "no"])
74+
75+
# tuple
76+
# integers
77+
T = DropNaN((1, 2, 3))
78+
n, c = apply(T, t)
79+
@test isequal(n.a, [1.8, 0.5, 5.0])
80+
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
81+
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
82+
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
83+
@test isequal(n.e, ["yes", "no", "yes"])
84+
85+
# symbols
86+
T = DropNaN((:a, :b, :c))
87+
n, c = apply(T, t)
88+
@test isequal(n.a, [1.8, 0.5, 5.0])
89+
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
90+
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
91+
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
92+
@test isequal(n.e, ["yes", "no", "yes"])
93+
94+
# strings
95+
T = DropNaN(("a", "b", "c"))
96+
n, c = apply(T, t)
97+
@test isequal(n.a, [1.8, 0.5, 5.0])
98+
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
99+
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
100+
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
101+
@test isequal(n.e, ["yes", "no", "yes"])
102+
103+
# regex
104+
T = DropNaN(r"[bcd]")
105+
n, c = apply(T, t)
106+
@test isequal(n.a, [0.5, 5.0, NaN])
107+
@test isequal(n.b, [5.4f0, 5.5f0, 2.6f0])
108+
@test isequal(n.c, [5.1, 8.6, 4.4] * u"m")
109+
@test isequal(n.d, [1.0f0, 1.5f0, 9.5f0] * u"m")
110+
@test isequal(n.e, ["no", "yes", "no"])
111+
end

0 commit comments

Comments
 (0)