Skip to content

Commit 004f158

Browse files
authored
Merge pull request #23 from eliascarv/new-select
New Select and Reject
2 parents 7614fbd + c90e4aa commit 004f158

File tree

2 files changed

+123
-17
lines changed

2 files changed

+123
-17
lines changed

src/transforms/select.jl

+41-17
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,44 @@
22
# Licensed under the MIT License. See LICENSE in the project root.
33
# ------------------------------------------------------------------
44

5+
const ColSpec = Union{Vector{Symbol}, Regex}
6+
57
"""
68
Select(col₁, col₂, ..., colₙ)
79
Select([col₁, col₂, ..., colₙ])
8-
10+
Select((col₁, col₂, ..., colₙ))
11+
912
The transform that selects columns `col₁`, `col₂`, ..., `colₙ`.
13+
14+
Select(regex)
15+
16+
Selects the columns that match with `regex`.
1017
"""
11-
struct Select{N} <: Stateless
12-
cols::NTuple{N,Symbol}
18+
struct Select{S<:ColSpec} <: Stateless
19+
cols::S
1320
end
1421

15-
Select(cols::NTuple{N,AbstractString}) where {N} =
16-
Select(Symbol.(cols))
22+
Select(cols::T...) where {T<:Union{AbstractString, Symbol}} =
23+
Select(cols)
1724

18-
Select(cols::AbstractVector) = Select(Tuple(cols))
25+
Select(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
26+
Select(collect(cols))
1927

20-
Select(cols...) = Select(cols)
28+
Select(cols::Vector{T}) where {T<:AbstractString} =
29+
Select(Symbol.(cols))
30+
31+
Base.:(==)(a::Select, b::Select) = a.cols == b.cols
2132

2233
isrevertible(::Type{<:Select}) = true
2334

35+
_select(cols::Vector{Symbol}, allcols) = cols
36+
_select(cols::Regex, allcols) =
37+
filter(col -> occursin(cols, String(col)), allcols)
38+
2439
function apply(transform::Select, table)
2540
# retrieve relevant column names
2641
allcols = collect(Tables.columnnames(table))
27-
select = collect(transform.cols)
42+
select = _select(transform.cols, allcols)
2843
reject = setdiff(allcols, select)
2944

3045
# keep track of indices to revert later
@@ -76,26 +91,35 @@ end
7691
"""
7792
Reject(col₁, col₂, ..., colₙ)
7893
Reject([col₁, col₂, ..., colₙ])
94+
Reject((col₁, col₂, ..., colₙ))
7995
8096
The transform that discards columns `col₁`, `col₂`, ..., `colₙ`.
97+
98+
Reject(regex)
99+
100+
Discards the columns that match with `regex`.
81101
"""
82-
struct Reject{N} <: Stateless
83-
cols::NTuple{N,Symbol}
102+
struct Reject{S<:ColSpec} <: Stateless
103+
cols::S
84104
end
85105

86-
Reject(cols::NTuple{N,AbstractString}) where {N} =
87-
Reject(Symbol.(cols))
106+
Reject(cols::T...) where {T<:Union{AbstractString, Symbol}} =
107+
Reject(cols)
88108

89-
Reject(cols::AbstractVector) = Reject(Tuple(cols))
109+
Reject(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
110+
Reject(collect(cols))
90111

91-
Reject(cols...) = Reject(cols)
112+
Reject(cols::Vector{T}) where {T<:AbstractString} =
113+
Reject(Symbol.(cols))
114+
115+
Base.:(==)(a::Reject, b::Reject) = a.cols == b.cols
92116

93117
isrevertible(::Type{<:Reject}) = true
94118

95119
function apply(transform::Reject, table)
96120
allcols = Tables.columnnames(table)
97-
reject = collect(transform.cols)
98-
select = Tuple(setdiff(allcols, reject))
121+
reject = _select(transform.cols, allcols)
122+
select = setdiff(allcols, reject)
99123
strans = Select(select)
100124
newtable, scache = apply(strans, table)
101125
newtable, (strans, scache)
@@ -104,4 +128,4 @@ end
104128
function revert(::Reject, newtable, cache)
105129
strans, scache = cache
106130
revert(strans, newtable, scache)
107-
end
131+
end

test/transforms.jl

+82
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,20 @@
5555
tₒ = revert(T, n, c)
5656
@test t == tₒ
5757

58+
# selection with tuple of strings
59+
T = Select(("d", "c", "b"))
60+
n, c = apply(T, t)
61+
@test Tables.columnnames(n) == (:d, :c, :b)
62+
tₒ = revert(T, n, c)
63+
@test t == tₒ
64+
65+
# selection with vector of strings
66+
T = Select(["d", "c", "b"])
67+
n, c = apply(T, t)
68+
@test Tables.columnnames(n) == (:d, :c, :b)
69+
tₒ = revert(T, n, c)
70+
@test t == tₒ
71+
5872
# selection with single column
5973
@test (Select(:a) == Select("a") ==
6074
Select((:a,)) == Select(("a",)) ==
@@ -65,6 +79,33 @@
6579
n1, c1 = apply(T, t)
6680
n2 = reapply(T, t, c1)
6781
@test n1 == n2
82+
83+
# selection with Regex
84+
T = Select(r"[dcb]")
85+
n, c = apply(T, t)
86+
@test Tables.columnnames(n) == (:b, :c, :d) # the order of columns is preserved
87+
tₒ = revert(T, n, c)
88+
@test t == tₒ
89+
90+
x1 = rand(4000)
91+
x2 = rand(4000)
92+
y1 = rand(4000)
93+
y2 = rand(4000)
94+
t = Table(; x1, x2, y1, y2)
95+
96+
# select columns whose names contain the character x
97+
T = Select(r"x")
98+
n, c = apply(T, t)
99+
@test Tables.columnnames(n) == (:x1, :x2)
100+
tₒ = revert(T, n, c)
101+
@test t == tₒ
102+
103+
# select columns whose names contain the character y
104+
T = Select(r"y")
105+
n, c = apply(T, t)
106+
@test Tables.columnnames(n) == (:y1, :y2)
107+
tₒ = revert(T, n, c)
108+
@test t == tₒ
68109
end
69110

70111
@testset "Reject" begin
@@ -121,6 +162,20 @@
121162
tₒ = revert(T, n, c)
122163
@test t == tₒ
123164

165+
# rejection with tuple of strings
166+
T = Reject(("d", "c", "b"))
167+
n, c = apply(T, t)
168+
@test Tables.columnnames(n) == (:a, :e, :f)
169+
tₒ = revert(T, n, c)
170+
@test t == tₒ
171+
172+
# rejection with vector of strings
173+
T = Reject(["d", "c", "b"])
174+
n, c = apply(T, t)
175+
@test Tables.columnnames(n) == (:a, :e, :f)
176+
tₒ = revert(T, n, c)
177+
@test t == tₒ
178+
124179
# rejection with single column
125180
@test (Reject(:a) == Reject("a") ==
126181
Reject((:a,)) == Reject(("a",)) ==
@@ -131,6 +186,33 @@
131186
n1, c1 = apply(T, t)
132187
n2 = reapply(T, t, c1)
133188
@test n1 == n2
189+
190+
# rejection with Regex
191+
T = Reject(r"[dcb]")
192+
n, c = apply(T, t)
193+
@test Tables.columnnames(n) == (:a, :e, :f) # the order of columns is preserved
194+
tₒ = revert(T, n, c)
195+
@test t == tₒ
196+
197+
x1 = rand(4000)
198+
x2 = rand(4000)
199+
y1 = rand(4000)
200+
y2 = rand(4000)
201+
t = Table(; x1, x2, y1, y2)
202+
203+
# reject columns whose names contain the character x
204+
T = Reject(r"x")
205+
n, c = apply(T, t)
206+
@test Tables.columnnames(n) == (:y1, :y2)
207+
tₒ = revert(T, n, c)
208+
@test t == tₒ
209+
210+
# reject columns whose names contain the character y
211+
T = Reject(r"y")
212+
n, c = apply(T, t)
213+
@test Tables.columnnames(n) == (:x1, :x2)
214+
tₒ = revert(T, n, c)
215+
@test t == tₒ
134216
end
135217

136218
@testset "Rename" begin

0 commit comments

Comments
 (0)