Skip to content

Commit 9af2b3b

Browse files
ClaroHenriqueClaro Henriquejuliohmeliascarv
authored
Add sort transform (#87)
* add sort transform * rename ascending parameter to rev * fix column table access * remove duplicated break line Co-authored-by: Júlio Hoffimann <[email protected]> * rename by parameter to col * Update src/transforms/sort.jl * Update src/transforms/sort.jl * add sort transform to doc page * Update src/transforms/sort.jl Co-authored-by: Elias Carvalho <[email protected]> * Add support for all sort function kwargs * Add colspec * Add tests * Update docstring * Remove notes Co-authored-by: Claro Henrique <[email protected]> Co-authored-by: Júlio Hoffimann <[email protected]> Co-authored-by: Elias Carvalho <[email protected]>
1 parent 0db52f0 commit 9af2b3b

File tree

5 files changed

+171
-1
lines changed

5 files changed

+171
-1
lines changed

docs/src/transforms/builtin.md

+6
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ Functional
121121
Levels
122122
```
123123

124+
## Sort
125+
126+
```@docs
127+
Sort
128+
```
129+
124130
## EigenAnalysis
125131

126132
```@docs

src/TableTransforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ export
4747
Quantile,
4848
Functional,
4949
Levels,
50+
Sort,
5051
EigenAnalysis,
5152
PCA, DRS, SDS,
5253
Sequential,

src/transforms.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ include("transforms/zscore.jl")
227227
include("transforms/quantile.jl")
228228
include("transforms/functional.jl")
229229
include("transforms/levels.jl")
230+
include("transforms/sort.jl")
230231
include("transforms/eigenanalysis.jl")
231232
include("transforms/sequential.jl")
232-
include("transforms/parallel.jl")
233+
include("transforms/parallel.jl")

src/transforms/sort.jl

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
Sort(col₁, col₂, ..., colₙ; kwargs...)
7+
Sort([col₁, col₂, ..., colₙ]; kwargs...)
8+
Sort((col₁, col₂, ..., colₙ); kwargs...)
9+
10+
Sort the rows of selected columns `col₁`, `col₂`, ..., `colₙ` by forwarding
11+
the `kwargs` to the `sortperm` function.
12+
13+
Sort(regex; kwargs...)
14+
15+
Sort the rows of columns that match with `regex`.
16+
17+
# Examples
18+
19+
```julia
20+
Sort(:a)
21+
Sort(:a, :c, rev=true)
22+
Sort([1, 3, 5], by=row -> abs.(row))
23+
Sort(("a", "c", "e"))
24+
Sort(r"[ace]")
25+
```
26+
"""
27+
struct Sort{S<:ColSpec,T} <: Stateless
28+
colspec::S
29+
kwargs::T
30+
end
31+
32+
Sort(colspec::S; kwargs...) where {S<:ColSpec} =
33+
Sort(colspec, values(kwargs))
34+
35+
Sort(cols::T...; kwargs...) where {T<:ColSelector} =
36+
Sort(cols, values(kwargs))
37+
38+
# argument errors
39+
Sort(::Tuple{}; kwargs...) = throw(ArgumentError("Cannot create a Sort object with empty tuple."))
40+
Sort(; kwargs...) = throw(ArgumentError("Cannot create a Sort object without arguments."))
41+
42+
isrevertible(::Type{<:Sort}) = true
43+
44+
function apply(transform::Sort, table)
45+
cols = Tables.columns(table)
46+
names = Tables.columnnames(cols)
47+
snames = choose(transform.colspec, names)
48+
49+
# use selected columns to calculate new order
50+
scols = collect(zip(Tables.getcolumn.(Ref(cols), snames)...))
51+
inds = sortperm(scols; transform.kwargs...)
52+
53+
# sort rows
54+
rows = Tables.rowtable(table)
55+
rows = rows[inds]
56+
57+
newtable = rows |> Tables.materializer(table)
58+
newtable, inds
59+
end
60+
61+
function revert(::Sort, newtable, cache)
62+
# use cache to recalculate old order
63+
inds = sortperm(cache)
64+
65+
# undo rows sort
66+
rows = Tables.rowtable(newtable)
67+
rows = rows[inds]
68+
69+
rows |> Tables.materializer(newtable)
70+
end

test/transforms.jl

+92
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,98 @@
13331333
@test isordered(tₒ.b) == false
13341334
end
13351335

1336+
@testset "Sort" begin
1337+
a = [5, 3, 1, 2]
1338+
b = [2, 4, 8, 5]
1339+
c = [3, 2, 1, 4]
1340+
d = [4, 3, 7, 5]
1341+
t = Table(; a, b, c, d)
1342+
1343+
T = Sort(:a)
1344+
n, c = apply(T, t)
1345+
@test Tables.schema(t) == Tables.schema(n)
1346+
@test n.a == [1, 2, 3, 5]
1347+
@test n.b == [8, 5, 4, 2]
1348+
@test n.c == [1, 4, 2, 3]
1349+
@test n.d == [7, 5, 3, 4]
1350+
@test isrevertible(T) == true
1351+
tₒ = revert(T, n, c)
1352+
@test t == tₒ
1353+
1354+
# descending order test
1355+
T = Sort(:b, rev=true)
1356+
n, c = apply(T, t)
1357+
@test Tables.schema(t) == Tables.schema(n)
1358+
@test n.a == [1, 2, 3, 5]
1359+
@test n.b == [8, 5, 4, 2]
1360+
@test n.c == [1, 4, 2, 3]
1361+
@test n.d == [7, 5, 3, 4]
1362+
tₒ = revert(T, n, c)
1363+
@test t == tₒ
1364+
1365+
# random test
1366+
a = rand(200)
1367+
b = rand(200)
1368+
c = rand(200)
1369+
d = rand(200)
1370+
t = Table(; a, b, c, d)
1371+
1372+
T = Sort(:c)
1373+
n, c = apply(T, t)
1374+
1375+
@test Tables.schema(t) == Tables.schema(n)
1376+
@test issetequal(Tables.rowtable(t), Tables.rowtable(n))
1377+
@test issorted(Tables.getcolumn(n, :c))
1378+
tₒ = revert(T, n, c)
1379+
@test t == tₒ
1380+
1381+
# sort by multiple columns
1382+
a = [-2, -1, -2, 2, 1, -1, 1, 2]
1383+
b = [-8, -4, 6, 9, 8, 2, 2, -8]
1384+
c = [-3, 6, 5, 4, -8, -7, -1, -10]
1385+
t = Table(; a, b, c)
1386+
1387+
T = Sort(1, 2)
1388+
n, c = apply(T, t)
1389+
@test n.a == [-2, -2, -1, -1, 1, 1, 2, 2]
1390+
@test n.b == [-8, 6, -4, 2, 2, 8, -8, 9]
1391+
@test n.c == [-3, 5, 6, -7, -1, -8, -10, 4]
1392+
tₒ = revert(T, n, c)
1393+
@test t == tₒ
1394+
1395+
T = Sort([:a, :c], rev=true)
1396+
n, c = apply(T, t)
1397+
@test n.a == [2, 2, 1, 1, -1, -1, -2, -2]
1398+
@test n.b == [9, -8, 2, 8, -4, 2, 6, -8]
1399+
@test n.c == [4, -10, -1, -8, 6, -7, 5, -3]
1400+
tₒ = revert(T, n, c)
1401+
@test t == tₒ
1402+
1403+
T = Sort(("b", "c"), by=row -> abs.(row))
1404+
n, c = apply(T, t)
1405+
@test n.a == [1, -1, -1, -2, -2, 1, 2, 2]
1406+
@test n.b == [2, 2, -4, 6, -8, 8, -8, 9]
1407+
@test n.c == [-1, -7, 6, 5, -3, -8, -10, 4]
1408+
tₒ = revert(T, n, c)
1409+
@test t == tₒ
1410+
1411+
# throws: Sort without arguments
1412+
@test_throws ArgumentError Sort()
1413+
@test_throws ArgumentError Sort(())
1414+
1415+
# throws: empty selection
1416+
@test_throws AssertionError apply(Sort(r"x"), t)
1417+
@test_throws AssertionError apply(Sort(Symbol[]), t)
1418+
@test_throws AssertionError apply(Sort(String[]), t)
1419+
1420+
# throws: columns that do not exist in the original table
1421+
@test_throws AssertionError apply(Sort([:d, :e]), t)
1422+
@test_throws AssertionError apply(Sort(("d", "e")), t)
1423+
1424+
# Invalid kwarg
1425+
@test_throws MethodError apply(Sort(:a, :b, test=1), t)
1426+
end
1427+
13361428
@testset "EigenAnalysis" begin
13371429
# PCA test
13381430
x = rand(Normal(0, 10), 1500)

0 commit comments

Comments
 (0)