Skip to content

Commit af0128a

Browse files
authored
Merge pull request #37 from ceferisbarov/coerce
Add Coerce
2 parents d1fd5ec + f196d3a commit af0128a

File tree

7 files changed

+80
-0
lines changed

7 files changed

+80
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ Please check the docstrings for additional information.
151151
| `DropMissing` | Drop missings |
152152
| `Rename` | Column renaming |
153153
| `Coalesce` | Replace missings |
154+
| `Coerce` | Coerce scientific types |
154155
| `Identity` | Identity transform |
155156
| `Center` | Mean removal |
156157
| `Scale` | Interval scaling |

src/TableTransforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export
3737
DropMissing,
3838
Rename,
3939
Coalesce,
40+
Coerce,
4041
Identity,
4142
Center,
4243
Scale,

src/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ include("transforms/select.jl")
215215
include("transforms/filter.jl")
216216
include("transforms/rename.jl")
217217
include("transforms/coalesce.jl")
218+
include("transforms/coerce.jl")
218219
include("transforms/identity.jl")
219220
include("transforms/center.jl")
220221
include("transforms/scale.jl")

src/transforms/coerce.jl

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
Coerce(pairs, tight=false, verbosity=1)
7+
8+
Return a copy of the table, ensuring that the scientific types of the columns match the new specification.
9+
10+
This transform wraps the ScientificTypes.coerce function. Please see their docstring for more details.
11+
12+
```julia
13+
Coerce(:col1 => Continuous, :col2 => Count)
14+
```
15+
"""
16+
struct Coerce{P} <: Transform
17+
pairs::P
18+
tight::Bool
19+
verbosity::Int
20+
end
21+
22+
Coerce(pair::Pair{Symbol,<:Type}...; tight=false, verbosity=1) =
23+
Coerce(pair, tight, verbosity)
24+
25+
isrevertible(::Type{<:Coerce}) = true
26+
27+
function apply(transform::Coerce, table)
28+
newtable = coerce(table, transform.pairs...;
29+
tight=transform.tight,
30+
verbosity=transform.verbosity)
31+
32+
types = Tables.schema(table).types
33+
34+
newtable, types
35+
end
36+
37+
function revert(transform::Coerce, newtable, cache)
38+
names = Tables.columnnames(newtable)
39+
cols = Tables.columns(newtable)
40+
oldcols = map(zip(cache, names)) do (T, n)
41+
x = Tables.getcolumn(cols, n)
42+
collect(T, x)
43+
end
44+
45+
𝒯 = (; zip(names, oldcols)...)
46+
𝒯 |> Tables.materializer(newtable)
47+
end
48+

test/Project.toml

+2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
[deps]
2+
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
23
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
34
GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
45
ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
56
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
67
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
78
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
89
ReferenceTests = "324d217c-45ce-50fc-942e-d289b448e8cf"
10+
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
911
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1012
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1113
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

test/runtests.jl

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ using TableTransforms
22
using Distributions
33
using Tables
44
using TypedTables
5+
using CategoricalArrays
6+
using ScientificTypes: Count, Multiclass
57
using LinearAlgebra
68
using Statistics
79
using Test, Random, Plots

test/transforms.jl

+25
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,31 @@
673673
@test ttypes == Tables.schema(tₒ).types
674674
end
675675

676+
@testset "Coerce" begin
677+
x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
678+
x2 = [1.0, 2.0, 3.0, 4.0, 5.0]
679+
x3 = [5.0, 5.0, 5.0, 5.0, 5.0]
680+
t = Table(;x1, x2, x3)
681+
682+
T = Coerce(:x1=>Count, :x2=>Count)
683+
n, c = apply(T, t)
684+
@test eltype(n.x1) == Int
685+
@test eltype(n.x2) == Int
686+
n, c = apply(T, t)
687+
tₒ = revert(T, n, c)
688+
@test eltype(tₒ.x1) == eltype(t.x1)
689+
@test eltype(tₒ.x2) == eltype(t.x2)
690+
691+
T = Coerce(:x1=>Multiclass, :x2=>Multiclass)
692+
n, c = apply(T, t)
693+
@test eltype(n.x1) <: CategoricalValue
694+
@test eltype(n.x2) <: CategoricalValue
695+
n, c = apply(T, t)
696+
tₒ = revert(T, n, c)
697+
@test eltype(tₒ.x1) == eltype(t.x1)
698+
@test eltype(tₒ.x2) == eltype(t.x2)
699+
end
700+
676701
@testset "Identity" begin
677702
x = rand(4000)
678703
y = rand(4000)

0 commit comments

Comments
 (0)