Skip to content

Commit 5521c1d

Browse files
authored
Add Unit transform (#283)
* Add 'Unit' transform * Add docstring * Add tests * Update code style * Update tests
1 parent 0dea773 commit 5521c1d

File tree

7 files changed

+277
-0
lines changed

7 files changed

+277
-0
lines changed

docs/src/transforms.md

+6
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ AbsoluteUnits
116116
Unitify
117117
```
118118

119+
## Unit
120+
121+
```@docs
122+
Unit
123+
```
124+
119125
## Map
120126

121127
```@docs

src/TableTransforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export
6767
DropUnits,
6868
AbsoluteUnits,
6969
Unitify,
70+
Unit,
7071
Map,
7172
Replace,
7273
Coalesce,

src/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ include("transforms/dropunits.jl")
281281
include("transforms/dropconstant.jl")
282282
include("transforms/absoluteunits.jl")
283283
include("transforms/unitify.jl")
284+
include("transforms/unit.jl")
284285
include("transforms/map.jl")
285286
include("transforms/replace.jl")
286287
include("transforms/coalesce.jl")

src/transforms/unit.jl

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
Unit(unit)
7+
8+
Converts the units of all columns in the table to `unit`.
9+
10+
Unit(cols₁ => unit₁, cols₂ => unit₂, ..., colsₙ => unitₙ)
11+
12+
Converts the units of selected columns `cols₁`, `cols₂`, ..., `colsₙ`
13+
to `unit₁`, `unit₂`, ... `unitₙ`.
14+
15+
The column selection can be a single column identifier (index or name),
16+
a collection of identifiers or a regular expression (regex).
17+
18+
# Examples
19+
20+
```julia
21+
Unit(u"m")
22+
Unit(1 => u"km", :b => u"K", "c" => u"s")
23+
Unit([2, 3] => u"cm")
24+
Unit([:a, :c] => u"cm")
25+
Unit(["a", "c"] => u"cm")
26+
Unit(r"[abc]" => u"km")
27+
```
28+
"""
29+
struct Unit <: StatelessFeatureTransform
30+
selectors::Vector{ColumnSelector}
31+
units::Vector{Units}
32+
end
33+
34+
Unit() = throw(ArgumentError("cannot create Unit transform without arguments"))
35+
36+
Unit(unit::Units) = Unit([AllSelector()], [unit])
37+
38+
Unit(pairs::Pair...) = Unit(collect(selector.(first.(pairs))), collect(last.(pairs)))
39+
40+
isrevertible(::Type{<:Unit}) = true
41+
42+
_uconvert(u, x) = _uconvert(nonmissingtype(eltype(x)), u, x)
43+
_uconvert(::Type, _, x) = (x, nothing)
44+
_uconvert(::Type{Q}, u, x) where {Q<:AbstractQuantity} = (map(v -> uconvert(u, v), x), unit(Q))
45+
46+
function applyfeat(transform::Unit, feat, prep)
47+
cols = Tables.columns(feat)
48+
names = Tables.columnnames(cols)
49+
50+
selectors = transform.selectors
51+
units = transform.units
52+
pairs = mapreduce(vcat, selectors, units) do selector, u
53+
snames = selector(names)
54+
snames .=> u
55+
end
56+
unitdict = Dict(pairs)
57+
58+
tuples = map(names) do name
59+
x = Tables.getcolumn(cols, name)
60+
if haskey(unitdict, name)
61+
u = unitdict[name]
62+
_uconvert(u, x)
63+
else
64+
(x, nothing)
65+
end
66+
end
67+
68+
columns = first.(tuples)
69+
ounits = last.(tuples)
70+
71+
𝒯 = (; zip(names, columns)...)
72+
newfeat = 𝒯 |> Tables.materializer(feat)
73+
newfeat, ounits
74+
end
75+
76+
function revertfeat(::Unit, newfeat, fcache)
77+
cols = Tables.columns(newfeat)
78+
names = Tables.columnnames(cols)
79+
80+
ounits = fcache
81+
columns = map(names, ounits) do name, u
82+
x = Tables.getcolumn(cols, name)
83+
isnothing(u) ? x : map(v -> uconvert(u, v), x)
84+
end
85+
86+
𝒯 = (; zip(names, columns)...)
87+
𝒯 |> Tables.materializer(newfeat)
88+
end

test/shows.jl

+15
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,21 @@
257257
@test iostr == "Unitify transform"
258258
end
259259

260+
@testset "Unit" begin
261+
T = Unit(:a => u"m", [:b, :c] => u"s")
262+
263+
# compact mode
264+
iostr = sprint(show, T)
265+
@test iostr == "Unit(selectors: ColumnSelector[:a, [:b, :c]], units: Units[m, s])"
266+
267+
# full mode
268+
iostr = sprint(show, MIME("text/plain"), T)
269+
@test iostr == """
270+
Unit transform
271+
├─ selectors: ColumnSelectors.ColumnSelector[:a, [:b, :c]]
272+
└─ units: Unitful.Units[m, s]"""
273+
end
274+
260275
@testset "Map" begin
261276
fun = (a, b) -> 2a + b
262277
T = Map(:a => sin, [:a, :b] => fun => :c)

test/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ transformfiles = [
1515
"dropconstant.jl",
1616
"absoluteunits.jl",
1717
"unitify.jl",
18+
"unit.jl",
1819
"map.jl",
1920
"replace.jl",
2021
"coalesce.jl",

test/transforms/unit.jl

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
@testset "Unit" begin
2+
@test isrevertible(Unit(u"m"))
3+
4+
a = [2.7, 2.9, 2.2, 1.4, 1.8, 3.3] * u"m"
5+
b = [300, 500, missing, 800, missing, 400] * u"cm"
6+
c = [8, 2, 5, 7, 9, 4] * u"km"
7+
d = [0.3, 0.1, 0.9, 0.2, 0.7, 0.4]
8+
e = ["no", "no", "yes", "yes", "no", "yes"]
9+
t = Table(; a, b, c, d, e)
10+
11+
T = Unit(u"m")
12+
n, c = apply(T, t)
13+
@test unit(eltype(n.a)) == u"m"
14+
@test unit(eltype(n.b)) == u"m"
15+
@test unit(eltype(n.c)) == u"m"
16+
@test eltype(n.d) <: Float64
17+
@test eltype(n.e) <: String
18+
tₒ = revert(T, n, c)
19+
@test unit(eltype(tₒ.a)) == u"m"
20+
@test unit(eltype(tₒ.b)) == u"cm"
21+
@test unit(eltype(tₒ.c)) == u"km"
22+
@test all(isapprox.(tₒ.a, t.a))
23+
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
24+
@test all(isapprox.(tₒ.c, t.c))
25+
@test tₒ.d == t.d
26+
@test tₒ.e == t.e
27+
28+
a = [2.7, 2.9, 2.2, 1.4, 1.8, 3.3] * u"m"
29+
b = [300, 500, missing, 800, missing, 400] * u"cm"
30+
c = [8, 2, 5, 7, 9, 4] * u"km"
31+
d = [29.1, missing, 29.2, missing, 28.4, 26.4] * u"°C"
32+
e = [0.9, 0.4, 0.5, 0.1, 0.3, 0.6] * u"kg"
33+
f = 0.5u"ppm" * e
34+
t = Table(; a, b, c, d, e, f)
35+
36+
T = Unit(4 => u"K")
37+
n, c = apply(T, t)
38+
@test unit(eltype(n.a)) == u"m"
39+
@test unit(eltype(n.b)) == u"cm"
40+
@test unit(eltype(n.c)) == u"km"
41+
@test unit(eltype(n.d)) == u"K"
42+
@test unit(eltype(n.e)) == u"kg"
43+
@test unit(eltype(n.f)) == u"kg * ppm"
44+
tₒ = revert(T, n, c)
45+
@test unit(eltype(tₒ.d)) == u"°C"
46+
@test tₒ.a == t.a
47+
@test isequal(tₒ.b, t.b)
48+
@test tₒ.c == t.c
49+
@test all(isapprox.(skipmissing(tₒ.d), skipmissing(t.d)))
50+
@test tₒ.e == t.e
51+
@test tₒ.f == t.f
52+
53+
T = Unit(:e => u"g")
54+
n, c = apply(T, t)
55+
@test unit(eltype(n.a)) == u"m"
56+
@test unit(eltype(n.b)) == u"cm"
57+
@test unit(eltype(n.c)) == u"km"
58+
@test unit(eltype(n.d)) == u"°C"
59+
@test unit(eltype(n.e)) == u"g"
60+
@test unit(eltype(n.f)) == u"kg * ppm"
61+
tₒ = revert(T, n, c)
62+
@test unit(eltype(tₒ.e)) == u"kg"
63+
@test tₒ.a == t.a
64+
@test isequal(tₒ.b, t.b)
65+
@test tₒ.c == t.c
66+
@test isequal(tₒ.d, t.d)
67+
@test all(isapprox.(tₒ.e, t.e))
68+
@test tₒ.f == t.f
69+
70+
T = Unit("f" => u"kg")
71+
n, c = apply(T, t)
72+
@test unit(eltype(n.a)) == u"m"
73+
@test unit(eltype(n.b)) == u"cm"
74+
@test unit(eltype(n.c)) == u"km"
75+
@test unit(eltype(n.d)) == u"°C"
76+
@test unit(eltype(n.e)) == u"kg"
77+
@test unit(eltype(n.f)) == u"kg"
78+
tₒ = revert(T, n, c)
79+
@test unit(eltype(tₒ.f)) == u"kg * ppm"
80+
@test tₒ.a == t.a
81+
@test isequal(tₒ.b, t.b)
82+
@test tₒ.c == t.c
83+
@test isequal(tₒ.d, t.d)
84+
@test tₒ.e == t.e
85+
@test all(isapprox.(tₒ.f, t.f))
86+
87+
T = Unit([1, 2, 3] => u"m")
88+
n, c = apply(T, t)
89+
@test unit(eltype(n.a)) == u"m"
90+
@test unit(eltype(n.b)) == u"m"
91+
@test unit(eltype(n.c)) == u"m"
92+
@test unit(eltype(n.d)) == u"°C"
93+
@test unit(eltype(n.e)) == u"kg"
94+
@test unit(eltype(n.f)) == u"kg * ppm"
95+
tₒ = revert(T, n, c)
96+
@test unit(eltype(tₒ.a)) == u"m"
97+
@test unit(eltype(tₒ.b)) == u"cm"
98+
@test unit(eltype(tₒ.c)) == u"km"
99+
@test all(isapprox.(tₒ.a, t.a))
100+
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
101+
@test all(isapprox.(tₒ.c, t.c))
102+
@test isequal(tₒ.d, t.d)
103+
@test tₒ.e == t.e
104+
@test tₒ.f == t.f
105+
106+
T = Unit([:a, :b, :c] => u"cm")
107+
n, c = apply(T, t)
108+
@test unit(eltype(n.a)) == u"cm"
109+
@test unit(eltype(n.b)) == u"cm"
110+
@test unit(eltype(n.c)) == u"cm"
111+
@test unit(eltype(n.d)) == u"°C"
112+
@test unit(eltype(n.e)) == u"kg"
113+
@test unit(eltype(n.f)) == u"kg * ppm"
114+
tₒ = revert(T, n, c)
115+
@test unit(eltype(tₒ.a)) == u"m"
116+
@test unit(eltype(tₒ.b)) == u"cm"
117+
@test unit(eltype(tₒ.c)) == u"km"
118+
@test all(isapprox.(tₒ.a, t.a))
119+
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
120+
@test all(isapprox.(tₒ.c, t.c))
121+
@test isequal(tₒ.d, t.d)
122+
@test tₒ.e == t.e
123+
@test tₒ.f == t.f
124+
125+
T = Unit(["a", "b", "c"] => u"km")
126+
n, c = apply(T, t)
127+
@test unit(eltype(n.a)) == u"km"
128+
@test unit(eltype(n.b)) == u"km"
129+
@test unit(eltype(n.c)) == u"km"
130+
@test unit(eltype(n.d)) == u"°C"
131+
@test unit(eltype(n.e)) == u"kg"
132+
@test unit(eltype(n.f)) == u"kg * ppm"
133+
tₒ = revert(T, n, c)
134+
@test unit(eltype(tₒ.a)) == u"m"
135+
@test unit(eltype(tₒ.b)) == u"cm"
136+
@test unit(eltype(tₒ.c)) == u"km"
137+
@test all(isapprox.(tₒ.a, t.a))
138+
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
139+
@test all(isapprox.(tₒ.c, t.c))
140+
@test isequal(tₒ.d, t.d)
141+
@test tₒ.e == t.e
142+
@test tₒ.f == t.f
143+
144+
T = Unit(r"[abc]" => u"m")
145+
n, c = apply(T, t)
146+
@test unit(eltype(n.a)) == u"m"
147+
@test unit(eltype(n.b)) == u"m"
148+
@test unit(eltype(n.c)) == u"m"
149+
@test unit(eltype(n.d)) == u"°C"
150+
@test unit(eltype(n.e)) == u"kg"
151+
@test unit(eltype(n.f)) == u"kg * ppm"
152+
tₒ = revert(T, n, c)
153+
@test unit(eltype(tₒ.a)) == u"m"
154+
@test unit(eltype(tₒ.b)) == u"cm"
155+
@test unit(eltype(tₒ.c)) == u"km"
156+
@test all(isapprox.(tₒ.a, t.a))
157+
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
158+
@test all(isapprox.(tₒ.c, t.c))
159+
@test isequal(tₒ.d, t.d)
160+
@test tₒ.e == t.e
161+
@test tₒ.f == t.f
162+
163+
# error: cannot create Unit transform without arguments
164+
@test_throws ArgumentError Unit()
165+
end

0 commit comments

Comments
 (0)