Skip to content

Commit 9b352b2

Browse files
authored
Add DropUnits transform (#205)
* Add 'DropUnits' transform * Update code style * Add more tests * Update tests
1 parent 1c652cf commit 9b352b2

File tree

9 files changed

+294
-0
lines changed

9 files changed

+294
-0
lines changed

Project.toml

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1717
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1818
Transducers = "28d57a85-8fef-5791-bfe6-a80928e7c999"
1919
TransformsBase = "28dd2a49-a57a-4bfb-84ca-1a49db9b96b8"
20+
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
2021

2122
[compat]
2223
AbstractTrees = "0.4"
@@ -29,4 +30,5 @@ StatsBase = "0.33, 0.34"
2930
Tables = "1.6"
3031
Transducers = "0.4"
3132
TransformsBase = "1.2"
33+
Unitful = "1.17"
3234
julia = "1.9"

docs/src/transforms.md

+6
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ DropMissing
5656
DropExtrema
5757
```
5858

59+
## DropUnits
60+
61+
```@docs
62+
DropUnits
63+
```
64+
5965
## Map
6066

6167
```@docs

src/TableTransforms.jl

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
module TableTransforms
66

77
using Tables
8+
using Unitful
89
using ScientificTypes
910
using Distributions: Normal
1011
using Transducers: tcollect
@@ -18,6 +19,8 @@ using CategoricalArrays
1819
using Random
1920
using NelderMead: optimise
2021

22+
using Unitful: AbstractQuantity
23+
2124
import Distributions: ContinuousUnivariateDistribution
2225
import Distributions: quantile, cdf
2326

@@ -54,6 +57,7 @@ export
5457
Filter,
5558
DropMissing,
5659
DropExtrema,
60+
DropUnits,
5761
Map,
5862
Replace,
5963
Coalesce,

src/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ include("transforms/sample.jl")
272272
include("transforms/filter.jl")
273273
include("transforms/dropmissing.jl")
274274
include("transforms/dropextrema.jl")
275+
include("transforms/dropunits.jl")
275276
include("transforms/map.jl")
276277
include("transforms/replace.jl")
277278
include("transforms/coalesce.jl")

src/transforms/dropunits.jl

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
DropUnits()
7+
DropUnits(:)
8+
9+
Drop units from all column in the table.
10+
11+
DropUnits(col₁, col₂, ..., colₙ)
12+
DropUnits([col₁, col₂, ..., colₙ])
13+
DropUnits((col₁, col₂, ..., colₙ))
14+
15+
Drop units from selected columns `col₁`, `col₂`, ..., `colₙ`.
16+
17+
DropUnits(regex)
18+
19+
Drop units from columns that match with `regex`.
20+
21+
# Examples
22+
23+
```julia
24+
DropUnits()
25+
DropUnits([2, 3, 5])
26+
DropUnits([:b, :c, :e])
27+
DropUnits(("b", "c", "e"))
28+
DropUnits(r"[bce]")
29+
```
30+
"""
31+
struct DropUnits{S<:ColSpec} <: StatelessFeatureTransform
32+
colspec::S
33+
end
34+
35+
DropUnits() = DropUnits(AllSpec())
36+
DropUnits(spec) = DropUnits(colspec(spec))
37+
DropUnits(cols::T...) where {T<:Col} = DropUnits(colspec(cols))
38+
39+
isrevertible(::Type{<:DropUnits}) = true
40+
41+
_dropunit(x) = _dropunit(x, nonmissingtype(eltype(x)))
42+
_dropunit(x, ::Type{Q}) where {Q<:AbstractQuantity} = (map(ustrip, x), unit(Q))
43+
_dropunit(x, ::Type) = (x, NoUnits)
44+
45+
function applyfeat(transform::DropUnits, feat, prep)
46+
cols = Tables.columns(feat)
47+
names = Tables.columnnames(cols)
48+
snames = choose(transform.colspec, names)
49+
50+
tuples = map(names) do name
51+
x = Tables.getcolumn(cols, name)
52+
name snames ? _dropunit(x) : (x, NoUnits)
53+
end
54+
55+
columns = first.(tuples)
56+
units = last.(tuples)
57+
58+
𝒯 = (; zip(names, columns)...)
59+
newfeat = 𝒯 |> Tables.materializer(feat)
60+
newfeat, (snames, units)
61+
end
62+
63+
_addunit(x, ::typeof(NoUnits)) = x
64+
_addunit(x, unit) = [v * unit for v in x]
65+
66+
function revertfeat(::DropUnits, newfeat, fcache)
67+
cols = Tables.columns(newfeat)
68+
names = Tables.columnnames(cols)
69+
70+
snames, units = fcache
71+
columns = map(names, units) do name, unit
72+
x = Tables.getcolumn(cols, name)
73+
name snames ? _addunit(x, unit) : x
74+
end
75+
76+
𝒯 = (; zip(names, columns)...)
77+
𝒯 |> Tables.materializer(newfeat)
78+
end

test/Project.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1414
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1515
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1616
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
17+
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
1718

1819
[compat]
1920
GR = "=0.72.7"

test/runtests.jl

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using TableTransforms
22
using Distributions
33
using Tables
4+
using Unitful
45
using TypedTables
56
using CategoricalArrays
67
using ScientificTypes: Continuous, Count, Finite, Multiclass

test/transforms.jl

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ transformfiles = [
77
"filter.jl",
88
"dropmissing.jl",
99
"dropextrema.jl",
10+
"dropunits.jl",
1011
"map.jl",
1112
"replace.jl",
1213
"coalesce.jl",

test/transforms/dropunits.jl

+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
@testset "DropUnits" begin
2+
@test isrevertible(DropUnits())
3+
4+
a = [7, 4, 4, 7, 4, 1, 1, 6, 4, 7] * u"m/s"
5+
b = [4, 5, 4, missing, 6, 6, missing, 4, 4, 1] * u"m^2"
6+
c = [3.9, 3.8, 3.5, 6.5, 7.7, 1.5, 0.6, 5.7, 4.7, 4.8] * u"km/hr"
7+
d = [6.3, 4.7, 7.6, missing, 1.2, missing, 5.9, 0.2, 1.9, 4.2] * u"km^2"
8+
e = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
9+
t = Table(; a, b, c, d, e)
10+
11+
T = DropUnits()
12+
n, c = apply(T, t)
13+
@test eltype(n.a) === Int
14+
@test unit(eltype(n.a)) === NoUnits
15+
@test nonmissingtype(eltype(n.b)) === Int
16+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
17+
@test eltype(n.c) === Float64
18+
@test unit(eltype(n.c)) === NoUnits
19+
@test nonmissingtype(eltype(n.d)) === Float64
20+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
21+
@test eltype(n.e) === String
22+
@test n.e == t.e
23+
tₒ = revert(T, n, c)
24+
@test t.a == tₒ.a
25+
@test isequal(t.b, tₒ.b)
26+
@test t.c == tₒ.c
27+
@test isequal(t.d, tₒ.d)
28+
@test t.e == tₒ.e
29+
30+
# args...
31+
# integers
32+
T = DropUnits(1, 2)
33+
n, c = apply(T, t)
34+
@test eltype(n.a) === Int
35+
@test unit(eltype(n.a)) === NoUnits
36+
@test nonmissingtype(eltype(n.b)) === Int
37+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
38+
@test unit(eltype(n.c)) === u"km/hr"
39+
@test unit(nonmissingtype(eltype(n.d))) === u"km^2"
40+
tₒ = revert(T, n, c)
41+
@test t.a == tₒ.a
42+
@test isequal(t.b, tₒ.b)
43+
@test t.c == tₒ.c
44+
@test isequal(t.d, tₒ.d)
45+
@test t.e == tₒ.e
46+
47+
# symbols
48+
T = DropUnits(:a, :b)
49+
n, c = apply(T, t)
50+
@test eltype(n.a) === Int
51+
@test unit(eltype(n.a)) === NoUnits
52+
@test nonmissingtype(eltype(n.b)) === Int
53+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
54+
@test unit(eltype(n.c)) === u"km/hr"
55+
@test unit(nonmissingtype(eltype(n.d))) === u"km^2"
56+
tₒ = revert(T, n, c)
57+
@test t.a == tₒ.a
58+
@test isequal(t.b, tₒ.b)
59+
@test t.c == tₒ.c
60+
@test isequal(t.d, tₒ.d)
61+
@test t.e == tₒ.e
62+
63+
# strings
64+
T = DropUnits("a", "b")
65+
n, c = apply(T, t)
66+
@test eltype(n.a) === Int
67+
@test unit(eltype(n.a)) === NoUnits
68+
@test nonmissingtype(eltype(n.b)) === Int
69+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
70+
@test unit(eltype(n.c)) === u"km/hr"
71+
@test unit(nonmissingtype(eltype(n.d))) === u"km^2"
72+
tₒ = revert(T, n, c)
73+
@test t.a == tₒ.a
74+
@test isequal(t.b, tₒ.b)
75+
@test t.c == tₒ.c
76+
@test isequal(t.d, tₒ.d)
77+
@test t.e == tₒ.e
78+
79+
# vector
80+
# integers
81+
T = DropUnits([3, 4])
82+
n, c = apply(T, t)
83+
@test unit(eltype(n.a)) === u"m/s"
84+
@test unit(nonmissingtype(eltype(n.b))) === u"m^2"
85+
@test eltype(n.c) === Float64
86+
@test unit(eltype(n.c)) === NoUnits
87+
@test nonmissingtype(eltype(n.d)) === Float64
88+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
89+
tₒ = revert(T, n, c)
90+
@test t.a == tₒ.a
91+
@test isequal(t.b, tₒ.b)
92+
@test t.c == tₒ.c
93+
@test isequal(t.d, tₒ.d)
94+
@test t.e == tₒ.e
95+
96+
# symbols
97+
T = DropUnits([:c, :d])
98+
n, c = apply(T, t)
99+
@test unit(eltype(n.a)) === u"m/s"
100+
@test unit(nonmissingtype(eltype(n.b))) === u"m^2"
101+
@test eltype(n.c) === Float64
102+
@test unit(eltype(n.c)) === NoUnits
103+
@test nonmissingtype(eltype(n.d)) === Float64
104+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
105+
tₒ = revert(T, n, c)
106+
@test t.a == tₒ.a
107+
@test isequal(t.b, tₒ.b)
108+
@test t.c == tₒ.c
109+
@test isequal(t.d, tₒ.d)
110+
@test t.e == tₒ.e
111+
112+
# strings
113+
T = DropUnits(["c", "d"])
114+
n, c = apply(T, t)
115+
@test unit(eltype(n.a)) === u"m/s"
116+
@test unit(nonmissingtype(eltype(n.b))) === u"m^2"
117+
@test eltype(n.c) === Float64
118+
@test unit(eltype(n.c)) === NoUnits
119+
@test nonmissingtype(eltype(n.d)) === Float64
120+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
121+
tₒ = revert(T, n, c)
122+
@test t.a == tₒ.a
123+
@test isequal(t.b, tₒ.b)
124+
@test t.c == tₒ.c
125+
@test isequal(t.d, tₒ.d)
126+
@test t.e == tₒ.e
127+
128+
# tuple
129+
# integers
130+
T = DropUnits((2, 4, 5))
131+
n, c = apply(T, t)
132+
@test unit(eltype(n.a)) === u"m/s"
133+
@test nonmissingtype(eltype(n.b)) === Int
134+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
135+
@test unit(eltype(n.c)) === u"km/hr"
136+
@test nonmissingtype(eltype(n.d)) === Float64
137+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
138+
@test eltype(n.e) === String
139+
@test n.e == t.e
140+
tₒ = revert(T, n, c)
141+
@test t.a == tₒ.a
142+
@test isequal(t.b, tₒ.b)
143+
@test t.c == tₒ.c
144+
@test isequal(t.d, tₒ.d)
145+
@test t.e == tₒ.e
146+
147+
# symbols
148+
T = DropUnits((:b, :d, :e))
149+
n, c = apply(T, t)
150+
@test unit(eltype(n.a)) === u"m/s"
151+
@test nonmissingtype(eltype(n.b)) === Int
152+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
153+
@test unit(eltype(n.c)) === u"km/hr"
154+
@test nonmissingtype(eltype(n.d)) === Float64
155+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
156+
@test eltype(n.e) === String
157+
@test n.e == t.e
158+
tₒ = revert(T, n, c)
159+
@test t.a == tₒ.a
160+
@test isequal(t.b, tₒ.b)
161+
@test t.c == tₒ.c
162+
@test isequal(t.d, tₒ.d)
163+
@test t.e == tₒ.e
164+
165+
# strings
166+
T = DropUnits(("b", "d", "e"))
167+
n, c = apply(T, t)
168+
@test unit(eltype(n.a)) === u"m/s"
169+
@test nonmissingtype(eltype(n.b)) === Int
170+
@test unit(nonmissingtype(eltype(n.b))) === NoUnits
171+
@test unit(eltype(n.c)) === u"km/hr"
172+
@test nonmissingtype(eltype(n.d)) === Float64
173+
@test unit(nonmissingtype(eltype(n.d))) === NoUnits
174+
@test eltype(n.e) === String
175+
@test n.e == t.e
176+
tₒ = revert(T, n, c)
177+
@test t.a == tₒ.a
178+
@test isequal(t.b, tₒ.b)
179+
@test t.c == tₒ.c
180+
@test isequal(t.d, tₒ.d)
181+
@test t.e == tₒ.e
182+
183+
# regex
184+
T = DropUnits(r"[ace]")
185+
n, c = apply(T, t)
186+
@test eltype(n.a) === Int
187+
@test unit(eltype(n.a)) === NoUnits
188+
@test unit(nonmissingtype(eltype(n.b))) === u"m^2"
189+
@test eltype(n.c) === Float64
190+
@test unit(eltype(n.c)) === NoUnits
191+
@test unit(nonmissingtype(eltype(n.d))) === u"km^2"
192+
@test n.e == t.e
193+
@test eltype(n.e) === String
194+
tₒ = revert(T, n, c)
195+
@test t.a == tₒ.a
196+
@test isequal(t.b, tₒ.b)
197+
@test t.c == tₒ.c
198+
@test isequal(t.d, tₒ.d)
199+
@test t.e == tₒ.e
200+
end

0 commit comments

Comments
 (0)