Skip to content

Commit 11f5c79

Browse files
authored
Refactor StdNames (#184)
* Refactor 'StdNames' transform * Fix tests * Add more tests
1 parent 8470f33 commit 11f5c79

File tree

3 files changed

+142
-43
lines changed

3 files changed

+142
-43
lines changed

src/transforms/stdnames.jl

+55-24
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,33 @@
22
# Licensed under the MIT License. See LICENSE in the project root.
33
# ------------------------------------------------------------------
44

5+
const SPECS = [:uppersnake, :uppercamel, :upperflat, :snake, :camel, :flat]
6+
57
"""
6-
StdNames(spec)
8+
StdNames(spec = :uppersnake)
79
810
Standardizes column names according to given `spec`.
9-
Default to `:upper` case specification.
11+
Default to `:uppersnake` case specification.
1012
1113
# Specs
1214
13-
* `:upper` - Uppercase, e.g. COLUMNNAME
14-
* `:camel` - Camelcase, e.g. ColumnName
15-
* `:snake` - Snakecase, e.g. column_name
15+
* `:uppersnake` - Upper Snake Case, e.g. COLUMN_NAME
16+
* `:uppercamel` - Upper Camel Case, e.g. ColumnName
17+
* `:upperflat` - Upper Flat Case, e.g. COLUMNNAME
18+
* `:snake` - Snake Case, e.g. column_name
19+
* `:camel` - Camel Case, e.g. columnName
20+
* `:flat` - Flat Case, e.g. columnname
1621
"""
1722
struct StdNames <: StatelessFeatureTransform
1823
spec::Symbol
19-
end
2024

21-
StdNames() = StdNames(:upper)
25+
function StdNames(spec = :uppersnake)
26+
if spec SPECS
27+
throw(ArgumentError("invalid specification, use one of these: $SPECS"))
28+
end
29+
new(spec)
30+
end
31+
end
2232

2333
isrevertible(::Type{StdNames}) = true
2434

@@ -28,18 +38,21 @@ function applyfeat(transform::StdNames, feat, prep)
2838

2939
# retrieve column names
3040
cols = Tables.columns(feat)
31-
oldnames = string.(Tables.columnnames(cols))
41+
oldnames = Tables.columnnames(cols)
3242

3343
# clean column names
34-
cleaned = _clean.(oldnames)
44+
names = map(nm -> _clean(string(nm)), oldnames)
3545

3646
# apply spec
37-
spec == :camel && (names = _camel.(cleaned))
38-
spec == :snake && (names = _snake.(cleaned))
39-
spec == :upper && (names = _upper.(cleaned))
47+
spec === :uppersnake && (names = _uppersnake.(names))
48+
spec === :uppercamel && (names = _uppercamel.(names))
49+
spec === :upperflat && (names = _upperflat.(names))
50+
spec === :snake && (names = _snake.(names))
51+
spec === :camel && (names = _camel.(names))
52+
spec === :flat && (names = _flat.(names))
4053

4154
# make names unique
42-
newnames = _unique(names)
55+
newnames = _makeunique(names)
4356

4457
# rename transform
4558
rtrans = Rename(colspec(oldnames), Symbol.(newnames))
@@ -53,25 +66,43 @@ function revertfeat(::StdNames, newfeat, fcache)
5366
revertfeat(rtrans, newfeat, rfcache)
5467
end
5568

56-
const delim = [' ', '\t', '-', '_']
69+
const DELIMS = [' ', '\t', '-', '_']
5770

58-
_clean(name) = filter(c -> isdigit(c) || isletter(c) || c delim, name)
71+
function _clean(name)
72+
nm = strip(name, DELIMS)
73+
filter(c -> isdigit(c) || isletter(c) || c DELIMS, nm)
74+
end
5975

60-
function _unique(names)
76+
function _makeunique(names)
6177
newnames = String[]
6278
for name in names
63-
n = name
64-
while n newnames
65-
n = string(n, "_")
79+
while name newnames
80+
name = name * "_"
6681
end
67-
push!(newnames, n)
82+
push!(newnames, name)
6883
end
69-
7084
newnames
7185
end
7286

73-
_camel(name) = join(uppercasefirst.(split(name, delim)))
87+
_uppersnake(name) = _isuppersnake(name) ? name : join(uppercase.(split(name, DELIMS)), '_')
88+
89+
_uppercamel(name) = _isuppercamel(name) ? name : join(uppercasefirst.(split(name, DELIMS)))
90+
91+
_upperflat(name) = _isupperflat(name) ? name : replace(uppercase(name), DELIMS => "")
92+
93+
_snake(name) = _issnake(name) ? name : join(lowercase.(split(name, DELIMS)), '_')
94+
95+
function _camel(name)
96+
_iscamel(name) && return name
97+
first, others... = split(name, DELIMS)
98+
join([lowercase(first); uppercasefirst.(others)])
99+
end
74100

75-
_snake(name) = join(lowercase.(split(strip(name, delim), delim)), '_')
101+
_flat(name) = _isflat(name) ? name : replace(lowercase(name), DELIMS => "")
76102

77-
_upper(name) = replace(uppercase(name), delim => "")
103+
_isuppersnake(name) = occursin(r"^[A-Z0-9]+(_[A-Z0-9]+)+$", name)
104+
_isuppercamel(name) = occursin(r"^[A-Z][a-z0-9]*([A-Z][a-z0-9]*)+$", name)
105+
_isupperflat(name) = occursin(r"^[A-Z0-9]+$", name)
106+
_issnake(name) = occursin(r"^[a-z0-9]+(_[a-z0-9]+)+$", name)
107+
_iscamel(name) = occursin(r"^[a-z][a-z0-9]*([A-Z][a-z0-9]*)+$", name)
108+
_isflat(name) = occursin(r"^[a-z0-9]+$", name)

test/shows.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,17 @@
5858
end
5959

6060
@testset "StdNames" begin
61-
T = StdNames(:upper)
61+
T = StdNames(:upperflat)
6262

6363
# compact mode
6464
iostr = sprint(show, T)
65-
@test iostr == "StdNames(:upper)"
65+
@test iostr == "StdNames(:upperflat)"
6666

6767
# full mode
6868
iostr = sprint(show, MIME("text/plain"), T)
6969
@test iostr == """
7070
StdNames transform
71-
└─ spec = :upper"""
71+
└─ spec = :upperflat"""
7272
end
7373

7474
@testset "Sort" begin

test/transforms/stdnames.jl

+84-16
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,62 @@
11
@testset "StdNames" begin
2+
names = Symbol.(["_apple tree_", " banana-fruit ", "-pear\tseed-"])
3+
columns = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
4+
t = Table(; zip(names, columns)...)
5+
T = StdNames(:uppersnake)
6+
n, c = apply(T, t)
7+
@test Tables.schema(n).names == (:APPLE_TREE, :BANANA_FRUIT, :PEAR_SEED)
8+
tₒ = revert(T, n, c)
9+
@test t == tₒ
10+
11+
T = StdNames(:uppercamel)
12+
n, c = apply(T, t)
13+
@test Tables.schema(n).names == (:AppleTree, :BananaFruit, :PearSeed)
14+
tₒ = revert(T, n, c)
15+
@test t == tₒ
16+
17+
T = StdNames(:upperflat)
18+
n, c = apply(T, t)
19+
@test Tables.schema(n).names == (:APPLETREE, :BANANAFRUIT, :PEARSEED)
20+
tₒ = revert(T, n, c)
21+
@test t == tₒ
22+
23+
T = StdNames(:snake)
24+
n, c = apply(T, t)
25+
@test Tables.schema(n).names == (:apple_tree, :banana_fruit, :pear_seed)
26+
tₒ = revert(T, n, c)
27+
@test t == tₒ
28+
29+
T = StdNames(:camel)
30+
n, c = apply(T, t)
31+
@test Tables.schema(n).names == (:appleTree, :bananaFruit, :pearSeed)
32+
tₒ = revert(T, n, c)
33+
@test t == tₒ
34+
35+
T = StdNames(:flat)
36+
n, c = apply(T, t)
37+
@test Tables.schema(n).names == (:appletree, :bananafruit, :pearseed)
38+
tₒ = revert(T, n, c)
39+
@test t == tₒ
40+
41+
# internal functions
242
names = ["apple banana", "apple\tbanana", "apple_banana", "apple-banana", "apple_Banana"]
343
for name in names
4-
@test TT._camel(name) == "AppleBanana"
44+
@test TT._uppersnake(name) == "APPLE_BANANA"
45+
@test TT._uppercamel(name) == "AppleBanana"
46+
@test TT._upperflat(name) == "APPLEBANANA"
547
@test TT._snake(name) == "apple_banana"
6-
@test TT._upper(name) == "APPLEBANANA"
48+
@test TT._camel(name) == "appleBanana"
49+
@test TT._flat(name) == "applebanana"
750
end
851

952
names = ["a", "A", "_a", "_A", "a ", "A "]
1053
for name in names
11-
@test TT._camel(name) == "A"
12-
@test TT._snake(name) == "a"
13-
@test TT._upper(name) == "A"
54+
@test TT._uppersnake(TT._clean(name)) == "A"
55+
@test TT._uppercamel(TT._clean(name)) == "A"
56+
@test TT._upperflat(TT._clean(name)) == "A"
57+
@test TT._snake(TT._clean(name)) == "a"
58+
@test TT._camel(TT._clean(name)) == "a"
59+
@test TT._flat(TT._clean(name)) == "a"
1460
end
1561

1662
# special characters
@@ -24,36 +70,55 @@
2470
@test TT._clean(name) == "apple-tree"
2571

2672
# invariance test
73+
names = ["APPLE_TREE", "BANANA_FRUIT", "PEAR_SEED"]
74+
for name in names
75+
@test TT._isuppersnake(name)
76+
@test TT._uppersnake(name) == name
77+
end
78+
2779
names = ["AppleTree", "BananaFruit", "PearSeed"]
2880
for name in names
29-
@test TT._camel(name) == name
81+
@test TT._isuppercamel(name)
82+
@test TT._uppercamel(name) == name
83+
end
84+
85+
names = ["APPLETREE", "BANANAFRUIT", "PEARSEED"]
86+
for name in names
87+
@test TT._isupperflat(name)
88+
@test TT._upperflat(name) == name
3089
end
3190

3291
names = ["apple_tree", "banana_fruit", "pear_seed"]
3392
for name in names
93+
@test TT._issnake(name)
3494
@test TT._snake(name) == name
3595
end
3696

37-
names = ["APPLETREE", "BANANAFRUIT", "PEARSEED"]
97+
names = ["appleTree", "bananaFruit", "pearSeed"]
3898
for name in names
39-
@test TT._upper(name) == name
99+
@test TT._iscamel(name)
100+
@test TT._camel(name) == name
101+
end
102+
103+
names = ["appletree", "bananafruit", "pearseed"]
104+
for name in names
105+
@test TT._isflat(name)
106+
@test TT._flat(name) == name
40107
end
41108

42109
# uniqueness test
43110
names = (Symbol("AppleTree"), Symbol("apple tree"), Symbol("apple_tree"))
44-
cols = ([1, 2, 3], [4, 5, 6], [7, 8, 9])
45-
t = Table(; zip(names, cols)...)
111+
columns = ([1, 2, 3], [4, 5, 6], [7, 8, 9])
112+
t = Table(; zip(names, columns)...)
46113
rt = Tables.rowtable(t)
47-
T = StdNames(:upper)
114+
T = StdNames(:upperflat)
48115
n, c = apply(T, rt)
49-
columns = Tables.columns(n)
50-
columnnames = Tables.columnnames(columns)
51-
@test columnnames == (:APPLETREE, :APPLETREE_, :APPLETREE__)
116+
@test Tables.schema(n).names == (:APPLETREE, :APPLETREE_, :APPLETREE__)
52117

53118
# row table test
54119
names = (:a, Symbol("apple tree"), Symbol("banana tree"))
55-
cols = ([1, 2, 3], [4, 5, 6], [7, 8, 9])
56-
t = Table(; zip(names, cols)...)
120+
columns = ([1, 2, 3], [4, 5, 6], [7, 8, 9])
121+
t = Table(; zip(names, columns)...)
57122
rt = Tables.rowtable(t)
58123
T = StdNames()
59124
n, c = apply(T, rt)
@@ -67,4 +132,7 @@
67132
n1, c1 = apply(T, rt)
68133
n2 = reapply(T, n1, c1)
69134
@test n1 == n2
135+
136+
# throws
137+
@test_throws ArgumentError StdNames(:test)
70138
end

0 commit comments

Comments
 (0)