Skip to content

Commit 72f07d1

Browse files
committed
Add strcase package
- `ToPascalCase`, `ToCamelCase`, and `ToSnakeCase` will transform any input to that form. - Support for unicode runes - Support for all-uppercase initialisms, like mandated by the Go convention. - Expose `IsInitialism` - Emphasis on reducing allocations for memory efficiency.
1 parent bf9e870 commit 72f07d1

File tree

4 files changed

+431
-0
lines changed

4 files changed

+431
-0
lines changed

strcase/id.go

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package strcase
2+
3+
import (
4+
"math"
5+
"strings"
6+
"unicode"
7+
)
8+
9+
func ToPascalCase(input string) string {
10+
return splitJoin(input, 0, 0)
11+
}
12+
13+
func ToCamelCase(input string) string {
14+
return splitJoin(input, 1, 0)
15+
}
16+
17+
func ToSnakeCase(input string) string {
18+
return splitJoin(input, math.MaxInt64, '_')
19+
}
20+
21+
func allocateBuilder(input string, separator rune) *strings.Builder {
22+
var b strings.Builder
23+
length := len(input)
24+
if separator != 0 {
25+
// Heuristic to add about 25% buffer for separators
26+
// Not having perfect match isn't terrible, it will only result in a few more memory allocations.
27+
// Ex:
28+
// foo_bar_baz: 9 original chars, 11 final. 9 * 5 / 4 = 11
29+
// foo_id: 5 original chars, 6 final. 5 * 5 / 4 = 6
30+
// a_b_c_d: 4 original chars, 7 final. 4 * 5 / 4 = 5, which will result in an extra allocation.
31+
length = length * 5 / 4
32+
}
33+
34+
b.Grow(length)
35+
return &b
36+
}
37+
38+
func splitJoin(input string, firstUpper int, separator rune) string {
39+
b := allocateBuilder(input, separator)
40+
var buf []rune
41+
var currentPartIndex int
42+
var lastCategory runeCategory
43+
44+
// Flush the buffer as a part
45+
flush := func() {
46+
if len(buf) == 0 {
47+
// Nothing was added since last flush
48+
return
49+
}
50+
if separator != 0 && currentPartIndex > 0 {
51+
b.WriteRune(separator)
52+
}
53+
if currentPartIndex >= firstUpper {
54+
pascalPart(buf)
55+
}
56+
for _, r := range buf {
57+
b.WriteRune(r)
58+
}
59+
currentPartIndex++
60+
lastCategory = unknown
61+
buf = buf[0:0] // Clear buffer, but keep current allocation
62+
}
63+
64+
for _, r := range input {
65+
switch cat := category(r); cat {
66+
case upper:
67+
if lastCategory != upper {
68+
flush()
69+
}
70+
lastCategory = cat
71+
buf = append(buf, unicode.ToLower(r))
72+
case lower, number:
73+
if (lastCategory > number) != (cat > number) {
74+
flush()
75+
}
76+
lastCategory = cat
77+
buf = append(buf, r)
78+
default:
79+
// separator
80+
flush()
81+
}
82+
}
83+
flush()
84+
85+
return b.String()
86+
}
87+
88+
// Convert to uppercase if initialism.
89+
// Convert first rune to uppercase otherwise.
90+
func pascalPart(part []rune) {
91+
if isInitialism(part) {
92+
for ri, r := range part {
93+
part[ri] = unicode.ToUpper(r)
94+
}
95+
} else {
96+
part[0] = unicode.ToUpper(part[0])
97+
}
98+
}
99+
100+
type runeCategory int
101+
102+
const (
103+
unknown runeCategory = iota
104+
number
105+
lower
106+
upper
107+
)
108+
109+
func category(r rune) runeCategory {
110+
switch {
111+
case unicode.IsLower(r):
112+
return lower
113+
case unicode.IsUpper(r):
114+
return upper
115+
case unicode.IsNumber(r):
116+
return number
117+
default:
118+
return unknown
119+
}
120+
}

strcase/id_test.go

+191
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
package strcase
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
// splitjoin_l1_p1 38.1 ns/op 16 B/op 1 allocs/op
12+
// IDToCamelCase_l1_p1 88.6 ns/op 48 B/op 3 allocs/op
13+
// IDToSnakeCase_l1_p1 87.7 ns/op 48 B/op 3 allocs/op
14+
//
15+
// splitjoin_l1_p10 253 ns/op 176 B/op 2 allocs/op
16+
// IDToCamelCase_l1_p10 421 ns/op 72 B/op 3 allocs/op
17+
// IDToSnakeCase_l1_p10 269 ns/op 72 B/op 3 allocs/op
18+
//
19+
// splitjoin_l1_p100 2137 ns/op 1904 B/op 2 allocs/op
20+
// IDToCamelCase_l1_p100 3503 ns/op 248 B/op 3 allocs/op
21+
// IDToSnakeCase_l1_p100 1879 ns/op 296 B/op 3 allocs/op
22+
//
23+
// splitjoin_l10_p1 38.0 ns/op 16 B/op 1 allocs/op
24+
// IDToCamelCase_l10_p1 247 ns/op 168 B/op 6 allocs/op
25+
// IDToSnakeCase_l10_p1 248 ns/op 168 B/op 6 allocs/op
26+
//
27+
// splitjoin_l10_p10 278 ns/op 272 B/op 2 allocs/op
28+
// IDToCamelCase_l10_p10 1140 ns/op 264 B/op 6 allocs/op
29+
// IDToSnakeCase_l10_p10 979 ns/op 296 B/op 6 allocs/op
30+
//
31+
// splitjoin_l10_p100 2267 ns/op 2816 B/op 2 allocs/op
32+
// IDToCamelCase_l10_p100 9538 ns/op 1304 B/op 6 allocs/op
33+
// IDToSnakeCase_l10_p100 8147 ns/op 1560 B/op 6 allocs/op
34+
//
35+
// splitjoin_l100_p1 41.1 ns/op 16 B/op 1 allocs/op
36+
// IDToCamelCase_l100_p1 1114 ns/op 1160 B/op 9 allocs/op
37+
// IDToSnakeCase_l100_p1 1104 ns/op 1176 B/op 9 allocs/op
38+
//
39+
// splitjoin_l100_p10 446 ns/op 1184 B/op 2 allocs/op
40+
// IDToCamelCase_l100_p10 7692 ns/op 2072 B/op 9 allocs/op
41+
// IDToSnakeCase_l100_p10 7589 ns/op 2328 B/op 9 allocs/op
42+
//
43+
// splitjoin_l100_p100 3877 ns/op 12032 B/op 2 allocs/op
44+
// IDToCamelCase_l100_p100 72671 ns/op 11288 B/op 9 allocs/op
45+
// IDToSnakeCase_l100_p100 71673 ns/op 14616 B/op 9 allocs/op
46+
func Benchmark_splitJoin(b *testing.B) {
47+
for _, length := range []int{1, 10, 100} {
48+
part := strings.Repeat("a", length)
49+
50+
for _, count := range []int{1, 10, 100} {
51+
input := part + strings.Repeat("_"+part, count-1)
52+
53+
// Baseline, split and join all parts
54+
b.Run(fmt.Sprintf("splitjoin_l%d_p%d", length, count), func(b *testing.B) {
55+
for i := 0; i < b.N; i++ {
56+
strings.Join(strings.Split(input, "_"), "")
57+
}
58+
})
59+
60+
b.Run(fmt.Sprintf("IDToCamelCase_l%d_p%d", length, count), func(b *testing.B) {
61+
for i := 0; i < b.N; i++ {
62+
ToCamelCase(input)
63+
}
64+
})
65+
66+
b.Run(fmt.Sprintf("IDToSnakeCase_l%d_p%d", length, count), func(b *testing.B) {
67+
for i := 0; i < b.N; i++ {
68+
ToSnakeCase(input)
69+
}
70+
})
71+
}
72+
}
73+
}
74+
75+
// lower 5.03 ns/op 0 B/op 0 allocs/op
76+
// upper 5.81 ns/op 0 B/op 0 allocs/op
77+
// number 6.59 ns/op 0 B/op 0 allocs/op
78+
// symbol 6.58 ns/op 0 B/op 0 allocs/op
79+
// 16_bits 153 ns/op 0 B/op 0 allocs/op
80+
// 32_bits 160 ns/op 0 B/op 0 allocs/op
81+
func Benchmark_category(b *testing.B) {
82+
tests := map[string][]rune{
83+
"lower": {'a', 'b'},
84+
"upper": {'A', 'B'},
85+
"number": {'0', '1'},
86+
"symbol": {'_', ' '},
87+
"16 bits": {'™', '∞', '•', 'Ω'},
88+
"32 bits": {'𠁂', '𠁄', '𠁔', '𠁑'},
89+
}
90+
for name, runes := range tests {
91+
b.Run(name, func(b *testing.B) {
92+
for i := 0; i < b.N; i++ {
93+
for _, r := range runes {
94+
category(r)
95+
}
96+
}
97+
})
98+
}
99+
}
100+
101+
func Test_splitJoin(t *testing.T) {
102+
tests := []struct {
103+
input string
104+
camel string
105+
pascal string
106+
snake string
107+
}{
108+
{
109+
// everything empty
110+
},
111+
{
112+
input: "a",
113+
pascal: "A",
114+
camel: "a",
115+
snake: "a",
116+
},
117+
{
118+
input: "A",
119+
pascal: "A",
120+
camel: "a",
121+
snake: "a",
122+
},
123+
{
124+
input: "a_a",
125+
pascal: "AA",
126+
camel: "aA",
127+
snake: "a_a",
128+
},
129+
{
130+
input: "__a___a_",
131+
pascal: "AA",
132+
camel: "aA",
133+
snake: "a_a",
134+
},
135+
{
136+
input: "aa_bbb",
137+
pascal: "AaBbb",
138+
camel: "aaBbb",
139+
snake: "aa_bbb",
140+
},
141+
{
142+
input: "aa_id",
143+
pascal: "AaID",
144+
camel: "aaID",
145+
snake: "aa_id",
146+
},
147+
{
148+
input: "fooBar",
149+
pascal: "FooBar",
150+
camel: "fooBar",
151+
snake: "foo_bar",
152+
},
153+
{
154+
input: "FooBAR",
155+
pascal: "FooBar",
156+
camel: "fooBar",
157+
snake: "foo_bar",
158+
},
159+
{
160+
input: "fooUrl",
161+
pascal: "FooURL",
162+
camel: "fooURL",
163+
snake: "foo_url",
164+
},
165+
{
166+
input: "fooURL",
167+
pascal: "FooURL",
168+
camel: "fooURL",
169+
snake: "foo_url",
170+
},
171+
{
172+
input: "url10",
173+
pascal: "URL10",
174+
camel: "url10",
175+
snake: "url_10",
176+
},
177+
{
178+
input: "url_id",
179+
pascal: "URLID",
180+
camel: "urlID",
181+
snake: "url_id",
182+
},
183+
}
184+
for _, tt := range tests {
185+
t.Run(tt.input, func(t *testing.T) {
186+
require.Equal(t, tt.pascal, ToPascalCase(tt.input))
187+
require.Equal(t, tt.camel, ToCamelCase(tt.input))
188+
require.Equal(t, tt.snake, ToSnakeCase(tt.input))
189+
})
190+
}
191+
}

strcase/initialism.go

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package strcase
2+
3+
import "sort"
4+
5+
var commonInitialisms [][]rune
6+
7+
func init() {
8+
// To follow go's convention of have acronyms in all caps, hard code a few of the common ones
9+
// Taken from https://github.com/golang/lint/blob/83fdc39ff7b56453e3793356bcff3070b9b96445/lint.go#L770-L809
10+
var initialisms = []string{
11+
"acl",
12+
"api",
13+
"ascii",
14+
"cpu",
15+
"css",
16+
"dns",
17+
"eof",
18+
"guid",
19+
"html",
20+
"http",
21+
"https",
22+
"id",
23+
"ip",
24+
"json",
25+
"lhs",
26+
"qps",
27+
"ram",
28+
"rhs",
29+
"rpc",
30+
"sla",
31+
"smtp",
32+
"sql",
33+
"ssh",
34+
"tcp",
35+
"tls",
36+
"ttl",
37+
"udp",
38+
"ui",
39+
"uid",
40+
"uuid",
41+
"uri",
42+
"url",
43+
"utf8",
44+
"vm",
45+
"xml",
46+
"xmpp",
47+
"xsrf",
48+
"xss",
49+
}
50+
sort.Strings(initialisms)
51+
52+
for _, initialism := range initialisms {
53+
commonInitialisms = append(commonInitialisms, []rune(initialism))
54+
}
55+
}
56+
57+
func IsInitialism(part string) bool {
58+
return isInitialism([]rune(part))
59+
}
60+
61+
func isInitialism(part []rune) bool {
62+
// Adapted from sort.Search to benefit from the fact that we only deal with rune slices
63+
i := 0
64+
j := len(commonInitialisms)
65+
out:
66+
for i < j {
67+
h := int(uint(i+j) >> 1) // avoid overflow when computing h
68+
// i ≤ h < j
69+
70+
for k, r := range commonInitialisms[h] {
71+
switch {
72+
case len(part) < k+1 || part[k] < r:
73+
j = h
74+
continue out
75+
case part[k] > r:
76+
i = h + 1
77+
continue out
78+
}
79+
}
80+
return true
81+
}
82+
return false
83+
}

0 commit comments

Comments
 (0)