Skip to content

Commit 3fed1cf

Browse files
authored
fix edge case with upper case (#48)
1 parent e9aa093 commit 3fed1cf

File tree

6 files changed

+320
-7
lines changed

6 files changed

+320
-7
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ test-report.json
2323
junit*.xml
2424
*cov
2525

26-
*.test
26+
*.test
27+
kubeutils/testdata/fuzz/

changelog/v0.7.1/sanitize.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
changelog:
2+
- type: NON_USER_FACING
3+
description: Fix and make santize faster

kubeutils/strings.go

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package kubeutils
2+
3+
import (
4+
"bytes"
5+
"crypto/md5"
6+
"encoding/hex"
7+
"hash"
8+
"sync"
9+
)
10+
11+
// This is an extrememly important number for these shortened names.
12+
// It signifies the spot of separation from the original name and the hash.
13+
// It is used for many string building and parsing operations below.
14+
const magicNumber = 31
15+
const totalSize = magicNumber*2 + 1
16+
const encodedMd5 = 2 * md5.Size
17+
18+
const separator = '-'
19+
20+
// We can short-circuit the comparison if the first 31 characters are not equal.
21+
// Otherweise we need to compare the shortened version of the strings.
22+
func ShortenedEquals(shortened, standard string) bool {
23+
24+
// If the standard string is less than 63 characters, we can just compare the strings.
25+
if len(standard) <= totalSize {
26+
return shortened == standard
27+
}
28+
29+
// If the shortened string is less than or equal to 32 characters, we can just compare the strings.
30+
// Also if it's less than 32 the below checks may crash.
31+
if len(shortened) <= magicNumber+1 {
32+
return shortened == standard
33+
}
34+
35+
// Check the first 31 characters, if they're not equal we can exit early.
36+
if shortened[:magicNumber] != standard[:magicNumber] {
37+
return false
38+
}
39+
40+
// If 32nd character of the shortened string is not a '-' or the 32nd character of the standard string is not a '-'
41+
// we can exit early.
42+
// In theory this shouldn't be necessary, but this label can technically be modified by the user,
43+
// so it's safer to double check.
44+
if shortened[magicNumber] != separator {
45+
return false
46+
}
47+
48+
// Check the last 32 characters of the shortened string against the hash of the standard string.
49+
hashed := hashName(standard)
50+
return shortened[magicNumber+1:] == string(hashed[:magicNumber])
51+
}
52+
53+
// shortenName is extrememly inefficient with it's allocation of slices for hashing.
54+
// We can re-use the arrays to avoid this allocation. However, this code may be called
55+
// from multiple go-routines simultaneously so we must house these objects in sync.Pools
56+
57+
// Pool of MD5 hashers to avoid allocation.
58+
var md5HasherPool = sync.Pool{
59+
New: func() interface{} {
60+
return md5.New()
61+
},
62+
}
63+
64+
// Pool of string builders to avoid allocation.
65+
var byteBufferPool = sync.Pool{
66+
New: func() interface{} {
67+
b := &bytes.Buffer{}
68+
b.Grow(totalSize)
69+
return b
70+
},
71+
}
72+
73+
// hashName returns a hash of the input string in base 16 format
74+
// This function is optimized for speed and memory usage.
75+
// It should aboid nearly all allocations by re-using the same buffers whenever possible.
76+
func hashName(name string) [encodedMd5]byte {
77+
hasher := md5HasherPool.Get().(hash.Hash)
78+
hasher.Reset()
79+
hasher.Write([]byte(name))
80+
hashArray := [md5.Size]byte{}
81+
hash := hasher.Sum(hashArray[:0])
82+
// Cannot use hex.EncodedLen() here because it's a func, but it just returns 2 * len(src)
83+
hashBufferArray := [encodedMd5]byte{}
84+
hex.Encode(hashBufferArray[:], hash)
85+
md5HasherPool.Put(hasher)
86+
return hashBufferArray
87+
}
88+
89+
// shortenName returns a shortened version of the input string.
90+
// It is based on the `kubeutils.SanitizeNameV2` function, but it
91+
// just does the shortening part.
92+
func ShortenName(name string) string {
93+
if len(name) > totalSize {
94+
hash := hashName(name)
95+
builder := byteBufferPool.Get().(*bytes.Buffer)
96+
builder.Reset()
97+
builder.Grow(totalSize)
98+
builder.WriteString(name[:magicNumber])
99+
builder.WriteRune(separator)
100+
builder.Write(hash[:magicNumber])
101+
name = builder.String()
102+
byteBufferPool.Put(builder)
103+
}
104+
return name
105+
}

kubeutils/strings_test.go

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package kubeutils
2+
3+
import (
4+
"crypto/md5"
5+
"fmt"
6+
"testing"
7+
)
8+
9+
// This function is a copy of the old version of this function.
10+
// It is used to ensure parity with the old implementation.
11+
func shortenName(name string) string {
12+
if len(name) > 63 {
13+
hash := md5.Sum([]byte(name))
14+
name = fmt.Sprintf("%s-%x", name[:31], hash)
15+
name = name[:63]
16+
}
17+
return name
18+
}
19+
20+
func BenchmarkShortenEqual(b *testing.B) {
21+
b.Run("shorten name old", func(b *testing.B) {
22+
for i := 0; i < b.N; i++ {
23+
shortenName("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
24+
}
25+
})
26+
27+
b.Run("shortened equals--worst case", func(b *testing.B) {
28+
shortened := "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
29+
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
30+
for i := 0; i < b.N; i++ {
31+
ShortenedEquals(shortened, standard)
32+
}
33+
})
34+
35+
b.Run("shortened equals--different prefix", func(b *testing.B) {
36+
shortened := "jfdklanfkljasfhjhlxacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
37+
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
38+
for i := 0; i < b.N; i++ {
39+
ShortenedEquals(shortened, standard)
40+
}
41+
})
42+
43+
b.Run("shortened equals--less than 63 characters", func(b *testing.B) {
44+
shortened := "hello"
45+
standard := "hello"
46+
for i := 0; i < b.N; i++ {
47+
ShortenedEquals(shortened, standard)
48+
}
49+
})
50+
}
51+
52+
func FuzzShortNameParity(f *testing.F) {
53+
// Random string < 63
54+
f.Add("hello")
55+
// Random string > 63
56+
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
57+
f.Fuzz(func(t *testing.T, a string) {
58+
oldName := shortenName(a)
59+
newName := ShortenName(a)
60+
if oldName != newName {
61+
t.Fatalf("shortenName(%s) = %s, ShortenName(%s) = %s", a, oldName, a, newName)
62+
}
63+
64+
equal := ShortenedEquals(newName, a)
65+
if !equal {
66+
t.Fatalf("ShortenedEquals(%s, %s) = %t", newName, a, equal)
67+
}
68+
})
69+
}
70+
71+
func TestShortenName(t *testing.T) {
72+
t.Run("shorten name < 63", func(t *testing.T) {
73+
name := "hello"
74+
shortened := ShortenName(name)
75+
if shortened != name {
76+
t.Fatalf("ShortenName(%s) = %s", name, shortened)
77+
}
78+
})
79+
80+
t.Run("shorten name > 63", func(t *testing.T) {
81+
name := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
82+
shortened := ShortenName(name)
83+
if len(shortened) != 63 {
84+
t.Fatalf("ShortenName(%s) = %s", name, shortened)
85+
}
86+
87+
if shortened != "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d" {
88+
t.Fatalf("ShortenName(%s) = %s", name, shortened)
89+
}
90+
})
91+
}
92+
93+
func TestShortenedEquals(t *testing.T) {
94+
95+
testCases := []struct {
96+
name string
97+
shortened string
98+
equal bool
99+
}{
100+
{
101+
name: "hello",
102+
shortened: "hello",
103+
equal: true,
104+
},
105+
{
106+
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
107+
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d",
108+
equal: true,
109+
},
110+
{
111+
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
112+
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848",
113+
equal: false,
114+
},
115+
{
116+
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
117+
shortened: "jfdklanfkljasfhjhldacaslkhdfkjsf1e0028d0fbfe9afbd1a8bb9b53848ds",
118+
equal: false,
119+
},
120+
{
121+
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
122+
shortened: "jfdklanfkjasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848ds",
123+
equal: false,
124+
},
125+
}
126+
127+
for _, tc := range testCases {
128+
t.Run(tc.name, func(t *testing.T) {
129+
equal := ShortenedEquals(tc.shortened, tc.name)
130+
if equal != tc.equal {
131+
t.Fatalf("ShortenedEquals(%s, %s) = %t", tc.shortened, tc.name, equal)
132+
}
133+
})
134+
135+
}
136+
137+
}

kubeutils/util.go

+3-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"crypto/md5"
55
"fmt"
66
"strings"
7-
"unicode"
87
)
98

109
// use SanitizeNameV2
@@ -36,12 +35,10 @@ func SanitizeNameV2(name string) string {
3635
case '[', ']', '\n', '"', '\'':
3736
return -1
3837
}
39-
return unicode.ToLower(r)
38+
return r
4039
}, name)
4140
if len(name) > 63 {
42-
hash := md5.Sum([]byte(name))
43-
name = fmt.Sprintf("%s-%x", name[:31], hash)
44-
name = name[:63]
41+
name = ShortenName(name)
4542
}
46-
return name
43+
return strings.ToLower(name)
4744
}

kubeutils/util_test.go

+70
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,48 @@
11
package kubeutils
22

33
import (
4+
"crypto/md5"
5+
"fmt"
6+
"strings"
7+
"testing"
48
"time"
9+
"unicode/utf8"
510

611
. "github.com/onsi/ginkgo/v2"
712
. "github.com/onsi/gomega"
813
"github.com/onsi/gomega/gmeasure"
914
)
1015

16+
// Here for fuzz tests..
17+
func sanitizeNameV2Old(name string) string {
18+
name = strings.Replace(name, "*", "-", -1)
19+
name = strings.Replace(name, "/", "-", -1)
20+
name = strings.Replace(name, ".", "-", -1)
21+
name = strings.Replace(name, "[", "", -1)
22+
name = strings.Replace(name, "]", "", -1)
23+
name = strings.Replace(name, ":", "-", -1)
24+
name = strings.Replace(name, "_", "-", -1)
25+
name = strings.Replace(name, " ", "-", -1)
26+
name = strings.Replace(name, "\n", "", -1)
27+
name = strings.Replace(name, "\"", "", -1)
28+
name = strings.Replace(name, "'", "", -1)
29+
if len(name) > 63 {
30+
hash := md5.Sum([]byte(name))
31+
name = fmt.Sprintf("%s-%x", name[:31], hash)
32+
name = name[:63]
33+
}
34+
name = strings.Replace(name, ".", "-", -1)
35+
name = strings.ToLower(name)
36+
return name
37+
}
38+
1139
var _ = Describe("sanitize name", func() {
1240

1341
DescribeTable("sanitize short names", func(in, out string) {
1442
Expect(SanitizeNameV2(in)).To(Equal(out))
1543
},
1644
Entry("basic a", "abc", "abc"),
45+
Entry("basic A", "Abc", "abc"),
1746
Entry("basic b", "abc123", "abc123"),
1847
Entry("subX *", "bb*", "bb-"),
1948
Entry("sub *", "bb*b", "bb-b"),
@@ -63,3 +92,44 @@ var _ = Describe("sanitize name", func() {
6392
}, gmeasure.SamplingConfig{N: 200, Duration: time.Minute})
6493
})
6594
})
95+
96+
func FuzzSanitizeNameParity(f *testing.F) {
97+
// Random string < 63
98+
f.Add("VirtualGateway-istio-ingressgateway-bookinfo-cluster-1-istio-ingressgateway-istio-gateway-ns-cluster-1-gloo-mesh-cluster-1-HTTPS.443-anything")
99+
f.Add("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
100+
f.Add("abc")
101+
f.Add("abc123")
102+
f.Add("bb*")
103+
f.Add("bb*b")
104+
f.Add("bb/")
105+
f.Add("bb/b")
106+
f.Add("bb.")
107+
f.Add("bb.b")
108+
f.Add("bb[")
109+
f.Add("bb[b")
110+
f.Add("bb]")
111+
f.Add("bb]b")
112+
f.Add("bb:")
113+
f.Add("bb:b")
114+
f.Add("bb ")
115+
f.Add("bb b")
116+
f.Add("bb\n")
117+
f.Add("bb\nb")
118+
f.Add("aa\"")
119+
f.Add("bb\"b")
120+
f.Add("aa'")
121+
f.Add("bb'b")
122+
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
123+
124+
f.Fuzz(func(t *testing.T, a string) {
125+
// we can only get a valid kube name that's alphanumeric
126+
if !utf8.Valid([]byte(a)) {
127+
t.Skip("Skipping non-valid utf8 input")
128+
}
129+
oldName := SanitizeNameV2(a)
130+
newName := sanitizeNameV2Old(a)
131+
if oldName != newName {
132+
t.Fatalf("SanitizeNameV2(%s) = %s, SanitizeNameV2Old(%s) = %s", a, oldName, a, newName)
133+
}
134+
})
135+
}

0 commit comments

Comments
 (0)