Skip to content

Commit e9fb144

Browse files
authored
MurmurHash v3 for bucketing (#4)
1 parent 3e27e41 commit e9fb144

File tree

3 files changed

+203
-0
lines changed

3 files changed

+203
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package com.featurevisor.sdk
2+
3+
object Bucket {
4+
private const val HASH_SEED = 1
5+
private const val MAX_HASH_VALUE = 4294967296 // 2^32
6+
7+
// 100% * 1000 to include three decimal places in the same integer value
8+
private const val MAX_BUCKETED_NUMBER = 100000
9+
10+
fun getBucketedNumber(bucketKey: String): Int {
11+
val hashValue = MurmurHash3().hash32x86(bucketKey.toByteArray())
12+
val ratio = hashValue.toDouble() / MAX_HASH_VALUE
13+
14+
return kotlin.math.floor(ratio * MAX_BUCKETED_NUMBER).toInt()
15+
}
16+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package com.featurevisor.sdk
2+
3+
/**
4+
* Taken from: https://github.com/goncalossilva/kotlinx-murmurhash
5+
*
6+
* (Copied to make it work, we can resort to using it as a package later)
7+
*
8+
* ---
9+
*
10+
* MIT License
11+
*
12+
* Copyright (c) 2021-2022 Gonçalo Silva
13+
*
14+
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
15+
* associated documentation files (the "Software"), to deal in the Software without restriction,
16+
* including without limitation the rights to use, copy, modify, merge, publish, distribute,
17+
* sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
18+
* furnished to do so, subject to the following conditions:
19+
*
20+
* The above copyright notice and this permission notice shall be included in all copies or
21+
* substantial portions of the Software.
22+
*
23+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
24+
* NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26+
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28+
*/
29+
public class MurmurHash3(private val seed: UInt = 1.toUInt()) {
30+
public fun hash32x86(key: ByteArray): UInt {
31+
var h = seed
32+
val len = key.size
33+
val nblocks = len / 4
34+
35+
for (i in 0 until nblocks * 4 step 4) {
36+
val k = key.getLittleEndianUInt(i)
37+
38+
h = h xor k.mix(R1_32, C1_32, C2_32)
39+
h = h.rotateLeft(R2_32)
40+
h = h * M_32 + N_32
41+
}
42+
43+
val index = nblocks * 4
44+
val rem = len - index
45+
var k = 0u
46+
if (rem == 3) {
47+
k = k xor (key.getUInt(index + 2) shl 16)
48+
}
49+
if (rem >= 2) {
50+
k = k xor (key.getUInt(index + 1) shl 8)
51+
}
52+
if (rem >= 1) {
53+
k = k xor key.getUInt(index)
54+
h = h xor k.mix(R1_32, C1_32, C2_32)
55+
}
56+
57+
h = h xor len.toUInt()
58+
59+
h = h.fmix()
60+
61+
return h
62+
}
63+
64+
private fun ByteArray.getLittleEndianUInt(index: Int): UInt {
65+
return this.getUInt(index) or
66+
(this.getUInt(index + 1) shl 8) or
67+
(this.getUInt(index + 2) shl 16) or
68+
(this.getUInt(index + 3) shl 24)
69+
}
70+
71+
private fun ByteArray.getLittleEndianLong(index: Int): ULong {
72+
return this.getULong(index) or
73+
(this.getULong(index + 1) shl 8) or
74+
(this.getULong(index + 2) shl 16) or
75+
(this.getULong(index + 3) shl 24) or
76+
(this.getULong(index + 4) shl 32) or
77+
(this.getULong(index + 5) shl 40) or
78+
(this.getULong(index + 6) shl 48) or
79+
(this.getULong(index + 7) shl 56)
80+
}
81+
82+
private fun UInt.mix(r: Int, c1: UInt, c2: UInt): UInt {
83+
var k = this
84+
k *= c1
85+
k = k.rotateLeft(r)
86+
k *= c2
87+
return k
88+
}
89+
90+
private fun ULong.mix(r: Int, c1: ULong, c2: ULong): ULong {
91+
var k = this
92+
k *= c1
93+
k = k.rotateLeft(r)
94+
k *= c2
95+
return k
96+
}
97+
98+
private fun UInt.fmix(): UInt {
99+
var h = this
100+
h = h xor (h shr 16)
101+
h *= 0x85ebca6bu
102+
h = h xor (h shr 13)
103+
h *= 0xc2b2ae35u
104+
h = h xor (h shr 16)
105+
return h
106+
}
107+
108+
private fun ULong.fmix(): ULong {
109+
var h = this
110+
h = h xor (h shr 33)
111+
h *= 0xff51afd7ed558ccduL
112+
h = h xor (h shr 33)
113+
h *= 0xc4ceb9fe1a85ec53uL
114+
h = h xor (h shr 33)
115+
return h
116+
}
117+
118+
private fun ByteArray.getUInt(index: Int) = get(index).toUByte().toUInt()
119+
120+
private fun ByteArray.getULong(index: Int) = get(index).toUByte().toULong()
121+
122+
private companion object {
123+
private const val C1_32: UInt = 0xcc9e2d51u
124+
private const val C2_32: UInt = 0x1b873593u
125+
126+
private const val R1_32: Int = 15
127+
private const val R2_32: Int = 13
128+
129+
private const val M_32: UInt = 5u
130+
private const val N_32: UInt = 0xe6546b64u
131+
132+
private const val C1_128x86: UInt = 0x239b961bu
133+
private const val C2_128x86: UInt = 0xab0e9789u
134+
private const val C3_128x86: UInt = 0x38b34ae5u
135+
private const val C4_128x86: UInt = 0xa1e38b93u
136+
137+
private const val R1_128x86: Int = 15
138+
private const val R2_128x86: Int = 16
139+
private const val R3_128x86: Int = 17
140+
private const val R4_128x86: Int = 18
141+
private const val R5_128x86: Int = 19
142+
private const val R6_128x86: Int = 13
143+
144+
private const val M_128x86: UInt = 5u
145+
private const val N1_128x86: UInt = 0x561ccd1bu
146+
private const val N2_128x86: UInt = 0x0bcaa747u
147+
private const val N3_128x86: UInt = 0x96cd1c35u
148+
private const val N4_128x86: UInt = 0x32ac3b17u
149+
150+
private const val C1_128x64: ULong = 0x87c37b91114253d5uL
151+
private const val C2_128x64: ULong = 0x4cf5ad432745937fuL
152+
153+
private const val R1_128x64: Int = 31
154+
private const val R2_128x64: Int = 27
155+
private const val R3_128x64: Int = 33
156+
157+
private const val M_128x64: ULong = 5u
158+
private const val N1_128x64: ULong = 0x52dce729u
159+
private const val N2_128x64: ULong = 0x38495ab5u
160+
}
161+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.featurevisor.sdk
2+
3+
import kotlin.test.Test
4+
import kotlin.test.assertEquals
5+
6+
class BucketTest {
7+
@Test
8+
fun getBucketedNumberReturnsExpectedValues() {
9+
10+
val expectedResults =
11+
mapOf(
12+
"foo" to 20602,
13+
"bar" to 89144,
14+
"123.foo" to 3151,
15+
"123.bar" to 9710,
16+
"123.456.foo" to 14432,
17+
"123.456.bar" to 1982
18+
)
19+
20+
for ((key, value) in expectedResults) {
21+
val result = Bucket.getBucketedNumber(key)
22+
23+
assertEquals(value, result, "Expected: $value for $key, got: $result")
24+
}
25+
}
26+
}

0 commit comments

Comments
 (0)