-
Notifications
You must be signed in to change notification settings - Fork 533
Expand file tree
/
Copy pathDistinctHash.cs
More file actions
322 lines (278 loc) · 15.5 KB
/
DistinctHash.cs
File metadata and controls
322 lines (278 loc) · 15.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// ------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// ------------------------------------------------------------
namespace Microsoft.Azure.Cosmos.Query.Core.Pipeline.Distinct
{
using System;
using System.Collections.Generic;
using Microsoft.Azure.Cosmos.Core.Utf8;
using Microsoft.Azure.Cosmos.CosmosElements;
using Microsoft.Azure.Cosmos.CosmosElements.Numbers;
internal static class DistinctHash
{
private static readonly UInt128 RootHashSeed = UInt128.Create(0xbfc2359eafc0e2b7, 0x8846e00284c4cf1f);
public static UInt128 GetHash(CosmosElement cosmosElement)
{
return GetHash(cosmosElement, RootHashSeed);
}
private static UInt128 GetHash(CosmosElement cosmosElement, UInt128 seed)
{
return cosmosElement.Accept(CosmosElementHasher.Singleton, seed);
}
private sealed class CosmosElementHasher : ICosmosElementVisitor<UInt128, UInt128>
{
public static readonly CosmosElementHasher Singleton = new CosmosElementHasher();
private static class HashSeeds
{
public static readonly UInt128 Null = UInt128.Create(0x1380f68bb3b0cfe4, 0x156c918bf564ee48);
public static readonly UInt128 False = UInt128.Create(0xc1be517fe893b40c, 0xe9fc8a4c531cd0dd);
public static readonly UInt128 True = UInt128.Create(0xf86d4abf9a412e74, 0x788488365c8a985d);
public static readonly UInt128 String = UInt128.Create(0x61f53f0a44204cfb, 0x09481be8ef4b56dd);
public static readonly UInt128 Array = UInt128.Create(0xfa573b014c4dc18e, 0xa014512c858eb115);
public static readonly UInt128 Object = UInt128.Create(0x77b285ac511aef30, 0x3dcf187245822449);
public static readonly UInt128 ArrayIndex = UInt128.Create(0xfe057204216db999, 0x5b1cc3178bd9c593);
public static readonly UInt128 PropertyName = UInt128.Create(0xc915dde058492a8a, 0x7c8be2eba72e4634);
public static readonly UInt128 Binary = UInt128.Create(0x54841d59fe1ea46c, 0xd4edb0ba5c59766b);
public static readonly UInt128 Guid = UInt128.Create(0x53b5b8939b790f4b, 0x7cc5e09441fd6cb1);
}
private static class RootCache
{
public static readonly UInt128 Null = MurmurHash3.Hash128(HashSeeds.Null, RootHashSeed);
public static readonly UInt128 False = MurmurHash3.Hash128(HashSeeds.False, RootHashSeed);
public static readonly UInt128 True = MurmurHash3.Hash128(HashSeeds.True, RootHashSeed);
public static readonly UInt128 String = MurmurHash3.Hash128(HashSeeds.String, RootHashSeed);
public static readonly UInt128 Array = MurmurHash3.Hash128(HashSeeds.Array, RootHashSeed);
public static readonly UInt128 Object = MurmurHash3.Hash128(HashSeeds.Object, RootHashSeed);
public static readonly UInt128 Binary = MurmurHash3.Hash128(HashSeeds.Binary, RootHashSeed);
public static readonly UInt128 Guid = MurmurHash3.Hash128(HashSeeds.Guid, RootHashSeed);
}
private CosmosElementHasher()
{
// Private constructor, since this is a singleton class.
}
public UInt128 Visit(CosmosArray cosmosArray, UInt128 seed)
{
// Start the array with a distinct hash, so that empty array doesn't hash to another value.
UInt128 hash = seed == RootHashSeed ? RootCache.Array : MurmurHash3.Hash128(HashSeeds.Array, seed);
// Incorporate all the array items into the hash.
for (int index = 0; index < cosmosArray.Count; index++)
{
CosmosElement arrayItem = cosmosArray[index];
if (arrayItem is not CosmosUndefined)
{
// Order of array items matter in equality check, so we add the index just to be safe.
// For now we know that murmurhash will correctly give a different hash for
// [true, false, true] and [true, true, false]
// due to the way the seed works.
// But we add the index just incase that property does not hold in the future.
UInt128 arrayItemSeed = HashSeeds.ArrayIndex + index;
hash = MurmurHash3.Hash128(arrayItem.Accept(this, arrayItemSeed), hash);
}
}
return hash;
}
public UInt128 Visit(CosmosBinary cosmosBinary, UInt128 seed)
{
// Hash with binary seed to differntiate between empty binary and no binary.
UInt128 hash = seed == RootHashSeed ? RootCache.Binary : MurmurHash3.Hash128(HashSeeds.Binary, seed);
hash = MurmurHash3.Hash128(cosmosBinary.Value.Span, hash);
return hash;
}
public UInt128 Visit(CosmosBoolean cosmosBoolean, UInt128 seed)
{
if (seed == RootHashSeed)
{
return cosmosBoolean.Value ? RootCache.True : RootCache.False;
}
return MurmurHash3.Hash128(
cosmosBoolean.Value ? HashSeeds.True : HashSeeds.False,
seed);
}
public UInt128 Visit(CosmosGuid cosmosGuid, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Guid : MurmurHash3.Hash128(HashSeeds.Guid, seed);
hash = MurmurHash3.Hash128(cosmosGuid.Value.ToByteArray(), hash);
return hash;
}
public UInt128 Visit(CosmosNull cosmosNull, UInt128 seed)
{
if (seed == RootHashSeed)
{
return RootCache.Null;
}
return MurmurHash3.Hash128(HashSeeds.Null, seed);
}
public UInt128 Visit(CosmosUndefined cosmosUndefined, UInt128 seed)
{
// undefined is ignored while hashing
return seed;
}
public UInt128 Visit(CosmosNumber cosmosNumber, UInt128 seed)
{
return cosmosNumber.Accept(CosmosNumberHasher.Singleton, seed);
}
public UInt128 Visit(CosmosObject cosmosObject, UInt128 seed)
{
// Start the object with a distinct hash, so that empty object doesn't hash to another value.
UInt128 hash = seed == RootHashSeed ? RootCache.Object : MurmurHash3.Hash128(HashSeeds.Object, seed);
//// Intermediate hashes of all the properties, which we don't want to xor with the final hash
//// otherwise the following will collide:
////{
//// "pet":{
//// "name":"alice",
//// "age":5
//// },
//// "pet2":{
//// "name":"alice",
//// "age":5
//// }
////}
////
////{
//// "pet":{
//// "name":"bob",
//// "age":5
//// },
//// "pet2":{
//// "name":"bob",
//// "age":5
//// }
////}
//// because they only differ on the name, but it gets repeated meaning that
//// hash({"name":"bob", "age":5}) ^ hash({"name":"bob", "age":5}) is the same as
//// hash({"name":"alice", "age":5}) ^ hash({"name":"alice", "age":5})
UInt128 intermediateHash = 0;
// Property order should not result in a different hash.
// This is consistent with equality comparison.
foreach (KeyValuePair<string, CosmosElement> kvp in cosmosObject)
{
if (kvp.Value is not CosmosUndefined)
{
UInt128 nameHash = MurmurHash3.Hash128(kvp.Key, MurmurHash3.Hash128(HashSeeds.String, HashSeeds.PropertyName));
UInt128 propertyHash = kvp.Value.Accept(this, nameHash);
//// xor is symmetric meaning that a ^ b = b ^ a
//// Which is great since now we can add the property hashes to the intermediate hash
//// in any order and get the same result, which upholds our definition of equality.
//// Note that we don't have to worry about a ^ a = 0 = b ^ b for duplicate property values,
//// since the hash of property values are seeded with the hash of property names,
//// which are unique within an object.
intermediateHash ^= propertyHash;
}
}
// Only if the object was not empty do we want to bring in the intermediate hash.
if (intermediateHash > 0)
{
hash = MurmurHash3.Hash128(intermediateHash, hash);
}
return hash;
}
public UInt128 Visit(CosmosString cosmosString, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.String : MurmurHash3.Hash128(HashSeeds.String, seed);
UtfAnyString utfAnyString = cosmosString.Value;
hash = utfAnyString.IsUtf8
? MurmurHash3.Hash128(utfAnyString.ToUtf8String().Span.Span, hash)
: MurmurHash3.Hash128(utfAnyString.ToString(), hash);
return hash;
}
}
private sealed class CosmosNumberHasher : ICosmosNumberVisitor<UInt128, UInt128>
{
public static readonly CosmosNumberHasher Singleton = new CosmosNumberHasher();
public static class HashSeeds
{
public static readonly UInt128 Number64 = UInt128.Create(0x2400e8b894ce9c2a, 0x790be1eabd7b9481);
public static readonly UInt128 Float32 = UInt128.Create(0x881c51c28fb61016, 0x1decd039cd24bd4b);
public static readonly UInt128 Float64 = UInt128.Create(0x62fb48cc659963a0, 0xe9e690779309c403);
public static readonly UInt128 Int8 = UInt128.Create(0x0007978411626daa, 0x89933677a85444b7);
public static readonly UInt128 Int16 = UInt128.Create(0xe7a19001d3211c09, 0x33e0ba9fb8bc7940);
public static readonly UInt128 Int32 = UInt128.Create(0x0320dc908e0d3e71, 0xf575de218f09ffa5);
public static readonly UInt128 Int64 = UInt128.Create(0xed93baf7fdc76638, 0x0d5733c37e079869);
public static readonly UInt128 UInt32 = UInt128.Create(0x78c441a2d2e9bb6e, 0xac88cb880ccda71d);
}
public static class RootCache
{
public static readonly UInt128 Number64 = MurmurHash3.Hash128(HashSeeds.Number64, RootHashSeed);
public static readonly UInt128 Float32 = MurmurHash3.Hash128(HashSeeds.Float32, RootHashSeed);
public static readonly UInt128 Float64 = MurmurHash3.Hash128(HashSeeds.Float64, RootHashSeed);
public static readonly UInt128 Int8 = MurmurHash3.Hash128(HashSeeds.Int8, RootHashSeed);
public static readonly UInt128 Int16 = MurmurHash3.Hash128(HashSeeds.Int16, RootHashSeed);
public static readonly UInt128 Int32 = MurmurHash3.Hash128(HashSeeds.Int32, RootHashSeed);
public static readonly UInt128 Int64 = MurmurHash3.Hash128(HashSeeds.Int64, RootHashSeed);
public static readonly UInt128 UInt32 = MurmurHash3.Hash128(HashSeeds.UInt32, RootHashSeed);
}
private CosmosNumberHasher()
{
// Private constructor, since this class is a singleton.
}
public UInt128 Visit(CosmosFloat32 cosmosFloat32, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Float32 : MurmurHash3.Hash128(HashSeeds.Float32, seed);
float value = cosmosFloat32.GetValue();
// Normalize 0.0f and -0.0f value
// https://stackoverflow.com/questions/3139538/is-minus-zero-0-equivalent-to-zero-0-in-c-sharp
if (value == 0.0f)
{
value = 0;
}
hash = MurmurHash3.Hash128((UInt128)BitConverter.DoubleToInt64Bits(value), hash);
return hash;
}
public UInt128 Visit(CosmosFloat64 cosmosFloat64, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Float64 : MurmurHash3.Hash128(HashSeeds.Float64, seed);
double value = cosmosFloat64.GetValue();
// Normalize 0.0 and -0.0 value
// https://stackoverflow.com/questions/3139538/is-minus-zero-0-equivalent-to-zero-0-in-c-sharp
if (value == 0.0)
{
value = 0;
}
hash = MurmurHash3.Hash128((UInt128)BitConverter.DoubleToInt64Bits(value), hash);
return hash;
}
public UInt128 Visit(CosmosInt16 cosmosInt16, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Int16 : MurmurHash3.Hash128(HashSeeds.Int16, seed);
short value = cosmosInt16.GetValue();
hash = MurmurHash3.Hash128(value, hash);
return hash;
}
public UInt128 Visit(CosmosInt32 cosmosInt32, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Int32 : MurmurHash3.Hash128(HashSeeds.Int32, seed);
int value = cosmosInt32.GetValue();
hash = MurmurHash3.Hash128(value, hash);
return hash;
}
public UInt128 Visit(CosmosInt64 cosmosInt64, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Int64 : MurmurHash3.Hash128(HashSeeds.Int64, seed);
long value = cosmosInt64.GetValue();
hash = MurmurHash3.Hash128(value, hash);
return hash;
}
public UInt128 Visit(CosmosInt8 cosmosInt8, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Int8 : MurmurHash3.Hash128(HashSeeds.Int8, seed);
sbyte value = cosmosInt8.GetValue();
hash = MurmurHash3.Hash128(value, hash);
return hash;
}
public UInt128 Visit(CosmosNumber64 cosmosNumber64, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.Number64 : MurmurHash3.Hash128(HashSeeds.Number64, seed);
Number64 value = cosmosNumber64.GetValue();
Number64.DoubleEx doubleExValue = Number64.ToDoubleEx(value);
return MurmurHash3.Hash128(doubleExValue, hash);
}
public UInt128 Visit(CosmosUInt32 cosmosUInt32, UInt128 seed)
{
UInt128 hash = seed == RootHashSeed ? RootCache.UInt32 : MurmurHash3.Hash128(HashSeeds.UInt32, seed);
uint value = cosmosUInt32.GetValue();
hash = MurmurHash3.Hash128(value, hash);
return hash;
}
}
}
}