|
1 | 1 | package org.roaringbitmap.art; |
2 | 2 |
|
3 | 3 | import org.roaringbitmap.ArraysShim; |
| 4 | +import org.roaringbitmap.Container; |
4 | 5 | import org.roaringbitmap.longlong.LongUtils; |
5 | 6 |
|
6 | 7 | import java.io.DataInput; |
7 | 8 | import java.io.DataOutput; |
8 | 9 | import java.io.IOException; |
9 | 10 | import java.nio.ByteBuffer; |
| 11 | +import java.util.function.Supplier; |
| 12 | +import java.util.function.ToLongFunction; |
| 13 | + |
| 14 | +import static org.roaringbitmap.art.BranchNode.ILLEGAL_IDX; |
10 | 15 |
|
11 | 16 | /** |
12 | 17 | * See: https://db.in.tum.de/~leis/papers/ART.pdf a cpu cache friendly main memory data structure. |
@@ -222,6 +227,124 @@ protected Toolkit removeSpecifyKey(Node node, byte[] key, int dep) { |
222 | 227 | return null; |
223 | 228 | } |
224 | 229 |
|
| 230 | + /** |
| 231 | + * Find or create a leaf node by the high part of the key. |
| 232 | + * If the leaf node is not found, it will be created with the value returned by |
| 233 | + * nextContainer.applyAsLong(ifNotFound). |
| 234 | + * |
| 235 | + * @param highPart the high part of the key |
| 236 | + * @param nextContainer a function to get the next container index |
| 237 | + * @param ifNotFound a supplier to provide a value if the key is not found |
| 238 | + * @return the container index of the found or created leaf node |
| 239 | + */ |
| 240 | + public <T> long findOrCreateByKey(long highPart, ToLongFunction<Supplier<T>> nextContainer, Supplier<T> ifNotFound) { |
| 241 | + LeafNode result; |
| 242 | + |
| 243 | + if (root == null) { |
| 244 | + result = new LeafNode(highPart, nextContainer.applyAsLong(ifNotFound)); |
| 245 | + root = result; |
| 246 | + return result.getContainerIdx(); |
| 247 | + } |
| 248 | + byte depth = 0; |
| 249 | + byte parentKeyInGrandParent = 0; |
| 250 | + BranchNode grandParent = null; |
| 251 | + Node parent = root; |
| 252 | + Node originalParent; |
| 253 | + // on each cycle |
| 254 | + // if gParent == null, parent will be the next root |
| 255 | + // if gParent != null, parent should replace grandParent at key parentKeyInGrandParent |
| 256 | + // depth is the depth of parent in the trie |
| 257 | + // result is the leaf node, or null if we are just finding |
| 258 | + |
| 259 | + //Parent can grow, so we need to keep track of the parent index in grandParent |
| 260 | + // but grandParent cant. |
| 261 | + while (true) { |
| 262 | + //keep a copy of the original parent, so we can see if it changes, and adjust the tree |
| 263 | + originalParent = parent; |
| 264 | + |
| 265 | + //usually a branch node, so lets test that first |
| 266 | + if (parent instanceof BranchNode) { |
| 267 | + BranchNode parentBranch = (BranchNode) parent; |
| 268 | + //is parent the real parent - does the prefix match? |
| 269 | + byte prefixLength = parentBranch.prefixLength(); |
| 270 | + if (prefixLength > 0) { |
| 271 | + byte matchLength = prefixMatchLength(LongUtils.fromArray(parentBranch.prefix), depth, prefixLength, highPart); |
| 272 | + if (matchLength == prefixLength) { |
| 273 | + // prefix matches, so we can just continue |
| 274 | + depth += prefixLength; |
| 275 | + continue; |
| 276 | + } else { |
| 277 | + //so we have a partial match, we need to split the branch |
| 278 | + // for example, if the prefix is [1,2,3] and the highPart is 0xab99010204, and depth 2 |
| 279 | + // we have a match length of 2, |
| 280 | + // so we create a new parent with prefix is [1,2] |
| 281 | + // with 2 children - the current parent prefix shrunk to [], at key 3 |
| 282 | + // and add a leaf node with key 4 |
| 283 | + byte branchKey = parentBranch.prefix[matchLength]; |
| 284 | + parentBranch = parentBranch.shrinkPrefixBy(matchLength + 1); |
| 285 | + result = new LeafNode(highPart, nextContainer.applyAsLong(ifNotFound)); |
| 286 | + parent = Node4.create(parentBranch, result, branchKey, LongUtils.getByte(highPart, depth + matchLength), highPart, depth, (byte) depth + matchLength); |
| 287 | + break; |
| 288 | + } |
| 289 | + } |
| 290 | + //OK so parent is ok, and depth is adjusted. Let try to move to the next level |
| 291 | + |
| 292 | + byte childKey = LongUtils.getByte(highPart, depth); |
| 293 | + // We optimise for the case where the childKey is already present at this level, and we are walking a tree |
| 294 | + // it should be the common case, so we try to find the child at this level |
| 295 | + //we could calculate the position for the access, but that means that we have to have 2 accesses on the common path, |
| 296 | + // so they shoul dbe faster |
| 297 | + Node childNode = parentBranch.getChildAtKey(childKey); |
| 298 | + if (childNode == null) { |
| 299 | + result = new LeafNode(highPart, nextContainer.applyAsLong(ifNotFound)); |
| 300 | + parent = parentBranch.insert(result, childKey); |
| 301 | + break; |
| 302 | + } else { |
| 303 | + grandParent = parentBranch; |
| 304 | + parentKeyInGrandParent = childKey; |
| 305 | + parent = childNode; |
| 306 | + depth += 1; |
| 307 | + |
| 308 | + } |
| 309 | + } else { |
| 310 | + LeafNode leafNode = (LeafNode) parent; |
| 311 | + long leafNodeKey = leafNode.getKey(); |
| 312 | + if (leafNodeKey == highPart) { |
| 313 | + result = leafNode; |
| 314 | + } else { |
| 315 | + //we have to create a new Node4 with the two leaves |
| 316 | + result = new LeafNode(highPart, nextContainer.applyAsLong(ifNotFound)); |
| 317 | + byte matchLength = prefixMatchLength(leafNodeKey, depth, (byte)6, highPart); |
| 318 | + |
| 319 | + // create a new parent node4 with the current leaf node and the new leaf node |
| 320 | + Node4 split = Node4.create(leafNode, result, |
| 321 | + LongUtils.getByte(leafNodeKey, depth + matchLength), LongUtils.getByte(highPart, depth + matchLength), |
| 322 | + highPart, depth, (byte) (depth + matchLength)); |
| 323 | + parent = split; |
| 324 | + } |
| 325 | + break; |
| 326 | + } |
| 327 | + } |
| 328 | + if (grandParent == null) { |
| 329 | + root = parent; |
| 330 | + } else if (parent != originalParent) { |
| 331 | + // if the parent has changed, we need to replace it in the grandParent |
| 332 | + int pos = grandParent.getChildPos(parentKeyInGrandParent); |
| 333 | + grandParent.replaceNode(pos, parent); |
| 334 | + } |
| 335 | + return result.getContainerIdx(); |
| 336 | + } |
| 337 | + |
| 338 | + private byte prefixMatchLength(long prefix, byte depth, byte prefixLength, long highPart) { |
| 339 | + for (byte i = 0; i < prefixLength; i++) { |
| 340 | + if (LongUtils.getByte(prefix,i) != LongUtils.getByte(highPart, depth + i)) { |
| 341 | + return i; |
| 342 | + } |
| 343 | + } |
| 344 | + return prefixLength; |
| 345 | + } |
| 346 | + |
| 347 | + |
225 | 348 | class Toolkit { |
226 | 349 |
|
227 | 350 | Node freshMatchedParentNode; // indicating a fresh parent node while the original |
|
0 commit comments