Skip to content

Commit cec007a

Browse files
authored
Merge pull request #252 from DataSketches/NewEmpty
New empty
2 parents bd02cc3 + 65da0ff commit cec007a

36 files changed

Lines changed: 927 additions & 666 deletions

src/main/java/com/yahoo/sketches/quantiles/DoublesSketch.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,23 +128,27 @@ public abstract class DoublesSketch {
128128
static final int MIN_K = 2;
129129
static final int MAX_K = 1 << 15;
130130

131-
/**
132-
* Parameter that controls space usage of sketch and accuracy of estimates.
133-
*/
134-
final int k_;
135-
136131
/**
137132
* Setting the seed makes the results of the sketch deterministic if the input values are
138133
* received in exactly the same order. This is only useful when performing test comparisons,
139134
* otherwise is not recommended.
140135
*/
141-
public static Random rand = new Random();
136+
static Random rand = new Random();
137+
138+
/**
139+
* Parameter that controls space usage of sketch and accuracy of estimates.
140+
*/
141+
final int k_;
142142

143143
DoublesSketch(final int k) {
144144
Util.checkK(k);
145145
k_ = k;
146146
}
147147

148+
synchronized static void setRandom(final long seed) {
149+
DoublesSketch.rand = new Random(seed);
150+
}
151+
148152
/**
149153
* Returns a new builder
150154
* @return a new builder

src/main/java/com/yahoo/sketches/theta/AnotB.java

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
* <p>Calling the update function a second time essentially clears the internal state and updates
2222
* with the new pair of sketches.
2323
*
24+
* <p>As an alternative, one can use the aNotB method that returns the result immediately.
25+
*
2426
* @author Lee Rhodes
2527
*/
2628
public abstract class AnotB extends SetOperation {
@@ -30,10 +32,16 @@ public Family getFamily() {
3032
return Family.A_NOT_B;
3133
}
3234

35+
/**
36+
* Gets the result of this operation as an ordered CompactSketch on the Java heap
37+
* @return the result of this operation as an ordered CompactSketch on the Java heap
38+
*/
39+
public abstract CompactSketch getResult();
40+
3341
/**
3442
* Gets the result of this set operation as a CompactSketch of the chosen form
3543
* @param dstOrdered
36-
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
44+
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
3745
*
3846
* @param dstMem
3947
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
@@ -42,12 +50,6 @@ public Family getFamily() {
4250
*/
4351
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);
4452

45-
/**
46-
* Gets the result of this operation as an ordered CompactSketch on the Java heap
47-
* @return the result of this operation as an ordered CompactSketch on the Java heap
48-
*/
49-
public abstract CompactSketch getResult();
50-
5153
/**
5254
* Perform A-and-not-B set operation on the two given sketches.
5355
* A null sketch is interpreted as an empty sketch.
@@ -57,4 +59,29 @@ public Family getFamily() {
5759
*/
5860
public abstract void update(Sketch a, Sketch b);
5961

62+
/**
63+
* Perform A-and-not-B set operation on the two given sketches and return the result as an
64+
* ordered CompactSketch on the heap.
65+
* @param a The incoming sketch for the first argument
66+
* @param b The incoming sketch for the second argument
67+
* @return an ordered CompactSketch on the heap
68+
*/
69+
public CompactSketch aNotB(final Sketch a, final Sketch b) {
70+
return aNotB(a, b, true, null);
71+
}
72+
73+
/**
74+
* Perform A-and-not-B set operation on the two given sketches and return the result as a
75+
* CompactSketch.
76+
* @param a The incoming sketch for the first argument
77+
* @param b The incoming sketch for the second argument
78+
* @param dstOrdered
79+
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
80+
* @param dstMem
81+
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
82+
* @return the result as a CompactSketch.
83+
*/
84+
public abstract CompactSketch aNotB(Sketch a, Sketch b, boolean dstOrdered,
85+
WritableMemory dstMem);
86+
6087
}

src/main/java/com/yahoo/sketches/theta/ConcurrentHeapThetaBuffer.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
package com.yahoo.sketches.theta;
77

8-
import static com.yahoo.sketches.theta.UpdateReturnState.InsertedCountIncremented;
8+
import static com.yahoo.sketches.theta.UpdateReturnState.ConcurrentBufferInserted;
9+
import static com.yahoo.sketches.theta.UpdateReturnState.ConcurrentPropagated;
910
import static com.yahoo.sketches.theta.UpdateReturnState.RejectedOverTheta;
1011

1112
import java.util.concurrent.atomic.AtomicBoolean;
@@ -27,11 +28,6 @@
2728
*/
2829
final class ConcurrentHeapThetaBuffer extends HeapQuickSelectSketch {
2930

30-
private static int computeLogBufferSize(final int lgNomLongs, final long exactSize,
31-
final int maxNumLocalBuffers) {
32-
return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers)));
33-
}
34-
3531
// Shared sketch consisting of the global sample set and theta value.
3632
private final ConcurrentSharedThetaSketch shared;
3733

@@ -60,6 +56,11 @@ private static int computeLogBufferSize(final int lgNomLongs, final long exactSi
6056
localPropagationInProgress = new AtomicBoolean(false);
6157
}
6258

59+
private static int computeLogBufferSize(final int lgNomLongs, final long exactSize,
60+
final int maxNumLocalBuffers) {
61+
return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers)));
62+
}
63+
6364
//Sketch overrides
6465

6566
@Override
@@ -82,6 +83,11 @@ public double getUpperBound(final int numStdDev) {
8283
return shared.getUpperBound(numStdDev);
8384
}
8485

86+
@Override
87+
public boolean hasMemory() {
88+
return shared.hasMemory();
89+
}
90+
8591
@Override
8692
public boolean isDirect() {
8793
return shared.isDirect();
@@ -129,18 +135,21 @@ UpdateReturnState hashUpdate(final long hash) {
129135
}
130136
HashOperations.checkHashCorruption(hash);
131137
if ((getHashTableThreshold() == 0) || isExactMode ) {
132-
final long thetaLong = getThetaLong();
133138
//The over-theta and zero test
134-
if (HashOperations.continueCondition(thetaLong, hash)) {
139+
if (HashOperations.continueCondition(getThetaLong(), hash)) {
135140
return RejectedOverTheta; //signal that hash was rejected due to theta or zero.
136141
}
137142
if (propagateToSharedSketch(hash)) {
138-
return InsertedCountIncremented; //not totally correct
143+
return ConcurrentPropagated;
139144
}
140145
}
141146
final UpdateReturnState state = super.hashUpdate(hash);
142147
if (isOutOfSpace(getRetainedEntries() + 1)) {
143148
propagateToSharedSketch();
149+
return ConcurrentPropagated;
150+
}
151+
if (state == UpdateReturnState.InsertedCountIncremented) {
152+
return ConcurrentBufferInserted;
144153
}
145154
return state;
146155
}

src/main/java/com/yahoo/sketches/theta/ConcurrentSharedThetaSketch.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s
112112

113113
double getUpperBound(int numStdDev);
114114

115+
boolean hasMemory();
116+
115117
boolean isDirect();
116118

117119
boolean isEmpty();

src/main/java/com/yahoo/sketches/theta/DirectCompactOrderedSketch.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,12 @@ static DirectCompactOrderedSketch wrapInstance(final Memory srcMem, final long s
5252
* @param dstMem the given destination Memory. This clears it before use.
5353
* @return a DirectCompactOrderedSketch.
5454
*/
55-
static DirectCompactOrderedSketch compact(final UpdateSketch sketch,
56-
final WritableMemory dstMem) {
57-
final long thetaLong = sketch.getThetaLong();
58-
final boolean empty = sketch.isEmpty();
55+
static DirectCompactOrderedSketch compact(final UpdateSketch sketch, final WritableMemory dstMem) {
5956
final int curCount = sketch.getRetainedEntries(true);
57+
long thetaLong = sketch.getThetaLong();
58+
boolean empty = sketch.isEmpty();
59+
thetaLong = thetaOnCompact(empty, curCount, thetaLong);
60+
empty = emptyOnCompact(curCount, thetaLong);
6061
final int preLongs = computeCompactPreLongs(thetaLong, empty, curCount);
6162
final short seedHash = sketch.getSeedHash();
6263
final long[] cache = sketch.getCache();

src/main/java/com/yahoo/sketches/theta/DirectCompactUnorderedSketch.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,11 @@ static DirectCompactUnorderedSketch wrapInstance(final Memory srcMem, final long
5353
*/
5454
static DirectCompactUnorderedSketch compact(final UpdateSketch sketch,
5555
final WritableMemory dstMem) {
56-
final long thetaLong = sketch.getThetaLong();
57-
final boolean empty = sketch.isEmpty();
5856
final int curCount = sketch.getRetainedEntries(true);
57+
long thetaLong = sketch.getThetaLong();
58+
boolean empty = sketch.isEmpty();
59+
thetaLong = thetaOnCompact(empty, curCount, thetaLong);
60+
empty = emptyOnCompact(curCount, thetaLong);
5961
final int preLongs = computeCompactPreLongs(thetaLong, empty, curCount);
6062
final short seedHash = sketch.getSeedHash();
6163
final long[] cache = sketch.getCache();

src/main/java/com/yahoo/sketches/theta/DirectQuickSelectSketch.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import static com.yahoo.sketches.theta.PreambleUtil.insertSeedHash;
3131
import static com.yahoo.sketches.theta.PreambleUtil.insertSerVer;
3232
import static com.yahoo.sketches.theta.PreambleUtil.insertThetaLong;
33+
import static com.yahoo.sketches.theta.PreambleUtil.insertUnionThetaLong;
3334
import static com.yahoo.sketches.theta.Rebuilder.actLgResizeFactor;
3435
import static com.yahoo.sketches.theta.Rebuilder.moveAndResize;
3536
import static com.yahoo.sketches.theta.Rebuilder.quickSelectAndRebuild;
@@ -132,6 +133,9 @@ private DirectQuickSelectSketch(
132133
insertP(dstMem, p); //bytes 12-15
133134
final long thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE);
134135
insertThetaLong(dstMem, thetaLong); //bytes 16-23
136+
if (unionGadget) {
137+
insertUnionThetaLong(dstMem, thetaLong);
138+
}
135139
//@formatter:on
136140

137141
//clear hash table area

src/main/java/com/yahoo/sketches/theta/ForwardCompatibility.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ static final CompactSketch heapify1to3(final Memory srcMem, final long seed) {
5959

6060
final long[] compactOrderedCache = new long[curCount];
6161
srcMem.getLongArray(24, compactOrderedCache, 0, curCount);
62-
6362
return HeapCompactOrderedSketch
6463
.compact(compactOrderedCache, false, seedHash, curCount, thetaLong);
6564
}
@@ -93,11 +92,10 @@ static final CompactSketch heapify2to3(final Memory srcMem, final long seed) {
9392
validateInputSize(reqBytesIn, memCap);
9493

9594
final long thetaLong = (mdLongs < 3) ? Long.MAX_VALUE : srcMem.getLong(THETA_LONG);
96-
final boolean empty = (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0;
97-
95+
boolean empty = (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0;
96+
empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); //force true
9897
final long[] compactOrderedCache = new long[curCount];
9998
srcMem.getLongArray(mdLongs << 3, compactOrderedCache, 0, curCount);
100-
10199
return HeapCompactOrderedSketch
102100
.compact(compactOrderedCache, empty, seedHash, curCount, thetaLong);
103101
}

0 commit comments

Comments
 (0)