Skip to content

Commit 9de80d7

Browse files
committed
Fixed error in Tuple Intersection.update(Sketch, S summary), line 184.
Also added self return to UpdatableSummary<U> update(U value). This simplifies user code. Also fixed naming of update methods where they should have been deprecated.
1 parent ccfc449 commit 9de80d7

18 files changed

Lines changed: 928 additions & 871 deletions

src/main/java/org/apache/datasketches/theta/AnotB.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,20 +108,21 @@ public Family getFamily() {
108108
public abstract void notB(Sketch skB);
109109

110110
/**
111-
* Gets the result of the mutistep, stateful operation AnotB that have been executed with calls
111+
* Gets the result of the multistep, stateful operation AnotB that have been executed with calls
112112
* to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
113113
* {@link #notB(org.apache.datasketches.theta.Sketch)}).
114114
*
115115
* @param reset If <i>true</i>, clears this operator to the empty state after this result is
116116
* returned. Set this to <i>false</i> if you wish to obtain an intermediate result.
117+
*
117118
* @return the result of this operation as an ordered, on-heap {@link CompactSketch}.
118119
*/
119120
public abstract CompactSketch getResult(boolean reset);
120121

121122
/**
122-
* Gets the result of this stateful set operation as a CompactSketch of the form based on
123-
* the input arguments.
124-
* The stateful input operations are {@link #setA(Sketch)} and {@link #notB(Sketch)}.
123+
* Gets the result of the multistep, stateful operation AnotB that have been executed with calls
124+
* to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
125+
* {@link #notB(org.apache.datasketches.theta.Sketch)}).
125126
*
126127
* @param dstOrdered If <i>true</i>, the result will be an ordered {@link CompactSketch}.
127128
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
@@ -132,7 +133,7 @@ public Family getFamily() {
132133
* @param reset If <i>true</i>, clears this operator to the empty state after this result is
133134
* returned. Set this to <i>false</i> if you wish to obtain an intermediate result.
134135
*
135-
* @return the result of this operation as a {@link CompactSketch} of the chosen form.
136+
* @return the result of this operation as a {@link CompactSketch} in the given dstMem.
136137
*/
137138
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem, boolean reset);
138139

src/main/java/org/apache/datasketches/theta/Intersection.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,19 @@ public Family getFamily() {
5050

5151
/**
5252
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
53-
* The {@link #intersect(Sketch)} method must have been called at least once.
53+
* This does not disturb the underlying data structure of this intersection.
54+
* The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
55+
* exception will be thrown. This is because a virgin Intersection object represents the
56+
* Universal Set, which has an infinite number of values.
5457
* @return the result of this operation as an ordered CompactSketch on the Java heap
5558
*/
5659
public CompactSketch getResult() {
5760
return getResult(true, null);
5861
}
5962

6063
/**
61-
* Gets the result of this operation as a CompactSketch of the chosen form.
64+
* Gets the result of this operation as a CompactSketch in the given dstMem.
65+
* This does not disturb the underlying data structure of this intersection.
6266
* The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
6367
* exception will be thrown. This is because a virgin Intersection object represents the
6468
* Universal Set, which has an infinite number of values.
@@ -70,14 +74,14 @@ public CompactSketch getResult() {
7074
*
7175
* <p>Presenting an intersection with a null argument will throw an exception.</p>
7276
*
73-
*
7477
* @param dstOrdered
7578
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
7679
*
7780
* @param dstMem
7881
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
7982
*
80-
* @return the result of this operation as a CompactSketch of the chosen form
83+
* @return the result of this operation as a CompactSketch stored in the given dstMem,
84+
* which can be either on or off-heap..
8185
*/
8286
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);
8387

@@ -90,7 +94,7 @@ public CompactSketch getResult() {
9094
/**
9195
* Resets this Intersection for stateful operations only.
9296
* The seed remains intact, otherwise reverts to
93-
* the Universal Set, theta of 1.0 and empty = false.
97+
* the Universal Set: theta = 1.0, no retained data and empty = false.
9498
*/
9599
public abstract void reset();
96100

src/main/java/org/apache/datasketches/theta/IntersectionImpl.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ static IntersectionImpl wrapInstance(
219219
@Override
220220
public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered,
221221
final WritableMemory dstMem) {
222-
if ((wmem_ != null) && readOnly_) { throw new SketchesReadOnlyException(); }
222+
if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
223223
hardReset();
224224
intersect(a);
225225
intersect(b);
@@ -233,7 +233,7 @@ public void intersect(final Sketch sketchIn) {
233233
if (sketchIn == null) {
234234
throw new SketchesArgumentException("Intersection argument must not be null.");
235235
}
236-
if ((wmem_ != null) && readOnly_) { throw new SketchesReadOnlyException(); }
236+
if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
237237
if (empty_ || sketchIn.isEmpty()) { //empty rule
238238
//Because of the def of null above and the Empty Rule (which is OR), empty_ must be true.
239239
//Whatever the current internal state, we make our local empty.
@@ -262,14 +262,14 @@ public void intersect(final Sketch sketchIn) {
262262
final int sketchInEntries = sketchIn.getRetainedEntries(true);
263263

264264
//states 1,2,3,6
265-
if ((curCount_ == 0) || (sketchInEntries == 0)) {
265+
if (curCount_ == 0 || sketchInEntries == 0) {
266266
curCount_ = 0;
267267
if (wmem_ != null) { insertCurCount(wmem_, 0); }
268268
hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid
269269
} //end of states 1,2,3,6
270270

271271
// state 5
272-
else if ((curCount_ < 0) && (sketchInEntries > 0)) {
272+
else if (curCount_ < 0 && sketchInEntries > 0) {
273273
curCount_ = sketchIn.getRetainedEntries(true);
274274
final int requiredLgArrLongs = minLgHashTableSize(curCount_, REBUILD_THRESHOLD);
275275
final int priorLgArrLongs = lgArrLongs_; //prior only used in error message
@@ -295,7 +295,7 @@ else if ((curCount_ < 0) && (sketchInEntries > 0)) {
295295
} //end of state 5
296296

297297
//state 7
298-
else if ((curCount_ > 0) && (sketchInEntries > 0)) {
298+
else if (curCount_ > 0 && sketchInEntries > 0) {
299299
//Sets resulting hashTable, curCount and adjusts lgArrLongs
300300
performIntersect(sketchIn);
301301
} //end of state 7
@@ -339,6 +339,16 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds
339339
dstMem, compactCache);
340340
}
341341

342+
@Override
343+
public boolean hasResult() {
344+
return wmem_ != null ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
345+
}
346+
347+
@Override
348+
public boolean isSameResource(final Memory that) {
349+
return wmem_ != null ? wmem_.isSameResource(that) : false;
350+
}
351+
342352
@Override
343353
public void reset() {
344354
hardReset();
@@ -347,7 +357,7 @@ public void reset() {
347357
@Override
348358
public byte[] toByteArray() {
349359
final int preBytes = CONST_PREAMBLE_LONGS << 3;
350-
final int dataBytes = (curCount_ > 0) ? 8 << lgArrLongs_ : 0;
360+
final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0;
351361
final byte[] byteArrOut = new byte[preBytes + dataBytes];
352362
if (wmem_ != null) {
353363
wmem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes);
@@ -376,16 +386,6 @@ public byte[] toByteArray() {
376386
return byteArrOut;
377387
}
378388

379-
@Override
380-
public boolean hasResult() {
381-
return (wmem_ != null) ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
382-
}
383-
384-
@Override
385-
public boolean isSameResource(final Memory that) {
386-
return (wmem_ != null) ? wmem_.isSameResource(that) : false;
387-
}
388-
389389
//restricted
390390

391391
/**
@@ -405,7 +405,7 @@ boolean isEmpty() {
405405
@Override
406406
long[] getCache() {
407407
if (wmem_ == null) {
408-
return (hashTable_ != null) ? hashTable_ : new long[0];
408+
return hashTable_ != null ? hashTable_ : new long[0];
409409
}
410410
//Direct
411411
final int arrLongs = 1 << lgArrLongs_;
@@ -426,7 +426,7 @@ long getThetaLong() {
426426

427427
private void performIntersect(final Sketch sketchIn) {
428428
// curCount and input data are nonzero, match against HT
429-
assert ((curCount_ > 0) && (!empty_));
429+
assert curCount_ > 0 && !empty_;
430430
final long[] cacheIn = sketchIn.getCache();
431431
final int arrLongsIn = cacheIn.length;
432432
final long[] hashTable;
@@ -458,7 +458,7 @@ private void performIntersect(final Sketch sketchIn) {
458458
//either unordered compact or hash table
459459
for (int i = 0; i < arrLongsIn; i++ ) {
460460
final long hashIn = cacheIn[i];
461-
if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
461+
if (hashIn <= 0L || hashIn >= thetaLong_) { continue; }
462462
final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn);
463463
if (foundIdx == -1) { continue; }
464464
matchSet[matchSetCount++] = hashIn;
@@ -505,7 +505,7 @@ private void moveDataToTgt(final long[] arr, final int count) {
505505
tmpCnt++;
506506
}
507507
}
508-
assert (tmpCnt == count) : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
508+
assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
509509
}
510510

511511
private void hardReset() {

src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public final class JaccardSimilarity {
5454
*/
5555
public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
5656
//Corner case checks
57-
if ((sketchA == null) || (sketchB == null)) { return ZEROS.clone(); }
57+
if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
5858
if (sketchA == sketchB) { return ONES.clone(); }
5959
if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
6060
if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
@@ -68,17 +68,17 @@ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
6868
final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
6969
final Union union =
7070
SetOperation.builder().setNominalEntries(newK).buildUnion();
71-
union.update(sketchA);
72-
union.update(sketchB);
71+
union.union(sketchA);
72+
union.union(sketchB);
7373
final Sketch unionAB = union.getResult(false, null);
7474
final long thetaLongUAB = unionAB.getThetaLong();
7575
final long thetaLongA = sketchA.getThetaLong();
7676
final long thetaLongB = sketchB.getThetaLong();
7777
final int countUAB = unionAB.getRetainedEntries(true);
7878

7979
//Check for identical data
80-
if ((countUAB == countA) && (countUAB == countB)
81-
&& (thetaLongUAB == thetaLongA) && (thetaLongUAB == thetaLongB)) {
80+
if (countUAB == countA && countUAB == countB
81+
&& thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
8282
return ONES.clone();
8383
}
8484

@@ -105,7 +105,7 @@ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
105105
*/
106106
public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) {
107107
//Corner case checks
108-
if ((sketchA == null) || (sketchB == null)) { return false; }
108+
if (sketchA == null || sketchB == null) { return false; }
109109
if (sketchA == sketchB) { return true; }
110110
if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
111111
if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
@@ -116,17 +116,17 @@ public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) {
116116
//Create the Union
117117
final Union union =
118118
SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA + countB)).buildUnion();
119-
union.update(sketchA);
120-
union.update(sketchB);
119+
union.union(sketchA);
120+
union.union(sketchB);
121121
final Sketch unionAB = union.getResult();
122122
final long thetaLongUAB = unionAB.getThetaLong();
123123
final long thetaLongA = sketchA.getThetaLong();
124124
final long thetaLongB = sketchB.getThetaLong();
125125
final int countUAB = unionAB.getRetainedEntries(true);
126126

127127
//Check for identical counts and thetas
128-
if ((countUAB == countA) && (countUAB == countB)
129-
&& (thetaLongUAB == thetaLongA) && (thetaLongUAB == thetaLongB)) {
128+
if (countUAB == countA && countUAB == countB
129+
&& thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
130130
return true;
131131
}
132132
return false;

src/main/java/org/apache/datasketches/theta/Union.java

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ public Family getFamily() {
3535
return Family.UNION;
3636
}
3737

38+
/**
39+
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
40+
* This does not disturb the underlying data structure of the union.
41+
* Therefore, it is OK to continue updating the union after this operation.
42+
* @return the result of this operation as an ordered CompactSketch on the Java heap
43+
*/
44+
public abstract CompactSketch getResult();
45+
3846
/**
3947
* Gets the result of this operation as a CompactSketch of the chosen form.
4048
* This does not disturb the underlying data structure of the union.
@@ -50,14 +58,6 @@ public Family getFamily() {
5058
*/
5159
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);
5260

53-
/**
54-
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
55-
* This does not disturb the underlying data structure of the union.
56-
* Therefore, it is OK to continue updating the union after this operation.
57-
* @return the result of this operation as an ordered CompactSketch on the Java heap
58-
*/
59-
public abstract CompactSketch getResult();
60-
6161
/**
6262
* Resets this Union. The seed remains intact, otherwise reverts back to its virgin state.
6363
*/
@@ -108,6 +108,20 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO
108108
*
109109
* @param sketchIn The incoming sketch.
110110
*/
111+
public abstract void union(Sketch sketchIn);
112+
113+
114+
/**
115+
* Perform a Union operation with <i>this</i> union and the given on-heap sketch of the Theta Family.
116+
* This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015).
117+
*
118+
* <p>This method can be repeatedly called.
119+
* If the given sketch is null it is interpreted as an empty sketch.</p>
120+
*
121+
* @param sketchIn The incoming sketch.
122+
* @deprecated 2.0.0. Use {@link #union(Sketch)} instead.
123+
*/
124+
@Deprecated
111125
public abstract void update(Sketch sketchIn);
112126

113127
/**
@@ -120,6 +134,20 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO
120134
*
121135
* @param mem Memory image of sketch to be merged
122136
*/
137+
public abstract void union(Memory mem);
138+
139+
/**
140+
* Perform a Union operation with <i>this</i> union and the given Memory image of any sketch of the
141+
* Theta Family. The input image may be from earlier versions of the Theta Compact Sketch,
142+
* called the SetSketch (circa 2012), which was prior to Open Source and are compact and ordered.
143+
*
144+
* <p>This method can be repeatedly called.
145+
* If the given sketch is null it is interpreted as an empty sketch.</p>
146+
*
147+
* @param mem Memory image of sketch to be merged
148+
* @deprecated 2.0.0. Use {@link #union(Memory)} instead.
149+
*/
150+
@Deprecated
123151
public abstract void update(Memory mem);
124152

125153
/**

0 commit comments

Comments
 (0)