Skip to content

Commit 9f85d8c

Browse files
committed
Mostly corrected javadocs
1 parent 999aaee commit 9f85d8c

5 files changed

Lines changed: 100 additions & 83 deletions

File tree

src/main/java/org/apache/datasketches/theta/CompactSketch.java

Lines changed: 70 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -190,49 +190,53 @@ private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, f
190190
}
191191
final short seedHash = Util.computeSeedHash(seed);
192192

193-
if (serVer == 4) {
194-
return DirectCompactCompressedSketch.wrapInstance(srcSeg,
195-
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
196-
}
197-
else if (serVer == 3) {
198-
if (PreambleUtil.isEmptyFlag(srcSeg)) {
199-
return EmptyCompactSketch.getHeapInstance(srcSeg);
193+
switch (serVer) {
194+
case 1: {
195+
return ForwardCompatibility.heapify1to3(srcSeg, seedHash);
200196
}
201-
if (otherCheckForSingleItem(srcSeg)) {
202-
return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
197+
case 2: {
198+
return ForwardCompatibility.heapify2to3(srcSeg,
199+
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
203200
}
204-
//not empty & not singleItem
205-
final int flags = extractFlags(srcSeg);
206-
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
207-
if (!compactFlag) {
208-
throw new SketchesArgumentException(
209-
"Corrupted: COMPACT family sketch image must have compact flag set");
201+
case 3: {
202+
if (PreambleUtil.isEmptyFlag(srcSeg)) {
203+
return EmptyCompactSketch.getHeapInstance(srcSeg);
204+
}
205+
if (otherCheckForSingleItem(srcSeg)) {
206+
return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
207+
}
208+
//not empty & not singleItem
209+
final int flags = extractFlags(srcSeg);
210+
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
211+
if (!compactFlag) {
212+
throw new SketchesArgumentException(
213+
"Corrupted: COMPACT family sketch image must have compact flag set");
214+
}
215+
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
216+
if (!readOnly) {
217+
throw new SketchesArgumentException(
218+
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
219+
}
220+
return DirectCompactSketch.wrapInstance(srcSeg,
221+
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
210222
}
211-
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
212-
if (!readOnly) {
223+
case 4: {
224+
return DirectCompactCompressedSketch.wrapInstance(srcSeg,
225+
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
226+
}
227+
default: {
213228
throw new SketchesArgumentException(
214-
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
229+
"Corrupted: Serialization Version " + serVer + " not recognized.");
215230
}
216-
return DirectCompactSketch.wrapInstance(srcSeg,
217-
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
218-
} //end of serVer 3
219-
else if (serVer == 1) {
220-
return ForwardCompatibility.heapify1to3(srcSeg, seedHash);
221-
}
222-
else if (serVer == 2) {
223-
return ForwardCompatibility.heapify2to3(srcSeg,
224-
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
225231
}
226-
throw new SketchesArgumentException(
227-
"Corrupted: Serialization Version " + serVer + " not recognized.");
228232
}
229233

230234
/**
231235
* Wrap takes the sketch image in the given byte array and refers to it directly.
232236
* There is no data copying onto the java heap.
233237
* The wrap operation enables fast read-only merging and access to all the public read-only API.
234238
*
235-
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
239+
* <p>Only "Direct" Serialization Versions 3 and 4 (i.e, OpenSource) sketches that have
236240
* been explicitly stored as direct sketches can be wrapped.
237241
* Wrapping earlier serial version sketches will result in a heapify operation.
238242
* These early versions were never designed to "wrap".</p>
@@ -242,7 +246,7 @@ else if (serVer == 2) {
242246
* This is actually faster and consumes less overall space.</p>
243247
*
244248
* <p>This method checks if the DEFAULT_UPDATE_SEED was used to create the source byte array image.
245-
* Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
249+
* Note that SerialVersion 1 (pre-open-source) sketches cannot be checked as they don't have a seedHash field,
246250
* so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.</p>
247251
*
248252
* @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED.
@@ -258,7 +262,7 @@ public static CompactSketch wrap(final byte[] bytes) {
258262
* There is no data copying onto the java heap.
259263
* The wrap operation enables fast read-only merging and access to all the public read-only API.
260264
*
261-
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
265+
* <p>Only "Direct" Serialization Versions 3 and 4 (i.e, OpenSource) sketches that have
262266
* been explicitly stored as direct sketches can be wrapped.
263267
* Wrapping earlier serial version sketches will result in a heapify operation.
264268
* These early versions were never designed to "wrap".</p>
@@ -288,38 +292,46 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo
288292
throw new SketchesArgumentException("Corrupted: " + family + " is not Compact!");
289293
}
290294
final short seedHash = Util.computeSeedHash(seed);
291-
if (serVer == 4) {
292-
return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash);
293-
} else if (serVer == 3) {
294-
final int flags = bytes[FLAGS_BYTE];
295-
if ((flags & EMPTY_FLAG_MASK) > 0) {
296-
return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes));
295+
296+
switch (serVer) {
297+
case 1: {
298+
return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash);
297299
}
298-
final int preLongs = bytes[PREAMBLE_LONGS_BYTE];
299-
if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) {
300-
return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
300+
case 2: {
301+
return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes),
302+
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
301303
}
302-
//not empty & not singleItem
303-
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
304-
if (!compactFlag) {
305-
throw new SketchesArgumentException(
306-
"Corrupted: COMPACT family sketch image must have compact flag set");
304+
case 3: {
305+
final int flags = bytes[FLAGS_BYTE];
306+
if ((flags & EMPTY_FLAG_MASK) > 0) {
307+
return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes));
308+
}
309+
final int preLongs = bytes[PREAMBLE_LONGS_BYTE];
310+
if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) {
311+
return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
312+
}
313+
//not empty & not singleItem
314+
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
315+
if (!compactFlag) {
316+
throw new SketchesArgumentException(
317+
"Corrupted: COMPACT family sketch image must have compact flag set");
318+
}
319+
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
320+
if (!readOnly) {
321+
throw new SketchesArgumentException(
322+
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
323+
}
324+
return WrappedCompactSketch.wrapInstance(bytes,
325+
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
326+
}
327+
case 4: {
328+
return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash);
307329
}
308-
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
309-
if (!readOnly) {
330+
default: {
310331
throw new SketchesArgumentException(
311-
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
332+
"Corrupted: Serialization Version " + serVer + " not recognized.");
312333
}
313-
return WrappedCompactSketch.wrapInstance(bytes,
314-
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
315-
} else if (serVer == 1) {
316-
return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash);
317-
} else if (serVer == 2) {
318-
return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes),
319-
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
320334
}
321-
throw new SketchesArgumentException(
322-
"Corrupted: Serialization Version " + serVer + " not recognized.");
323335
}
324336

325337
//Sketch Overrides

src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@
5151
import org.apache.datasketches.thetacommon.ThetaUtil;
5252

5353
/**
54-
* The default Theta Sketch using the QuickSelect algorithm.
55-
* This is the read-only implementation with non-functional methods, which affect the state.
54+
* The read-only Theta Sketch using the QuickSelect algorithm.
5655
*
5756
* <p>This implementation uses data in a given MemorySegment that is owned and managed by the caller.
5857
* This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
@@ -65,17 +64,16 @@ class DirectQuickSelectSketchR extends UpdateSketch {
6564
static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space
6665
final long seed_; //provided, kept only on heap, never serialized.
6766
int hashTableThreshold_; //computed, kept only on heap, never serialized.
68-
MemorySegment wseg_; //A MemorySegment for child class, but no write methods here
67+
MemorySegment wseg_; //This reference is shared with the writable child class, but no write methods here
6968

70-
//only called by DirectQuickSelectSketch and below
69+
//only called by the writable DirectQuickSelectSketch and this class.
7170
DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) {
7271
seed_ = seed;
7372
wseg_ = wseg;
7473
}
7574

7675
/**
77-
* Wrap a sketch around the given source MemorySegment containing sketch data that originated from
78-
* this sketch.
76+
* Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch.
7977
* @param srcSeg the source MemorySegment.
8078
* The given MemorySegment object must be in hash table form and not read only.
8179
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>
@@ -89,8 +87,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final l
8987
UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs);
9088
checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs);
9189

92-
final DirectQuickSelectSketchR dqssr =
93-
new DirectQuickSelectSketchR(seed, srcSeg);
90+
final DirectQuickSelectSketchR dqssr = new DirectQuickSelectSketchR(seed, srcSeg);
9491
dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
9592
return dqssr;
9693
}

src/main/java/org/apache/datasketches/theta/Sketch.java

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public abstract class Sketch implements MemorySegmentStatus {
6363
* was used to create the source MemorySegment image.
6464
*
6565
* <p>For Compact Sketches this method assumes that the sketch image was created with the
66-
* correct hash seed, so it is not checked.</p>
66+
* correct hash seed, so it is not checked. SerialVersion 1 sketches (pre-open-source) cannot be checked.</p>
6767
*
6868
* @param srcSeg an image of a Sketch.
6969
*
@@ -83,8 +83,12 @@ public static Sketch heapify(final MemorySegment srcSeg) {
8383
*
8484
* <p>The resulting sketch will not retain any link to the source MemorySegment.</p>
8585
*
86-
* <p>For Update and Compact Sketches this method checks if the given expectedSeed was used to
87-
* create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.</p>
86+
* <p>For Update Sketches this method checks if the
87+
* <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a></p>
88+
* was used to create the source MemorySegment image.
89+
*
90+
* <p>For Compact Sketches this method assumes that the sketch image was created with the
91+
* correct hash seed, so it is not checked. SerialVersion 1 sketches (pre-open-source) cannot be checked.</p>
8892
*
8993
* @param srcSeg an image of a Sketch that was created using the given expectedSeed.
9094
* @param expectedSeed the seed used to validate the given MemorySegment image.
@@ -109,8 +113,7 @@ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed
109113
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
110114
* been explicitly stored as direct sketches can be wrapped.
111115
* Wrapping earlier serial version sketches will result in a on-heap CompactSketch
112-
* where all data will be copied to the heap. These early versions were never designed to
113-
* "wrap".</p>
116+
* where all data will be copied to the heap. These early versions were never designed to "wrap".</p>
114117
*
115118
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
116119
* result in on-heap equivalent forms of empty and single item sketch respectively.
@@ -121,10 +124,10 @@ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed
121124
* was used to create the source MemorySegment image.
122125
*
123126
* <p>For Compact Sketches this method assumes that the sketch image was created with the
124-
* correct hash seed, so it is not checked.</p>
127+
* correct hash seed, so it is not checked. SerialVersion 1 (pre-open-source) sketches cannot be checked.</p>
125128
*
126-
* @param srcSeg an image of a Sketch.
127-
* @return a Sketch backed by the given MemorySegment
129+
* @param srcSeg a MemorySegment with an image of a Sketch.
130+
* @return a read-only Sketch backed by the given MemorySegment
128131
*/
129132
public static Sketch wrap(final MemorySegment srcSeg) {
130133
final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
@@ -154,20 +157,23 @@ public static Sketch wrap(final MemorySegment srcSeg) {
154157
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
155158
* been explicitly stored as direct sketches can be wrapped.
156159
* Wrapping earlier serial version sketches will result in a on-heap CompactSketch
157-
* where all data will be copied to the heap. These early versions were never designed to
158-
* "wrap".</p>
160+
* where all data will be copied to the heap. These early versions were never designed to "wrap".</p>
159161
*
160162
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
161163
* result in on-heap equivalent forms of empty and single item sketch respectively.
162164
* This is actually faster and consumes less overall space.</p>
163165
*
164-
* <p>For Update and Compact Sketches this method checks if the given expectedSeed was used to
165-
* create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.</p>
166+
* <p>For Update Sketches this method checks if the
167+
* <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a></p>
168+
* was used to create the source MemorySegment image.
169+
*
170+
* <p>For Compact Sketches this method assumes that the sketch image was created with the
171+
* correct hash seed, so it is not checked. SerialVersion 1 (pre-open-source) sketches cannot be checked.</p>
166172
*
167173
* @param srcSeg a MemorySegment with an image of a Sketch.
168174
* @param expectedSeed the seed used to validate the given MemorySegment image.
169175
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
170-
* @return a UpdateSketch backed by the given MemorySegment except as above.
176+
* @return a read-only Sketch backed by the given MemorySegment.
171177
*/
172178
public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) {
173179
final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
@@ -203,7 +209,7 @@ public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) {
203209
* @return this sketch as an ordered CompactSketch.
204210
*/
205211
public CompactSketch compact() {
206-
return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
212+
return isCompact() ? (CompactSketch)this : compact(true, null);
207213
}
208214

209215
/**

src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
import org.apache.datasketches.common.Util;
2929

3030
/**
31-
* Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item.
31+
* A wrapper around a serialized compact compressed read-only sketch in the form of a byte array.
32+
* It is not an empty nor a single item sketch.
3233
*
3334
* <p>This sketch can only be associated with a Serialization Version 4 format binary image.</p>
3435
*/

src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@
3737
import org.apache.datasketches.common.Util;
3838

3939
/**
40-
* Wrapper around a serialized compact read-only sketch. It is not empty, not a single item.
40+
* A wrapper around a serialized compact read-only sketch in the form of a byte array.
41+
* It is not an empty nor a single item sketch.
4142
*
42-
* <p>This sketch can only be associated with a Serialization Version 3 format binary image.</p>
43+
* <p>This sketch can only be associated with a Serialization Version 3 binary image format.</p>
4344
*/
4445
class WrappedCompactSketch extends CompactSketch {
4546
final byte[] bytes_;

0 commit comments

Comments
 (0)