Skip to content

Commit 88d6037

Browse files
committed
Speed and code comment suggestions from GPT review. Fix typo.
1 parent 9f85d8c commit 88d6037

3 files changed

Lines changed: 48 additions & 58 deletions

File tree

src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,12 @@
6262
*/
6363
class DirectQuickSelectSketchR extends UpdateSketch {
6464
static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space
65-
final long seed_; //provided, kept only on heap, never serialized.
6665
int hashTableThreshold_; //computed, kept only on heap, never serialized.
6766
MemorySegment wseg_; //This reference is shared with the writable child class, but no write methods here
6867

6968
//only called by the writable DirectQuickSelectSketch and this class.
7069
DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) {
71-
seed_ = seed;
70+
super(seed);
7271
wseg_ = wseg;
7372
}
7473

@@ -100,8 +99,8 @@ static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final l
10099
* @return instance of this sketch
101100
*/
102101
static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) {
103-
final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF;
104-
final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
102+
final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; //mask to byte
103+
final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; //mask to byte
105104

106105
final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, srcSeg);
107106
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
@@ -114,8 +113,8 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, fin
114113
public int getCurrentBytes() {
115114
//not compact
116115
final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE);
117-
final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
118-
return (preLongs + (1 << lgArrLongs)) << 3;
116+
final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //mask to 6 bits
117+
return preLongs + (1 << lgArrLongs) << 3;
119118
}
120119

121120
@Override
@@ -127,7 +126,7 @@ public double getEstimate() {
127126

128127
@Override
129128
public Family getFamily() {
130-
final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
129+
final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; //mask to byte
131130
return Family.idToFamily(familyID);
132131
}
133132

@@ -143,7 +142,7 @@ public long getThetaLong() {
143142

144143
@Override
145144
public boolean hasMemorySegment() {
146-
return (wseg_ != null) && wseg_.scope().isAlive();
145+
return wseg_ != null && wseg_.scope().isAlive();
147146
}
148147

149148
@Override
@@ -196,11 +195,6 @@ public ResizeFactor getResizeFactor() {
196195
return ResizeFactor.getRF(getLgRF());
197196
}
198197

199-
@Override
200-
long getSeed() {
201-
return seed_;
202-
}
203-
204198
@Override
205199
public UpdateSketch rebuild() {
206200
throw new SketchesReadOnlyException();
@@ -215,8 +209,8 @@ public void reset() {
215209

216210
@Override
217211
long[] getCache() {
218-
final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
219-
final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
212+
final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; //mask to byte
213+
final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //mask to 6 bits
220214
final long[] cacheArr = new long[1 << lgArrLongs];
221215
MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs);
222216
return cacheArr;
@@ -254,11 +248,11 @@ boolean isOutOfSpace(final int numEntries) {
254248

255249
@Override
256250
int getLgArrLongs() {
257-
return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
251+
return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; //mask to byte
258252
}
259253

260254
int getLgRF() { //only Direct needs this
261-
return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3;
255+
return wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT & 0X3; //mask to 2 bits
262256
}
263257

264258
@Override
@@ -277,7 +271,7 @@ UpdateReturnState hashUpdate(final long hash) {
277271
protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
278272
//SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
279273
//but this allows us to tune these constants for different sketches.
280-
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
274+
final double fraction = lgArrLongs <= lgNomLongs ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
281275
return (int) (fraction * (1 << lgArrLongs));
282276
}
283277

src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,12 @@
4949
*/
5050
abstract class HeapUpdateSketch extends UpdateSketch {
5151
final int lgNomLongs_;
52-
private final long seed_;
5352
private final float p_;
5453
private final ResizeFactor rf_;
5554

5655
HeapUpdateSketch(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf) {
56+
super(seed);
5757
lgNomLongs_ = Math.max(lgNomLongs, ThetaUtil.MIN_LG_NOM_LONGS);
58-
seed_ = seed;
5958
p_ = p;
6059
rf_ = rf;
6160
}
@@ -66,7 +65,7 @@ abstract class HeapUpdateSketch extends UpdateSketch {
6665
public int getCurrentBytes() {
6766
final int preLongs = getCurrentPreambleLongs();
6867
final int dataLongs = getCurrentDataLongs();
69-
return (preLongs + dataLongs) << 3;
68+
return preLongs + dataLongs << 3;
7069
}
7170

7271
//UpdateSketch
@@ -86,11 +85,6 @@ public ResizeFactor getResizeFactor() {
8685
return rf_;
8786
}
8887

89-
@Override
90-
long getSeed() {
91-
return seed_;
92-
}
93-
9488
//restricted methods
9589

9690
@Override
@@ -102,14 +96,14 @@ short getSeedHash() {
10296
byte[] toByteArray(final int preLongs, final byte familyID) {
10397
if (isDirty()) { rebuild(); }
10498
checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries(true));
105-
final int preBytes = (preLongs << 3) & 0X3F; //24 bytes
99+
final int preBytes = preLongs << 3 & 0X3F; //24 bytes; mask to 6 bits
106100
final int dataBytes = getCurrentDataLongs() << 3;
107101
final byte[] byteArrOut = new byte[preBytes + dataBytes];
108102

109103
final MemorySegment segOut = MemorySegment.ofArray(byteArrOut);
110104

111105
//preamble first 8 bytes. Note: only compact can be reduced to 8 bytes.
112-
final int lgRf = getResizeFactor().lg() & 0x3;
106+
final int lgRf = getResizeFactor().lg() & 0x3; //mask to 2 bits
113107
insertPreLongs(segOut, preLongs); //byte 0 low 6 bits
114108
insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits
115109
insertSerVer(segOut, SER_VER); //byte 1

src/main/java/org/apache/datasketches/theta/UpdateSketch.java

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,11 @@
6060
* @author Lee Rhodes
6161
*/
6262
public abstract class UpdateSketch extends Sketch {
63+
private final long seed_;
6364

64-
UpdateSketch() {}
65+
UpdateSketch(final long seed) {
66+
seed_ = seed; //kept only on heap, never serialized. Hoisted here for performance.
67+
}
6568

6669
/**
6770
* Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto
@@ -91,17 +94,17 @@ public static UpdateSketch wrap(final MemorySegment srcWSeg) {
9194
* @return a UpdateSketch backed by the given MemorySegment
9295
*/
9396
public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) {
94-
Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null");
97+
Objects.requireNonNull(srcWSeg, "Source MemorySegment must not be null");
9598
checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes
96-
final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
97-
final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
98-
final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
99+
final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //mask to 6 bits
100+
final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; //mask to byte
101+
final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; //mask to byte
99102
final Family family = Family.idToFamily(familyID);
100103
if (family != Family.QUICKSELECT) {
101104
throw new SketchesArgumentException(
102105
"A " + family + " sketch cannot be wrapped as an UpdateSketch.");
103106
}
104-
if ((serVer == 3) && (preLongs == 3)) {
107+
if (serVer == 3 && preLongs == 3) {
105108
return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed);
106109
} else {
107110
throw new SketchesArgumentException(
@@ -150,7 +153,7 @@ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWS
150153
public int getCompactBytes() {
151154
final int preLongs = getCompactPreambleLongs();
152155
final int dataLongs = getRetainedEntries(true);
153-
return (preLongs + dataLongs) << 3;
156+
return preLongs + dataLongs << 3;
154157
}
155158

156159
@Override
@@ -160,7 +163,7 @@ int getCurrentDataLongs() {
160163

161164
@Override
162165
public boolean hasMemorySegment() {
163-
return ((this instanceof DirectQuickSelectSketchR) && ((DirectQuickSelectSketchR)this).hasMemorySegment());
166+
return this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment();
164167
}
165168

166169
@Override
@@ -170,7 +173,7 @@ public boolean isCompact() {
170173

171174
@Override
172175
public boolean isOffHeap() {
173-
return ((this instanceof DirectQuickSelectSketchR) && ((DirectQuickSelectSketchR)this).isOffHeap());
176+
return this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isOffHeap();
174177
}
175178

176179
@Override
@@ -180,7 +183,7 @@ public boolean isOrdered() {
180183

181184
@Override
182185
public boolean isSameResource(final MemorySegment that) {
183-
return (this instanceof final DirectQuickSelectSketchR dqssr) && dqssr.isSameResource(that);
186+
return this instanceof final DirectQuickSelectSketchR dqssr && dqssr.isSameResource(that);
184187
}
185188

186189
//UpdateSketch interface
@@ -210,7 +213,7 @@ public static final UpdateSketchBuilder builder() {
210213
* Gets the configured seed
211214
* @return the configured seed
212215
*/
213-
abstract long getSeed();
216+
public long getSeed() { return seed_; }
214217

215218
/**
216219
* Resets this sketch back to a virgin empty state.
@@ -232,8 +235,7 @@ public static final UpdateSketchBuilder builder() {
232235
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
233236
*/
234237
public UpdateReturnState update(final long datum) {
235-
final long[] data = { datum };
236-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
238+
return hashUpdate(hash(datum, seed_)[0] >>> 1);
237239
}
238240

239241
/**
@@ -248,9 +250,9 @@ public UpdateReturnState update(final long datum) {
248250
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
249251
*/
250252
public UpdateReturnState update(final double datum) {
251-
final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0
252-
final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms
253-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
253+
final double d = datum == 0.0 ? 0.0 : datum; // canonicalize -0.0, 0.0
254+
final long data = Double.doubleToLongBits(d);// canonicalize all NaN & +/- infinity forms
255+
return hashUpdate(hash(data, seed_)[0] >>> 1);
254256
}
255257

256258
/**
@@ -267,11 +269,11 @@ public UpdateReturnState update(final double datum) {
267269
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
268270
*/
269271
public UpdateReturnState update(final String datum) {
270-
if ((datum == null) || datum.isEmpty()) {
272+
if (datum == null || datum.isEmpty()) {
271273
return RejectedNullOrEmpty;
272274
}
273275
final byte[] data = datum.getBytes(UTF_8);
274-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
276+
return hashUpdate(hash(data, seed_)[0] >>> 1);
275277
}
276278

277279
/**
@@ -283,10 +285,10 @@ public UpdateReturnState update(final String datum) {
283285
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
284286
*/
285287
public UpdateReturnState update(final byte[] data) {
286-
if ((data == null) || (data.length == 0)) {
288+
if (data == null || data.length == 0) {
287289
return RejectedNullOrEmpty;
288290
}
289-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
291+
return hashUpdate(hash(data, seed_)[0] >>> 1);
290292
}
291293

292294
/**
@@ -298,10 +300,10 @@ public UpdateReturnState update(final byte[] data) {
298300
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
299301
*/
300302
public UpdateReturnState update(final ByteBuffer buffer) {
301-
if ((buffer == null) || !buffer.hasRemaining()) {
303+
if (buffer == null || !buffer.hasRemaining()) {
302304
return RejectedNullOrEmpty;
303305
}
304-
return hashUpdate(hash(buffer, getSeed())[0] >>> 1);
306+
return hashUpdate(hash(buffer, seed_)[0] >>> 1);
305307
}
306308

307309
/**
@@ -316,10 +318,10 @@ public UpdateReturnState update(final ByteBuffer buffer) {
316318
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
317319
*/
318320
public UpdateReturnState update(final char[] data) {
319-
if ((data == null) || (data.length == 0)) {
321+
if (data == null || data.length == 0) {
320322
return RejectedNullOrEmpty;
321323
}
322-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
324+
return hashUpdate(hash(data, seed_)[0] >>> 1);
323325
}
324326

325327
/**
@@ -331,10 +333,10 @@ public UpdateReturnState update(final char[] data) {
331333
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
332334
*/
333335
public UpdateReturnState update(final int[] data) {
334-
if ((data == null) || (data.length == 0)) {
336+
if (data == null || data.length == 0) {
335337
return RejectedNullOrEmpty;
336338
}
337-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
339+
return hashUpdate(hash(data, seed_)[0] >>> 1);
338340
}
339341

340342
/**
@@ -346,10 +348,10 @@ public UpdateReturnState update(final int[] data) {
346348
* <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
347349
*/
348350
public UpdateReturnState update(final long[] data) {
349-
if ((data == null) || (data.length == 0)) {
351+
if (data == null || data.length == 0) {
350352
return RejectedNullOrEmpty;
351353
}
352-
return hashUpdate(hash(data, getSeed())[0] >>> 1);
354+
return hashUpdate(hash(data, seed_)[0] >>> 1);
353355
}
354356

355357
//restricted methods
@@ -455,7 +457,7 @@ static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSee
455457
final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23
456458
final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
457459
//if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p.
458-
if ((lgArrLongs <= lgNomLongs) && (theta < p) ) {
460+
if (lgArrLongs <= lgNomLongs && theta < p ) {
459461
throw new SketchesArgumentException(
460462
"Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. "
461463
+ lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
@@ -477,7 +479,7 @@ static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgN
477479
final int lgA = lgArrLongs;
478480
final int lgR = extractLgResizeFactor(srcSeg);
479481
if (lgR == 0) { return lgA != lgT; }
480-
return (((lgT - lgA) % lgR) != 0);
482+
return (lgT - lgA) % lgR != 0;
481483
}
482484

483485
}

0 commit comments

Comments
 (0)