Skip to content

Commit e4271bf

Browse files
committed
Preparatory work for generic items.
1. The UpdatableBitMask as part of the Flags field has been eliminated. This should make the Flags field identical to the one used in C++. 2. The documentation of the serialization formats has been significantly improved. See the docs for the KllPreambleUtil class. 3. I have reduced the dependence on the KllMemoryValidate class to those that are actually required for validation. 4. More cleanup of fields and variables not really being used.
1 parent ee84291 commit e4271bf

14 files changed

Lines changed: 189 additions & 215 deletions

src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,13 @@
2222
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
2323
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
2424
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
25-
import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
2625
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
2726
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
2827
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
2928
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
3029
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
3130
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
3231
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
33-
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
3432
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
3533
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
3634
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
@@ -59,13 +57,12 @@
5957
class KllDirectDoublesSketch extends KllDoublesSketch {
6058

6159
/**
62-
* The constructor with Memory that can be off-heap.
60+
* The constructor with WritableMemory that can be off-heap.
6361
* @param wmem the current WritableMemory
6462
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
6563
* @param memVal the MemoryValadate object
6664
*/
67-
KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
68-
final KllMemoryValidate memVal) {
65+
KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
6966
super(wmem, memReqSvr);
7067
levelsArr = memVal.levelsArr;
7168
}
@@ -83,7 +80,6 @@ static KllDirectDoublesSketch newDirectInstance(final int k, final int m, final
8380
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
8481
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
8582
setMemoryFamilyID(dstMem, Family.KLL.getID());
86-
setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
8783
setMemoryK(dstMem, k);
8884
setMemoryM(dstMem, m);
8985
setMemoryN(dstMem, 0);

src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,13 @@
2222
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
2323
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
2424
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
25-
import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
2625
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
2726
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
2827
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
2928
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
3029
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
3130
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
3231
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
33-
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
3432
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
3533
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
3634
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
@@ -59,13 +57,12 @@
5957
class KllDirectFloatsSketch extends KllFloatsSketch {
6058

6159
/**
62-
* The constructor with Memory that can be off-heap.
60+
* The constructor with WritableMemory that can be off-heap.
6361
* @param wmem the current WritableMemory
6462
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
6563
* @param memVal the MemoryValadate object
6664
*/
67-
KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
68-
final KllMemoryValidate memVal) {
65+
KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
6966
super(wmem, memReqSvr);
7067
levelsArr = memVal.levelsArr;
7168
}
@@ -83,7 +80,6 @@ static KllDirectFloatsSketch newDirectInstance(final int k, final int m, final W
8380
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
8481
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
8582
setMemoryFamilyID(dstMem, Family.KLL.getID());
86-
setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
8783
setMemoryK(dstMem, k);
8884
setMemoryM(dstMem, m);
8985
setMemoryN(dstMem, 0);

src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,13 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
4949
final int myMinK = mySketch.getMinK();
5050

5151
//update this sketch with level0 items from the other sketch
52-
5352
if (otherDblSk.isCompactSingleItem()) {
5453
updateDouble(mySketch, otherDblSk.getDoubleSingleItem());
5554
otherDoubleItemsArr = new double[0];
5655
} else {
5756
otherDoubleItemsArr = otherDblSk.getDoubleItemsArray();
5857
for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) {
59-
KllDoublesHelper.updateDouble(mySketch, otherDoubleItemsArr[i]);
58+
updateDouble(mySketch, otherDoubleItemsArr[i]);
6059
}
6160
}
6261
// after the level 0 update, we capture the state of levels and items arrays
@@ -68,7 +67,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
6867
int[] myNewLevelsArr = myCurLevelsArr;
6968
double[] myNewDoubleItemsArr = myCurDoubleItemsArr;
7069

71-
if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge other levels if they exist
70+
if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge higher levels if they exist
7271
final int tmpSpaceNeeded = mySketch.getNumRetained()
7372
+ KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr);
7473
final double[] workbuf = new double[tmpSpaceNeeded];
@@ -115,7 +114,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
115114
}
116115

117116
//MEMORY SPACE MANAGEMENT
118-
if (mySketch.updatableMemFormat) {
117+
if (mySketch.serialVersionUpdatable) {
119118
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewDoubleItemsArr.length);
120119
}
121120
}
@@ -358,7 +357,7 @@ private static void populateDoubleWorkArrays(
358357
worklevels[0] = 0;
359358

360359
// Note: the level zero data from "other" was already inserted into "self"
361-
final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels,myCurLevelsArr);
360+
final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels, myCurLevelsArr);
362361
System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero);
363362
worklevels[1] = worklevels[0] + selfPopZero;
364363

src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
import static java.lang.Math.max;
2323
import static java.lang.Math.min;
24-
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
25-
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
24+
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
25+
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
2626
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
2727
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
2828
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
@@ -61,60 +61,56 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou
6161
*/
6262
public static KllDoublesSketch heapify(final Memory srcMem) {
6363
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
64-
if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
6564
return KllHeapDoublesSketch.heapifyImpl(srcMem);
6665
}
6766

6867
/**
69-
* Create a new direct instance of this sketch with a given <em>k</em>.
70-
* @param k parameter that controls size of the sketch and accuracy of estimates.
68+
* Create a new direct instance of this sketch with the default <em>k</em>.
69+
* The default <em>k</em> = 200 results in a normalized rank error of about
70+
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
7171
* @param dstMem the given destination WritableMemory object for use by the sketch
7272
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
7373
* @return a new direct instance of this sketch
7474
*/
7575
public static KllDoublesSketch newDirectInstance(
76-
final int k,
7776
final WritableMemory dstMem,
7877
final MemoryRequestServer memReqSvr) {
79-
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
80-
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
81-
return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
78+
return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
8279
}
83-
80+
8481
/**
85-
* Create a new direct instance of this sketch with the default <em>k</em>.
86-
* The default <em>k</em> = 200 results in a normalized rank error of about
87-
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
82+
* Create a new direct instance of this sketch with a given <em>k</em>.
83+
* @param k parameter that controls size of the sketch and accuracy of estimates.
8884
* @param dstMem the given destination WritableMemory object for use by the sketch
8985
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
9086
* @return a new direct instance of this sketch
9187
*/
9288
public static KllDoublesSketch newDirectInstance(
89+
final int k,
9390
final WritableMemory dstMem,
9491
final MemoryRequestServer memReqSvr) {
9592
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
9693
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
97-
return KllDirectDoublesSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
94+
return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
9895
}
9996

10097
/**
10198
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
10299
* The default <em>k</em> = 200 results in a normalized rank error of about
103100
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
104-
* This will have a rank error of about 1.65%.
105-
* @return new KllDoublesSketch on the heap.
101+
* @return new KllDoublesSketch on the Java heap.
106102
*/
107-
public static KllDoublesSketch newHeapInstance() {
108-
return new KllHeapDoublesSketch(DEFAULT_K, DEFAULT_M);
103+
public static KllDoublesSketch newHeapInstance() {
104+
return newHeapInstance(DEFAULT_K);
109105
}
110106

111107
/**
112108
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
113-
* <em>k</em> can be between DEFAULT_M and 65535, inclusive.
109+
* <em>k</em> can be between 8, inclusive, and 65535, inclusive.
114110
* The default <em>k</em> = 200 results in a normalized rank error of about
115111
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
116112
* @param k parameter that controls size of the sketch and accuracy of estimates.
117-
* @return new KllDoublesSketch on the heap.
113+
* @return new KllDoublesSketch on the Java heap.
118114
*/
119115
public static KllDoublesSketch newHeapInstance(final int k) {
120116
return new KllHeapDoublesSketch(k, DEFAULT_M);
@@ -129,7 +125,7 @@ public static KllDoublesSketch newHeapInstance(final int k) {
129125
public static KllDoublesSketch wrap(final Memory srcMem) {
130126
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
131127
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
132-
if (memVal.updatableMemFormat) {
128+
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
133129
return new KllDirectDoublesSketch((WritableMemory) srcMem, null, memVal);
134130
} else {
135131
return new KllDirectCompactDoublesSketch(srcMem, memVal);
@@ -148,10 +144,8 @@ public static KllDoublesSketch writableWrap(
148144
final MemoryRequestServer memReqSvr) {
149145
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
150146
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
151-
if (memVal.updatableMemFormat) {
152-
if (!memVal.readOnly) {
153-
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
154-
}
147+
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
148+
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
155149
return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal);
156150
} else {
157151
return new KllDirectCompactDoublesSketch(srcMem, memVal);

src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ static void mergeFloatImpl(final KllFloatsSketch mySketch, final KllSketch other
114114
}
115115

116116
//MEMORY SPACE MANAGEMENT
117-
if (mySketch.updatableMemFormat) {
117+
if (mySketch.serialVersionUpdatable) {
118118
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewFloatItemsArr.length);
119119
}
120120
}

src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
import static java.lang.Math.max;
2323
import static java.lang.Math.min;
24-
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
25-
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
24+
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
25+
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
2626
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
2727
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
2828
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
@@ -61,60 +61,56 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa
6161
*/
6262
public static KllFloatsSketch heapify(final Memory srcMem) {
6363
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
64-
if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
6564
return KllHeapFloatsSketch.heapifyImpl(srcMem);
6665
}
6766

6867
/**
69-
* Create a new direct instance of this sketch with a given <em>k</em>.
70-
* @param k parameter that controls size of the sketch and accuracy of estimates.
68+
* Create a new direct instance of this sketch with the default <em>k</em>.
69+
* The default <em>k</em> = 200 results in a normalized rank error of about
70+
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
7171
* @param dstMem the given destination WritableMemory object for use by the sketch
7272
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
7373
* @return a new direct instance of this sketch
7474
*/
7575
public static KllFloatsSketch newDirectInstance(
76-
final int k,
7776
final WritableMemory dstMem,
7877
final MemoryRequestServer memReqSvr) {
79-
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
80-
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
81-
return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
78+
return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
8279
}
83-
80+
8481
/**
85-
* Create a new direct instance of this sketch with the default <em>k</em>.
86-
* The default <em>k</em> = 200 results in a normalized rank error of about
87-
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
82+
* Create a new direct instance of this sketch with a given <em>k</em>.
83+
* @param k parameter that controls size of the sketch and accuracy of estimates.
8884
* @param dstMem the given destination WritableMemory object for use by the sketch
8985
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
9086
* @return a new direct instance of this sketch
9187
*/
9288
public static KllFloatsSketch newDirectInstance(
89+
final int k,
9390
final WritableMemory dstMem,
9491
final MemoryRequestServer memReqSvr) {
9592
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
9693
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
97-
return KllDirectFloatsSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
94+
return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
9895
}
9996

10097
/**
10198
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
10299
* The default <em>k</em> = 200 results in a normalized rank error of about
103100
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
104-
* This will have a rank error of about 1.65%.
105-
* @return new KllFloatsSketch on the heap.
101+
* @return new KllFloatsSketch on the Java heap.
106102
*/
107103
public static KllFloatsSketch newHeapInstance() {
108-
return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M);
104+
return newHeapInstance(DEFAULT_K);
109105
}
110106

111107
/**
112108
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
113-
* <em>k</em> can be between DEFAULT_M and 65535, inclusive.
109+
* <em>k</em> can be between 8, inclusive, and 65535, inclusive.
114110
* The default <em>k</em> = 200 results in a normalized rank error of about
115111
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
116112
* @param k parameter that controls size of the sketch and accuracy of estimates.
117-
* @return new KllFloatsSketch on the heap.
113+
* @return new KllFloatsSketch on the Java heap.
118114
*/
119115
public static KllFloatsSketch newHeapInstance(final int k) {
120116
return new KllHeapFloatsSketch(k, DEFAULT_M);
@@ -129,7 +125,7 @@ public static KllFloatsSketch newHeapInstance(final int k) {
129125
public static KllFloatsSketch wrap(final Memory srcMem) {
130126
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
131127
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
132-
if (memVal.updatableMemFormat) {
128+
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
133129
return new KllDirectFloatsSketch((WritableMemory) srcMem, null, memVal);
134130
} else {
135131
return new KllDirectCompactFloatsSketch(srcMem, memVal);
@@ -148,10 +144,8 @@ public static KllFloatsSketch writableWrap(
148144
final MemoryRequestServer memReqSvr) {
149145
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
150146
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
151-
if (memVal.updatableMemFormat) {
152-
if (!memVal.readOnly) {
147+
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
153148
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
154-
}
155149
return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal);
156150
} else {
157151
return new KllDirectCompactFloatsSketch(srcMem, memVal);

0 commit comments

Comments
 (0)