Skip to content

Commit df2237b

Browse files
Merge pull request #197 from DataSketches/RemoveOldEpsFromK
Removed old EpsilonFromK model
2 parents 64f250a + fef7500 commit df2237b

11 files changed

Lines changed: 117 additions & 251 deletions

File tree

src/main/java/com/yahoo/sketches/kll/KllFloatsSketch.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,7 @@ private KllFloatsSketch(final Memory mem) {
230230
}
231231

232232
private KllFloatsSketch(final int k, final int m) {
233-
if ((k < MIN_K) || (k > MAX_K)) {
234-
throw new SketchesArgumentException("K must be >= " + MIN_K + " and < " + MAX_K + ": " + k);
235-
}
233+
checkK(k);
236234
k_ = k;
237235
m_ = m;
238236
numLevels_ = 1;
@@ -577,6 +575,7 @@ public static double getNormalizedRankError(final int k) {
577575
}
578576

579577
/**
578+
* Gets the normalized rank error given k and pmf.
580579
* Static method version of the {@link #getNormalizedRankError(boolean)}.
581580
* @param k the configuation parameter
582581
* @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
@@ -603,8 +602,11 @@ public static double getNormalizedRankError(final int k, final boolean pmf) {
603602
* @return the value of <i>k</i> given a value of epsilon.
604603
* @see KllFloatsSketch
605604
*/
605+
// constants were derived as the best fit to 99 percentile empirically measured max error in
606+
// thousands of trials
606607
public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
607-
final double eps = max(epsilon, 4.7E-5);
608+
//Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false.
609+
final double eps = max(epsilon, 4.7634E-5);
608610
final double kdbl = pmf
609611
? exp(log(2.446 / eps) / 0.9433)
610612
: exp(log(2.296 / eps) / 0.9723);
@@ -780,6 +782,17 @@ public static KllFloatsSketch heapify(final Memory mem) {
780782
return new KllFloatsSketch(mem);
781783
}
782784

785+
/**
786+
* Checks the validity of the given value k
787+
* @param k must be greater than 7 and less than 65536.
788+
*/
789+
static void checkK(final int k) {
790+
if ((k < MIN_K) || (k > MAX_K)) {
791+
throw new SketchesArgumentException(
792+
"K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k);
793+
}
794+
}
795+
783796
private KllFloatsQuantileCalculator getQuantileCalculator() {
784797
sortLevelZero(); // sort in the sketch to reuse if possible
785798
return new KllFloatsQuantileCalculator(items_, levels_, numLevels_, n_);

src/main/java/com/yahoo/sketches/quantiles/DoublesSketch.java

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@
77

88
import static com.yahoo.sketches.Util.ceilingPowerOf2;
99
import static com.yahoo.sketches.quantiles.Util.checkIsCompactMemory;
10-
import static java.lang.Math.abs;
11-
import static java.lang.Math.ceil;
12-
import static java.lang.Math.exp;
13-
import static java.lang.Math.log;
1410
import static java.lang.Math.max;
1511
import static java.lang.Math.min;
16-
import static java.lang.Math.pow;
17-
import static java.lang.Math.round;
1812

1913
import java.util.Random;
2014

@@ -131,6 +125,7 @@ public abstract class DoublesSketch {
131125
static final int DOUBLES_SER_VER = 3;
132126
static final int MAX_PRELONGS = Family.QUANTILES.getMaxPreLongs();
133127
static final int MIN_K = 2;
128+
static final int MAX_K = 1 << 15;
134129

135130
/**
136131
* Parameter that controls space usage of sketch and accuracy of estimates.
@@ -225,7 +220,7 @@ public double getQuantile(final double fraction) {
225220
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
226221
*/
227222
public double getQuantileUpperBound(final double fraction) {
228-
return getQuantile(min(1.0, fraction + getNormalizedRankError(k_, false)));
223+
return getQuantile(min(1.0, fraction + Util.getNormalizedRankError(k_, false)));
229224
}
230225

231226
/**
@@ -236,7 +231,7 @@ public double getQuantileUpperBound(final double fraction) {
236231
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
237232
*/
238233
public double getQuantileLowerBound(final double fraction) {
239-
return getQuantile(max(0, fraction - getNormalizedRankError(k_, false)));
234+
return getQuantile(max(0, fraction - Util.getNormalizedRankError(k_, false)));
240235
}
241236

242237
/**
@@ -441,7 +436,7 @@ public int getK() {
441436
*/
442437
@Deprecated
443438
public double getNormalizedRankError() {
444-
return getNormalizedRankError(getK());
439+
return Util.getNormalizedRankError(getK(), true);
445440
}
446441

447442
/**
@@ -452,7 +447,7 @@ public double getNormalizedRankError() {
452447
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
453448
*/
454449
public double getNormalizedRankError(final boolean pmf) {
455-
return getNormalizedRankError(k_, pmf);
450+
return Util.getNormalizedRankError(k_, pmf);
456451
}
457452

458453
/**
@@ -463,10 +458,11 @@ public double getNormalizedRankError(final boolean pmf) {
463458
*/
464459
@Deprecated
465460
public static double getNormalizedRankError(final int k) {
466-
return Util.EpsilonFromK.getAdjustedEpsilon(k);
461+
return Util.getNormalizedRankError(k, true);
467462
}
468463

469464
/**
465+
* Gets the normalized rank error given k and pmf.
470466
* Static method version of the {@link #getNormalizedRankError(boolean)}.
471467
* @param k the configuation parameter
472468
* @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
@@ -475,12 +471,8 @@ public static double getNormalizedRankError(final int k) {
475471
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
476472
* @see KllFloatsSketch
477473
*/
478-
// constants were derived as the best fit to 99 percentile empirically measured max error in
479-
// thousands of trials
480474
public static double getNormalizedRankError(final int k, final boolean pmf) {
481-
return pmf
482-
? 1.74289590045312415 / pow(k, 0.954048085817058)
483-
: 1.74289590045312415 / pow(k, 0.954048085817058); ///TODO
475+
return Util.getNormalizedRankError(k, pmf);
484476
}
485477

486478
/**
@@ -494,14 +486,7 @@ public static double getNormalizedRankError(final int k, final boolean pmf) {
494486
* @see KllFloatsSketch
495487
*/
496488
public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
497-
final double eps = max(epsilon, 4.7E-5);
498-
final double kdbl = pmf
499-
? exp(log(1.74289590045312415 / eps) / 0.954048085817058)
500-
: exp(log(1.74289590045312415 / eps) / 0.954048085817058);//TODO
501-
final double krnd = round(kdbl);
502-
final double del = abs(krnd - kdbl);
503-
final int k = (int) ((del < 1E-6) ? krnd : ceil(kdbl));
504-
return k; //
489+
return Util.getKFromEpsilon(epsilon, pmf);
505490
}
506491

507492
/**

src/main/java/com/yahoo/sketches/quantiles/DoublesUnionImpl.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* @author Kevin Lang
1818
*/
1919
final class DoublesUnionImpl extends DoublesUnionImplR {
20+
2021
private DoublesUnionImpl(final int maxK) {
2122
super(maxK);
2223
}
@@ -77,13 +78,8 @@ static DoublesUnionImpl heapifyInstance(final DoublesSketch sketch) {
7778
* @return a DoublesUnion object
7879
*/
7980
static DoublesUnionImpl heapifyInstance(final Memory srcMem) {
80-
final int preLongs = srcMem.getByte(PreambleUtil.PREAMBLE_LONGS_BYTE) & 0xFF;
81-
final int k = srcMem.getShort(PreambleUtil.K_SHORT) & 0xFFFF;
82-
final HeapUpdateDoublesSketch sketch = (preLongs == 1)
83-
? HeapUpdateDoublesSketch.newInstance(k)
84-
: HeapUpdateDoublesSketch.heapifyInstance(srcMem);
85-
final DoublesUnionImpl union = new DoublesUnionImpl(k);
86-
union.maxK_ = k;
81+
final HeapUpdateDoublesSketch sketch = HeapUpdateDoublesSketch.heapifyInstance(srcMem);
82+
final DoublesUnionImpl union = new DoublesUnionImpl(sketch.getK());
8783
union.gadget_ = sketch;
8884
return union;
8985
}
@@ -98,9 +94,7 @@ static DoublesUnionImpl heapifyInstance(final Memory srcMem) {
9894
*/
9995
static DoublesUnionImpl wrapInstance(final WritableMemory mem) {
10096
final DirectUpdateDoublesSketch sketch = DirectUpdateDoublesSketch.wrapInstance(mem);
101-
final int k = sketch.getK();
102-
final DoublesUnionImpl union = new DoublesUnionImpl(k);
103-
union.maxK_ = k;
97+
final DoublesUnionImpl union = new DoublesUnionImpl(sketch.getK());
10498
union.gadget_ = sketch;
10599
return union;
106100
}

src/main/java/com/yahoo/sketches/quantiles/DoublesUtil.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ private static String getSummary(final DoublesSketch sk) {
111111
final String retItemsStr = String.format("%,d", sk.getRetainedItems());
112112
final String cmptBytesStr = String.format("%,d", sk.getCompactStorageBytes());
113113
final String updtBytesStr = String.format("%,d", sk.getUpdatableStorageBytes());
114-
final double eps = Util.EpsilonFromK.getAdjustedEpsilon(k);
114+
final double epsPmf = Util.getNormalizedRankError(k, true);
115+
final String epsPmfPctStr = String.format("%.3f%%", epsPmf * 100.0);
116+
final double eps = Util.getNormalizedRankError(k, false);
115117
final String epsPctStr = String.format("%.3f%%", eps * 100.0);
116118
final String memCap = sk.isDirect() ? Long.toString(sk.getMemory().getCapacity()) : "";
117119

@@ -133,6 +135,7 @@ private static String getSummary(final DoublesSketch sk) {
133135
sb.append(" Compact Storage Bytes : ").append(cmptBytesStr).append(LS);
134136
sb.append(" Updatable Storage Bytes : ").append(updtBytesStr).append(LS);
135137
sb.append(" Normalized Rank Error : ").append(epsPctStr).append(LS);
138+
sb.append(" Normalized Rank Error (PMF) : ").append(epsPmfPctStr).append(LS);
136139
sb.append(" Min Value : ")
137140
.append(String.format("%,.3f", sk.getMinValue())).append(LS);
138141
sb.append(" Max Value : ")
@@ -163,11 +166,11 @@ private static String getDataDetail(final DoublesSketch sketchIn) {
163166

164167
//output all the levels
165168
final int combBufSize = combBuf.length;
166-
if (n >= 2 * k) {
169+
if (n >= (2 * k)) {
167170
sb.append(" Valid | Level");
168171
for (int j = 2 * k; j < combBufSize; j++) { //output level data starting at 2K
169-
if (j % k == 0) { //start output of new level
170-
final int levelNum = j / k - 2;
172+
if ((j % k) == 0) { //start output of new level
173+
final int levelNum = (j / k) - 2;
171174
final String validLvl = ((1L << levelNum) & bitPattern) > 0 ? " T " : " F ";
172175
final String lvl = String.format("%5d", levelNum);
173176
sb.append(LS).append(" ").append(validLvl).append(" ").append(lvl).append(": ");

src/main/java/com/yahoo/sketches/quantiles/ItemsSketch.java

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,8 @@
1414
import static com.yahoo.sketches.quantiles.PreambleUtil.extractSerVer;
1515
import static com.yahoo.sketches.quantiles.Util.computeBaseBufferItems;
1616
import static com.yahoo.sketches.quantiles.Util.computeBitPattern;
17-
import static java.lang.Math.abs;
18-
import static java.lang.Math.ceil;
19-
import static java.lang.Math.exp;
20-
import static java.lang.Math.log;
2117
import static java.lang.Math.max;
2218
import static java.lang.Math.min;
23-
import static java.lang.Math.pow;
24-
import static java.lang.Math.round;
2519

2620
import java.lang.reflect.Array;
2721
import java.util.Arrays;
@@ -32,7 +26,6 @@
3226
import com.yahoo.memory.WritableMemory;
3327
import com.yahoo.sketches.ArrayOfItemsSerDe;
3428
import com.yahoo.sketches.SketchesArgumentException;
35-
import com.yahoo.sketches.kll.KllFloatsSketch;
3629

3730
/**
3831
* This is a stochastic streaming sketch that enables near-real time analysis of the
@@ -285,7 +278,7 @@ public T getQuantile(final double fraction) {
285278
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
286279
*/
287280
public T getQuantileUpperBound(final double fraction) {
288-
return getQuantile(min(1.0, fraction + getNormalizedRankError(k_, false)));
281+
return getQuantile(min(1.0, fraction + Util.getNormalizedRankError(k_, false)));
289282
}
290283

291284
/**
@@ -296,7 +289,7 @@ public T getQuantileUpperBound(final double fraction) {
296289
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
297290
*/
298291
public T getQuantileLowerBound(final double fraction) {
299-
return getQuantile(max(0, fraction - getNormalizedRankError(k_, false)));
292+
return getQuantile(max(0, fraction - Util.getNormalizedRankError(k_, false)));
300293
}
301294

302295
/**
@@ -499,7 +492,7 @@ public long getN() {
499492
*/
500493
@Deprecated
501494
public double getNormalizedRankError() {
502-
return getNormalizedRankError(getK());
495+
return Util.getNormalizedRankError(getK(), true);
503496
}
504497

505498
/**
@@ -510,7 +503,7 @@ public double getNormalizedRankError() {
510503
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
511504
*/
512505
public double getNormalizedRankError(final boolean pmf) {
513-
return getNormalizedRankError(k_, pmf);
506+
return Util.getNormalizedRankError(k_, pmf);
514507
}
515508

516509
/**
@@ -521,24 +514,20 @@ public double getNormalizedRankError(final boolean pmf) {
521514
*/
522515
@Deprecated
523516
public static double getNormalizedRankError(final int k) {
524-
return getNormalizedRankError(k, true);
517+
return Util.getNormalizedRankError(k, true);
525518
}
526519

527520
/**
521+
* Gets the normalized rank error given k and pmf.
528522
* Static method version of the {@link #getNormalizedRankError(boolean)}.
529523
* @param k the configuation parameter
530524
* @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
531525
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
532526
* @return if pmf is true, the normalized rank error for the getPMF() function.
533527
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
534-
* @see KllFloatsSketch
535528
*/
536-
// constants were derived as the best fit to 99 percentile empirically measured max error in
537-
// thousands of trials
538529
public static double getNormalizedRankError(final int k, final boolean pmf) {
539-
return pmf
540-
? 1.74289590045312415 / pow(k, 0.954048085817058)
541-
: 1.74289590045312415 / pow(k, 0.954048085817058); //TODO
530+
return Util.getNormalizedRankError(k, pmf);
542531
}
543532

544533
/**
@@ -549,17 +538,9 @@ public static double getNormalizedRankError(final int k, final boolean pmf) {
549538
* returns the value of <em>k</em> assuming the input epsilon is the desired "single-sided"
550539
* epsilon for all the other queries.
551540
* @return the value of <i>k</i> given a value of epsilon.
552-
* @see KllFloatsSketch
553541
*/
554542
public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
555-
final double eps = max(epsilon, 4.7E-5);
556-
final double kdbl = pmf
557-
? exp(log(1.74289590045312415 / eps) / 0.954048085817058)
558-
: exp(log(1.74289590045312415 / eps) / 0.954048085817058);//TODO
559-
final double krnd = round(kdbl);
560-
final double del = abs(krnd - kdbl);
561-
final int k = (int) ((del < 1E-6) ? krnd : ceil(kdbl));
562-
return k; //
543+
return Util.getKFromEpsilon(epsilon, pmf);
563544
}
564545

565546
/**

0 commit comments

Comments
 (0)