Skip to content

Commit e06ae7d

Browse files
authored
Merge pull request #475 from apache/Fixes_for_getPartitionBoundaries
Fixes for get partition boundaries
2 parents 007f35b + 51aef3c commit e06ae7d

75 files changed

Lines changed: 2489 additions & 1341 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,5 +154,5 @@ In Eclipse, open the project *Properties / Java Build Path / Module Dependencies
154154

155155
#### SpotBugs
156156

157-
* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you will get a lot of false positive or low risk issues that we have examined and exliminated with this exclusion file.
157+
* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you may get a lot of false positive or low risk issues that we have examined and eliminated with this exclusion file.
158158

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ under the License.
150150
<version>${testng.version}</version>
151151
<scope>test</scope>
152152
</dependency>
153+
<!--
154+
<dependency>
155+
<groupId>org.apache.datasketches</groupId>
156+
<artifactId>datasketches-java-common</artifactId>
157+
<version>1.0.0</version>
158+
</dependency>
159+
-->
153160
</dependencies>
154161

155162
<build>

src/main/java/org/apache/datasketches/common/Util.java

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import static java.lang.Math.log;
2525
import static java.lang.Math.pow;
2626
import static java.lang.Math.round;
27+
import static java.util.Arrays.fill;
2728

2829
import java.util.Comparator;
2930

@@ -217,7 +218,7 @@ public static String nanoSecToString(final long nS) {
217218

218219
/**
219220
* Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec
220-
* @param mS the given nanoseconds
221+
* @param mS the given milliseconds
221222
* @return the given time in milliseconds formatted as Hours:Min:Sec.mSec
222223
*/
223224
public static String milliSecToString(final long mS) {
@@ -244,40 +245,20 @@ public static String zeroPad(final String s, final int fieldLength) {
244245

245246
/**
246247
* Prepend or postpend the given string with the given character to fill the given field length.
247-
* If the given string is equal or greater than the given field length, it will be returned
248-
* without modification.
248+
* If the given string is equal to or greater than the given field length, it will be returned without modification.
249249
* @param s the given string
250250
* @param fieldLength the desired field length
251251
* @param padChar the desired pad character
252252
* @param postpend if true append the pacCharacters to the end of the string.
253-
* @return prepended or postpended given string with the given character to fill the given field
254-
* length.
253+
* @return prepended or postpended given string with the given character to fill the given field length.
255254
*/
256-
public static String characterPad(final String s, final int fieldLength, final char padChar,
257-
final boolean postpend) {
258-
final char[] chArr = s.toCharArray();
259-
final int sLen = chArr.length;
255+
public static String characterPad(final String s, final int fieldLength, final char padChar, final boolean postpend) {
256+
final int sLen = s.length();
260257
if (sLen < fieldLength) {
261-
final char[] out = new char[fieldLength];
262-
final int blanks = fieldLength - sLen;
263-
264-
if (postpend) {
265-
for (int i = 0; i < sLen; i++) {
266-
out[i] = chArr[i];
267-
}
268-
for (int i = sLen; i < fieldLength; i++) {
269-
out[i] = padChar;
270-
}
271-
} else { //prepend
272-
for (int i = 0; i < blanks; i++) {
273-
out[i] = padChar;
274-
}
275-
for (int i = blanks; i < fieldLength; i++) {
276-
out[i] = chArr[i - blanks];
277-
}
278-
}
279-
280-
return String.valueOf(out);
258+
final char[] cArr = new char[fieldLength - sLen];
259+
fill(cArr, padChar);
260+
final String addstr = String.valueOf(cArr);
261+
return (postpend) ? s.concat(addstr) : addstr.concat(s);
281262
}
282263
return s;
283264
}
@@ -376,8 +357,8 @@ public static int ceilingIntPowerOf2(final int n) {
376357
}
377358

378359
/**
379-
* Computes the long ceiling power of 2 within the range [1, 2^30]. This is the smallest positive power
380-
* of 2 that is equal to or greater than the given n and a mathematical integer.
360+
* Computes the long ceiling power of 2 within the range [1, 2^62]. This is the smallest positive power
361+
* of 2 that is equal to or greater than the given n and a mathematical long.
381362
*
382363
* <p>For:
383364
* <ul>
@@ -550,56 +531,60 @@ public static double powerSeriesNextDouble(final int ppb, final double curPoint,
550531
}
551532

552533
/**
553-
* Computes the ceiling power of given <i>base</i> and <i>n</i> as doubles.
554-
* This is the smallest positive power
555-
* of <i>base</i> that equal to or greater than the given <i>n</i> and equal to a mathematical integer.
534+
* Returns the ceiling of a given <i>n</i> given a <i>base</i>, where the ceiling is an integral power of the base.
535+
* This is the smallest positive power of <i>base</i> that is equal to or greater than the given <i>n</i>
536+
* and equal to a mathematical integer.
556537
* The result of this function is consistent with {@link #ceilingIntPowerOf2(int)} for values
557538
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
558539
*
559-
* @param base The base in the expression &#8968;base<sup>n</sup>&#8969;.
540+
* <p>The formula is: <i>base<sup>ceiling(log<sub>base</sub>(x))</sup></i></p>
541+
*
542+
* @param base The number in the expression &#8968;base<sup>n</sup>&#8969;.
560543
* @param n The input argument.
561544
* @return the ceiling power of <i>base</i> as a double and equal to a mathematical integer.
562545
*/
563546
public static double ceilingPowerBaseOfDouble(final double base, final double n) {
564547
final double x = n < 1.0 ? 1.0 : n;
565-
return pow(base, ceil(logBaseOfX(base, x)));
548+
return Math.round(pow(base, ceil(logBaseOfX(base, x))));
566549
}
567550

568551
/**
569-
* Computes the floor power of given <i>base</i> and <i>n</i> as doubles.
570-
* This is the largest positive power
571-
* of <i>base</i> that equal to or less than the given n and equal to a mathematical integer.
552+
* Computes the floor of a given <i>n</i> given <i>base</i>, where the floor is an integral power of the base.
553+
* This is the largest positive power of <i>base</i> that is equal to or less than the given <i>n</i>
554+
* and equal to a mathematical integer.
572555
* The result of this function is consistent with {@link #floorPowerOf2(int)} for values
573556
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
574557
*
575-
* @param base The base in the expression &#8970;base<sup>n</sup>&#8971;.
558+
* <p>The formula is: <i>base<sup>floor(log<sub>base</sub>(x))</sup></i></p>
559+
*
560+
* @param base The number in the expression &#8970;base<sup>n</sup>&#8971;.
576561
* @param n The input argument.
577562
* @return the floor power of 2 and equal to a mathematical integer.
578563
*/
579564
public static double floorPowerBaseOfDouble(final double base, final double n) {
580565
final double x = n < 1.0 ? 1.0 : n;
581-
return pow(base, floor(logBaseOfX(base, x)));
566+
return Math.round(pow(base, floor(logBaseOfX(base, x))));
582567
}
583568

584569
// Logarithm related
585570

586571
/**
587-
* The log base 2 of the value
572+
* The log<sub>2</sub>(value)
588573
* @param value the given value
589-
* @return The log base 2 of the value
574+
* @return log<sub>2</sub>(value)
590575
*/
591576
public static double log2(final double value) {
592577
return log(value) / LOG2;
593578
}
594579

595580
/**
596-
* Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / log(2.0).
597-
* @param logBase the base of the logarithm used
581+
* Returns the log<sub>base</sub>(x). Example, if base = 2.0: logB(2.0, x) = log(x) / log(2.0).
582+
* @param base The number in the expression log(x) / log(base).
598583
* @param x the given value
599-
* @return the logarithm_logBase of x: Example: logB(2.0, x) = log(x) / log(2.0).
584+
* @return the log<sub>base</sub>(x)
600585
*/
601-
public static double logBaseOfX(final double logBase, final double x) {
602-
return log(x) / log(logBase);
586+
public static double logBaseOfX(final double base, final double x) {
587+
return log(x) / log(base);
603588
}
604589

605590
/**

src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE;
2525
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
2626
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
27-
import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks;
2827

2928
import java.util.Objects;
3029

@@ -175,21 +174,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria
175174
return kllDoublesSV.getCDF(splitPoints, searchCrit);
176175
}
177176

178-
@Override
179-
public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted,
180-
final QuantileSearchCriteria searchCrit) {
181-
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
182-
final double[] ranks = equallyWeightedRanks(numEquallyWeighted);
183-
final double[] boundaries = getQuantiles(ranks, searchCrit);
184-
boundaries[0] = getMinItem();
185-
boundaries[boundaries.length - 1] = getMaxItem();
186-
final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries();
187-
dpb.N = this.getN();
188-
dpb.ranks = ranks;
189-
dpb.boundaries = boundaries;
190-
return dpb;
191-
}
192-
193177
@Override
194178
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
195179
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }

src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -24,55 +24,17 @@
2424
/**
2525
* Iterator over KllDoublesSketch. The order is not defined.
2626
*/
27-
public final class KllDoublesSketchIterator implements QuantilesDoublesSketchIterator {
27+
public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator {
2828
private final double[] quantiles;
29-
private final int[] levelsArr;
30-
private final int numLevels;
31-
private int level;
32-
private int index;
33-
private long weight;
34-
private boolean isInitialized;
3529

3630
KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) {
31+
super(levelsArr, numLevels);
3732
this.quantiles = quantiles;
38-
this.levelsArr = levelsArr;
39-
this.numLevels = numLevels;
40-
this.isInitialized = false;
4133
}
4234

4335
@Override
4436
public double getQuantile() {
4537
return quantiles[index];
4638
}
4739

48-
@Override
49-
public long getWeight() {
50-
return weight;
51-
}
52-
53-
@Override
54-
public boolean next() {
55-
if (!isInitialized) {
56-
level = 0;
57-
index = levelsArr[level];
58-
weight = 1;
59-
isInitialized = true;
60-
} else {
61-
index++;
62-
}
63-
if (index < levelsArr[level + 1]) {
64-
return true;
65-
}
66-
// go to the next non-empty level
67-
do {
68-
level++;
69-
if (level == numLevels) {
70-
return false; // run out of levels
71-
}
72-
weight *= 2;
73-
} while (levelsArr[level] == levelsArr[level + 1]);
74-
index = levelsArr[level];
75-
return true;
76-
}
77-
7840
}

src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@
2121

2222
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
2323
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
24+
import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;
2425

2526
import java.util.Arrays;
2627

2728
import org.apache.datasketches.common.SketchesArgumentException;
2829
import org.apache.datasketches.quantilescommon.DoublesSortedView;
30+
import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator;
2931
import org.apache.datasketches.quantilescommon.InequalitySearch;
3032
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
3133
import org.apache.datasketches.quantilescommon.QuantilesUtil;
@@ -39,32 +41,40 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView {
3941
private final double[] quantiles;
4042
private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
4143
private final long totalN;
44+
private final double maxItem;
45+
private final double minItem;
4246

4347
/**
4448
* Construct from elements for testing.
4549
* @param quantiles sorted array of quantiles
4650
* @param cumWeights sorted, monotonically increasing cumulative weights.
4751
* @param totalN the total number of items presented to the sketch.
4852
*/
49-
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) {
53+
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN,
54+
final double maxItem, final double minItem) {
5055
this.quantiles = quantiles;
5156
this.cumWeights = cumWeights;
5257
this.totalN = totalN;
58+
this.maxItem = maxItem;
59+
this.minItem = minItem;
5360
}
5461

5562
/**
5663
* Constructs this Sorted View given the sketch
57-
* @param sk the given KllDoublesSketch.
64+
* @param sketch the given KllDoublesSketch.
5865
*/
59-
public KllDoublesSketchSortedView(final KllDoublesSketch sk) {
60-
this.totalN = sk.getN();
61-
final double[] srcQuantiles = sk.getDoubleItemsArray();
62-
final int[] srcLevels = sk.levelsArr;
63-
final int srcNumLevels = sk.getNumLevels();
64-
65-
if (!sk.isLevelZeroSorted()) {
66+
public KllDoublesSketchSortedView(final KllDoublesSketch sketch) {
67+
if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
68+
this.totalN = sketch.getN();
69+
this.maxItem = sketch.getMaxItem();
70+
this.minItem = sketch.getMinItem();
71+
final double[] srcQuantiles = sketch.getDoubleItemsArray();
72+
final int[] srcLevels = sketch.levelsArr;
73+
final int srcNumLevels = sketch.getNumLevels();
74+
75+
if (!sketch.isLevelZeroSorted()) {
6676
Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]);
67-
if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); }
77+
if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); }
6878
}
6979

7080
final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage
@@ -78,17 +88,31 @@ public long[] getCumulativeWeights() {
7888
return cumWeights.clone();
7989
}
8090

91+
@Override
92+
public double getMaxItem() {
93+
return maxItem;
94+
}
95+
96+
@Override
97+
public double getMinItem() {
98+
return minItem;
99+
}
100+
101+
@Override
102+
public long getN() {
103+
return totalN;
104+
}
105+
81106
@Override
82107
public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
83108
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
84109
QuantilesUtil.checkNormalizedRankBounds(rank);
85110
final int len = cumWeights.length;
86-
final long naturalRank = (searchCrit == INCLUSIVE)
87-
? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN);
111+
final double naturalRank = getNaturalRank(rank, totalN, searchCrit);
88112
final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
89113
final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit);
90114
if (index == -1) {
91-
return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
115+
return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
92116
}
93117
return quantiles[index];
94118
}
@@ -116,8 +140,8 @@ public boolean isEmpty() {
116140
}
117141

118142
@Override
119-
public KllDoublesSketchSortedViewIterator iterator() {
120-
return new KllDoublesSketchSortedViewIterator(quantiles, cumWeights);
143+
public DoublesSortedViewIterator iterator() {
144+
return new DoublesSortedViewIterator(quantiles, cumWeights);
121145
}
122146

123147
//restricted methods

0 commit comments

Comments
 (0)