Skip to content

Commit 97552af

Browse files
committed
Merge branch 'master' into 1.2.X-incubating
2 parents bcefa34 + 43888f9 commit 97552af

6 files changed

Lines changed: 114 additions & 26 deletions

File tree

src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,6 @@ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES,
172172
count = mem.getInt(offset);
173173
offset += Integer.BYTES;
174174
}
175-
// if (version == serialVersionWithSummaryFactoryUID) {
176-
// final DeserializeResult<SummaryFactory<S>> factoryResult =
177-
// SerializerDeserializer.deserializeFromMemory(mem, offset);
178-
// offset += factoryResult.getSize();
179-
// }
180175
final int currentCapacity = 1 << lgCurrentCapacity_;
181176
keys_ = new long[currentCapacity];
182177
for (int i = 0; i < count; i++) {

src/main/java/org/apache/datasketches/tuple/Union.java

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,13 @@
1919

2020
package org.apache.datasketches.tuple;
2121

22+
import static java.lang.Math.min;
2223
import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
2324

25+
import java.lang.reflect.Array;
26+
27+
import org.apache.datasketches.QuickSelect;
28+
2429
/**
2530
* Compute a union of two or more tuple sketches.
2631
* A new instance represents an empty set.
@@ -29,10 +34,10 @@
2934
* @param <S> Type of Summary
3035
*/
3136
public class Union<S extends Summary> {
32-
private final int nomEntries_;
3337
private final SummarySetOperations<S> summarySetOps_;
3438
private QuickSelectSketch<S> sketch_;
3539
private long theta_; // need to maintain outside of the sketch
40+
private boolean isEmpty_;
3641

3742
/**
3843
* Creates new instance with default nominal entries
@@ -49,42 +54,83 @@ public Union(final SummarySetOperations<S> summarySetOps) {
4954
* @param summarySetOps instance of SummarySetOperations
5055
*/
5156
public Union(final int nomEntries, final SummarySetOperations<S> summarySetOps) {
52-
nomEntries_ = nomEntries;
5357
summarySetOps_ = summarySetOps;
54-
sketch_ = new QuickSelectSketch<S>(nomEntries, null);
58+
sketch_ = new QuickSelectSketch<>(nomEntries, null);
5559
theta_ = sketch_.getThetaLong();
60+
isEmpty_ = true;
5661
}
5762

5863
/**
5964
* Updates the internal set by adding entries from the given sketch
6065
* @param sketchIn input sketch to add to the internal set
6166
*/
6267
public void update(final Sketch<S> sketchIn) {
63-
if (sketchIn == null || sketchIn.isEmpty()) { return; }
68+
if ((sketchIn == null) || sketchIn.isEmpty()) { return; }
69+
isEmpty_ = false;
6470
if (sketchIn.theta_ < theta_) { theta_ = sketchIn.theta_; }
6571
final SketchIterator<S> it = sketchIn.iterator();
6672
while (it.next()) {
6773
sketch_.merge(it.getKey(), it.getSummary(), summarySetOps_);
6874
}
75+
if (sketch_.theta_ < theta_) {
76+
theta_ = sketch_.theta_;
77+
}
6978
}
7079

7180
/**
7281
* Gets the internal set as a CompactSketch
7382
* @return result of the unions so far
7483
*/
84+
@SuppressWarnings("unchecked")
7585
public CompactSketch<S> getResult() {
76-
sketch_.trim();
77-
if (theta_ < sketch_.theta_) {
78-
sketch_.setThetaLong(theta_);
79-
sketch_.rebuild();
86+
if (isEmpty_) {
87+
return sketch_.compact();
88+
}
89+
if ((theta_ >= sketch_.theta_) && (sketch_.getRetainedEntries() <= sketch_.getNominalEntries())) {
90+
return sketch_.compact();
91+
}
92+
long theta = min(theta_, sketch_.theta_);
93+
94+
int num = 0;
95+
{
96+
final SketchIterator<S> it = sketch_.iterator();
97+
while (it.next()) {
98+
if (it.getKey() < theta) { num++; }
99+
}
80100
}
81-
return sketch_.compact();
101+
if (num == 0) {
102+
return new CompactSketch<>(null, null, theta, isEmpty_);
103+
}
104+
if (num > sketch_.getNominalEntries()) {
105+
final long[] keys = new long[num]; // temporary since the order will be destroyed by quick select
106+
final SketchIterator<S> it = sketch_.iterator();
107+
int i = 0;
108+
while (it.next()) {
109+
if (it.getKey() < theta) { keys[i++] = it.getKey(); }
110+
}
111+
theta = QuickSelect.select(keys, 0, num - 1, sketch_.getNominalEntries());
112+
num = sketch_.getNominalEntries();
113+
}
114+
final long[] keys = new long[num];
115+
final S[] summaries = (S[]) Array.newInstance(sketch_.summaries_.getClass().getComponentType(), num);
116+
final SketchIterator<S> it = sketch_.iterator();
117+
int i = 0;
118+
while (it.next()) {
119+
if (it.getKey() < theta) {
120+
keys[i] = it.getKey();
121+
summaries[i] = (S) it.getSummary().copy();
122+
i++;
123+
}
124+
}
125+
return new CompactSketch<>(keys, summaries, theta, isEmpty_);
82126
}
83127

84128
/**
85129
* Resets the internal set to the initial state, which represents an empty set
86130
*/
87131
public void reset() {
88-
sketch_ = new QuickSelectSketch<S>(nomEntries_, null);
132+
sketch_.reset();
133+
theta_ = sketch_.getThetaLong();
134+
isEmpty_ = true;
89135
}
90136
}

src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ public class UpdatableSketch<U, S extends UpdatableSummary<U>> extends QuickSele
5555
* <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability</a>
5656
* @param summaryFactory An instance of a SummaryFactory.
5757
*/
58-
public UpdatableSketch(final int nomEntries, final int lgResizeFactor, final float samplingProbability,
59-
final SummaryFactory<S> summaryFactory) {
58+
public UpdatableSketch(final int nomEntries, final int lgResizeFactor,
59+
final float samplingProbability, final SummaryFactory<S> summaryFactory) {
6060
super(nomEntries, lgResizeFactor, samplingProbability, summaryFactory);
6161
}
6262

src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,26 @@ public class DoubleSketch extends UpdatableSketch<Double, DoubleSummary> {
3535
* @param mode The DoubleSummary mode to be used
3636
*/
3737
public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) {
38-
super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new DoubleSummaryFactory(mode));
38+
this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode);
39+
}
40+
41+
/**
42+
* Creates this sketch with the following parameters:
43+
* @param lgK Log_base2 of <i>Nominal Entries</i>.
44+
* @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
45+
* <pre>
46+
* 0 - no resizing (max size allocated),
47+
* 1 - double internal hash table each time it reaches a threshold
48+
* 2 - grow four times
49+
* 3 - grow eight times (default)
50+
* </pre>
51+
* @param samplingProbability
52+
* <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability</a>
53+
* @param mode The DoubleSummary mode to be used
54+
*/
55+
public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplingProbability,
56+
final DoubleSummary.Mode mode) {
57+
super(1 << lgK, lgResizeFactor, samplingProbability, new DoubleSummaryFactory(mode));
3958
}
4059

4160
/**

src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,26 @@ public class IntegerSketch extends UpdatableSketch<Integer, IntegerSummary> {
3535
* @param mode The IntegerSummary mode to be used
3636
*/
3737
public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) {
38-
super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new IntegerSummaryFactory(mode));
38+
this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode);
39+
}
40+
41+
/**
42+
* Creates this sketch with the following parameters:
43+
* @param lgK Log_base2 of <i>Nominal Entries</i>.
44+
* @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
45+
* <pre>
46+
* 0 - no resizing (max size allocated),
47+
* 1 - double internal hash table each time it reaches a threshold
48+
* 2 - grow four times
49+
* 3 - grow eight times (default)
50+
* </pre>
51+
* @param samplingProbability
52+
* <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability</a>
53+
* @param mode The IntegerSummary mode to be used
54+
*/
55+
public IntegerSketch(final int lgK, final int lgResizeFactor, final float samplingProbability,
56+
final IntegerSummary.Mode mode) {
57+
super(1 << lgK, lgResizeFactor, samplingProbability, new IntegerSummaryFactory(mode));
3958
}
4059

4160
/**

src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,6 @@ public void updatesOfAllKeyTypes() {
230230
Assert.assertEquals(sketch.getEstimate(), 6.0);
231231
}
232232

233-
// @Test
234-
// public void updateDoubleSummary() {
235-
// DoubleSummary ds = new DoubleSummary();
236-
// ds.update(1.0);
237-
// Assert.assertEquals(ds.getValue(), 1.0);
238-
// }
239-
240233
@Test
241234
public void doubleSummaryDefaultSumMode() {
242235
UpdatableSketch<Double, DoubleSummary> sketch =
@@ -402,6 +395,22 @@ public void serializeDeserializeSampling() throws Exception {
402395
Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta());
403396
}
404397

398+
@Test
399+
public void unionEmptySampling() {
400+
UpdatableSketch<Double, DoubleSummary> sketch =
401+
new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build();
402+
sketch.update(1, 1.0);
403+
Assert.assertEquals(sketch.getRetainedEntries(), 0); // not retained due to low sampling probability
404+
405+
Union<DoubleSummary> union = new Union<>(new DoubleSummarySetOperations(mode));
406+
union.update(sketch);
407+
CompactSketch<DoubleSummary> result = union.getResult();
408+
Assert.assertEquals(result.getRetainedEntries(), 0);
409+
Assert.assertFalse(result.isEmpty());
410+
Assert.assertTrue(result.isEstimationMode());
411+
Assert.assertEquals(result.getEstimate(), 0.0);
412+
}
413+
405414
@Test
406415
public void unionExactMode() {
407416
UpdatableSketch<Double, DoubleSummary> sketch1 =

0 commit comments

Comments
 (0)