Skip to content

Commit 13e5ef8

Browse files
authored
Merge pull request #279 from apache/BugFixPlusMisc
Fix Theta bug wrt SingleItemSketch merge from Memory.
2 parents 7505da3 + ed3c4f6 commit 13e5ef8

20 files changed

Lines changed: 931 additions & 120 deletions

src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ UpdateReturnState hashUpdate(final long hash) {
262262
* @return the hash table threshold
263263
*/
264264
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
265-
//FindBugs may complain if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, but this allows us
266-
// to tune these constants for different sketches.
265+
//FindBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
266+
//but this allows us to tune these constants for different sketches.
267267
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : REBUILD_THRESHOLD;
268268
return (int) Math.floor(fraction * (1 << lgArrLongs));
269269
}

src/main/java/org/apache/datasketches/theta/UnionImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,8 @@ private void processVer3(final Memory skMem) {
369369
// OR the above and the SI bit is set
370370
if (SingleItemSketch.testPre0SeedHash(skMem.getLong(0), seedHash_)) {
371371
final long hash = skMem.getLong(8);
372-
update(hash); //a hash < 1 will be rejected later
372+
//backdoor update, hash function is bypassed. A hash < 1 will be rejected later
373+
gadget_.hashUpdate(hash);
373374
return;
374375
}
375376
return; //empty

src/main/java/org/apache/datasketches/tuple/Sketch.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,14 @@ public double getTheta() {
116116
* @return a SketchIterator
117117
*/
118118
public SketchIterator<S> iterator() {
119-
return new SketchIterator<S>(keys_, summaries_);
119+
return new SketchIterator<>(keys_, summaries_);
120120
}
121121

122-
long getThetaLong() {
122+
/**
123+
* Returns Theta as a long
124+
* @return Theta as a long
125+
*/
126+
public long getThetaLong() {
123127
return theta_;
124128
}
125129

src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
public interface UpdatableSummary<U> extends Summary {
2727

2828
/**
29-
* This is to provide a method of updating summaries
29+
* This is to provide a method of updating summaries.
30+
* This should not be called by the user.
3031
* @param value update value
3132
*/
3233
public void update(U value);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.tuple.adouble;
21+
22+
import org.apache.datasketches.ResizeFactor;
23+
import org.apache.datasketches.memory.Memory;
24+
import org.apache.datasketches.tuple.UpdatableSketch;
25+
26+
/**
27+
* @author Lee Rhodes
28+
*/
29+
public class DoubleSketch extends UpdatableSketch<Double, DoubleSummary> {
30+
31+
/**
32+
* Constructs this sketch with given <i>lgK</i>.
33+
* @param lgK Log_base2 of <i>Nominal Entries</i>.
34+
* <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
35+
* @param mode The DoubleSummary mode to be used
36+
*/
37+
public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) {
38+
super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new DoubleSummaryFactory(mode));
39+
}
40+
41+
/**
42+
* Constructs this sketch from a Memory image, which must be from an DoubleSketch, and
43+
* usually with data.
44+
* @param mem the given Memory
45+
* @param mode The DoubleSummary mode to be used
46+
*/
47+
public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) {
48+
super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
49+
}
50+
51+
@Override
52+
public void update(final String key, final Double value) {
53+
super.update(key, value);
54+
}
55+
56+
@Override
57+
public void update(final long key, final Double value) {
58+
super.update(key, value);
59+
}
60+
}

src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
* Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum.
3232
*/
3333
public final class DoubleSummary implements UpdatableSummary<Double> {
34+
private double value_;
35+
private final Mode mode_;
3436

3537
/**
3638
* The aggregation modes for this Summary
@@ -48,6 +50,7 @@ public static enum Mode {
4850
* <p>New retained value = min(previous retained value, incoming value)</p>
4951
*/
5052
Min,
53+
5154
/**
5255
* The aggregation mode is the maximum function.
5356
* <p>New retained value = max(previous retained value, incoming value)</p>
@@ -58,27 +61,21 @@ public static enum Mode {
5861
* The aggregation mode is always one.
5962
* <p>New retained value = 1.0</p>
6063
*/
61-
AlwaysOne,
62-
63-
/**
64-
* The aggregation mode is increment by one.
65-
* <p>New retained value = previous retained value + 1.0</p>
66-
*/
67-
Increment
64+
AlwaysOne
6865
}
6966

70-
private double value_;
71-
private final Mode mode_;
72-
7367
/**
74-
* Creates an instance of DoubleSummary with zero starting value and default mode (Sum)
68+
* Creates an instance of DoubleSummary with a given starting value and mode
69+
* @param value starting value
70+
* @param mode update mode
7571
*/
76-
public DoubleSummary() {
77-
this(0, Mode.Sum);
72+
private DoubleSummary(final double value, final Mode mode) {
73+
value_ = value;
74+
mode_ = mode;
7875
}
7976

8077
/**
81-
* Creates an instance of DoubleSummary with a starting value and a given mode (Sum)
78+
* Creates an instance of DoubleSummary with a given mode.
8279
* @param mode update mode
8380
*/
8481
public DoubleSummary(final Mode mode) {
@@ -96,21 +93,9 @@ public DoubleSummary(final Mode mode) {
9693
case AlwaysOne:
9794
value_ = 1.0;
9895
break;
99-
case Increment:
100-
value_ = 0;
10196
}
10297
}
10398

104-
/**
105-
* Creates an instance of DoubleSummary with a given starting value and mode
106-
* @param value starting value
107-
* @param mode update mode
108-
*/
109-
public DoubleSummary(final double value, final Mode mode) {
110-
value_ = value;
111-
mode_ = mode;
112-
}
113-
11499
@Override
115100
public void update(final Double value) {
116101
switch (mode_) {
@@ -126,8 +111,6 @@ public void update(final Double value) {
126111
case AlwaysOne:
127112
value_ = 1.0;
128113
break;
129-
case Increment:
130-
value_++;
131114
}
132115
}
133116

src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public final class DoubleSummaryFactory implements SummaryFactory<DoubleSummary>
3131
/**
3232
* Creates an instance of DoubleSummaryFactory with default mode
3333
*/
34+
@Deprecated
3435
public DoubleSummaryFactory() {
3536
summaryMode_ = DoubleSummary.Mode.Sum;
3637
}

src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ public final class DoubleSummarySetOperations implements SummarySetOperations<Do
3030

3131
private final Mode summaryMode_;
3232

33+
//TODO see IntegerSummarySetOperations for better model
34+
3335
/**
3436
* Creates an instance with default mode.
3537
*/
38+
@Deprecated
3639
public DoubleSummarySetOperations() {
3740
summaryMode_ = DoubleSummary.Mode.Sum;
3841
}
@@ -53,13 +56,14 @@ public DoubleSummary union(final DoubleSummary a, final DoubleSummary b) {
5356
return result;
5457
}
5558

56-
@Override
59+
5760
/**
5861
* Intersection is not well defined or even meaningful between numeric values.
5962
* Nevertheless, this can be defined to be just a different type of aggregation.
6063
* In this case it is defined to be the same as union. It can be overridden to
6164
* be a more meaningful operation.
6265
*/
66+
@Override
6367
public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) {
6468
return union(a, b);
6569
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.tuple.aninteger;
21+
22+
import org.apache.datasketches.ResizeFactor;
23+
import org.apache.datasketches.memory.Memory;
24+
import org.apache.datasketches.tuple.UpdatableSketch;
25+
26+
/**
27+
* @author Lee Rhodes
28+
*/
29+
public class IntegerSketch extends UpdatableSketch<Integer, IntegerSummary> {
30+
31+
/**
32+
* Constructs this sketch with given <i>lgK</i>.
33+
* @param lgK Log_base2 of <i>Nominal Entries</i>.
34+
* <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
35+
* @param mode The IntegerSummary mode to be used
36+
*/
37+
public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) {
38+
super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new IntegerSummaryFactory(mode));
39+
}
40+
41+
/**
42+
* Constructs this sketch from a Memory image, which must be from an IntegerSketch, and
43+
* usually with data.
44+
* @param mem the given Memory
45+
* @param mode The IntegerSummary mode to be used
46+
*/
47+
public IntegerSketch(final Memory mem, final IntegerSummary.Mode mode) {
48+
super(mem, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode));
49+
}
50+
51+
@Override
52+
public void update(final String key, final Integer value) {
53+
super.update(key, value);
54+
}
55+
56+
@Override
57+
public void update(final long key, final Integer value) {
58+
super.update(key, value);
59+
}
60+
61+
}

0 commit comments

Comments
 (0)