Skip to content

Commit 8e3be7c

Browse files
committed
Merge pull request #17 from DataSketches/ExtendedUnion
Extended union
2 parents 8bb2e0e + 94d3571 commit 8e3be7c

6 files changed

Lines changed: 194 additions & 6 deletions

File tree

src/main/java/com/yahoo/sketches/theta/DirectUnion.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,37 @@ else if (serVer == 3) {
147147
}
148148
else throw new IllegalArgumentException("SerVer is unknown: "+serVer);
149149
}
150+
151+
@Override
152+
public void update(long datum) {
153+
gadget_.update(datum);
154+
}
155+
156+
@Override
157+
public void update(double datum) {
158+
gadget_.update(datum);
159+
}
160+
161+
@Override
162+
public void update(String datum) {
163+
gadget_.update(datum);
164+
}
165+
166+
@Override
167+
public void update(byte[] data) {
168+
gadget_.update(data);
169+
}
170+
171+
@Override
172+
public void update(int[] data) {
173+
gadget_.update(data);
174+
}
175+
176+
@Override
177+
public void update(long[] data) {
178+
gadget_.update(data);
179+
}
180+
150181
//must trust seed, no seedhash. No p, can't be empty, can only be compact, ordered, size > 24
151182
private void processVer1(Memory skMem) {
152183
//unionEmpty_ flag is merged with the gadget

src/main/java/com/yahoo/sketches/theta/HeapUnion.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ class HeapUnion extends SetOperation implements Union {
2525
private long unionThetaLong_;
2626
private boolean unionEmpty_;
2727

28-
2928
/**
3029
* Construct a new Union SetOperation on the java heap. Called by SetOperation.Builder.
3130
*
@@ -129,6 +128,36 @@ else if (serVer == 3) {
129128
else throw new IllegalArgumentException("SerVer is unknown: "+serVer);
130129
}
131130

131+
@Override
132+
public void update(long datum) {
133+
gadget_.update(datum);
134+
}
135+
136+
@Override
137+
public void update(double datum) {
138+
gadget_.update(datum);
139+
}
140+
141+
@Override
142+
public void update(String datum) {
143+
gadget_.update(datum);
144+
}
145+
146+
@Override
147+
public void update(byte[] data) {
148+
gadget_.update(data);
149+
}
150+
151+
@Override
152+
public void update(int[] data) {
153+
gadget_.update(data);
154+
}
155+
156+
@Override
157+
public void update(long[] data) {
158+
gadget_.update(data);
159+
}
160+
132161
//must trust seed, no seedhash. No p, can't be empty, can only be compact, ordered, size > 24
133162
private void processVer1(Memory skMem) {
134163
unionEmpty_ = false; //Empty rule: AND the empty states

src/main/java/com/yahoo/sketches/theta/Union.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,57 @@ public interface Union {
3131
*/
3232
void update(Memory mem);
3333

34+
/**
35+
* Present this union with a long.
36+
*
37+
* @param datum The given long datum.
38+
*/
39+
public void update(long datum);
40+
41+
/**
42+
* Present this union with the given double (or float) datum.
43+
* The double will be converted to a long using Double.doubleToLongBits(datum),
44+
* which normalizes all NaN values to a single NaN representation.
45+
* Plus and minus zero will be normalized to plus zero.
46+
* The special floating-point values NaN and +/- Infinity are treated as distinct.
47+
*
48+
* @param datum The given double datum.
49+
*/
50+
public void update(double datum);
51+
52+
/**
53+
* Present this union with the given String.
54+
* The string is converted to a byte array using UTF8 encoding.
55+
* If the string is null or empty no update attempt is made and the method returns.
56+
*
57+
* @param datum The given String.
58+
*/
59+
public void update(String datum);
60+
61+
/**
62+
* Present this union with the given byte array.
63+
* If the byte array is null or empty no update attempt is made and the method returns.
64+
*
65+
* @param data The given byte array.
66+
*/
67+
public void update(byte[] data);
68+
69+
/**
70+
* Present this union with the given integer array.
71+
* If the integer array is null or empty no update attempt is made and the method returns.
72+
*
73+
* @param data The given int array.
74+
*/
75+
public void update(int[] data);
76+
77+
/**
78+
* Present this union with the given long array.
79+
* If the long array is null or empty no update attempt is made and the method returns.
80+
*
81+
* @param data The given long array.
82+
*/
83+
public void update(long[] data);
84+
3485
/**
3586
* Gets the result of this operation as a CompactSketch of the chosen form
3687
* @param dstOrdered

src/main/javadoc/resources/dictionary.html

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ <h3><a name="accuracy">Sketch Accuracy</a></h3>
1010
About sketch accuracy...
1111

1212
<h3><a name="alphaTCF">Alpha TCF</a></h3>
13-
The Alpha Theta Choosing Function (TCF) and the theory behind it is fully described in this
14-
<a href="ThetaSketchFramework.pdf">paper</a>.
13+
The Alpha Theta Choosing Function (TCF) and the theory behind it is fully described in the
14+
<a href="https://github.com/DataSketches/DataSketches.github.io/blob/master/docs/ThetaSketchFramework.pdf">Theta Sketch Framework</a> paper.
1515
The alpha algorithm is optimized for speed and accuracy in a real-time sketch
1616
building / estimating environment.
1717
<p>One of the properties of the Alpha Algorithm used for cache management within a sketch is that
@@ -189,8 +189,8 @@ <h3><a name="seedHash">Seed Hash</a></h3>
189189
or (2) that when deserializing or wrapping a sketch image that the caller has the correct seed.
190190

191191
<h3><a name="tcf">Theta Choosing Function (TCF)</a></h3>
192-
The Theta Choosing Function (TCF) and the theory behind it is fully described in this
193-
<a href="ThetaSketchFramework.pdf">paper</a>.
192+
The Theta Choosing Function (TCF) and the theory behind it is fully described in the
193+
<a href="https://github.com/DataSketches/DataSketches.github.io/blob/master/docs/ThetaSketchFramework.pdf">Theta Sketch Framework</a> paper.
194194

195195
<h3><a name="theta">Theta, &theta;</a></h3>
196196
Refers to the mathematical random variable &theta; that represents the current probability
@@ -209,7 +209,7 @@ <h3><a name="thetaSketch">Theta Sketch Framework</a></h3>
209209
This framework also enables the sketches to share estimation, upper and lower bounds algorithms and
210210
a common serialization data structure.
211211
The Theta Sketch Framework, Theta Choosing Functions and the theory behind them is fully described
212-
in this <a href="ThetaSketchFramework.pdf">paper</a>.
212+
in the <a href="https://github.com/DataSketches/DataSketches.github.io/blob/master/docs/ThetaSketchFramework.pdf">Theta Sketch Framework</a> paper.
213213

214214
<h3><a name="updateReturnState">Update Return State</a></h3>
215215
Provides useful detail for sketch characterization and debugging. It is not required that any of

src/test/java/com/yahoo/sketches/theta/DirectUnionTest.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,45 @@ public void printlnTest() {
612612
println("PRINTING: "+this.getClass().getName());
613613
}
614614

615+
@Test
616+
public void checkPrimitiveUpdates() {
617+
int k = 32;
618+
Memory uMem = new NativeMemory(new byte[getMaxUnionBytes(k)]);
619+
Union union = SetOperation.builder().initMemory(uMem).buildUnion(k);
620+
621+
union.update(1L);
622+
union.update(1.5); //#1 double
623+
union.update(0.0);
624+
union.update(-0.0);
625+
String s = null;
626+
union.update(s); //null string
627+
s = "";
628+
union.update(s); //empty string
629+
s = "String";
630+
union.update(s); //#2 actual string
631+
byte[] byteArr = null;
632+
union.update(byteArr); //null byte[]
633+
byteArr = new byte[0];
634+
union.update(byteArr); //empty byte[]
635+
byteArr = "Byte Array".getBytes();
636+
union.update(byteArr); //#3 actual byte[]
637+
int[] intArr = null;
638+
union.update(intArr); //null int[]
639+
intArr = new int[0];
640+
union.update(intArr); //empty int[]
641+
int[] intArr2 = { 1, 2, 3, 4, 5 };
642+
union.update(intArr2); //#4 actual int[]
643+
long[] longArr = null;
644+
union.update(longArr); //null long[]
645+
longArr = new long[0];
646+
union.update(longArr); //empty long[]
647+
long[] longArr2 = { 6, 7, 8, 9 };
648+
union.update(longArr2); //#5 actual long[]
649+
CompactSketch comp = union.getResult();
650+
double est = comp.getEstimate();
651+
assertEquals(est, 7.0, 0.0);
652+
}
653+
615654
/**
616655
* @param s value to print
617656
*/

src/test/java/com/yahoo/sketches/theta/HeapUnionTest.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,44 @@ public void checkGetResult() {
491491
assertEquals(csk.getCurrentBytes(true), 8);
492492
}
493493

494+
@Test
495+
public void checkPrimitiveUpdates() {
496+
int k = 32;
497+
Union union = Sketches.setOperationBuilder().buildUnion(k);
498+
499+
union.update(1L);
500+
union.update(1.5); //#1 double
501+
union.update(0.0);
502+
union.update(-0.0);
503+
String s = null;
504+
union.update(s); //null string
505+
s = "";
506+
union.update(s); //empty string
507+
s = "String";
508+
union.update(s); //#2 actual string
509+
byte[] byteArr = null;
510+
union.update(byteArr); //null byte[]
511+
byteArr = new byte[0];
512+
union.update(byteArr); //empty byte[]
513+
byteArr = "Byte Array".getBytes();
514+
union.update(byteArr); //#3 actual byte[]
515+
int[] intArr = null;
516+
union.update(intArr); //null int[]
517+
intArr = new int[0];
518+
union.update(intArr); //empty int[]
519+
int[] intArr2 = { 1, 2, 3, 4, 5 };
520+
union.update(intArr2); //#4 actual int[]
521+
long[] longArr = null;
522+
union.update(longArr); //null long[]
523+
longArr = new long[0];
524+
union.update(longArr); //empty long[]
525+
long[] longArr2 = { 6, 7, 8, 9 };
526+
union.update(longArr2); //#5 actual long[]
527+
CompactSketch comp = union.getResult();
528+
double est = comp.getEstimate();
529+
assertEquals(est, 7.0, 0.0);
530+
}
531+
494532
//used by DirectUnionTest as well
495533
public static void testAllCompactForms(Union union, double expected, double toll) {
496534
double compEst1, compEst2;

0 commit comments

Comments
 (0)