1919
2020package org .apache .datasketches .tuple ;
2121
22+ import static java .lang .Math .min ;
2223import static org .apache .datasketches .Util .DEFAULT_NOMINAL_ENTRIES ;
2324
25+ import java .lang .reflect .Array ;
26+
27+ import org .apache .datasketches .QuickSelect ;
28+
2429/**
2530 * Compute a union of two or more tuple sketches.
2631 * A new instance represents an empty set.
2934 * @param <S> Type of Summary
3035 */
3136public class Union <S extends Summary > {
32- private final int nomEntries_ ;
3337 private final SummarySetOperations <S > summarySetOps_ ;
3438 private QuickSelectSketch <S > sketch_ ;
3539 private long theta_ ; // need to maintain outside of the sketch
40+ private boolean isEmpty_ ;
3641
3742 /**
3843 * Creates new instance with default nominal entries
@@ -49,42 +54,83 @@ public Union(final SummarySetOperations<S> summarySetOps) {
4954 * @param summarySetOps instance of SummarySetOperations
5055 */
5156 public Union (final int nomEntries , final SummarySetOperations <S > summarySetOps ) {
52- nomEntries_ = nomEntries ;
5357 summarySetOps_ = summarySetOps ;
54- sketch_ = new QuickSelectSketch <S >(nomEntries , null );
58+ sketch_ = new QuickSelectSketch <>(nomEntries , null );
5559 theta_ = sketch_ .getThetaLong ();
60+ isEmpty_ = true ;
5661 }
5762
5863 /**
5964 * Updates the internal set by adding entries from the given sketch
6065 * @param sketchIn input sketch to add to the internal set
6166 */
6267 public void update (final Sketch <S > sketchIn ) {
63- if (sketchIn == null || sketchIn .isEmpty ()) { return ; }
68+ if ((sketchIn == null ) || sketchIn .isEmpty ()) { return ; }
69+ isEmpty_ = false ;
6470 if (sketchIn .theta_ < theta_ ) { theta_ = sketchIn .theta_ ; }
6571 final SketchIterator <S > it = sketchIn .iterator ();
6672 while (it .next ()) {
6773 sketch_ .merge (it .getKey (), it .getSummary (), summarySetOps_ );
6874 }
75+ if (sketch_ .theta_ < theta_ ) {
76+ theta_ = sketch_ .theta_ ;
77+ }
6978 }
7079
7180 /**
7281 * Gets the internal set as a CompactSketch
7382 * @return result of the unions so far
7483 */
84+ @ SuppressWarnings ("unchecked" )
7585 public CompactSketch <S > getResult () {
76- sketch_ .trim ();
77- if (theta_ < sketch_ .theta_ ) {
78- sketch_ .setThetaLong (theta_ );
79- sketch_ .rebuild ();
86+ if (isEmpty_ ) {
87+ return sketch_ .compact ();
88+ }
89+ if ((theta_ >= sketch_ .theta_ ) && (sketch_ .getRetainedEntries () <= sketch_ .getNominalEntries ())) {
90+ return sketch_ .compact ();
91+ }
92+ long theta = min (theta_ , sketch_ .theta_ );
93+
94+ int num = 0 ;
95+ {
96+ final SketchIterator <S > it = sketch_ .iterator ();
97+ while (it .next ()) {
98+ if (it .getKey () < theta ) { num ++; }
99+ }
80100 }
81- return sketch_ .compact ();
101+ if (num == 0 ) {
102+ return new CompactSketch <>(null , null , theta , isEmpty_ );
103+ }
104+ if (num > sketch_ .getNominalEntries ()) {
105+ final long [] keys = new long [num ]; // temporary since the order will be destroyed by quick select
106+ final SketchIterator <S > it = sketch_ .iterator ();
107+ int i = 0 ;
108+ while (it .next ()) {
109+ if (it .getKey () < theta ) { keys [i ++] = it .getKey (); }
110+ }
111+ theta = QuickSelect .select (keys , 0 , num - 1 , sketch_ .getNominalEntries ());
112+ num = sketch_ .getNominalEntries ();
113+ }
114+ final long [] keys = new long [num ];
115+ final S [] summaries = (S []) Array .newInstance (sketch_ .summaries_ .getClass ().getComponentType (), num );
116+ final SketchIterator <S > it = sketch_ .iterator ();
117+ int i = 0 ;
118+ while (it .next ()) {
119+ if (it .getKey () < theta ) {
120+ keys [i ] = it .getKey ();
121+ summaries [i ] = (S ) it .getSummary ().copy ();
122+ i ++;
123+ }
124+ }
125+ return new CompactSketch <>(keys , summaries , theta , isEmpty_ );
82126 }
83127
84128 /**
85129 * Resets the internal set to the initial state, which represents an empty set
86130 */
87131 public void reset () {
88- sketch_ = new QuickSelectSketch <S >(nomEntries_ , null );
132+ sketch_ .reset ();
133+ theta_ = sketch_ .getThetaLong ();
134+ isEmpty_ = true ;
89135 }
90136}
0 commit comments