@@ -29,6 +29,7 @@ import (
2929// Union computes the union of Tuple sketches.
3030type Union [S Summary ] struct {
3131 policy Policy [S ]
32+ applyFunc func (S , S ) S
3233 hashtable * hashtable [S ]
3334 entryLessFunc func (a , b entry [S ]) int
3435 theta uint64
@@ -43,6 +44,20 @@ type unionOptions struct {
4344 rf theta.ResizeFactor
4445}
4546
47+ func (o * unionOptions ) Validate () error {
48+ if o .lgK < theta .MinLgK {
49+ return fmt .Errorf ("lgK must not be less than %d: %d" , theta .MinLgK , o .lgK )
50+ }
51+ if o .lgK > theta .MaxLgK {
52+ return fmt .Errorf ("lgK must not be greater than %d: %d" , theta .MaxLgK , o .lgK )
53+ }
54+ if o .p <= 0 || o .p > 1 {
55+ return errors .New ("sampling probability must be between 0 and 1" )
56+ }
57+
58+ return nil
59+ }
60+
4661type UnionOptionFunc func (* unionOptions )
4762
4863// WithUnionLgK sets log2(k), where k is a nominal number of entries in the union
@@ -89,14 +104,49 @@ func NewUnion[S Summary](policy Policy[S], opts ...UnionOptionFunc) (*Union[S],
89104 opt (options )
90105 }
91106
92- if options .lgK < theta . MinLgK {
93- return nil , fmt . Errorf ( "lgK must not be less than %d: %d" , theta . MinLgK , options . lgK )
107+ if err := options .Validate (); err != nil {
108+ return nil , err
94109 }
95- if options .lgK > theta .MaxLgK {
96- return nil , fmt .Errorf ("lgK must not be greater than %d: %d" , theta .MaxLgK , options .lgK )
110+
111+ options .lgCurSize = startingSubMultiple (options .lgK + 1 , theta .MinLgK , uint8 (options .rf ))
112+ options .theta = startingThetaFromP (options .p )
113+
114+ table := newHashtable [S ](
115+ options .lgCurSize , options .lgK , options .rf , options .p , options .theta , options .seed , true ,
116+ )
117+
118+ return & Union [S ]{
119+ hashtable : table ,
120+ policy : policy ,
121+ entryLessFunc : func (a , b entry [S ]) int {
122+ if a .Hash < b .Hash {
123+ return - 1
124+ } else if a .Hash > b .Hash {
125+ return 1
126+ }
127+ return 0
128+ },
129+ theta : table .theta ,
130+ }, nil
131+ }
132+
133+ // NewUnionWithSummaryMergeFunc creates a new union that uses a function to merge summaries.
134+ // This is useful for value-type summaries where Policy.Apply cannot mutate the internal summary.
135+ func NewUnionWithSummaryMergeFunc [S Summary ](
136+ applyFunc func (S , S ) S , opts ... UnionOptionFunc ,
137+ ) (* Union [S ], error ) {
138+ options := & unionOptions {
139+ lgK : theta .DefaultLgK ,
140+ rf : theta .DefaultResizeFactor ,
141+ p : 1.0 ,
142+ seed : theta .DefaultSeed ,
143+ }
144+ for _ , opt := range opts {
145+ opt (options )
97146 }
98- if options .p <= 0 || options .p > 1 {
99- return nil , errors .New ("sampling probability must be between 0 and 1" )
147+
148+ if err := options .Validate (); err != nil {
149+ return nil , err
100150 }
101151
102152 options .lgCurSize = startingSubMultiple (options .lgK + 1 , theta .MinLgK , uint8 (options .rf ))
@@ -108,7 +158,7 @@ func NewUnion[S Summary](policy Policy[S], opts ...UnionOptionFunc) (*Union[S],
108158
109159 return & Union [S ]{
110160 hashtable : table ,
111- policy : policy ,
161+ applyFunc : applyFunc ,
112162 entryLessFunc : func (a , b entry [S ]) int {
113163 if a .Hash < b .Hash {
114164 return - 1
@@ -156,7 +206,11 @@ func (u *Union[S]) Update(sketch Sketch[S]) error {
156206 return err
157207 }
158208
159- u .policy .Apply (u .hashtable .entries [index ].Summary , summary )
209+ if u .applyFunc != nil {
210+ u .hashtable .entries [index ].Summary = u .applyFunc (u .hashtable .entries [index ].Summary , summary )
211+ } else {
212+ u .policy .Apply (u .hashtable .entries [index ].Summary , summary )
213+ }
160214 } else {
161215 // For ordered sketches, we can break early
162216 if sketch .IsOrdered () {
0 commit comments