@@ -3,45 +3,17 @@ package action
33import (
44 "context"
55 "fmt"
6- "strings"
76 "time"
87
98 pb "buf.build/gen/go/gotocompany/proton/protocolbuffers/go/gotocompany/raccoon/v1beta1"
10- "github.com/spf13/cast"
11- "google.golang.org/protobuf/reflect/protoreflect"
129
1310 "github.com/goto/raccoon/config"
1411 "github.com/goto/raccoon/ingestionrule/action/dedup/cache"
15- "github.com/goto/raccoon/ingestionrule/action/dedup/protoutil"
1612 "github.com/goto/raccoon/ingestionrule/action/dedup/schemaregistry"
1713 "github.com/goto/raccoon/logger"
1814 "github.com/goto/raccoon/metrics"
1915)
2016
21- const (
22- metricNameEventDeserializationError = "event_deserialization_error"
23- metricNameEventDeserializationLatency = "event_deserialization_latency"
24- metricNameEventDuplicateCheckerLatency = "event_duplicate_checker_latency"
25- )
26-
27- const (
28- reasonProtoClassNotFound = "proto class not found"
29- reasonStencilParseError = "stencil parse error"
30- reasonPublisherNotFound = "publisher not found"
31-
32- reasonUserIDNotFound = "userID not found"
33- reasonUserIDTypeInvalid = "userID type invalid"
34-
35- reasonSessionIDNotFound = "sessionID not found"
36- reasonSessionIDTypeInvalid = "sessionID type invalid"
37-
38- reasonEventNameNotFound = "event_name not found"
39- reasonEventNameTypeInvalid = "event_name type invalid"
40-
41- reasonEventTimestampNotFound = "event_timestamp not found"
42- reasonEventTimestampTypeInvalid = "event_timestamp type invalid"
43- )
44-
4517// DuplicateChecker defines the capability to verify event uniqueness.
4618type DuplicateChecker interface {
4719 AreDuplicates (ctx context.Context , events []cache.EventMetadata ) ([]bool , error )
@@ -51,8 +23,6 @@ type DuplicateChecker interface {
5123
5224// processState holds the state of each event being processed.
5325type processState struct {
54- // event is the original event being processed.
55- event * pb.Event
5626 // isValid indicates whether the event has valid metadata and should be checked for duplication.
5727 isValid bool
5828}
@@ -91,23 +61,29 @@ func (d *Dedup) Apply(ctx context.Context, events []*pb.Event, connGroup string)
9161
9262 for i , event := range events {
9363 startDeserialize := time .Now ()
94- meta , err := d . extractMetadata (event , connGroup )
64+ meta , err := ExtractMetadata (event , connGroup , config . PolicyCfg . PublisherMapping , config . EventDistribution . PublisherPattern , d . stencil )
9565 metrics .Timing (metricNameEventDeserializationLatency , time .Since (startDeserialize ).Milliseconds (), fmt .Sprintf ("conn_group=%s" , connGroup ))
9666
9767 if err != nil {
9868 logger .Errorf ("dedup: failed to extract metadata: %v" , err )
99- states [i ] = processState {event : event , isValid : false }
69+ metrics .Increment (metricNameEventDeserializationError , fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , getErrorReason (err ), event .Type , event .Product , event .EventName ))
70+ states [i ] = processState {isValid : false }
10071 continue
10172 }
10273
10374 if meta .EventGUID == "" || meta .Publisher == "" {
10475 logger .Errorf ("dedup: missing metadata fields: %+v for conn_group=%s,product=%s,event_name=%s" , meta , connGroup , event .Product , event .EventName )
105- states [i ] = processState {event : event , isValid : false }
76+ states [i ] = processState {isValid : false }
10677 continue
10778 }
10879
109- states [i ] = processState {event : event , isValid : true }
110- metadataBatch = append (metadataBatch , meta )
80+ states [i ] = processState {isValid : true }
81+ metadataBatch = append (metadataBatch , cache.EventMetadata {
82+ Publisher : meta .Publisher ,
83+ EventGUID : meta .EventGUID ,
84+ EventName : meta .EventName ,
85+ Product : meta .Product ,
86+ })
11187 }
11288
11389 var isDuplicateResults []bool
@@ -122,9 +98,10 @@ func (d *Dedup) Apply(ctx context.Context, events []*pb.Event, connGroup string)
12298 uniqueEvents := make ([]* pb.Event , 0 , len (events ))
12399 resultIdx := 0 // Tracks our position in the isDuplicateResults slice
124100
125- for _ , state := range states {
101+ for i , state := range states {
102+ event := events [i ]
126103 if ! state .isValid {
127- uniqueEvents = append (uniqueEvents , state . event )
104+ uniqueEvents = append (uniqueEvents , event )
128105 continue
129106 }
130107
@@ -133,86 +110,22 @@ func (d *Dedup) Apply(ctx context.Context, events []*pb.Event, connGroup string)
133110 logger .Errorf ("dedup: cache batch verification failed, bypassing filter: %v" , cacheErr )
134111 }
135112
136- uniqueEvents = append (uniqueEvents , state . event )
113+ uniqueEvents = append (uniqueEvents , event )
137114 resultIdx ++
138115 continue
139116 }
140117
141118 isDuplicate := isDuplicateResults [resultIdx ]
119+ meta := metadataBatch [resultIdx ]
142120 resultIdx ++
143121
144122 if isDuplicate {
145- metrics .Increment (metricEventLossCount , fmt .Sprintf ("reason=DEDUP_POLICY,event_name=%s,product=%s,conn_group=%s,event_type=%s" , state . event . EventName , state . event . Product , connGroup , state . event .Type ))
123+ metrics .Increment (metricEventLossCount , fmt .Sprintf ("reason=DEDUP_POLICY,event_name=%s,product=%s,conn_group=%s,event_type=%s" , meta . EventName , meta . Product , connGroup , event .Type ))
146124 continue
147125 }
148126
149- uniqueEvents = append (uniqueEvents , state . event )
127+ uniqueEvents = append (uniqueEvents , event )
150128 }
151129
152130 return uniqueEvents
153131}
154-
155- // extractMetadata deserializes dynamic protobuf payloads using Stencil and handles identity field extractions.
156- func (d * Dedup ) extractMetadata (event * pb.Event , connGroup string ) (cache.EventMetadata , error ) {
157- protoClass , ok := config .DedupCfg .ProtoClassNameMapping [event .Type ]
158- if ! ok {
159- metrics .Increment (metricNameEventDeserializationError ,
160- fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , reasonProtoClassNotFound , event .Type , event .Product , event .EventName ))
161- return cache.EventMetadata {}, fmt .Errorf ("failed to find proto class for conn_group=%s,event_type=%s,product=%s,event_name=%s" , connGroup , event .Type , event .Product , event .EventName )
162- }
163-
164- publisher , ok := config .PolicyCfg .PublisherMapping [connGroup ]
165- if ! ok {
166- metrics .Increment (metricNameEventDeserializationError ,
167- fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , reasonPublisherNotFound , event .Type , event .Product , event .EventName ))
168- return cache.EventMetadata {}, fmt .Errorf ("failed to publisher for conn_group=%s,event_type=%s,product=%s,event_name=%s" , connGroup , event .Type , event .Product , event .EventName )
169- }
170-
171- parsedMsg , err := d .stencil .Client .Parse (protoClass , event .EventBytes )
172- if err != nil {
173- metrics .Increment (metricNameEventDeserializationError ,
174- fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , reasonStencilParseError , event .Type , event .Product , event .EventName ))
175- return cache.EventMetadata {}, fmt .Errorf ("failed to parse proto class for conn_group=%s,event_type=%s,product=%s,event_name=%s" , connGroup , event .Type , event .Product , event .EventName )
176- }
177-
178- ref := parsedMsg .ProtoReflect ()
179-
180- const protoFieldEventGUID = "meta.event_guid"
181-
182- eventGUID , err := d .getStringField (ref , protoFieldEventGUID , connGroup , event , protoFieldEventGUID , reasonEventNameNotFound , reasonEventNameTypeInvalid )
183- if err != nil {
184- return cache.EventMetadata {}, err
185- }
186-
187- return cache.EventMetadata {
188- EventGUID : eventGUID ,
189- Publisher : publisher ,
190- }, nil
191- }
192-
193- // getStringField is a helper function to safely extract, convert to string, and handle error telemetry for identifier fields.
194- func (d * Dedup ) getStringField (
195- ref protoreflect.Message ,
196- path string ,
197- connGroup string ,
198- event * pb.Event ,
199- fieldName string ,
200- reasonNotFound string ,
201- reasonTypeInvalid string ,
202- ) (string , error ) {
203- rawVal , ok := protoutil .GetFieldValue (ref , strings .Split (path , "." ))
204- if ! ok {
205- metrics .Increment (metricNameEventDeserializationError ,
206- fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , reasonNotFound , event .Type , event .Product , event .EventName ))
207- return "" , fmt .Errorf ("failed to find %s for conn_group=%s,event_type=%s,product=%s,event_name=%s" , fieldName , connGroup , event .Type , event .Product , event .EventName )
208- }
209-
210- val , err := cast .ToStringE (rawVal )
211- if err != nil {
212- metrics .Increment (metricNameEventDeserializationError ,
213- fmt .Sprintf ("conn_group=%s,reason=%s,event_type=%s,product=%s,event_name=%s" , connGroup , reasonTypeInvalid , event .Type , event .Product , event .EventName ))
214- return "" , fmt .Errorf ("%s field type is not convertible to string for conn_group=%s,event_type=%s,product=%s,event_name=%s: %w" , fieldName , connGroup , event .Type , event .Product , event .EventName , err )
215- }
216-
217- return val , nil
218- }
0 commit comments