-
Notifications
You must be signed in to change notification settings - Fork 827
Log duplicated activity events #6813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -529,6 +529,14 @@ func (e *mutableStateBuilder) FlushBufferedEvents() error { | |
|
||
newCommittedEvents = e.trimEventsAfterWorkflowClose(newCommittedEvents) | ||
e.hBuilder.history = newCommittedEvents | ||
|
||
// adding logs to help identify duplicate activity task events | ||
// duplicated activity events can cause DecisionTaskFailed events with cause UNHANDLED_DECISION | ||
// and cause workflow to be stuck in decision task failed state | ||
// this can be removed after the root cause is identified and fixed | ||
// TODO: remove this after the root cause is identified and fixed or add deduplication | ||
e.logDuplicatedActivityEvents() | ||
|
||
// make sure all new committed events have correct EventID | ||
e.assignEventIDToBufferedEvents() | ||
if err := e.assignTaskIDToEvents(); err != nil { | ||
|
@@ -2253,6 +2261,65 @@ func (e *mutableStateBuilder) logDataInconsistency() { | |
tag.WorkflowRunID(runID), | ||
) | ||
} | ||
func (e *mutableStateBuilder) logDuplicatedActivityEvents() { | ||
type activityTaskUniqueEventParams struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't compare events directly. Using this struct to dereference attributes that can guarantee the activity uniqueness There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure this will work, equality in maps is being compared by pointer, not by value ( I think, let's confirm I'm not lying). Let's start with a unit test to validate this is working as expected. I think this is will probably not catch duplicates in its current state, unless you squash it into something that's more comparable like a string. Ensuring that log is called is a bit annoying, so changing the check to just return a bool is one easy solution. eg:
For the duplicated check, my vague memory was that you were looking to check if the scheduledEventID was duplicated? That's just a primitive type, I think that's possible to just do a map check on? I would start there to begin with, though if you want to check other fields, there's a few solutions, but i'm not entirely sure what it means for the attempt to be duplicated, for example. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually seems I am a bit wrong and clearly don't understand map keys well enough There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, you're correct and I'm wrong (https://go.dev/ref/spec#Comparison_operators), (demonstrated here)
So I take back what I said, but would agree with Tim's point about a unit test, both to check and to also catch NPE problems when dereferencing |
||
eventType types.EventType | ||
scheduledEventID int64 | ||
attempt int32 | ||
startedEventID int64 | ||
} | ||
|
||
activityTaskUniqueEvents := make(map[activityTaskUniqueEventParams]struct{}) | ||
|
||
checkActivityTaskEventUniqueness := func(event *types.HistoryEvent) { | ||
uniqueEventParams := activityTaskUniqueEventParams{ | ||
eventType: event.GetEventType(), | ||
} | ||
|
||
var scheduledEventID int64 | ||
|
||
switch event.GetEventType() { | ||
case types.EventTypeActivityTaskStarted: | ||
scheduledEventID = event.ActivityTaskStartedEventAttributes.GetScheduledEventID() | ||
uniqueEventParams.scheduledEventID = scheduledEventID | ||
uniqueEventParams.attempt = event.ActivityTaskStartedEventAttributes.Attempt | ||
case types.EventTypeActivityTaskCompleted: | ||
scheduledEventID = event.ActivityTaskCompletedEventAttributes.GetScheduledEventID() | ||
uniqueEventParams.scheduledEventID = scheduledEventID | ||
uniqueEventParams.startedEventID = event.ActivityTaskCompletedEventAttributes.GetStartedEventID() | ||
case types.EventTypeActivityTaskFailed: | ||
scheduledEventID = event.ActivityTaskFailedEventAttributes.GetScheduledEventID() | ||
uniqueEventParams.scheduledEventID = scheduledEventID | ||
uniqueEventParams.startedEventID = event.ActivityTaskFailedEventAttributes.GetStartedEventID() | ||
case types.EventTypeActivityTaskCanceled: | ||
scheduledEventID = event.ActivityTaskCanceledEventAttributes.GetScheduledEventID() | ||
uniqueEventParams.scheduledEventID = scheduledEventID | ||
uniqueEventParams.startedEventID = event.ActivityTaskCanceledEventAttributes.StartedEventID | ||
case types.EventTypeActivityTaskTimedOut: | ||
scheduledEventID = event.ActivityTaskTimedOutEventAttributes.GetScheduledEventID() | ||
uniqueEventParams.scheduledEventID = scheduledEventID | ||
uniqueEventParams.startedEventID = event.ActivityTaskTimedOutEventAttributes.StartedEventID | ||
default: | ||
return | ||
} | ||
|
||
if _, ok := activityTaskUniqueEvents[uniqueEventParams]; ok { | ||
e.logger.Error("Duplicate activity task event found", | ||
tag.WorkflowDomainName(e.GetDomainEntry().GetInfo().Name), | ||
tag.WorkflowID(e.GetExecutionInfo().WorkflowID), | ||
tag.WorkflowRunID(e.GetExecutionInfo().RunID), | ||
tag.WorkflowScheduleID(scheduledEventID), | ||
tag.WorkflowEventType(event.GetEventType().String()), | ||
) | ||
} else { | ||
activityTaskUniqueEvents[uniqueEventParams] = struct{}{} | ||
} | ||
} | ||
|
||
for _, event := range e.hBuilder.history { | ||
checkActivityTaskEventUniqueness(event) | ||
} | ||
} | ||
|
||
func mergeMapOfByteArray( | ||
current map[string][]byte, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: could we have some unit test on those cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I'll add that to show how it works