@@ -86,6 +86,10 @@ type (
86
86
cancelActivityStateMachine struct {
87
87
* commandStateMachineBase
88
88
attributes * commandpb.RequestCancelActivityTaskCommandAttributes
89
+
90
+ // The commandsHelper.nextCommandEventIDResetCounter when this command
91
+ // incremented commandsHelper.commandsCancelledDuringWFCancellation.
92
+ cancelledOnEventIDResetCounter uint64
89
93
}
90
94
91
95
timerCommandStateMachine struct {
@@ -96,6 +100,10 @@ type (
96
100
cancelTimerCommandStateMachine struct {
97
101
* commandStateMachineBase
98
102
attributes * commandpb.CancelTimerCommandAttributes
103
+
104
+ // The commandsHelper.nextCommandEventIDResetCounter when this command
105
+ // incremented commandsHelper.commandsCancelledDuringWFCancellation.
106
+ cancelledOnEventIDResetCounter uint64
99
107
}
100
108
101
109
childWorkflowCommandStateMachine struct {
@@ -137,6 +145,12 @@ type (
137
145
versionMarkerLookup map [int64 ]string
138
146
commandsCancelledDuringWFCancellation int64
139
147
workflowExecutionIsCancelling bool
148
+
149
+ // Incremented everytime nextCommandEventID and
150
+ // commandsCancelledDuringWFCancellation is reset (i.e. on new workflow
151
+ // task). Won't ever happen, but technically the way this value is compared
152
+ // is safe for overflow wrap around.
153
+ nextCommandEventIDResetCounter uint64
140
154
}
141
155
142
156
// panic when command state machine is in illegal state
@@ -528,6 +542,10 @@ func (d *activityCommandStateMachine) cancel() {
528
542
}
529
543
cancelCmd := d .helper .newCancelActivityStateMachine (attribs )
530
544
d .helper .addCommand (cancelCmd )
545
+ // We must mark the event ID reset counter for when we performed this
546
+ // increment so a potential decrement can only decrement if it wasn't
547
+ // reset
548
+ cancelCmd .cancelledOnEventIDResetCounter = d .helper .nextCommandEventIDResetCounter
531
549
}
532
550
533
551
d .commandStateMachineBase .cancel ()
@@ -541,6 +559,10 @@ func (d *timerCommandStateMachine) cancel() {
541
559
}
542
560
cancelCmd := d .helper .newCancelTimerCommandStateMachine (attribs )
543
561
d .helper .addCommand (cancelCmd )
562
+ // We must mark the event ID reset counter for when we performed this
563
+ // increment so a potential decrement can only decrement if it wasn't
564
+ // reset
565
+ cancelCmd .cancelledOnEventIDResetCounter = d .helper .nextCommandEventIDResetCounter
544
566
}
545
567
546
568
d .commandStateMachineBase .cancel ()
@@ -824,6 +846,9 @@ func (h *commandsHelper) setCurrentWorkflowTaskStartedEventID(workflowTaskStarte
824
846
// execution as those canceled command events will show up *after* the workflow task completed event.
825
847
h .nextCommandEventID = workflowTaskStartedEventID + 2 + h .commandsCancelledDuringWFCancellation
826
848
h .commandsCancelledDuringWFCancellation = 0
849
+ // We must change the counter here so that others who mutate
850
+ // commandsCancelledDuringWFCancellation know it has since been reset
851
+ h .nextCommandEventIDResetCounter ++
827
852
}
828
853
829
854
func (h * commandsHelper ) getNextID () int64 {
@@ -877,14 +902,26 @@ func (h *commandsHelper) addCommand(command commandStateMachine) {
877
902
// might be in the same workflow task. In practice this only seems to happen during unhandled command events.
878
903
func (h * commandsHelper ) removeCancelOfResolvedCommand (commandID commandID ) {
879
904
// Ensure this isn't misused for non-cancel commands
880
- if commandID .commandType != commandTypeCancelTimer {
881
- panic ("removeCancelOfResolvedCommand should only be called for cancel timer" )
905
+ if commandID .commandType != commandTypeCancelTimer && commandID . commandType != commandTypeRequestCancelActivityTask {
906
+ panic ("removeCancelOfResolvedCommand should only be called for cancel timer / activity " )
882
907
}
883
908
orderedCmdEl , ok := h .commands [commandID ]
884
909
if ok {
885
910
delete (h .commands , commandID )
886
- h .orderedCommands .Remove (orderedCmdEl )
887
- h .commandsCancelledDuringWFCancellation --
911
+ command := h .orderedCommands .Remove (orderedCmdEl )
912
+ // Sometimes commandsCancelledDuringWFCancellation was incremented before
913
+ // it was reset and sometimes not. We use the reset counter to see if we're
914
+ // still on the same iteration where we may have incremented it before.
915
+ switch command := command .(type ) {
916
+ case * cancelActivityStateMachine :
917
+ if command .cancelledOnEventIDResetCounter == h .nextCommandEventIDResetCounter {
918
+ h .commandsCancelledDuringWFCancellation --
919
+ }
920
+ case * cancelTimerCommandStateMachine :
921
+ if command .cancelledOnEventIDResetCounter == h .nextCommandEventIDResetCounter {
922
+ h .commandsCancelledDuringWFCancellation --
923
+ }
924
+ }
888
925
}
889
926
}
890
927
@@ -916,6 +953,10 @@ func (h *commandsHelper) requestCancelActivityTask(activityID string) commandSta
916
953
917
954
func (h * commandsHelper ) handleActivityTaskClosed (activityID string , scheduledEventID int64 ) commandStateMachine {
918
955
command := h .getCommand (makeCommandID (commandTypeActivity , activityID ))
956
+ // If, for whatever reason, we were going to send an activity cancel request, don't do that anymore
957
+ // since we already know the activity is resolved.
958
+ possibleCancelID := makeCommandID (commandTypeRequestCancelActivityTask , activityID )
959
+ h .removeCancelOfResolvedCommand (possibleCancelID )
919
960
command .handleCompletionEvent ()
920
961
delete (h .scheduledEventIDToActivityID , scheduledEventID )
921
962
return command
0 commit comments