@@ -112,6 +112,13 @@ func (rm *Manager) DeriveChecker(resourceGroupName, originalSQL, sqlDigest, plan
112112 return NewChecker (rm , resourceGroupName , group .RunawaySettings , originalSQL , sqlDigest , planDigest , startTime )
113113}
114114
115+ func (r * Checker ) isMarkedByIdentifyInRunawaySettings () bool {
116+ if r == nil {
117+ return false
118+ }
119+ return r .markedByIdentifyInRunawaySettings .Load ()
120+ }
121+
115122// BeforeExecutor checks whether query is in watch list before executing and after compiling.
116123func (r * Checker ) BeforeExecutor () (string , error ) {
117124 if r == nil {
@@ -149,34 +156,43 @@ func (r *Checker) BeforeExecutor() (string, error) {
149156 return "" , nil
150157 case rmpb .RunawayAction_SwitchGroup :
151158 // Return the switch group name to switch the resource group before executing.
152- return switchGroupName , nil
159+ return r . checkSwitchGroupName ( switchGroupName ) , nil
153160 default :
154161 // Continue to examine other convicts.
155162 }
156163 }
157164 return "" , nil
158165}
159166
167+ func (r * Checker ) checkSwitchGroupName (groupName string ) string {
168+ if len (groupName ) == 0 {
169+ return ""
170+ }
171+ group , err := r .manager .ResourceGroupCtl .GetResourceGroup (groupName )
172+ if err != nil || group == nil {
173+ logutil .BgLogger ().Debug ("invalid switch resource group" , zap .String ("switch-group-name" , groupName ), zap .Error (err ))
174+ return ""
175+ }
176+ return groupName
177+ }
178+
160179// BeforeCopRequest checks runaway and modifies the request if necessary before sending coprocessor request.
161180func (r * Checker ) BeforeCopRequest (req * tikvrpc.Request ) error {
162181 if r == nil {
163182 return nil
164183 }
165- // If the group settings are not available, and it's not marked by watch, skip this part.
166- if r .settings == nil && ! r .markedByQueryWatchRule {
167- return nil
168- }
169- // If it's marked by watch and the action is cooldown, override the priority,
184+ // If it's marked by watch and the action is cooldown, override the priority.
185+ // Other watch actions should already been handled in `BeforeExecutor` before.
170186 if r .markedByQueryWatchRule && r .watchAction == rmpb .RunawayAction_CoolDown {
171187 req .ResourceControlContext .OverridePriority = 1 // set priority to lowest
172188 }
173- // If group settings are available and the query is not marked by a rule,
174- // verify if it matches any rules in the settings.
175- if r .settings != nil && ! r .markedByIdentifyInRunawaySettings .Load () {
189+ // If group settings are available, verify if it matches any rules in the settings.
190+ if r .settings != nil {
176191 now := time .Now ()
177- // only check time and need to ensure deadline existed .
192+ // Check and ensure the deadline exists .
178193 exceedCause := r .exceedsThresholds (now , nil , 0 )
179- if exceedCause == "" { // only set timeout when the query is not runaway.
194+ // Only set timeout when the query has not been marked as runaway yet.
195+ if ! r .isMarkedByIdentifyInRunawaySettings () && len (exceedCause ) == 0 {
180196 if r .settings .Action == rmpb .RunawayAction_Kill {
181197 until := r .deadline .Sub (now )
182198 // if the execution time is close to the threshold, set a timeout
@@ -186,7 +202,8 @@ func (r *Checker) BeforeCopRequest(req *tikvrpc.Request) error {
186202 }
187203 return nil
188204 }
189- // execution time exceeds the threshold, mark the query as runaway
205+ // Try to mark the query as runaway, it's safe to call this method concurrently.
206+ // So it's possible that the query has already been marked as runaway in `CheckThresholds`.
190207 r .markRunawayByIdentifyInRunawaySettings (& now , exceedCause )
191208 // Take action if needed.
192209 switch r .settings .Action {
@@ -196,7 +213,9 @@ func (r *Checker) BeforeCopRequest(req *tikvrpc.Request) error {
196213 req .ResourceControlContext .OverridePriority = 1 // set priority to lowest
197214 return nil
198215 case rmpb .RunawayAction_SwitchGroup :
199- req .ResourceControlContext .ResourceGroupName = r .settings .SwitchGroupName
216+ if switchGroupName := r .checkSwitchGroupName (r .settings .SwitchGroupName ); len (switchGroupName ) != 0 {
217+ req .ResourceControlContext .ResourceGroupName = switchGroupName
218+ }
200219 return nil
201220 default :
202221 return nil
@@ -214,7 +233,7 @@ func (r *Checker) CheckAction() rmpb.RunawayAction {
214233 if r .markedByQueryWatchRule {
215234 return r .watchAction
216235 }
217- if r .markedByIdentifyInRunawaySettings . Load () {
236+ if r .isMarkedByIdentifyInRunawaySettings () {
218237 return r .settings .Action
219238 }
220239 return rmpb .RunawayAction_NoneAction
@@ -227,7 +246,7 @@ func (r *Checker) CheckRuleKillAction() (string, bool) {
227246 return "" , false
228247 }
229248 // If the group settings are available, and it's not marked by rule, check the execution time.
230- if r .settings != nil && ! r .markedByIdentifyInRunawaySettings . Load () {
249+ if r .settings != nil && ! r .isMarkedByIdentifyInRunawaySettings () {
231250 now := time .Now ()
232251 exceedCause := r .exceedsThresholds (now , nil , 0 )
233252 if exceedCause == "" {
@@ -249,15 +268,14 @@ func (r *Checker) markQuarantine(now *time.Time, exceedCause string) {
249268 r .settings .Action , r .settings .SwitchGroupName , ttl , now , exceedCause )
250269}
251270
252- func (r * Checker ) markRunawayByIdentifyInRunawaySettings (now * time.Time , exceedCause string ) bool {
271+ func (r * Checker ) markRunawayByIdentifyInRunawaySettings (now * time.Time , exceedCause string ) {
253272 swapped := r .markedByIdentifyInRunawaySettings .CompareAndSwap (false , true )
254273 if swapped {
255274 r .markRunaway ("identify" , r .settings .Action , r .settings .SwitchGroupName , now , exceedCause )
256275 if ! r .markedByQueryWatchRule {
257276 r .markQuarantine (now , exceedCause )
258277 }
259278 }
260- return swapped
261279}
262280
263281func (r * Checker ) markRunawayByQueryWatchRule (action rmpb.RunawayAction , switchGroupName , exceedCause string ) {
@@ -320,7 +338,7 @@ func (r *Checker) CheckThresholds(ruDetail *util.RUDetails, processKeys int64, e
320338 processKeys = int64 (100 )
321339 }
322340 })
323- if r .settings == nil || r . settings . Action != rmpb . RunawayAction_Kill {
341+ if r .settings == nil {
324342 return err
325343 }
326344
@@ -333,18 +351,15 @@ func (r *Checker) CheckThresholds(ruDetail *util.RUDetails, processKeys int64, e
333351 atomic .AddInt64 (& r .totalProcessedKeys , processKeys )
334352 totalProcessedKeys := atomic .LoadInt64 (& r .totalProcessedKeys )
335353 exceedCause := r .exceedsThresholds (checkTime , ruDetail , totalProcessedKeys )
336- if ! r .markedByIdentifyInRunawaySettings .Load () {
337- if exceedCause != "" && r .markRunawayByIdentifyInRunawaySettings (& now , exceedCause ) {
338- if r .markRunawayByIdentifyInRunawaySettings (& now , exceedCause ) {
339- return exeerrors .ErrResourceGroupQueryRunawayInterrupted .FastGenByArgs (exceedCause )
340- }
341- }
354+ // No need to mark as runaway if the query is not exceeded any threshold.
355+ if len (exceedCause ) == 0 {
356+ return err
342357 }
343- // Due to concurrency, check again.
344- if r .markedByIdentifyInRunawaySettings .Load () {
358+ r .markRunawayByIdentifyInRunawaySettings (& now , exceedCause )
359+ // Other actions will be handled in `BeforeCopRequest` since they need to modify the request.
360+ if r .settings .Action == rmpb .RunawayAction_Kill {
345361 return exeerrors .ErrResourceGroupQueryRunawayInterrupted .FastGenByArgs (exceedCause )
346362 }
347-
348363 return err
349364}
350365
0 commit comments