Skip to content

Commit a2adcb6

Browse files
committed
refactor(queuev2): distinguish benign vs real shadow mismatches
ExtraInCache (tasks in cache not in DB) is benign given task independence. It now logs at Info without incrementing the mismatch counter, so it is still observable without polluting the Warn metric used for alerting. MissingFromCache and NextKeyMismatch are real divergences: they log at Warn and increment the counter as before. shadowMismatch.dbTaskCount and shadowMismatch.cacheTaskCount moved into the mismatch tag set so they are only emitted when there is something to act on. The match path logs a bare Debug with no extra fields. Signed-off-by: Seva Kaloshin <seva.kaloshin@gmail.com>
1 parent aac7aea commit a2adcb6

1 file changed

Lines changed: 14 additions & 11 deletions

File tree

service/history/queuev2/queue_reader_cached.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,7 @@ type findMismatchesInShadowResult struct {
620620
// from the cache snapshot (after filtering benign eviction and inject races).
621621
MissingFromCache []persistence.HistoryTaskKey
622622
// ExtraInCache contains task keys present in the cache snapshot but absent
623-
// from the DB response. These are typically benign (Inject races, tasks
624-
// written to cache before the DB read completes) but are still reported as
625-
// mismatches for observability via shadowMismatch.extraInCache in the log.
623+
// from the DB response.
626624
ExtraInCache []persistence.HistoryTaskKey
627625
// NextKeyMismatch is true when the cache and DB disagree on the next-page
628626
// boundary key, meaning a subsequent GetTask would start at different points.
@@ -639,28 +637,33 @@ func (q *cachedQueueReader) reportShadowComparison(
639637
dbResp *GetTaskResponse,
640638
logTags []tag.Tag,
641639
) {
642-
comparisonTags := append(logTags,
643-
tag.Dynamic("dbTaskCount", len(dbResp.Tasks)),
644-
tag.Dynamic("cacheTaskCount", len(cacheResp.Tasks)),
645-
)
646640
if !result.HasMismatches {
647-
q.logger.Debug("shadow comparison matched", comparisonTags...)
641+
q.logger.Debug("shadow comparison matched")
648642
return
649643
}
650644

651-
q.metrics.IncCounter(metrics.CachedQueueMismatchCounter)
652-
mismatchTags := append(comparisonTags,
645+
mismatchTags := append(logTags,
653646
tag.Dynamic("shadowMismatch.missingFromCache", result.MissingFromCache),
654647
tag.Dynamic("shadowMismatch.extraInCache", result.ExtraInCache),
655648
tag.Dynamic("shadowMismatch.nextKeyMismatch", result.NextKeyMismatch),
649+
tag.Dynamic("shadowMismatch.dbTaskCount", len(dbResp.Tasks)),
650+
tag.Dynamic("shadowMismatch.cacheTaskCount", len(cacheResp.Tasks)),
656651
)
657652
if cacheResp.Progress != nil {
658653
mismatchTags = append(mismatchTags, tag.Dynamic("shadowMismatch.cacheNextKey", cacheResp.Progress.NextTaskKey))
659654
}
660655
if dbResp.Progress != nil {
661656
mismatchTags = append(mismatchTags, tag.Dynamic("shadowMismatch.dbNextKey", dbResp.Progress.NextTaskKey))
662657
}
663-
q.logger.Warn("shadow comparison mismatch", mismatchTags...)
658+
659+
// NextKeyMismatch is a meaningful divergence even when task sets match, so count it as a mismatch.
660+
// ExtraInCache is benign given task indepetency, but still a mismatch to be observed and counted.
661+
if result.NextKeyMismatch || len(result.MissingFromCache) > 0 {
662+
q.metrics.IncCounter(metrics.CachedQueueMismatchCounter)
663+
q.logger.Warn("shadow comparison mismatch", mismatchTags...)
664+
} else {
665+
q.logger.Info("shadow comparison mismatch (only extra tasks in cache)", mismatchTags...)
666+
}
664667
}
665668

666669
// findMismatchesInShadow compares a cache snapshot response against the DB

0 commit comments

Comments
 (0)