diff --git a/src/sentry/tasks/seer/night_shift/agentic_triage.py b/src/sentry/tasks/seer/night_shift/agentic_triage.py index 41541d0c94ef23..9ff7fe507a8419 100644 --- a/src/sentry/tasks/seer/night_shift/agentic_triage.py +++ b/src/sentry/tasks/seer/night_shift/agentic_triage.py @@ -19,6 +19,7 @@ fixability_score_strategy, priority_label, ) +from sentry.tasks.seer.night_shift.skip_cache import mark_skipped from sentry.tasks.seer.night_shift.triage_tools import ( get_event_details_agentic_triage, get_issue_details_agentic_triage, @@ -191,6 +192,10 @@ def _triage_candidates( }, ) + for v in triage_response.verdicts: + if v.group_id in groups_by_id and v.action == TriageAction.SKIP: + mark_skipped(v.group_id) + return [ TriageResult(group=groups_by_id[v.group_id], action=v.action, reason=v.reason) for v in triage_response.verdicts diff --git a/src/sentry/tasks/seer/night_shift/simple_triage.py b/src/sentry/tasks/seer/night_shift/simple_triage.py index eaad266cf4ca28..d746e64af2e8f9 100644 --- a/src/sentry/tasks/seer/night_shift/simple_triage.py +++ b/src/sentry/tasks/seer/night_shift/simple_triage.py @@ -13,6 +13,7 @@ from sentry.seer.autofix.utils import is_issue_category_eligible from sentry.snuba.referrer import Referrer from sentry.tasks.seer.night_shift.models import TriageAction, TriageResult +from sentry.tasks.seer.night_shift.skip_cache import recently_skipped from sentry.types.group import PriorityLevel logger = logging.getLogger("sentry.tasks.seer.night_shift") @@ -65,16 +66,22 @@ def fixability_score_strategy( referrer=Referrer.SEER_NIGHT_SHIFT_FIXABILITY_SCORE_STRATEGY.value, ) + skipped_ids = recently_skipped(g.id for g in result.results) + logger.info( "night_shift.search_results", extra={ "num_projects": len(projects), "num_results": len(result.results), + "num_skip_filtered": len(skipped_ids), + "num_kept_after_skip_filter": len(result.results) - len(skipped_ids), }, ) candidates: list[ScoredCandidate] = [] for group in result.results: + if group.id in skipped_ids: + continue if not is_issue_category_eligible(group): continue diff --git a/src/sentry/tasks/seer/night_shift/skip_cache.py b/src/sentry/tasks/seer/night_shift/skip_cache.py new file mode 100644 index 00000000000000..dd150dfeb63edb --- /dev/null +++ b/src/sentry/tasks/seer/night_shift/skip_cache.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import logging +from collections.abc import Iterable +from datetime import timedelta + +from redis.client import StrictRedis +from rediscluster import RedisCluster + +from sentry.utils.redis import redis_clusters + +logger = logging.getLogger(__name__) + +# Padded past 3 days so nightly-run jitter can't expire a key right at the +# 3-day boundary; guarantees the next 3 nightly runs suppress the issue. +SKIP_TTL_SECONDS = int(timedelta(days=3, hours=12).total_seconds()) +KEY_PREFIX = "seer:night-shift:skip:" + + +def mark_skipped(group_id: int) -> None: + try: + _client().set(key(group_id), "1", ex=SKIP_TTL_SECONDS) + except Exception: + logger.exception( + "seer.night_shift.skip_cache.mark_skipped_failed", + extra={"group_id": group_id}, + ) + + +def recently_skipped(group_ids: Iterable[int]) -> set[int]: + ids = list(group_ids) + if not ids: + return set() + + try: + pipeline = _client().pipeline() + for gid in ids: + pipeline.get(key(gid)) + values = pipeline.execute() + except Exception: + logger.exception("seer.night_shift.skip_cache.recently_skipped_failed") + return set() + + return {gid for gid, val in zip(ids, values) if val is not None} + + +def key(group_id: int) -> str: + return f"{KEY_PREFIX}{group_id}" + + +def _client() -> RedisCluster[str] | StrictRedis[str]: + return redis_clusters.get("default") diff --git a/tests/sentry/tasks/seer/night_shift/test_skip_cache.py b/tests/sentry/tasks/seer/night_shift/test_skip_cache.py new file mode 100644 index 00000000000000..0d969d5afe8f1c --- /dev/null +++ b/tests/sentry/tasks/seer/night_shift/test_skip_cache.py @@ -0,0 +1,65 @@ +from datetime import timedelta +from unittest.mock import patch + +from sentry.tasks.seer.night_shift.skip_cache import ( + SKIP_TTL_SECONDS, + key, + mark_skipped, + recently_skipped, +) +from sentry.utils.redis import redis_clusters + + +def _delete(group_id: int) -> None: + redis_clusters.get("default").delete(key(group_id)) + + +def test_mark_then_recently_skipped_returns_id() -> None: + try: + mark_skipped(101) + assert recently_skipped([101]) == {101} + finally: + _delete(101) + + +def test_recently_skipped_returns_only_marked_ids() -> None: + try: + mark_skipped(202) + assert recently_skipped([202, 203, 204]) == {202} + finally: + _delete(202) + + +def test_recently_skipped_empty_input() -> None: + assert recently_skipped([]) == set() + + +def test_ttl_padded_past_three_days() -> None: + try: + mark_skipped(305) + ttl = redis_clusters.get("default").ttl(key(305)) + assert int(timedelta(days=3).total_seconds()) < ttl <= SKIP_TTL_SECONDS + finally: + _delete(305) + + +def test_deleted_key_no_longer_recently_skipped() -> None: + mark_skipped(406) + _delete(406) + assert recently_skipped([406]) == set() + + +def test_mark_skipped_swallows_redis_errors() -> None: + with patch( + "sentry.tasks.seer.night_shift.skip_cache._client", + side_effect=ConnectionError("redis down"), + ): + mark_skipped(501) + + +def test_recently_skipped_returns_empty_on_redis_errors() -> None: + with patch( + "sentry.tasks.seer.night_shift.skip_cache._client", + side_effect=ConnectionError("redis down"), + ): + assert recently_skipped([601, 602]) == set() diff --git a/tests/sentry/tasks/seer/test_night_shift.py b/tests/sentry/tasks/seer/test_night_shift.py index 2ddc6f464b7725..f3b8a24ddb69a8 100644 --- a/tests/sentry/tasks/seer/test_night_shift.py +++ b/tests/sentry/tasks/seer/test_night_shift.py @@ -17,9 +17,12 @@ ) from sentry.tasks.seer.night_shift.models import TriageAction from sentry.tasks.seer.night_shift.simple_triage import fixability_score_strategy +from sentry.tasks.seer.night_shift.skip_cache import key as skip_cache_key +from sentry.tasks.seer.night_shift.skip_cache import mark_skipped from sentry.testutils.cases import SnubaTestCase, TestCase from sentry.testutils.helpers.datetime import before_now from sentry.testutils.pytest.fixtures import django_db_all +from sentry.utils.redis import redis_clusters class FakeExplorerClient: @@ -500,16 +503,42 @@ def test_skips_autofix_for_skip_candidates(self) -> None: project, "skip-me", seer_fixability_score=0.9, times_seen=5 ) - with self._patched_night_shift([(group.id, "skip")]) as (mock_trigger, mock_logger): + with ( + self._patched_night_shift([(group.id, "skip")]) as (mock_trigger, mock_logger), + patch("sentry.tasks.seer.night_shift.agentic_triage.mark_skipped") as mock_mark_skipped, + ): run_night_shift_for_org(org.id) mock_trigger.assert_not_called() log_calls = [call.args[0] for call in mock_logger.info.call_args_list] assert "night_shift.no_fixable_candidates" in log_calls + mock_mark_skipped.assert_called_once_with(group.id) run = SeerNightShiftRun.objects.get(organization=org) assert not SeerNightShiftRunResult.objects.filter(run=run).exists() + def test_filters_recently_skipped_groups(self) -> None: + org = self.create_organization() + project = self.create_project(organization=org) + self._make_eligible(project) + + skipped_group = self._store_event_and_update_group( + project, "already-skipped", seer_fixability_score=0.9, times_seen=5 + ) + other_group = self._store_event_and_update_group( + project, "fresh", seer_fixability_score=0.9, times_seen=5 + ) + + mark_skipped(skipped_group.id) + try: + with self._patched_night_shift([(other_group.id, "autofix")]) as (mock_trigger, _): + run_night_shift_for_org(org.id) + finally: + redis_clusters.get("default").delete(skip_cache_key(skipped_group.id)) + + mock_trigger.assert_called_once() + assert mock_trigger.call_args.kwargs["group"].id == other_group.id + def test_skips_autofix_when_no_seer_quota(self) -> None: org = self.create_organization() project = self.create_project(organization=org)