pytorch · clee2000 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/tools/torchci/check_alerts.py b/tools/torchci/check_alerts.py
@@ -363,10 +363,11 @@ def create_issue(issue: Dict, dry_run: bool) -> Dict:
     print(f"Creating issue with content:{os.linesep}{issue}")
     if dry_run:
         print("NOTE: Dry run activated, not doing any real work")
-        return
+        return {"number": -1, "closed": False, "body": ""}
     r = requests.post(CREATE_ISSUE_URL, json=issue, headers=headers)
     r.raise_for_status()
-    return {"number": r.json()["number"], "closed": False}
+    res = r.json()
+    return {"number": res["number"], "closed": False, "body": res["body"]}
 
 
 def fetch_hud_data(repo: str, branch: str) -> Any:
@@ -574,7 +575,7 @@ def check_for_recurrently_failing_jobs_alert(
         new_issue = create_issue(
             generate_failed_job_issue(repo=repo, branch=branch, failed_jobs=[]), dry_run
         )
-        existing_alerts.push(new_issue)
+        existing_alerts.append(new_issue)
 
     # Always favor the most recent issue, close all other ones
     existing_issue = existing_alerts[-1]

diff --git a/tools/torchci/queue_alert.py b/tools/torchci/queue_alert.py
@@ -6,7 +6,13 @@
 
 import requests
 from setuptools import distutils  # type: ignore[import]
-from torchci.check_alerts import clear_alerts, create_issue, fetch_alerts, update_issue
+from torchci.check_alerts import (
+    clear_alerts,
+    close_if_too_many_comments,
+    create_issue,
+    fetch_alerts,
+    update_issue,
+)
 
 
 REPO_ROOT = Path(__file__).resolve().parent.parent.parent
@@ -101,12 +107,16 @@ def queuing_alert(dry_run: bool) -> None:
         clear_alerts(existing_alerts, dry_run=dry_run)
         return
 
+    existing_alerts = [
+        x for x in existing_alerts if not close_if_too_many_comments(x, dry_run)
+    ]
+
     if len(existing_alerts) == 0:
         # Generate a blank issue if there are no issues with the label and
         # re-fetch the issues so we can post an update comment, which will
         # trigger a more informative workchat ping
         new_issue = create_issue(gen_issue([]), dry_run)
-        existing_alerts.push(new_issue)
+        existing_alerts.append(new_issue)
 
     # Favor the most recent issue and close the rest
     existing_issue = existing_alerts[-1]

diff --git a/tools/torchci/tests/test_check_alerts.py b/tools/torchci/tests/test_check_alerts.py
@@ -2,18 +2,21 @@
 from datetime import datetime, timedelta, timezone
 from typing import Any, Dict, List
 from unittest import main, TestCase
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 from torchci.check_alerts import (
     check_for_no_flaky_tests_alert,
+    close_if_too_many_comments,
     fetch_alerts_filter,
     filter_job_names,
     gen_update_comment,
     generate_no_flaky_tests_issue,
     handle_flaky_tests_alert,
     JobStatus,
     PYTORCH_ALERT_LABEL,
+    SOFT_COMMENT_THRESHOLD,
 )
+from torchci.queue_alert import gen_issue, QueueInfo, queuing_alert
 
 
 JOB_NAME = "periodic / linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck / test (default, 2, 2, linux.4xlarge.nvidia.gpu)"
@@ -356,5 +359,52 @@ def test_fetch_alert(self, mocked_alerts):
             self.assertListEqual(alerts, case["expected"])
 
 
+class TestQueueAlert(TestCase):
+    @patch("torchci.queue_alert.update_issue")
+    @patch("torchci.queue_alert.create_issue")
+    @patch("torchci.queue_alert.close_if_too_many_comments")
+    @patch("torchci.queue_alert.fetch_alerts")
+    @patch("torchci.queue_alert.filter_long_queues")
+    @patch("torchci.queue_alert.requests.get")
+    def test_close_if_too_many_comments(
+        self, mock_get, mock_filter, mock_fetch, mock_close, mock_create, mock_update
+    ):
+        # Test that we can close an issue if it has too many comments and open a
+        # new one
+
+        # Setup mock response for API calls
+        mock_get_response = MagicMock()
+        mock_get_response.json.return_value = [{"mock": "data"}]
+        mock_get.return_value = mock_get_response
+
+        # Setup that we have queues that need an alert
+        queue_info = QueueInfo("linux.gpu.nvidia", 100, 5.0)
+        mock_filter.return_value = [queue_info]
+
+        # Setup scenario: we have an alert but it has too many comments
+        existing_issue = {
+            "number": 123,
+            "closed": False,
+            "body": "- linux.gpu.nvidia, 80 machines, 4.5 hours",
+            "comments": {"totalCount": SOFT_COMMENT_THRESHOLD + 1},
+        }
+        mock_fetch.return_value = [existing_issue]
+
+        # Make close_if_too_many_comments return True to simulate closing
+        mock_close.return_value = True
+
+        # Setup create_issue to return a new issue
+        new_issue = {"number": 456, "closed": False, "body": ""}
+        mock_create.return_value = new_issue
+
+        # Run the function under test
+        queuing_alert(dry_run=False)
+
+        # Verify we closed the old issue and created a new one
+        mock_close.assert_called_with(existing_issue, False)
+        mock_create.assert_called_once()
+        mock_update.assert_called_once()
+
+
 if __name__ == "__main__":
     main()