Skip to content

[queue alert] Close issue if too many comments #6714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions tools/torchci/check_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,11 @@ def create_issue(issue: Dict, dry_run: bool) -> Dict:
print(f"Creating issue with content:{os.linesep}{issue}")
if dry_run:
print("NOTE: Dry run activated, not doing any real work")
return
return {"number": -1, "closed": False, "body": ""}
r = requests.post(CREATE_ISSUE_URL, json=issue, headers=headers)
r.raise_for_status()
return {"number": r.json()["number"], "closed": False}
res = r.json()
return {"number": res["number"], "closed": False, "body": res["body"]}


def fetch_hud_data(repo: str, branch: str) -> Any:
Expand Down Expand Up @@ -574,7 +575,7 @@ def check_for_recurrently_failing_jobs_alert(
new_issue = create_issue(
generate_failed_job_issue(repo=repo, branch=branch, failed_jobs=[]), dry_run
)
existing_alerts.push(new_issue)
existing_alerts.append(new_issue)

# Always favor the most recent issue, close all other ones
existing_issue = existing_alerts[-1]
Expand Down
14 changes: 12 additions & 2 deletions tools/torchci/queue_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@

import requests
from setuptools import distutils # type: ignore[import]
from torchci.check_alerts import clear_alerts, create_issue, fetch_alerts, update_issue
from torchci.check_alerts import (
clear_alerts,
close_if_too_many_comments,
create_issue,
fetch_alerts,
update_issue,
)


REPO_ROOT = Path(__file__).resolve().parent.parent.parent
Expand Down Expand Up @@ -101,12 +107,16 @@ def queuing_alert(dry_run: bool) -> None:
clear_alerts(existing_alerts, dry_run=dry_run)
return

existing_alerts = [
x for x in existing_alerts if not close_if_too_many_comments(x, dry_run)
]

if len(existing_alerts) == 0:
# Generate a blank issue if there are no issues with the label and
# re-fetch the issues so we can post an update comment, which will
# trigger a more informative workchat ping
new_issue = create_issue(gen_issue([]), dry_run)
existing_alerts.push(new_issue)
existing_alerts.append(new_issue)

# Favor the most recent issue and close the rest
existing_issue = existing_alerts[-1]
Expand Down
52 changes: 51 additions & 1 deletion tools/torchci/tests/test_check_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,21 @@
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, List
from unittest import main, TestCase
from unittest.mock import patch
from unittest.mock import MagicMock, patch

from torchci.check_alerts import (
check_for_no_flaky_tests_alert,
close_if_too_many_comments,
fetch_alerts_filter,
filter_job_names,
gen_update_comment,
generate_no_flaky_tests_issue,
handle_flaky_tests_alert,
JobStatus,
PYTORCH_ALERT_LABEL,
SOFT_COMMENT_THRESHOLD,
)
from torchci.queue_alert import gen_issue, QueueInfo, queuing_alert


JOB_NAME = "periodic / linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck / test (default, 2, 2, linux.4xlarge.nvidia.gpu)"
Expand Down Expand Up @@ -356,5 +359,52 @@ def test_fetch_alert(self, mocked_alerts):
self.assertListEqual(alerts, case["expected"])


class TestQueueAlert(TestCase):
@patch("torchci.queue_alert.update_issue")
@patch("torchci.queue_alert.create_issue")
@patch("torchci.queue_alert.close_if_too_many_comments")
@patch("torchci.queue_alert.fetch_alerts")
@patch("torchci.queue_alert.filter_long_queues")
@patch("torchci.queue_alert.requests.get")
def test_close_if_too_many_comments(
self, mock_get, mock_filter, mock_fetch, mock_close, mock_create, mock_update
):
# Test that we can close an issue if it has too many comments and open a
# new one

# Setup mock response for API calls
mock_get_response = MagicMock()
mock_get_response.json.return_value = [{"mock": "data"}]
mock_get.return_value = mock_get_response

# Setup that we have queues that need an alert
queue_info = QueueInfo("linux.gpu.nvidia", 100, 5.0)
mock_filter.return_value = [queue_info]

# Setup scenario: we have an alert but it has too many comments
existing_issue = {
"number": 123,
"closed": False,
"body": "- linux.gpu.nvidia, 80 machines, 4.5 hours",
"comments": {"totalCount": SOFT_COMMENT_THRESHOLD + 1},
}
mock_fetch.return_value = [existing_issue]

# Make close_if_too_many_comments return True to simulate closing
mock_close.return_value = True

# Setup create_issue to return a new issue
new_issue = {"number": 456, "closed": False, "body": ""}
mock_create.return_value = new_issue

# Run the function under test
queuing_alert(dry_run=False)

# Verify we closed the old issue and created a new one
mock_close.assert_called_with(existing_issue, False)
mock_create.assert_called_once()
mock_update.assert_called_once()


if __name__ == "__main__":
main()