Skip to content

Commit 250f2c5

Browse files
authored
Merge pull request #5138 from grafana/dev
v1.10.3
2 parents a6f281a + 2545bf8 commit 250f2c5

27 files changed

+1000
-26
lines changed

.github/workflows/add-to-docs-project.yml

-15
This file was deleted.

engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py

+29
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
from apps.alerts.models.escalation_policy import EscalationPolicy
1313
from apps.alerts.tasks import (
1414
custom_webhook_result,
15+
declare_incident,
1516
notify_all_task,
1617
notify_group_task,
1718
notify_user_task,
1819
resolve_by_last_step_task,
1920
)
21+
from apps.alerts.utils import is_declare_incident_step_enabled
2022
from apps.schedules.ical_utils import list_users_to_notify_from_ical
2123
from apps.user_management.models import User
2224

@@ -136,6 +138,7 @@ def execute(self, alert_group: "AlertGroup", reason) -> StepExecutionResultData:
136138
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window,
137139
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users,
138140
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: self._escalation_step_notify_multiple_users,
141+
EscalationPolicy.STEP_DECLARE_INCIDENT: self._escalation_step_declare_incident,
139142
None: self._escalation_step_not_configured,
140143
}
141144
result = action_map[self.step](alert_group, reason)
@@ -410,6 +413,32 @@ def _escalation_step_notify_team_members(self, alert_group: "AlertGroup", reason
410413

411414
self._execute_tasks(tasks)
412415

416+
def _escalation_step_declare_incident(self, alert_group: "AlertGroup", _reason: str) -> None:
417+
grafana_declare_incident_enabled = is_declare_incident_step_enabled(
418+
organization=alert_group.channel.organization
419+
)
420+
if not grafana_declare_incident_enabled:
421+
AlertGroupLogRecord(
422+
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
423+
alert_group=alert_group,
424+
reason="Declare Incident step is not enabled",
425+
escalation_policy=self.escalation_policy,
426+
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
427+
escalation_policy_step=self.step,
428+
).save()
429+
return
430+
tasks = []
431+
declare_incident_task = declare_incident.signature(
432+
args=(alert_group.pk,),
433+
kwargs={
434+
"escalation_policy_pk": self.id,
435+
"severity": self.severity,
436+
},
437+
immutable=True,
438+
)
439+
tasks.append(declare_incident_task)
440+
self._execute_tasks(tasks)
441+
413442
def _escalation_step_notify_if_time(self, alert_group: "AlertGroup", _reason: str) -> StepExecutionResultData:
414443
eta = None
415444

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Generated by Django 4.2.15 on 2024-10-04 16:38
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('user_management', '0022_alter_team_unique_together'),
11+
('alerts', '0059_escalationpolicy_severity_and_more'),
12+
]
13+
14+
operations = [
15+
migrations.CreateModel(
16+
name='RelatedIncident',
17+
fields=[
18+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
19+
('incident_id', models.CharField(db_index=True, max_length=50)),
20+
('created_at', models.DateTimeField(auto_now_add=True)),
21+
('is_active', models.BooleanField(default=True)),
22+
('attached_alert_groups', models.ManyToManyField(related_name='related_incidents', to='alerts.alertgroup')),
23+
('channel_filter', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='related_incidents', to='alerts.channelfilter')),
24+
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_incidents', to='user_management.organization')),
25+
],
26+
options={
27+
'unique_together': {('organization', 'incident_id')},
28+
},
29+
),
30+
]

engine/apps/alerts/models/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .grafana_alerting_contact_point import GrafanaAlertingContactPoint # noqa: F401
1414
from .invitation import Invitation # noqa: F401
1515
from .maintainable_object import MaintainableObject # noqa: F401
16+
from .related_incident import RelatedIncident # noqa: F401
1617
from .resolution_note import ResolutionNote, ResolutionNoteSlackMessage # noqa: F401
1718
from .user_has_notification import UserHasNotification # noqa: F401
1819
from .user_notification_bundle import BundledNotification, UserNotificationBundle # noqa: F401

engine/apps/alerts/models/alert_group.py

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
AlertGroupLogRecord,
4545
AlertReceiveChannel,
4646
BundledNotification,
47+
RelatedIncident,
4748
ResolutionNote,
4849
ResolutionNoteSlackMessage,
4950
)
@@ -193,6 +194,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
193194
acknowledged_by_user: typing.Optional["User"]
194195
alerts: "RelatedManager['Alert']"
195196
bundled_notifications: "RelatedManager['BundledNotification']"
197+
related_incidents: "RelatedManager['RelatedIncident']"
196198
dependent_alert_groups: "RelatedManager['AlertGroup']"
197199
channel: "AlertReceiveChannel"
198200
log_records: "RelatedManager['AlertGroupLogRecord']"

engine/apps/alerts/models/alert_group_log_record.py

+52-3
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,24 @@
1111

1212
from apps.alerts import tasks
1313
from apps.alerts.constants import ActionSource
14+
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
1415
from apps.alerts.utils import render_relative_timeline
1516
from apps.slack.slack_formatter import SlackFormatter
1617
from common.utils import clean_markup
1718

1819
if typing.TYPE_CHECKING:
1920
from apps.alerts.models import AlertGroup, CustomButton, EscalationPolicy, Invitation
20-
from apps.user_management.models import User
21+
from apps.user_management.models import Organization, User
2122

2223
logger = logging.getLogger(__name__)
2324
logger.setLevel(logging.DEBUG)
2425

2526

27+
class RelatedIncidentData(typing.TypedDict):
28+
incident_link: typing.Optional[str]
29+
incident_title: str
30+
31+
2632
class AlertGroupLogRecord(models.Model):
2733
alert_group: "AlertGroup"
2834
author: typing.Optional["User"]
@@ -161,7 +167,9 @@ class AlertGroupLogRecord(models.Model):
161167
ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR,
162168
ERROR_ESCALATION_NOTIFY_TEAM_MEMBERS_STEP_IS_NOT_CONFIGURED,
163169
ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED,
164-
) = range(20)
170+
ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
171+
ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
172+
) = range(22)
165173

166174
type = models.IntegerField(choices=TYPE_CHOICES)
167175

@@ -225,7 +233,14 @@ class AlertGroupLogRecord(models.Model):
225233
escalation_policy_step = models.IntegerField(null=True, default=None)
226234
step_specific_info = JSONField(null=True, default=None)
227235

228-
STEP_SPECIFIC_INFO_KEYS = ["schedule_name", "custom_button_name", "usergroup_handle", "source_integration_name"]
236+
STEP_SPECIFIC_INFO_KEYS = [
237+
"schedule_name",
238+
"custom_button_name",
239+
"usergroup_handle",
240+
"source_integration_name",
241+
"incident_id",
242+
"incident_title",
243+
]
229244

230245
def _make_log_line_link(self, url, title, html=False, for_slack=False, substitute_with_tag=False):
231246
if html and url:
@@ -244,6 +259,7 @@ def render_log_line_json(self):
244259
author = self.author.short(organization) if self.author is not None else None
245260
escalation_chain = self.alert_group.channel_filter.escalation_chain if self.alert_group.channel_filter else None
246261
step_info = self.get_step_specific_info()
262+
related_incident = self.render_incident_data_from_step_info(organization, step_info)
247263
escalation_chain_data = (
248264
{
249265
"pk": escalation_chain.public_primary_key,
@@ -280,6 +296,7 @@ def render_log_line_json(self):
280296
"type": self.type,
281297
"created_at": created_at,
282298
"author": author,
299+
"incident": related_incident,
283300
"escalation_chain": escalation_chain_data,
284301
"schedule": schedule,
285302
"webhook": webhook,
@@ -425,6 +442,14 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
425442
result += f'triggered step "Notify on-call from Schedule {schedule_text}{important_text}"'
426443
elif escalation_policy_step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES:
427444
result += "escalation started from the beginning"
445+
elif escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT:
446+
organization = self.alert_group.channel.organization
447+
incident_data = self.render_incident_data_from_step_info(organization, step_specific_info)
448+
incident_link = incident_data["incident_link"]
449+
incident_title = incident_data["incident_title"]
450+
tag = "related_incident" if substitute_with_tag else False
451+
incident_text = self._make_log_line_link(incident_link, incident_title, html, for_slack, tag)
452+
result += self.reason + f": {incident_text}"
428453
else:
429454
result += f'triggered step "{EscalationPolicy.get_step_display_name(escalation_policy_step)}"'
430455
elif self.type == AlertGroupLogRecord.TYPE_SILENCE:
@@ -640,8 +665,32 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
640665
result += f"failed to notify User Group{usergroup_handle_text} in Slack"
641666
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED:
642667
result += 'skipped escalation step "Trigger Outgoing Webhook" because it is disabled'
668+
elif (
669+
self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
670+
):
671+
result += 'skipped escalation step "Declare Incident": step is not enabled'
672+
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED:
673+
result += "failed to declare an Incident"
674+
if self.reason:
675+
result += f": {self.reason}"
643676
return result
644677

678+
def render_incident_data_from_step_info(
679+
self, organization: "Organization", step_specific_info: dict
680+
) -> RelatedIncidentData | None:
681+
from apps.alerts.models.related_incident import get_incident_url
682+
683+
if not step_specific_info or not all(key in step_specific_info for key in ["incident_title", "incident_id"]):
684+
return None
685+
686+
incident_link = (
687+
get_incident_url(organization, step_specific_info["incident_id"])
688+
if step_specific_info["incident_id"]
689+
else None
690+
)
691+
incident_title = step_specific_info["incident_title"] or DEFAULT_BACKUP_TITLE
692+
return {"incident_link": incident_link, "incident_title": incident_title}
693+
645694
def get_step_specific_info(self):
646695
step_specific_info = None
647696
# in some cases step_specific_info was saved with using json.dumps

engine/apps/alerts/models/escalation_policy.py

+3
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class EscalationPolicy(OrderedModel):
9292
STEP_NOTIFY_IF_TIME,
9393
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
9494
STEP_REPEAT_ESCALATION_N_TIMES,
95+
STEP_DECLARE_INCIDENT,
9596
]
9697
# Steps can be stored in db while interacting with internal api
9798
# Includes important versions of default steps
@@ -218,6 +219,7 @@ class EscalationPolicy(OrderedModel):
218219
STEP_NOTIFY_IF_TIME,
219220
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
220221
STEP_REPEAT_ESCALATION_N_TIMES,
222+
STEP_DECLARE_INCIDENT,
221223
]
222224

223225
PUBLIC_STEP_CHOICES_MAP = {
@@ -239,6 +241,7 @@ class EscalationPolicy(OrderedModel):
239241
STEP_NOTIFY_IF_TIME: "notify_if_time_from_to",
240242
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: "notify_if_num_alerts_in_window",
241243
STEP_REPEAT_ESCALATION_N_TIMES: "repeat_escalation",
244+
STEP_DECLARE_INCIDENT: "declare_incident",
242245
}
243246

244247
public_primary_key = models.CharField(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import typing
2+
from urllib.parse import urljoin
3+
4+
from django.db import models
5+
6+
from common.constants.plugin_ids import PluginID
7+
8+
if typing.TYPE_CHECKING:
9+
from django.db.models.manager import RelatedManager
10+
11+
from apps.alerts.models import AlertGroup, ChannelFilter
12+
from apps.user_management.models import Organization
13+
14+
15+
def get_incident_url(organization, incident_id) -> str:
16+
return urljoin(organization.grafana_url, f"a/{PluginID.INCIDENT}/incidents/{incident_id}")
17+
18+
19+
class RelatedIncident(models.Model):
20+
attached_alert_groups: "RelatedManager['AlertGroup']"
21+
channel_filter: typing.Optional["ChannelFilter"]
22+
organization: "Organization"
23+
24+
incident_id = models.CharField(db_index=True, max_length=50)
25+
organization = models.ForeignKey(
26+
"user_management.Organization",
27+
on_delete=models.CASCADE,
28+
related_name="related_incidents",
29+
)
30+
channel_filter = models.ForeignKey(
31+
"alerts.ChannelFilter",
32+
on_delete=models.SET_NULL,
33+
null=True,
34+
related_name="related_incidents",
35+
)
36+
created_at = models.DateTimeField(auto_now_add=True)
37+
is_active = models.BooleanField(default=True)
38+
39+
attached_alert_groups = models.ManyToManyField(
40+
"alerts.AlertGroup",
41+
related_name="related_incidents",
42+
)
43+
44+
class Meta:
45+
unique_together = ("organization", "incident_id")
46+
47+
def get_incident_link(self) -> str:
48+
return get_incident_url(self.organization, self.incident_id)

engine/apps/alerts/tasks/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
)
66
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
77
from .custom_webhook_result import custom_webhook_result # noqa: F401
8+
from .declare_incident import declare_incident # noqa: F401
89
from .delete_alert_group import delete_alert_group # noqa: F401
910
from .delete_alert_group import finish_delete_alert_group # noqa: F401
1011
from .delete_alert_group import send_alert_group_signal_for_delete # noqa: F401

0 commit comments

Comments
 (0)