Skip to content

Commit 802c6c3

Browse files
author
Kroner
committed
add alertmanager
1 parent 274b412 commit 802c6c3

File tree

8 files changed

+148
-6
lines changed

8 files changed

+148
-6
lines changed

.github/workflows/main.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ jobs:
2121
POSTGRES_DB=test
2222
POSTGRES_HOST=postgres
2323
POSTGRES_PORT=5432
24-
# Добавьте другие переменные, если нужно
2524
EOF
26-
cat .env # Для проверки (опционально)
2725
2826
- name: Install Docker Compose (v2)
2927
run: |
@@ -96,4 +94,6 @@ jobs:
9694
pwd
9795
mkdir -p roles/app/defaults
9896
echo -e "\npostgres_password: ${{ secrets.POSTGRES_PASS }}" >> roles/app/defaults/main.yaml
97+
echo -e "\ntelegram_bot_token: ${{ secrets.BOT_TOKEN}}" >> roles/app/defaults/main.yaml
98+
echo -e "\ntelegram_chat_id: ${{ secrets.BOT_CHAT_ID }}" >> roles/app/defaults/main.yaml
9999
ansible-playbook playbook.yaml
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
route:
2+
group_wait: 30s
3+
group_interval: 5m
4+
repeat_interval: 1h
5+
6+
# Дефолтный получатель (если ни один route не сработал)
7+
receiver: 'telegram-warning'
8+
9+
# Правила маршрутизации (проверяются сверху вниз!)
10+
routes:
11+
- match:
12+
severity: 'critical'
13+
receiver: 'telegram-critical'
14+
continue: true
15+
16+
- match:
17+
severity: 'warning'
18+
receiver: 'telegram-warning'
19+
20+
receivers:
21+
- name: 'telegram-critical'
22+
telegram_configs:
23+
- api_url: "https://api.telegram.org"
24+
bot_token: "$TELEGRAM_BOT_TOKEN"
25+
chat_id: $TELEGRAM_CHAT_ID
26+
message: "🚨 CRITICAL: {{ .CommonAnnotations.summary }}"
27+
parse_mode: "HTML"
28+
29+
- name: 'telegram-warning'
30+
telegram_configs:
31+
- api_url: "https://api.telegram.org"
32+
bot_token: "$TELEGRAM_BOT_TOKEN"
33+
chat_id: $TELEGRAM_CHAT_ID
34+
message: |
35+
{{ if eq .Labels.severity "warning" }}
36+
⚠️ WARNING: {{ .CommonAnnotations.summary }}
37+
{{ else }}
38+
ℹ️ INFO: {{ .CommonAnnotations.summary }}
39+
{{ end }}
40+
parse_mode: "HTML"

ansible/defaults/main.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
postgres_password: "postgres"
1+
postgres_password: "postgres"
2+
telegram_bot_token: "none"
3+
telegram_chat_id: "12345678"

ansible/roles/app/tasks/main.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,32 @@
2121
POSTGRES_DB=messages
2222
POSTGRES_HOST=postgres
2323
POSTGRES_PORT=5432
24+
TELEGRAM_BOT_TOKEN={{ telegram_bot_token }}
25+
TELEGRAM_CHAT_ID={{ telegram_chat_id }}
2426
mode: '0644'
2527

28+
- name: Ensure alertmanager directory exists
29+
ansible.builtin.file:
30+
path: "{{ app_dir }}/alertmanager"
31+
state: directory
32+
mode: '0755'
33+
34+
- name: Install gettext for envsubst
35+
become: yes
36+
apt:
37+
name: gettext-base
38+
state: present
39+
when: ansible_os_family == 'Debian'
40+
41+
- name: Generate alertmanager.yml from template
42+
ansible.builtin.shell: |
43+
envsubst < "{{ app_dir }}/alertmanager/alertmanager.yml.template" > "{{ app_dir }}/alertmanager/alertmanager.yml"
44+
environment:
45+
TELEGRAM_BOT_TOKEN: "{{ telegram_bot_token }}"
46+
TELEGRAM_CHAT_ID: "{{ telegram_chat_id }}"
47+
args:
48+
executable: /bin/sh
49+
2650
- name: Run Docker Compose
2751
command: docker compose up -d --force-recreate
2852
args:

docker-compose.yaml

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ services:
3838
ports:
3939
- "9090:9090" # Веб-интерфейс Prometheus
4040
volumes:
41-
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml # Конфиг
41+
- ./prometheus/:/etc/prometheus/ # Конфиr|Алерты
4242
networks:
4343
- app-network
4444
depends_on:
@@ -56,7 +56,36 @@ services:
5656
depends_on:
5757
- prometheus # Grafana зависит от Prometheus
5858

59+
# Добавляем node_exporter
60+
node_exporter:
61+
image: prom/node-exporter:latest
62+
container_name: node_exporter
63+
volumes:
64+
- /proc:/host/proc:ro
65+
- /sys:/host/sys:ro
66+
- /:/rootfs:ro
67+
command:
68+
- '--path.procfs=/host/proc'
69+
- '--path.sysfs=/host/sys'
70+
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
71+
ports:
72+
- '9100:9100'
73+
networks:
74+
- app-network
75+
pid: "host" # Важно для доступа к системным метрикам
5976

77+
alertmanager:
78+
container_name: alertmanager
79+
image: prom/alertmanager:latest
80+
volumes:
81+
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
82+
ports:
83+
- "9093:9093"
84+
networks:
85+
- app-network
86+
depends_on:
87+
- prometheus
88+
6089
# Объявляем сети и тома
6190
volumes:
6291
postgres_data: # Том для данных Postgres (не удаляется при перезапуске)

prometheus/alert.rules.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
groups:
2+
- name: host_alerts
3+
rules:
4+
- alert: HighCPU
5+
expr: instance:node_cpu_usage:percentage > 90
6+
for: 20s
7+
labels:
8+
severity: "critical"
9+
annotations:
10+
summary: "High CPU usage ({{ $value }}%) on {{ $labels.instance }}"
11+
description: "CPU превысил 90% более чем на 2 минут."
12+
13+
- alert: HighMemory
14+
expr: instance:node_memory_usage:percentage > 85
15+
for: 2m
16+
labels:
17+
severity: "warning"
18+
annotations:
19+
summary: "High RAM usage ({{ $value }}%) on {{ $labels.instance }}"

prometheus/prometheus.yml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
11
global:
2-
scrape_interval: 15s # Частота сбора метрик
2+
scrape_interval: 15s # Как часто собирать метрики
3+
evaluation_interval: 20s # Как часто проверять правила (alert/record)
4+
5+
rule_files:
6+
- '/etc/prometheus/record.rules.yml' # Record Rules
7+
- '/etc/prometheus/alert.rules.yml'
8+
9+
alerting:
10+
alertmanagers:
11+
- static_configs:
12+
- targets: ['alertmanager:9093']
313

414
scrape_configs:
515
- job_name: "app"
616
static_configs:
7-
- targets: ["app:8080"] # Имя сервиса из docker-compose + порт метрик
17+
- targets: ["app:8080"]
18+
19+
- job_name: 'node_exporter'
20+
scrape_interval: 5s
21+
static_configs:
22+
- targets: ['node_exporter:9100']

prometheus/record.rules.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
groups:
2+
- name: host_metrics
3+
interval: 15s
4+
rules:
5+
- record: instance:node_cpu_usage:percentage
6+
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)
7+
labels:
8+
metric_type: "cpu"
9+
10+
- record: instance:node_memory_usage:percentage
11+
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100
12+
labels:
13+
metric_type: "memory"

0 commit comments

Comments
 (0)