File tree Expand file tree Collapse file tree 8 files changed +148
-6
lines changed Expand file tree Collapse file tree 8 files changed +148
-6
lines changed Original file line number Diff line number Diff line change 2121 POSTGRES_DB=test
2222 POSTGRES_HOST=postgres
2323 POSTGRES_PORT=5432
24- # Добавьте другие переменные, если нужно
2524 EOF
26- cat .env # Для проверки (опционально)
2725
2826 - name : Install Docker Compose (v2)
2927 run : |
9694 pwd
9795 mkdir -p roles/app/defaults
9896 echo -e "\npostgres_password: ${{ secrets.POSTGRES_PASS }}" >> roles/app/defaults/main.yaml
97+ echo -e "\ntelegram_bot_token: ${{ secrets.BOT_TOKEN}}" >> roles/app/defaults/main.yaml
98+ echo -e "\ntelegram_chat_id: ${{ secrets.BOT_CHAT_ID }}" >> roles/app/defaults/main.yaml
9999 ansible-playbook playbook.yaml
Original file line number Diff line number Diff line change 1+ route:
2+ group_wait: 30s
3+ group_interval: 5m
4+ repeat_interval: 1h
5+
6+ # Дефолтный получатель (если ни один route не сработал)
7+ receiver: 'telegram-warning'
8+
9+ # Правила маршрутизации (проверяются сверху вниз!)
10+ routes:
11+ - match:
12+ severity: 'critical'
13+ receiver: 'telegram-critical'
14+ continue: true
15+
16+ - match:
17+ severity: 'warning'
18+ receiver: 'telegram-warning'
19+
20+ receivers:
21+ - name: 'telegram-critical'
22+ telegram_configs:
23+ - api_url: "https://api.telegram.org"
24+ bot_token: "$TELEGRAM_BOT_TOKEN"
25+ chat_id: $TELEGRAM_CHAT_ID
26+ message: "🚨 CRITICAL: {{ .CommonAnnotations.summary }}"
27+ parse_mode: "HTML"
28+
29+ - name: 'telegram-warning'
30+ telegram_configs:
31+ - api_url: "https://api.telegram.org"
32+ bot_token: "$TELEGRAM_BOT_TOKEN"
33+ chat_id: $TELEGRAM_CHAT_ID
34+ message: |
35+ {{ if eq .Labels.severity "warning" }}
36+ ⚠️ WARNING: {{ .CommonAnnotations.summary }}
37+ {{ else }}
38+ ℹ️ INFO: {{ .CommonAnnotations.summary }}
39+ {{ end }}
40+ parse_mode: "HTML"
Original file line number Diff line number Diff line change 1- postgres_password : " postgres"
1+ postgres_password : " postgres"
2+ telegram_bot_token : " none"
3+ telegram_chat_id : " 12345678"
Original file line number Diff line number Diff line change 2121 POSTGRES_DB=messages
2222 POSTGRES_HOST=postgres
2323 POSTGRES_PORT=5432
24+ TELEGRAM_BOT_TOKEN={{ telegram_bot_token }}
25+ TELEGRAM_CHAT_ID={{ telegram_chat_id }}
2426 mode : ' 0644'
2527
28+ - name : Ensure alertmanager directory exists
29+ ansible.builtin.file :
30+ path : " {{ app_dir }}/alertmanager"
31+ state : directory
32+ mode : ' 0755'
33+
34+ - name : Install gettext for envsubst
35+ become : yes
36+ apt :
37+ name : gettext-base
38+ state : present
39+ when : ansible_os_family == 'Debian'
40+
41+ - name : Generate alertmanager.yml from template
42+ ansible.builtin.shell : |
43+ envsubst < "{{ app_dir }}/alertmanager/alertmanager.yml.template" > "{{ app_dir }}/alertmanager/alertmanager.yml"
44+ environment :
45+ TELEGRAM_BOT_TOKEN : " {{ telegram_bot_token }}"
46+ TELEGRAM_CHAT_ID : " {{ telegram_chat_id }}"
47+ args :
48+ executable : /bin/sh
49+
2650- name : Run Docker Compose
2751 command : docker compose up -d --force-recreate
2852 args :
Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ services:
3838 ports :
3939 - " 9090:9090" # Веб-интерфейс Prometheus
4040 volumes :
41- - ./prometheus/prometheus.yml :/etc/prometheus/prometheus.yml # Конфиг
41+ - ./prometheus/:/etc/prometheus/ # Конфиr|Алерты
4242 networks :
4343 - app-network
4444 depends_on :
@@ -56,7 +56,36 @@ services:
5656 depends_on :
5757 - prometheus # Grafana зависит от Prometheus
5858
59+ # Добавляем node_exporter
60+ node_exporter :
61+ image : prom/node-exporter:latest
62+ container_name : node_exporter
63+ volumes :
64+ - /proc:/host/proc:ro
65+ - /sys:/host/sys:ro
66+ - /:/rootfs:ro
67+ command :
68+ - ' --path.procfs=/host/proc'
69+ - ' --path.sysfs=/host/sys'
70+ - ' --collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
71+ ports :
72+ - ' 9100:9100'
73+ networks :
74+ - app-network
75+ pid : " host" # Важно для доступа к системным метрикам
5976
77+ alertmanager :
78+ container_name : alertmanager
79+ image : prom/alertmanager:latest
80+ volumes :
81+ - ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
82+ ports :
83+ - " 9093:9093"
84+ networks :
85+ - app-network
86+ depends_on :
87+ - prometheus
88+
6089# Объявляем сети и тома
6190volumes :
6291 postgres_data : # Том для данных Postgres (не удаляется при перезапуске)
Original file line number Diff line number Diff line change 1+ groups :
2+ - name : host_alerts
3+ rules :
4+ - alert : HighCPU
5+ expr : instance:node_cpu_usage:percentage > 90
6+ for : 20s
7+ labels :
8+ severity : " critical"
9+ annotations :
10+ summary : " High CPU usage ({{ $value }}%) on {{ $labels.instance }}"
11+ description : " CPU превысил 90% более чем на 2 минут."
12+
13+ - alert : HighMemory
14+ expr : instance:node_memory_usage:percentage > 85
15+ for : 2m
16+ labels :
17+ severity : " warning"
18+ annotations :
19+ summary : " High RAM usage ({{ $value }}%) on {{ $labels.instance }}"
Original file line number Diff line number Diff line change 11global :
2- scrape_interval : 15s # Частота сбора метрик
2+ scrape_interval : 15s # Как часто собирать метрики
3+ evaluation_interval : 20s # Как часто проверять правила (alert/record)
4+
5+ rule_files :
6+ - ' /etc/prometheus/record.rules.yml' # Record Rules
7+ - ' /etc/prometheus/alert.rules.yml'
8+
9+ alerting :
10+ alertmanagers :
11+ - static_configs :
12+ - targets : ['alertmanager:9093']
313
414scrape_configs :
515 - job_name : " app"
616 static_configs :
7- - targets : ["app:8080"] # Имя сервиса из docker-compose + порт метрик
17+ - targets : ["app:8080"]
18+
19+ - job_name : ' node_exporter'
20+ scrape_interval : 5s
21+ static_configs :
22+ - targets : ['node_exporter:9100']
Original file line number Diff line number Diff line change 1+ groups :
2+ - name : host_metrics
3+ interval : 15s
4+ rules :
5+ - record : instance:node_cpu_usage:percentage
6+ expr : 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)
7+ labels :
8+ metric_type : " cpu"
9+
10+ - record : instance:node_memory_usage:percentage
11+ expr : (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100
12+ labels :
13+ metric_type : " memory"
You can’t perform that action at this time.
0 commit comments