Skip to content

Commit 521b670

Browse files
author
Federico Ceratto
committed
Add replication monitoring
1 parent c19497f commit 521b670

File tree

6 files changed

+105
-0
lines changed

6 files changed

+105
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# Deployed by ansible
3+
# See roles/node_exporter/files/db_replication_mon_active
4+
# active database --> standby
5+
#
6+
# Errors are reported as RTT=0
7+
8+
set -ux
9+
FN=/run/nodeexp/db_replication_socket.prom
10+
while true; do
11+
socket_rtt=$(ss -ntpi state established 'dst 37.218.242.175' | tr -s " " "\n" | grep ^rtt: | cut -c5- | cut -d'/' -f1)
12+
socket_rtt=${socket_rtt:-0}
13+
ping_rtt=$(ping -w 1 -c 1 10.1.0.1 | tail -n1 | cut -d'/' -f5)
14+
ping_rtt=${ping_rtt:-0}
15+
16+
cat <<EOF > $FN.$$
17+
db_replication_rtt{role="standby"} $socket_rtt
18+
ping_rtt{role="standby"} $ping_rtt
19+
EOF
20+
mv $FN.$$ $FN
21+
sleep 1
22+
done
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# Deployed by ansible
3+
# See roles/node_exporter/files/db_replication_mon_standby
4+
# standby database --> active
5+
#
6+
# Errors are reported as RTT=0
7+
8+
set -ux
9+
FN=/run/nodeexp/db_replication_socket.prom
10+
while true; do
11+
socket_rtt=$(ss -ntpi state established 'dst 37.218.240.56 dport = 5432' | tr -s " " "\n" | grep ^rtt: | cut -c5- | cut -d'/' -f1)
12+
socket_rtt=${socket_rtt:-0}
13+
ping_rtt=$(ping -w 1 -c 1 10.1.0.2 | tail -n1 | cut -d'/' -f5)
14+
ping_rtt=${ping_rtt:-0}
15+
16+
cat <<EOF > $FN.$$
17+
db_replication_rtt{role="standby"} $socket_rtt
18+
ping_rtt{role="standby"} $ping_rtt
19+
EOF
20+
mv $FN.$$ $FN
21+
sleep 1
22+
done

ansible/roles/node_exporter/handlers/main.yml

+8
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,22 @@
33
service:
44
name: node_exporter
55
state: restarted
6+
67
- name: restart seeksample
78
service:
89
name: seeksample
910
state: restarted
11+
1012
- name: restart tcpmetrics
1113
service:
1214
name: tcpmetrics
1315
state: restarted
16+
17+
- name: restart db_replication_mon
18+
service:
19+
name: db_replication_mon
20+
state: restarted
21+
1422
- name: restart collector-metrics
1523
service:
1624
name: collector-metrics

ansible/roles/node_exporter/tasks/main.yml

+20
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,26 @@
8989
dest: /etc/systemd/system/tcpmetrics.service
9090
when: "'have_tcpmetrics' in group_names"
9191

92+
- name: Install db_replication_mon on active DB
93+
copy: src=db_replication_mon_active dest=/srv/db_replication_mon mode=0555
94+
when: "'db_active' in group_names"
95+
tags: db_replication_mon
96+
97+
- name: Install db_replication_mon on standby DB
98+
copy: src=db_replication_mon_standby dest=/srv/db_replication_mon mode=0555
99+
when: "'db_standby' in group_names"
100+
tags: db_replication_mon
101+
102+
- name: Install db_replication_mon systemd service file
103+
notify:
104+
- restart db_replication_mon
105+
- systemctl daemon-reload
106+
template:
107+
src: db_replication_mon.service
108+
dest: /etc/systemd/system/db_replication_mon.service
109+
when: "'db_active' in group_names or 'db_active' in group_names"
110+
tags: db_replication_mon
111+
92112
- name: Install collector-metrics systemd service file
93113
notify:
94114
- restart collector-metrics
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Deployed by ansible
2+
# see roles/node_exporter/templates/db_replication_mon.service
3+
[Unit]
4+
Description=DB replication monitor
5+
6+
[Service]
7+
User=root
8+
Group=root
9+
10+
ExecStartPre=/usr/bin/install --owner root --group root --mode 0755 --directory {{ node_exporter_textfiles_path }}
11+
ExecStart=/srv/db_replication_mon
12+
KillMode=process
13+
Restart=always
14+
15+
# Hardening
16+
CapabilityBoundingSet=CAP_SETUID CAP_SETGID
17+
SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap
18+
NoNewPrivileges=yes
19+
PrivateDevices=yes
20+
PrivateTmp=yes
21+
ProtectHome=yes
22+
ProtectSystem=full
23+
ProtectKernelModules=yes
24+
ProtectKernelTunables=yes
25+
26+
[Install]
27+
WantedBy=multi-user.target

ansible/roles/plpsql/README.adoc

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
PostgreSQL metadb database
2+
3+
Deploys a temporary traffic forwarder using A VPN
4+
HKG -> mia-ps-test.ooni.nu -> AMS
5+
6+
Runs db_replication_mon.service on HKG and AMS db to monitor the replication

0 commit comments

Comments
 (0)