Skip to content

Commit 4569350

Browse files
author
xNetVN
committed
chore(config): resolve merge conflicts and update example config
Merged restart_command documentation and examples; updated config samples and docs as part of analysis report.
2 parents 7f64afa + 0446775 commit 4569350

File tree

8 files changed

+220
-25
lines changed

8 files changed

+220
-25
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
### Key Features
1414

1515
-**Automatic Recovery**: Auto-restart services when failures detected
16+
- 🔧 **Sequential Restart Commands**: Support list-based restart commands and scripts
1617
- 📊 **Comprehensive Monitoring**: CPU, RAM, disk space, and critical services
1718
- 🌐 **HTTP/HTTPS Health Checks**: Detect connection errors, 4xx/5xx, timeouts, slow responses
1819
- 📧 **Multi-channel Notifications**: Email, Telegram, Slack, Discord, Webhook
@@ -80,6 +81,10 @@ The daemon loads an optional `.env` file from:
8081
Use `/opt/xnetvn_monitord/config/.env.example` as a template and copy it to
8182
`.env` without committing secrets.
8283

84+
The installer and auto-updater refresh `/opt/xnetvn_monitord/config/main.example.yaml`
85+
and `/opt/xnetvn_monitord/config/.env.example` on each install/upgrade, without
86+
overwriting `/opt/xnetvn_monitord/config/main.yaml` or `/opt/xnetvn_monitord/config/.env`.
87+
8388
You can also use systemd `EnvironmentFile` entries when running via systemd:
8489

8590
When using `${VAR}` in `config/main.yaml`, define environment variables via a
@@ -114,6 +119,11 @@ general:
114119
service_monitor:
115120
enabled: true
116121
action_on_failure: "restart_and_notify"
122+
services:
123+
- name: "nginx"
124+
restart_command:
125+
- "systemctl restart nginx"
126+
- "bash /opt/xnetvn_monitord/scripts/custom-restart.sh"
117127

118128
resource_monitor:
119129
enabled: true

config/main.example.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ service_monitor:
140140
# - check_command/check_timeout: Custom command and timeout (custom_command only)
141141
# - url/http_method/headers/timeout_seconds/expected_status_codes/
142142
# max_response_time_ms/verify_tls: HTTP/HTTPS health checks
143-
# - restart_command: Recovery command (string or list executed in order)
143+
# - restart_command: Recovery command to execute on failure.
144+
# Supports a single command string or a list of commands executed in order.
145+
# External scripts are supported (bash/php/python, etc.).
144146
# - pre_restart_hook/post_restart_hook: Optional hooks before/after recovery
145147
# - check_interval/action_cooldown: Per-service overrides for frequency/cooldown
146148
# - critical/description: Notification metadata
@@ -158,10 +160,11 @@ service_monitor:
158160
service_name: "nginx"
159161
# Optional regex to match multiple systemd units
160162
# service_name_pattern: "php.*-fpm\.service"
161-
# Restart command to execute when failure detected
162-
restart_command:
163+
# Example: execute multiple commands in order
164+
restart_command:
163165
- "systemctl restart nginx"
164-
#- "bash /path/to/scripts/custom-restart.sh" # Custom script
166+
#- "systemctl restart php*-fpm"
167+
#- "bash /opt/xnetvn_monitord/scripts/custom-restart.sh" # Custom script
165168
# Per-service check interval override
166169
check_interval:
167170
value: 30
@@ -183,7 +186,8 @@ service_monitor:
183186
enabled: false
184187
check_method: "process_regex"
185188
process_pattern: "php-fpm.*master"
186-
restart_command: "systemctl restart php*-fpm"
189+
restart_command:
190+
- "systemctl restart php*-fpm"
187191
critical: true
188192
description: "PHP-FPM master process"
189193

docs/en/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
## Key Features
1212

1313
- Service monitoring (systemd/OpenRC/SysV/process/regex/custom/HTTP/HTTPS).
14+
- Sequential restart commands via list-based `restart_command`.
1415
- Per-service check intervals and action cooldowns.
1516
- Pre- and post-recovery notifications.
1617
- CPU/RAM/Disk threshold monitoring.
@@ -63,6 +64,17 @@ Key sections:
6364
- `resource_monitor`: CPU/RAM/Disk thresholds and recovery actions
6465
- `notifications`: Email/Telegram and rate limits
6566

67+
Example restart command list:
68+
69+
```yaml
70+
service_monitor:
71+
services:
72+
- name: "nginx"
73+
restart_command:
74+
- "systemctl restart nginx"
75+
- "bash /opt/xnetvn_monitord/scripts/custom-restart.sh"
76+
```
77+
6678
## Environment Variables (.env + systemd)
6779
6880
The daemon loads an optional `.env` file from:
@@ -74,6 +86,10 @@ The daemon loads an optional `.env` file from:
7486
Use `/opt/xnetvn_monitord/config/.env.example` as a template and copy it to
7587
`.env` without committing secrets.
7688
89+
Install and auto-update refresh `/opt/xnetvn_monitord/config/main.example.yaml`
90+
and `/opt/xnetvn_monitord/config/.env.example` on every install/upgrade, without
91+
overwriting `/opt/xnetvn_monitord/config/main.yaml` or `/opt/xnetvn_monitord/config/.env`.
92+
7793
When using `${VAR}` in `config/main.yaml`, define environment variables via a
7894
systemd EnvironmentFile:
7995

scripts/install.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,16 @@ copy_files() {
124124
exit 1
125125
fi
126126

127+
if [ -f "$SCRIPT_DIR/config/main.example.yaml" ]; then
128+
cp "$SCRIPT_DIR/config/main.example.yaml" "$CONFIG_DIR/main.example.yaml"
129+
log_info "Configuration example file refreshed"
130+
else
131+
log_warning "Configuration example file not found in repository"
132+
fi
133+
127134
if [ -f "$SCRIPT_DIR/config/.env.example" ]; then
128-
if [ -f "$CONFIG_DIR/.env.example" ]; then
129-
log_warning "Environment example already exists: $CONFIG_DIR/.env.example"
130-
else
131-
cp "$SCRIPT_DIR/config/.env.example" "$CONFIG_DIR/.env.example"
132-
log_info "Environment example file copied"
133-
fi
135+
cp "$SCRIPT_DIR/config/.env.example" "$CONFIG_DIR/.env.example"
136+
log_info "Environment example file refreshed"
134137
else
135138
log_warning "Environment example file not found in repository"
136139
fi

src/xnetvn_monitord/monitors/service_monitor.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -919,15 +919,32 @@ def _restart_service(self, service_config: Dict) -> bool:
919919
subprocess.run(pre_hook, shell=True, timeout=30)
920920

921921
# Restart the service
922-
logger.info(f"Executing restart command for {service_name}: {resolved_command}")
923922
if isinstance(resolved_command, list):
924-
result = subprocess.run(
923+
for command in resolved_command:
924+
logger.info(
925+
"Executing restart command for %s: %s",
926+
service_name,
927+
command,
928+
)
929+
result = subprocess.run(
930+
command,
931+
shell=True,
932+
capture_output=True,
933+
text=True,
934+
timeout=60,
935+
)
936+
if result.returncode != 0:
937+
logger.warning(
938+
"Restart command returned non-zero for %s: %s",
939+
service_name,
940+
(result.stderr or result.stdout or "").strip(),
941+
)
942+
else:
943+
logger.info(
944+
"Executing restart command for %s: %s",
945+
service_name,
925946
resolved_command,
926-
capture_output=True,
927-
text=True,
928-
timeout=60,
929947
)
930-
else:
931948
result = subprocess.run(
932949
resolved_command,
933950
shell=True,
@@ -957,28 +974,53 @@ def _restart_service(self, service_config: Dict) -> bool:
957974
logger.error(f"Error restarting service {service_name}: {str(e)}", exc_info=True)
958975
return False
959976

960-
def _resolve_restart_command(self, restart_command: Optional[str], service_config: Dict) -> Optional[Any]:
977+
def _resolve_restart_command(self, restart_command: Optional[Any], service_config: Dict) -> Optional[Any]:
961978
"""Resolve restart command based on available service manager.
962979
963980
Args:
964981
restart_command: Explicit restart command from configuration.
965982
service_config: Service configuration dictionary.
966983
967984
Returns:
968-
Command string or list, or None if not resolvable.
985+
Command string, list of commands, or None if not resolvable.
969986
"""
970987
service_name = service_config.get("service_name") or service_config.get("name")
971988
if not restart_command:
972989
if service_name:
973990
return self.service_manager.build_restart_command(service_name)
974991
return None
975992

976-
command_value = restart_command.strip()
977-
if command_value.startswith("systemctl") and not self.service_manager.is_systemd:
978-
if service_name:
979-
return self.service_manager.build_restart_command(service_name)
993+
if isinstance(restart_command, list):
994+
normalized_commands = [
995+
str(command).strip()
996+
for command in restart_command
997+
if str(command).strip()
998+
]
999+
if not normalized_commands:
1000+
if service_name:
1001+
return self.service_manager.build_restart_command(service_name)
1002+
return None
1003+
return normalized_commands
9801004

981-
return restart_command
1005+
if isinstance(restart_command, str):
1006+
command_value = restart_command.strip()
1007+
if not command_value:
1008+
if service_name:
1009+
return self.service_manager.build_restart_command(service_name)
1010+
return None
1011+
if command_value.startswith("systemctl") and not self.service_manager.is_systemd:
1012+
if service_name:
1013+
return self.service_manager.build_restart_command(service_name)
1014+
return command_value
1015+
1016+
logger.warning(
1017+
"Unsupported restart_command type for %s: %s",
1018+
service_name,
1019+
type(restart_command).__name__,
1020+
)
1021+
if service_name:
1022+
return self.service_manager.build_restart_command(service_name)
1023+
return None
9821024

9831025
def reset_restart_history(self) -> None:
9841026
"""Reset all restart history and cooldown trackers."""

src/xnetvn_monitord/utils/update_checker.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,8 @@ def apply_update(self, tarball_url: str) -> bool:
344344
logger.error("No extracted release directory found")
345345
return False
346346

347-
source_dir = extracted_dirs[0] / "src" / "xnetvn_monitord"
347+
release_root = extracted_dirs[0]
348+
source_dir = release_root / "src" / "xnetvn_monitord"
348349
if not source_dir.exists():
349350
logger.error("Release source directory not found: %s", source_dir)
350351
return False
@@ -361,6 +362,24 @@ def apply_update(self, tarball_url: str) -> bool:
361362

362363
shutil.rmtree(target_dir)
363364
shutil.copytree(source_dir, target_dir)
365+
366+
config_dir = self.install_dir / "config"
367+
config_dir.mkdir(parents=True, exist_ok=True)
368+
release_config_dir = release_root / "config"
369+
if release_config_dir.exists():
370+
example_config = release_config_dir / "main.example.yaml"
371+
if example_config.exists():
372+
shutil.copy2(example_config, config_dir / "main.example.yaml")
373+
else:
374+
logger.warning("Release missing main.example.yaml")
375+
376+
env_example = release_config_dir / ".env.example"
377+
if env_example.exists():
378+
shutil.copy2(env_example, config_dir / ".env.example")
379+
else:
380+
logger.warning("Release missing .env.example")
381+
else:
382+
logger.warning("Release missing config directory")
364383
except Exception as exc:
365384
logger.error("Failed to apply update: %s", exc)
366385
return False

tests/unit/test_service_monitor.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,47 @@ def test_should_restart_service_with_hooks(self, mocker):
13901390

13911391
assert mock_run.call_count >= 2
13921392

1393+
def test_should_execute_restart_command_sequence(self, mocker):
1394+
"""Test restart executes a list of commands sequentially."""
1395+
mock_run = mocker.patch("subprocess.run")
1396+
mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
1397+
mocker.patch("time.sleep")
1398+
1399+
monitor = ServiceMonitor({"enabled": True, "restart_wait_time": 0})
1400+
service_config = {
1401+
"name": "nginx",
1402+
"restart_command": [
1403+
"systemctl restart nginx",
1404+
"bash /opt/xnetvn_monitord/scripts/custom-restart.sh",
1405+
],
1406+
"check_method": "systemctl",
1407+
"service_name": "nginx",
1408+
}
1409+
1410+
mocker.patch.object(monitor, "_check_service", return_value={"running": True})
1411+
1412+
assert monitor._restart_service(service_config) is True
1413+
1414+
mock_run.assert_has_calls(
1415+
[
1416+
call(
1417+
"systemctl restart nginx",
1418+
shell=True,
1419+
capture_output=True,
1420+
text=True,
1421+
timeout=60,
1422+
),
1423+
call(
1424+
"bash /opt/xnetvn_monitord/scripts/custom-restart.sh",
1425+
shell=True,
1426+
capture_output=True,
1427+
text=True,
1428+
timeout=60,
1429+
),
1430+
],
1431+
any_order=False,
1432+
)
1433+
13931434
def test_should_execute_post_restart_hook_and_check_status(self, mocker):
13941435
"""Test post-restart hook execution and status verification."""
13951436
mock_run = mocker.patch("subprocess.run")

tests/unit/test_update_checker.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from __future__ import annotations
1818

1919
import json
20+
import shutil
21+
import tarfile
2022
from pathlib import Path
2123

2224
import pytest
@@ -158,3 +160,61 @@ def test_should_handle_fetch_failure(self, tmp_path, monkeypatch) -> None:
158160
assert result.checked is True
159161
assert result.update_available is False
160162
assert "Failed to fetch" in result.message
163+
164+
165+
class TestUpdateCheckerApplyUpdate:
166+
"""Tests for applying updates and refreshing example files."""
167+
168+
def test_should_refresh_example_files_without_overwriting_user_config(
169+
self, tmp_path, monkeypatch
170+
) -> None:
171+
"""Ensure example files are refreshed while user config stays intact."""
172+
install_dir = tmp_path / "install"
173+
install_dir.mkdir()
174+
175+
target_dir = install_dir / "xnetvn_monitord"
176+
target_dir.mkdir()
177+
(target_dir / "old.txt").write_text("old")
178+
179+
config_dir = install_dir / "config"
180+
config_dir.mkdir()
181+
(config_dir / "main.yaml").write_text("user-config")
182+
(config_dir / ".env").write_text("SECRET=1")
183+
(config_dir / "main.example.yaml").write_text("old example")
184+
(config_dir / ".env.example").write_text("old env")
185+
186+
package_root = tmp_path / "package" / "xnetvn_monitord-1.1.0"
187+
source_dir = package_root / "src" / "xnetvn_monitord"
188+
source_dir.mkdir(parents=True)
189+
(source_dir / "new.txt").write_text("new")
190+
191+
release_config = package_root / "config"
192+
release_config.mkdir(parents=True)
193+
(release_config / "main.example.yaml").write_text("new example")
194+
(release_config / ".env.example").write_text("new env")
195+
196+
tarball_path = tmp_path / "release.tar.gz"
197+
with tarfile.open(tarball_path, "w:gz") as tar_handle:
198+
tar_handle.add(package_root, arcname=package_root.name)
199+
200+
def _fake_urlretrieve(url: str, filename: str) -> None:
201+
shutil.copy(tarball_path, filename)
202+
203+
monkeypatch.setattr(
204+
"xnetvn_monitord.utils.update_checker.request.urlretrieve",
205+
_fake_urlretrieve,
206+
)
207+
208+
state_file = tmp_path / "state.json"
209+
checker = UpdateChecker(
210+
_build_config(state_file),
211+
current_version="1.0.0",
212+
install_dir=install_dir,
213+
)
214+
215+
assert checker.apply_update("https://example.com/release.tar.gz") is True
216+
assert (install_dir / "xnetvn_monitord" / "new.txt").read_text() == "new"
217+
assert (config_dir / "main.example.yaml").read_text() == "new example"
218+
assert (config_dir / ".env.example").read_text() == "new env"
219+
assert (config_dir / "main.yaml").read_text() == "user-config"
220+
assert (config_dir / ".env").read_text() == "SECRET=1"

0 commit comments

Comments
 (0)