diff --git a/.env b/.env new file mode 100644 index 0000000..bb99cbe --- /dev/null +++ b/.env @@ -0,0 +1,3 @@ +CONFIG_TIMEOUT = 10 # Timeout for requests +MONITOR_INTERVAL = 15 # Seconds +LOG_FILE = "monitor.log" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc14588 --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +Health Monitor +This is a Python-based HTTP endpoint health monitoring tool that periodically checks the availability of a list of configured URLs and logs their status. +This will create a monitor.log file which which will keep the reports status of the endpoints in a structured format. + + +Requirements +Python 3.7+ +pip installed + +Project Structure +. +├── monitor.py # Main script +├── endpoints.yaml # Configuration file for endpoints +├── .env # Environment variables +├── monitor.log # Rotating log file (will be auto-generated) +└── README.md # Readene.md + + + +Install dependencies by running the + +pip3 install -r requirements.txt + + + +Run the script with the below command. + +python main.py endpoints.yaml + +You’ll see output like: + + +Request to https://example.com/api took 0.231 seconds 200 +Request to https://api.example.org/post took 0.450 seconds 500 + +Logs +Logs are written to the file specified in .env (default: monitor.log) + +Old logs are rotated daily and kept for 7 days + +Stopping the Monitor +Just press Ctrl + C. The script will exit gracefully and log the shutdown. + +Tips +Place monitor.py, .env, and config.yaml in the same directory + +Use JSON payloads for POST/PUT requests + +Use curl or Postman to validate endpoint formats before monitoring + +License +MIT License – free to use and modify. + +Let me know if you want a pre-filled repo template or a Dockerfile to containerize it! \ No newline at end of file diff --git a/endpoints.yaml b/endpoints.yaml new file mode 100644 index 0000000..e6fa742 --- /dev/null +++ b/endpoints.yaml @@ -0,0 +1,22 @@ +- body: '{"foo":"bar"}' + headers: + content-type: application/json + method: POST + name: sample body up + url: https://reqres.in/api/users + +- name: sample index up + url: https://jsonplaceholder.typicode.com/posts/1 + +- name: Current IP address + url: https://ipinfo.io/ip + +- body: "{}" + headers: + content-type: application/json + method: POST + name: sample body down + url: https://mock.codes/503 + +- name: sample error down + url: https://mock.codes/500 \ No newline at end of file diff --git a/main.py b/main.py index e3f2bef..6a9a7aa 100644 --- a/main.py +++ b/main.py @@ -1,52 +1,131 @@ import yaml import requests import time +import logging +import traceback +from urllib.parse import urlparse from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from logging.handlers import TimedRotatingFileHandler +from dotenv import load_dotenv +import os -# Function to load configuration from the YAML file +load_dotenv() + +CONFIG_TIMEOUT = int(os.getenv("CONFIG_TIMEOUT", 10)) +MONITOR_INTERVAL = int(os.getenv("MONITOR_INTERVAL", 15)) +LOG_FILE = os.getenv("LOG_FILE", "monitor.log") + + +# Set up rotating log handler +def setup_logging(): + logger = logging.getLogger("HealthMonitor") + logger.setLevel(logging.INFO) + handler = TimedRotatingFileHandler(LOG_FILE, when="midnight", backupCount=7) + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + + +logger = setup_logging() + + +# Load config from YAML def load_config(file_path): - with open(file_path, 'r') as file: - return yaml.safe_load(file) + try: + with open(file_path, 'r') as file: + return yaml.safe_load(file) + except Exception as e: + logger.error(f"Failed to load config file: {e}, Please check yaml file should exist in the same root location where the python script is present") + raise + -# Function to perform health checks +# Perform individual health check def check_health(endpoint): - url = endpoint['url'] - method = endpoint.get('method') - headers = endpoint.get('headers') - body = endpoint.get('body') + url = endpoint.get('url') + method = endpoint.get('method', 'GET').upper() + headers = endpoint.get('headers', {}) + body = endpoint.get('body', None) + + if not url: + logger.warning("Skipping endpoint with missing URL.") + return None, "INVALID" try: - response = requests.request(method, url, headers=headers, json=body) - if 200 <= response.status_code < 300: - return "UP" - else: - return "DOWN" - except requests.RequestException: - return "DOWN" - -# Main function to monitor endpoints + response = requests.request(method, url, headers=headers, json=body, timeout=CONFIG_TIMEOUT) + print(f"Request to {url} took {response.elapsed.total_seconds()} seconds", response.status_code) + status = "UP" if 200 <= response.status_code < 300 else "DOWN" + logger.info(f"[{url}] Status: {status} (HTTP {response.status_code})") + return url, status + except requests.Timeout: + logger.warning(f"[{url}] Request timed out.") + return url, "DOWN" + except requests.RequestException as e: + logger.error(f"[{url}] Request failed: {e}") + return url, "DOWN" + except Exception: + logger.error(f"[{url}] Unexpected error:\n{traceback.format_exc()}") + return url, "DOWN" + + +# Extract domain from URL +def extract_domain(url): + try: + return urlparse(url).netloc + except Exception: + return "unknown" + + +# Main monitoring loop def monitor_endpoints(file_path): config = load_config(file_path) + + # Initialize availability stats domain_stats = defaultdict(lambda: {"up": 0, "total": 0}) while True: - for endpoint in config: - domain = endpoint["url"].split("//")[-1].split("/")[0] - result = check_health(endpoint) + start_time = time.time() - domain_stats[domain]["total"] += 1 - if result == "UP": - domain_stats[domain]["up"] += 1 + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_endpoint = { + executor.submit(check_health, endpoint): endpoint for endpoint in config + } - # Log cumulative availability percentages + for future in as_completed(future_to_endpoint): + url, status = future.result() + if url is None: + continue + domain = extract_domain(url) + domain_stats[domain]["total"] += 1 + if status == "UP": + domain_stats[domain]["up"] += 1 + + # Log availability per domain + # ...existing code... + # Log availability per domain for domain, stats in domain_stats.items(): - availability = round(100 * stats["up"] / stats["total"]) - print(f"{domain} has {availability}% availability percentage") + total = stats["total"] + up = stats["up"] + availability = round(100 * up / total) if total > 0 else 0 + + if availability == 100: + status = "UP" + elif availability == 0: + status = "DOWN" + else: + status = "PARTIAL" + + logger.info(f" - {domain:<35} ➤ {availability:>3}% {status}") + + logger.info("\n✅ Monitoring cycle complete.\n" + "-" * 60 + "\n") + + elapsed = time.time() - start_time + sleep_time = max(0, MONITOR_INTERVAL - elapsed) + time.sleep(sleep_time) - print("---") - time.sleep(15) -# Entry point of the program +# Entry point if __name__ == "__main__": import sys @@ -58,4 +137,5 @@ def monitor_endpoints(file_path): try: monitor_endpoints(config_file) except KeyboardInterrupt: - print("\nMonitoring stopped by user.") \ No newline at end of file + logger.info("Monitoring stopped by user.") + print("\nMonitoring stopped.") \ No newline at end of file diff --git a/monitor.log b/monitor.log new file mode 100644 index 0000000..a4f85ad --- /dev/null +++ b/monitor.log @@ -0,0 +1,92 @@ +2025-04-22 09:41:05,412 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:41:05,450 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:41:05,507 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:41:05,513 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:41:05,587 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:41:05,589 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:41:05,589 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:41:05,589 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:41:05,589 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:41:05,589 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:41:20,396 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:41:20,403 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:41:20,437 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:41:20,445 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:41:20,554 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:41:20,556 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:41:20,556 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:41:20,556 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:41:20,556 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:41:20,556 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:41:35,400 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:41:35,424 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:41:35,428 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:41:35,433 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:41:35,654 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:41:35,654 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:41:35,654 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:41:35,654 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:41:35,654 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:41:35,654 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:41:50,405 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:41:50,428 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:41:50,447 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:41:50,449 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:41:50,645 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:41:50,647 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:41:50,648 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:41:50,648 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:41:50,648 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:41:50,648 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:42:05,420 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:42:05,426 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:42:05,454 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:42:05,460 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:42:05,583 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:42:05,584 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:42:05,584 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:42:05,584 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:42:05,584 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:42:05,584 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:42:20,412 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:42:20,413 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:42:20,419 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:42:20,449 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:42:20,723 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:42:20,724 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:42:20,725 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:42:20,725 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:42:20,725 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:42:20,725 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:42:35,415 - INFO - [https://ipinfo.io/ip] Status: UP (HTTP 200) +2025-04-22 09:42:35,422 - INFO - [https://mock.codes/503] Status: DOWN (HTTP 503) +2025-04-22 09:42:35,422 - INFO - [https://mock.codes/500] Status: DOWN (HTTP 500) +2025-04-22 09:42:35,469 - INFO - [https://jsonplaceholder.typicode.com/posts/1] Status: UP (HTTP 200) +2025-04-22 09:42:35,712 - INFO - [https://reqres.in/api/users] Status: DOWN (HTTP 400) +2025-04-22 09:42:35,714 - INFO - - ipinfo.io ➤ 100% UP +2025-04-22 09:42:35,715 - INFO - - jsonplaceholder.typicode.com ➤ 100% UP +2025-04-22 09:42:35,715 - INFO - - mock.codes ➤ 0% DOWN +2025-04-22 09:42:35,715 - INFO - - reqres.in ➤ 0% DOWN +2025-04-22 09:42:35,715 - INFO - +✅ Monitoring cycle complete. +------------------------------------------------------------ + +2025-04-22 09:42:44,429 - INFO - Monitoring stopped by user. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b2ca1c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +PyYAML>=6.0 +requests>=2.31.0 +dotenv \ No newline at end of file diff --git a/sample.yaml b/sample.yaml deleted file mode 100644 index b95f5a5..0000000 --- a/sample.yaml +++ /dev/null @@ -1,16 +0,0 @@ -- body: '{"foo":"bar"}' - headers: - content-type: application/json - method: POST - name: sample body up - url: https://dev-sre-take-home-exercise-rubric.us-east-1.recruiting-public.fetchrewards.com/body -- name: sample index up - url: https://dev-sre-take-home-exercise-rubric.us-east-1.recruiting-public.fetchrewards.com/ -- body: "{}" - headers: - content-type: application/json - method: POST - name: sample body down - url: https://dev-sre-take-home-exercise-rubric.us-east-1.recruiting-public.fetchrewards.com/body -- name: sample error down - url: https://dev-sre-take-home-exercise-rubric.us-east-1.recruiting-public.fetchrewards.com/error \ No newline at end of file diff --git a/screenshots/monitor.log.output.png b/screenshots/monitor.log.output.png new file mode 100644 index 0000000..77819b9 Binary files /dev/null and b/screenshots/monitor.log.output.png differ diff --git a/screenshots/requirements.png b/screenshots/requirements.png new file mode 100644 index 0000000..59dd318 Binary files /dev/null and b/screenshots/requirements.png differ diff --git a/screenshots/runscript.png b/screenshots/runscript.png new file mode 100644 index 0000000..2843b08 Binary files /dev/null and b/screenshots/runscript.png differ