Edit file File name : check_backuply.py Content :#!/usr/bin/env python3 """ Backuply Health Check Plugin for Icinga Description: - Checks if backup servers are configured - Verifies backup jobs exist - Monitors recent task execution (--chours / --whours) - Detects failed backup tasks Usage: ./check_backuply.py [--chours 24] [--whours 12] Exit Codes: 0 OK 1 WARNING 2 CRITICAL 3 UNKNOWN """ import os import sys import shutil import argparse import time import sqlite3 import json import subprocess def check_backup_servers(): """Check if backup servers are configured""" config_path = "/var/backuply/conf/backup_servers.json" if not os.path.exists(config_path): print(f"CRITICAL: Backup servers config not found at {config_path}") return 2 try: with open(config_path, 'r') as f: backup_servers = json.load(f) except (json.JSONDecodeError, IOError) as e: print(f"CRITICAL: Failed to read backup servers config: {e}") return 2 if not backup_servers: print("CRITICAL: No backup servers configured") return 2 print(f"OK: {len(backup_servers)} backup server(s) configured") return 0 def check_backup_jobs(): """Check if backup jobs are configured""" config_path = "/var/backuply/conf/backup.json" if not os.path.exists(config_path): print(f"CRITICAL: Backup jobs config not found at {config_path}") return 2 try: with open(config_path, 'r') as f: backup_jobs = json.load(f) except (json.JSONDecodeError, IOError) as e: print(f"CRITICAL: Failed to read backup jobs config: {e}") return 2 if not backup_jobs: print("CRITICAL: No backup jobs configured") return 2 active_jobs = [job for job in backup_jobs if job.get('schedule_status') == 1] if not active_jobs: print("WARNING: No active backup jobs found") return 1 print(f"OK: {len(active_jobs)} active backup job(s) configured") return 0 def check_recent_tasks(): """Check recent task execution and detect failures from SQLite database""" parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--chours", type=int, default=24, help="Critical hours threshold (default: 24)") parser.add_argument("--whours", type=int, default=12, help="Warning hours threshold (default: 12)") args, _ = parser.parse_known_args() def get_current_users_count(): try: if os.path.exists("/usr/local/cpanel/cpanel"): command = "whmapi1 --output=jsonpretty get_current_users_count" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, _ = process.communicate() data = json.loads(output) return data['data']['users'] except: pass users_count = get_current_users_count() critical_hours = args.chours warning_hours = args.whours db_path = "/var/backuply/db/tasks.db" if not os.path.exists(db_path): print(f"WARNING: Database not found at {db_path}") return 1 now = int(time.time()) critical_threshold = now - (critical_hours * 3600) warning_threshold = now - (warning_hours * 3600) try: conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute(""" SELECT actid, uuid, action, status_txt, status, progress, created, started, updated, ended FROM tasks WHERE created > ? AND action LIKE '%creating_backup_%' ORDER BY created DESC """, (critical_threshold,)) recent_tasks = cursor.fetchall() conn.close() except sqlite3.Error as e: print(f"CRITICAL: Failed to query database: {e}") return 2 if not recent_tasks and users_count is not None and users_count > 0: print(f"CRITICAL: No tasks executed in last {critical_hours}h but {users_count} user(s) exist") return 2 failed_tasks = [] for task in recent_tasks: actid, uuid, action, status_txt, status, progress, created, _, _, _ = task if status != 1: failed_tasks.append({ 'id': actid, 'uuid': uuid, 'created': created, 'status': status, 'status_txt': status_txt or f'Status {status}', 'task': action or 'Unknown', 'progress': progress or 0 }) critical_failures = [t for t in failed_tasks if t['created'] > warning_threshold] warning_failures = [t for t in failed_tasks if warning_threshold >= t['created'] > critical_threshold] if critical_failures: failed_tasks_info = ", ".join([f"{t['task']}({t['status_txt']})" for t in critical_failures[:3]]) print(f"CRITICAL: {len(critical_failures)} failed/running task(s) in last {critical_hours}h: {failed_tasks_info}") return 2 elif warning_failures: failed_tasks_info = ", ".join([f"{t['task']}({t['status_txt']})" for t in warning_failures[:3]]) print(f"WARNING: {len(warning_failures)} failed/running task(s) in last {warning_hours}h: {failed_tasks_info}") return 1 else: print(f"OK: {len(recent_tasks)} task(s) in last {critical_hours}h, no failures detected{f', {users_count} users' if users_count is not None else ', no users'}") return 0 if __name__ == "__main__": if not shutil.which("backuply"): print("OK: Backuply is not installed on this server") sys.exit(0) checks = [check_backup_servers, check_backup_jobs, check_recent_tasks] for check_fn in checks: result = check_fn() if result != 0: sys.exit(result) Save