Edit file File name : check_a2_cagefs_503_errors Content :#!/bin/bash # This script checks for CageFS issues where sites return 503 until CageFS is remounted # Refactored to check for mount errors in the system log and su errors on cPanel user accounts # Refactored to add logging and cagefs restart log_file="/var/log/check_503.log" log_message() { local message="$1" echo "$(date '+%b %d %T') $message" >> "$log_file" } if which cagefsctl &> /dev/null; then msg_ok="0 check_503 - OK" msg_crit="2 check_503 - Possible 503 errors situation, resetting CageFS." domain_count=0 error_count=0 su_error_count=0 # Function to check mounts error in last 15 minutes. check_mount_error() { last_mount_error=$(grep "reached the limit on mounts" /var/log/messages | tail -1) if [ -n "$last_mount_error" ]; then mount_error_timestamp=$(echo "$last_mount_error" | awk '{print $1, $2, $3}') epoch_timestamp=$(date -d "$mount_error_timestamp" +%s) current_epoch=$(date +%s) time_diff=$((current_epoch - epoch_timestamp)) if [ "$time_diff" -le 900 ]; then log_message "Mount errors found in system log within last 15 minutes" return 0 fi fi return 1 } # Function to check su on each cPanel user account. check_su_error() { users_list=$(whmapi1 list_users --output=jsonpretty | jq -r '.data.users[]' | grep -v root) if [ $? -ne 0 ] || [ -z "$users_list" ]; then return 1 fi total_num_of_users=$(echo "$users_list" | wc -l) for cpuser in $users_list; do if ! su - "$cpuser" -s /bin/sh -c "exit" &>/dev/null; then su_error_count=$((su_error_count + 1)) fi done if [ "$su_error_count" -eq "$total_num_of_users" ]; then log_message "su attempt failed for $su_error_count/$total_num_of_users users." return 0 else return 1 fi } if check_mount_error || check_su_error; then # Loop through each domain and check if its giving 503 error domain=$(whmapi1 --output=jsonpretty get_domain_info | jq -r '.data.domains[].domain') for cur_domain in $domain; do domain_count=$((domain_count + 1)) ip=$(whmapi1 --output=jsonpretty get_domain_info | jq -r --arg domain "$cur_domain" '.data.domains[] | select(.domain == $domain) | .ipv4') status_code=$(curl --connect-timeout 2 -sL -o /dev/null -w "%{http_code}" "$cur_domain" --connect-to ""$cur_domain":80:$ip") if [ "$status_code" -eq 301 ]; then status_code=$(curl --connect-timeout 2 -sL -o /dev/null -w "%{http_code}" "$cur_domain" --connect-to ""$cur_domain":443:$ip") fi if [ "$status_code" -eq 503 ]; then error_count=$((error_count + 1)) fi done if [ "$error_count" -gt 0 ]; then log_message "$cur_domain showing 503 error." log_message "$msg_crit" echo "$msg_crit" exit 2 systemctl restart cagefs /usr/sbin/cagefsctl --disable-all /usr/sbin/cagefsctl --enable-all else echo "$msg_ok" exit 0 fi else echo "$msg_ok" exit 0 fi fi Save