Edit file File name : vz7_check_a2_backups-check.sh Content :#!/bin/bash # # Nag monitoring script for vz7 nodes # To monitor vm backups # Ref SYSENG-1670, SYSENG-1822, SYSENG-3159, SYSENG-3288, SYSENG-3818 # now=$(date +%s) fivedaysago=$(($now-432000)) tendaysago=$(($now-864000)) critical=0 warning=0 threshold=18000 suspended_vms_present=0 # Maximum recovery time for suspended vms in number of days. # If this is x, vms suspended more than x amount of days would only be ignored by the check for x+1 days when unsuspended. suspended_vms_max_recovery=10 suspended_vms_max_ignore=$((${suspended_vms_max_recovery} + 1)) pbackupcache="/var/cache/prlctl_backup_list.cache" if [ -f /etc/vz/backup_location ]; then backup_server=$(cat /etc/vz/backup_location) else backup_server=$(grep "^10.*backup-local" /etc/fstab | awk -F: '{print $1}') fi declare -A total_snapshots_critical total_snapshots_warning recent_snapshots_critical recent_snapshots_warning skipped_vms_count declare -a vms_to_skip_from_check # Relaxed limits for unmanaged vz7 nodes if `hostname|grep -qE "vz[7|m]-(t)?vu[0-9]"`; then recent_snapshots_critical_limit=1 recent_snapshots_warning_limit=2 total_snapshots_critical_limit=3 total_snapshots_warning_limit=5 else recent_snapshots_critical_limit=2 recent_snapshots_warning_limit=3 total_snapshots_critical_limit=5 total_snapshots_warning_limit=7 fi # Keep track of suspended vms by adding a log with their ctids and making a touch file to keep track of when it was last logged so we don't log every run date_check="/var/log/suspended_vms" if [[ ! -e "$file_path" ]]; then touch "$date_check" fi if [ $(find "$date_check" -mtime +1 | wc -l) -gt 0 ]; then echo $(date) > "$date_check" echo -e "$(date +%Y-%m-%d\ %H:%M:%S) suspended vms skipped: $(prlctl list -a |grep suspended | awk '{print $1 " "}'| tr -d '\n')" >> /var/log/vzbackup.log fi # Search the 2 most recent vzctl logs for new vms (vms created within the last 10 days) new_containers=$(cat $(ls -1t /var/log/vzctl.log*|head -2|tr '\n' ' ')|awk '/Creating.*Container/ {cmd="date -d "$1" +%s";cmd|getline datestamp;print datestamp,$5}' |awk -v eda="${tendaysago}" '$1>eda {print "{"$2"}"}') # Get backup list from cache, or generate if missing (very slow) if [ -s "${pbackupcache}" ]; then backuplisting=$(cat "${pbackupcache}") else # If new Virtuozzo Remote backup location defined if [ -f /etc/vz/backup_location ]; then backuplisting=$(prlctl backup-list -s "root@$(cat /etc/vz/backup_location)" --backup-path "/backup/$(hostname -s)" | tee "${pbackupcache}") else backuplisting=$(prlctl backup-list | tee "${pbackupcache}") fi fi if [ -z "${backuplisting}" ]; then printf "vz7-backups-check - (${backup_server}) prlctl backup-list empty\n" exit 1 fi # Sql query to get vms created by a2hosting.com email addresses and hostname containing either 'test', 'example' or 'syseng' strings OR the email address itself containing 'test' testvmquery="select v.hostname from vps v join users u join servers s on v.uid=u.uid and v.serid=s.serid where u.email like '%@a2hosting.com' and (v.hostname like '%test%' or v.hostname like '%example%' or u.email like '%test%' or v.hostname like 'syseng');" mysql_rootpass=$(awk -F"'" '/dbpass/ {print $4}' /usr/local/virtualizor/universal.php) testvms=$(/usr/local/emps/bin/mysql virtualizor -p${mysql_rootpass} -BNe "$testvmquery" 2>/dev/null|tr '\n' '|'|sed 's/.$/\n/') # If there is at least one test vm, get a list of test vm uuids enclosed with curly brackets, one each line if [ -n "$testvms" ]; then testuuids=$(prlctl list -o uuid,hostname|grep -E "${testvms}"|awk '{print $1}') # If there are no test vms, set testuuids to some string which doesnt match the uuid of any vm else testuuids="null" fi if [ -s /opt/ignorevmbackup ]; then ignoredvmuuids=$(cat /opt/ignorevmbackup) else ignoredvmuuids="null" fi # Get the skipped vms with the number of days they were skipped from the vzbackup logs skipped_vms_with_count=$(nice -n19 zgrep -i "suspended vms skipped" /var/log/vzbackup.log*|awk -F"skipped: " '{print $2}'|tr ' ' '\n'|sort -n|uniq -c|awk 'NF==2') if [ -n "${skipped_vms_with_count}" ]; then # If there is atleast one skipped vm that is running now, set suspended_vms_present=1 for vm in $(echo "${skipped_vms_with_count}"| awk '{print $2}');do vmstatus=$(prlctl status $vm 2>/dev/null) if `echo $vmstatus|grep -iqw "running"`; then suspended_vms_present=1 break fi done # If suspended_vms_present=1 if [ "${suspended_vms_present}" -eq 1 ] ; then # For each vm that was skipped, get the skipped vm ctid and number of times it was skipped into an associative array while read line; do skipped_vms_count["$(echo $line|awk '{print $2}')"]=$(echo $line|awk '{print $1}') done <<< "${skipped_vms_with_count}" # For each vm that was skipped, get the last date on which it was skipped and see if enough days has gone past since that day for vm in "${!skipped_vms_count[@]}"; do last_seen_suspended=$(zgrep -im1 -h "suspended vms skipped.*${vm}" /var/log/vzbackup.log*|tr -d '['|awk '{cmd="date -d "$1" +%s";cmd|getline datestamp;print datestamp}'|sort -n|tail -1) if [ "${skipped_vms_count[$vm]}" -lt "${suspended_vms_max_recovery}" ]; then x=$((${skipped_vms_count[$vm]} + 1)) else x=${suspended_vms_max_ignore} fi x_days_ago=$(date --date "${x} days ago" "+%s") # If x days has not gone past since the vm was last seen suspended, add the vm to vms_to_skip_from_check array if [ "${x_days_ago}" -lt "${last_seen_suspended}" ]; then vms_to_skip_from_check+=( "{${vm}}" ) fi done fi fi for container in $(prlctl list|awk '!/UUID/ {print $1}'); do # If vms in new containers list or in test vm uuids list, skip if `printf "${new_containers}\n${testuuids}\n${ignoredvmuuids}\n"|grep -qw "${container}"`; then continue elif [ "${suspended_vms_present}" -eq 1 ] && `printf '%s\n' "${vms_to_skip_from_check[@]}"|grep -qw "${container}"`; then continue fi snapshots=$(echo "$backuplisting"|awk -v cont="$container" '$1==cont {cmd="date -d "$4" +%s";cmd|getline datestamp;print $1,datestamp}') total_snapshots=$(echo "${snapshots}"|awk 'NF==2'|wc -l) if [ "${total_snapshots}" -lt "${total_snapshots_critical_limit}" ]; then total_snapshots_critical["$container"]="${total_snapshots}" elif [ "${total_snapshots}" -lt "${total_snapshots_warning_limit}" ]; then total_snapshots_warning["$container"]="${total_snapshots}" fi recent_snapshots=$(echo "$snapshots"|awk -v fda="${fivedaysago}" '$2>fda'|wc -l) if [ "${recent_snapshots}" -lt "${recent_snapshots_critical_limit}" ]; then recent_snapshots_critical["$container"]="${recent_snapshots}" elif [ "${recent_snapshots}" -lt "${recent_snapshots_warning_limit}" ]; then recent_snapshots_warning["$container"]="${recent_snapshots}" fi done if [ "${#total_snapshots_critical[@]}" -gt 0 ] || [ "${#recent_snapshots_critical[@]}" -gt 0 ]; then if [ "${#total_snapshots_critical[@]}" -gt 0 ]; then msg="Total Snapshots Critical:" for container in "${!total_snapshots_critical[@]}"; do container_ip=$(prlctl list "$container"|awk '/^{/ {print $3'}) msg="$msg ${container_ip}:${total_snapshots_critical[$container]}" done elif [ "${#recent_snapshots_critical[@]}" -gt 0 ]; then msg="Recent Snapshots Critical:" for container in "${!recent_snapshots_critical[@]}"; do container_ip=$(prlctl list "$container"|awk '/^{/ {print $3'}) msg="$msg ${container_ip}:${recent_snapshots_critical[$container]}" done fi printf "vz7-backups-check - (${backup_server}) $msg\n" exit 2 elif [ "${#total_snapshots_warning[@]}" -gt 0 ] || [ "${#recent_snapshots_warning[@]}" -gt 0 ]; then if [ "${#total_snapshots_warning[@]}" -gt 0 ]; then msg="Total Snapshots Warning:" for container in "${!total_snapshots_warning[@]}"; do container_ip=$(prlctl list "$container"|awk '/^{/ {print $3'}) msg="$msg ${container_ip}:${total_snapshots_warning[$container]}" done elif [ "${#recent_snapshots_warning[@]}" -gt 0 ]; then msg="Recent Snapshots Warning:" for container in "${!recent_snapshots_warning[@]}"; do container_ip=$(prlctl list "$container"|awk '/^{/ {print $3'}) msg="$msg ${container_ip}:${recent_snapshots_warning[$container]}" done fi printf "vz7-backups-check - (${backup_server}) $msg\n" exit 1 else printf "vz7-backups-check - (${backup_server}) OK\n" exit 0 fi Save