Edit file File name : check_qps_dnsdist.sh Content :#!/bin/bash # # Icinga check on DNS servers running DNSdist and # dnsdist-query.py via cron, to detect QPS spikes # # Accepts argument with custom value to check for past minutes # - default is 15 minutes # # Config Defaults LOG_FILE="/var/log/dnsdist/query_monitor.log" DEFAULT_AGE_MINUTES=15 SPIKE_MARKER="QPS spike detected!" REST_MARKER="Traffic spike is distributed" DOMAIN_MARKER="High query domain" # CLI override AGE_MINUTES=${1:-$DEFAULT_AGE_MINUTES} MAX_AGE_SECONDS=$((AGE_MINUTES * 60)) # Check log file exists if [ ! -f "${LOG_FILE}" ]; then echo "UNKNOWN - Log file not found: ${LOG_FILE}" exit 3 fi # Get last spike line and timestamp last_spike_line=$(grep -nF "${SPIKE_MARKER}" "${LOG_FILE}" | tail -n 1) if [ -z "${last_spike_line}" ]; then echo "OK - No QPS spike logged" exit 0 fi spike_line_no=$(echo "${last_spike_line}" | cut -d':' -f1) spike_log_line=$(echo "${last_spike_line}" | cut -d':' -f2-) spike_ts=$(echo "${spike_log_line}" | cut -d' ' -f1,2) spike_epoch=$(date -d "${spike_ts}" +%s 2>/dev/null) now_epoch=$(date +%s) # Check if spike is within time window if [ -z "${spike_epoch}" ] || [ $((now_epoch - spike_epoch)) -gt ${MAX_AGE_SECONDS} ]; then echo "OK - No spike detected in the last ${AGE_MINUTES} minute(s)" exit 0 fi # Pull associated domain details from next few lines next_lines=$(tail -n +"${spike_line_no}" "${LOG_FILE}" | head -n 10) domains=$(echo "${next_lines}" | grep -F "${DOMAIN_MARKER}" | cut -d':' -f3-) if [ -n "${domains}" ]; then echo -e "CRITICAL - QPS spike with dominant domain(s) in past ${AGE_MINUTES} minutes:\n${domains}" else echo "CRITICAL - QPS spike detected in past ${AGE_MINUTES} minutes (distributed: 'Rest' traffic)" fi exit 2 Save