diff --git a/monitoring/node-exporter-compute5-compose.yml b/monitoring/node-exporter-compute5-compose.yml new file mode 100644 index 0000000..791fcc1 --- /dev/null +++ b/monitoring/node-exporter-compute5-compose.yml @@ -0,0 +1,21 @@ +# node-exporter - Host metrics exporter for Prometheus +# Host: compute5 (192.168.99.196), Port: 9100 + +services: + node-exporter: + image: prom/node-exporter:latest + container_name: node-exporter + restart: unless-stopped + pid: host + network_mode: host + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /var/lib/node_exporter/textfile:/textfile:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--path.rootfs=/rootfs' + - '--collector.textfile.directory=/textfile' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' diff --git a/monitoring/postfix-queue-check.sh b/monitoring/postfix-queue-check.sh new file mode 100755 index 0000000..893a011 --- /dev/null +++ b/monitoring/postfix-queue-check.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Postfix queue depth textfile metric — runs every 5 min via root crontab. + +TEXTFILE="/var/lib/node_exporter/textfile/postfix_queue.prom" +HOSTNAME=$(hostname -s) +TMPFILE=$(mktemp /tmp/postfix_queue.XXXXXX.prom) +trap 'rm -f "$TMPFILE"' EXIT + +QUEUE_SIZE=0 +if command -v mailq >/dev/null 2>&1; then + # grep -c exits 1 on no match even for count=0; handle separately + QUEUE_SIZE=$(mailq 2>/dev/null | grep -c '^[0-9A-F]' 2>/dev/null) || QUEUE_SIZE=0 +fi + +cat > "$TMPFILE" << EOF +# HELP postfix_queue_size Number of messages in postfix mail queue +# TYPE postfix_queue_size gauge +postfix_queue_size{host="${HOSTNAME}"} ${QUEUE_SIZE} +# HELP postfix_queue_check_last_run_seconds Unix timestamp of last queue check +# TYPE postfix_queue_check_last_run_seconds gauge +postfix_queue_check_last_run_seconds $(date +%s) +EOF + +mkdir -p "$(dirname "$TEXTFILE")" +mv "$TMPFILE" "$TEXTFILE" +chmod 644 "$TEXTFILE" +trap - EXIT +exit 0