From 38fa22d444736d75d9c770a11d3320b7cc9ab64a Mon Sep 17 00:00:00 2001 From: tommy Date: Wed, 6 May 2026 06:05:50 -0500 Subject: [PATCH] monitoring: add quarterly UPS self-test script (Phase 4B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ups-quarterly-test.sh: - Runs test.battery.start.quick on cyberpower1 then cyberpower2 - 120s wait between tests (allow recharge) - Logs pass/fail to syslog via logger -t ups-quarterly-test - Password stored in single-quoted variable to prevent shell expansion - Deployed on beast (/usr/local/bin/), cron: first Sunday of Jan/Apr/Jul/Oct 02:00 Manual run 2026-05-06: cyberpower1: Done and passed (charge 97% post-test, recharged normally) cyberpower2: Done and passed (charge 100%) Grafana alerts (in grafana.db): cfl8lrs1mxnnka UPS Battery Charge Low (<80%) — break-tested pending ✓ afl8lrs4mbaioa UPS On Battery (power outage) — break-tested fired ✓ Note: nut_battery_test_result not exposed by nut-exporter v1.2.1. Pass/fail tracked via syslog only for now. Adding to Phase 5 if exporter gains test-result metric support. --- monitoring/ups-quarterly-test.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 monitoring/ups-quarterly-test.sh diff --git a/monitoring/ups-quarterly-test.sh b/monitoring/ups-quarterly-test.sh new file mode 100755 index 0000000..38b1aed --- /dev/null +++ b/monitoring/ups-quarterly-test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Quarterly UPS battery self-test — runs via cron first Sunday of Jan/Apr/Jul/Oct +NUT_PASS='Sparky$100' +NUT_USER='upsadmin' +NUT_HOST='localhost' + +logger -t ups-quarterly-test 'starting quarterly battery self-test' + +logger -t ups-quarterly-test 'testing cyberpower1 (Tower)' +upscmd -u "$NUT_USER" -p "$NUT_PASS" cyberpower1@$NUT_HOST test.battery.start.quick 2>&1 | logger -t ups-quarterly-test + +sleep 120 + +result1=$(upsc cyberpower1@$NUT_HOST ups.test.result 2>/dev/null) +logger -t ups-quarterly-test "cyberpower1 result: $result1" + +logger -t ups-quarterly-test 'testing cyberpower2 (Rackmount)' +upscmd -u "$NUT_USER" -p "$NUT_PASS" cyberpower2@$NUT_HOST test.battery.start.quick 2>&1 | logger -t ups-quarterly-test + +sleep 120 + +result2=$(upsc cyberpower2@$NUT_HOST ups.test.result 2>/dev/null) +logger -t ups-quarterly-test "cyberpower2 result: $result2" + +if [[ "$result1" == 'Done and passed' && "$result2" == 'Done and passed' ]]; then + logger -t ups-quarterly-test 'PASS: both UPS batteries healthy' +else + logger -t ups-quarterly-test 'FAIL: one or more UPS batteries need attention' +fi