traefik: sync dynamic_conf.yml and add drift-check cron

node02 was missing two blocks from node01 (canonical):
- strip-trailing-dot-speedtest middleware (regex redirect for speedtest.goattw.net. URLs)
- speedtest-trailing-dot router (catches trailing-dot Host header variant)

crowdsecLapiHost intentionally differs: node01 uses Docker service name
(crowdsec:8080, container on same host); node02 points to node01 IP
(192.168.99.186:8081, node02 has no local CrowdSec instance).

Added traefik-drift-check.sh — runs daily at 06:00 on ansible-control,
diffs both configs (excluding known crowdsecLapiHost difference),
posts to ntfy homelab-alerts on unexpected divergence.

Traefik hot-reloaded on node02 via SIGHUP — no restart required.
This commit is contained in:
tommy
2026-05-05 20:17:17 -05:00
parent 23194ed22a
commit 7fac4fc9c7
3 changed files with 1473 additions and 0 deletions

51
traefik/traefik-drift-check.sh Executable file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
# Nightly Traefik dynamic_conf.yml drift check
# Runs on ansible-control; alerts to ntfy if node01 and node02 differ
# beyond the known intentional crowdsecLapiHost difference.
NODE01="tommy@192.168.99.186"
NODE02="tommy@192.168.99.187"
CONF_PATH="/home/tommy/traefik/dynamic_conf.yml"
NTFY_URL="https://ntfy.goattw.net/homelab-alerts"
KNOWN_DIFF_PATTERN="crowdsecLapiHost"
logger -t traefik-drift-check "starting drift check"
node01_content=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 "$NODE01" "cat $CONF_PATH" 2>/dev/null)
node02_content=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 "$NODE02" "cat $CONF_PATH" 2>/dev/null)
if [[ -z "$node01_content" || -z "$node02_content" ]]; then
logger -t traefik-drift-check "ERROR: could not fetch config from one or both nodes"
curl -sf \
-H "Priority: high" \
-H "Tags: warning" \
-H "Title: Traefik drift-check: fetch failed" \
-d "Could not SSH to node01 or node02 to fetch dynamic_conf.yml" \
"$NTFY_URL" || true
exit 1
fi
# Diff, filter out the known intentional line
unexpected_diff=$(diff \
<(echo "$node01_content" | grep -v "$KNOWN_DIFF_PATTERN") \
<(echo "$node02_content" | grep -v "$KNOWN_DIFF_PATTERN") 2>/dev/null)
if [[ -n "$unexpected_diff" ]]; then
logger -t traefik-drift-check "DRIFT DETECTED — unexpected differences found"
diff_summary=$(echo "$unexpected_diff" | head -20)
curl -sf \
-H "Priority: high" \
-H "Tags: warning" \
-H "Title: Traefik config drift detected" \
-d "dynamic_conf.yml differs between node01 and node02 (beyond crowdsecLapiHost).
$diff_summary
Fix: sync node02 from node01 canonical." \
"$NTFY_URL" || true
logger -t traefik-drift-check "alert sent to ntfy"
exit 1
else
logger -t traefik-drift-check "no unexpected drift — configs match (crowdsecLapiHost difference expected)"
exit 0
fi