ntfy-alerts: init
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
./modules/no-rgb.nix
|
||||
./modules/security.nix
|
||||
./modules/arr-init.nix
|
||||
./modules/ntfy-alerts.nix
|
||||
|
||||
./services/postgresql.nix
|
||||
./services/jellyfin.nix
|
||||
@@ -64,6 +65,7 @@
|
||||
./services/syncthing.nix
|
||||
|
||||
./services/ntfy.nix
|
||||
./services/ntfy-alerts.nix
|
||||
];
|
||||
|
||||
services.kmscon.enable = true;
|
||||
|
||||
@@ -65,5 +65,20 @@
|
||||
owner = "root";
|
||||
group = "root";
|
||||
};
|
||||
|
||||
# ntfy-alerts secrets
|
||||
ntfy-alerts-topic = {
|
||||
file = ../secrets/ntfy-alerts-topic.age;
|
||||
mode = "0400";
|
||||
owner = "root";
|
||||
group = "root";
|
||||
};
|
||||
|
||||
ntfy-alerts-token = {
|
||||
file = ../secrets/ntfy-alerts-token.age;
|
||||
mode = "0400";
|
||||
owner = "root";
|
||||
group = "root";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
121
modules/ntfy-alerts.nix
Normal file
121
modules/ntfy-alerts.nix
Normal file
@@ -0,0 +1,121 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
cfg = config.services.ntfyAlerts;
|
||||
|
||||
curl = "${pkgs.curl}/bin/curl";
|
||||
hostname = config.networking.hostName;
|
||||
|
||||
# Build the curl auth args as a proper bash array fragment
|
||||
authCurlArgs =
|
||||
if cfg.tokenFile != null then
|
||||
''
|
||||
if [ -f "${cfg.tokenFile}" ]; then
|
||||
TOKEN=$(cat "${cfg.tokenFile}" 2>/dev/null || echo "")
|
||||
if [ -n "$TOKEN" ]; then
|
||||
AUTH_ARGS=(-H "Authorization: Bearer $TOKEN")
|
||||
fi
|
||||
fi
|
||||
''
|
||||
else
|
||||
"";
|
||||
|
||||
# Systemd failure alert script
|
||||
systemdAlertScript = pkgs.writeShellScript "ntfy-systemd-alert" ''
|
||||
set -euo pipefail
|
||||
|
||||
UNIT_NAME="$1"
|
||||
SERVER_URL="${cfg.serverUrl}"
|
||||
TOPIC=$(cat "${cfg.topicFile}" 2>/dev/null | tr -d '[:space:]')
|
||||
if [ -z "$TOPIC" ]; then
|
||||
echo "ERROR: Could not read topic from ${cfg.topicFile}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get journal output for context
|
||||
JOURNAL_OUTPUT=$(${pkgs.systemd}/bin/journalctl -u "$UNIT_NAME" -n 15 --no-pager 2>/dev/null || echo "No journal output available")
|
||||
|
||||
# Build auth args
|
||||
AUTH_ARGS=()
|
||||
${authCurlArgs}
|
||||
|
||||
# Send notification
|
||||
${curl} -sf --max-time 15 -X POST \
|
||||
"$SERVER_URL/$TOPIC" \
|
||||
-H "Title: [${hostname}] Service failed: $UNIT_NAME" \
|
||||
-H "Priority: high" \
|
||||
-H "Tags: warning" \
|
||||
"''${AUTH_ARGS[@]}" \
|
||||
-d "$JOURNAL_OUTPUT" || true
|
||||
'';
|
||||
|
||||
in
|
||||
{
|
||||
options.services.ntfyAlerts = {
|
||||
enable = lib.mkEnableOption "ntfy push notifications for system alerts";
|
||||
|
||||
serverUrl = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
description = "The ntfy server URL (e.g. https://ntfy.example.com)";
|
||||
example = "https://ntfy.example.com";
|
||||
};
|
||||
|
||||
topicFile = lib.mkOption {
|
||||
type = lib.types.path;
|
||||
description = "Path to a file containing the ntfy topic name to publish alerts to.";
|
||||
example = "/run/agenix/ntfy-alerts-topic";
|
||||
};
|
||||
|
||||
tokenFile = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.path;
|
||||
default = null;
|
||||
description = ''
|
||||
Path to a file containing the ntfy auth token.
|
||||
If set, uses Authorization: Bearer header for authentication.
|
||||
'';
|
||||
example = "/run/secrets/ntfy-token";
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
# Per-service OnFailure for monitored services
|
||||
systemd.services = {
|
||||
"ntfy-alert@" = {
|
||||
description = "Send ntfy notification for failed service %i";
|
||||
|
||||
unitConfig.OnFailure = lib.mkForce "";
|
||||
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${systemdAlertScript} %i";
|
||||
TimeoutSec = 30;
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
# Global OnFailure drop-in for all services
|
||||
systemd.packages = [
|
||||
(pkgs.writeTextDir "etc/systemd/system/service.d/onfailure.conf" ''
|
||||
[Unit]
|
||||
OnFailure=ntfy-alert@%p.service
|
||||
'')
|
||||
];
|
||||
# ZED (ZFS Event Daemon) ntfy notification settings
|
||||
services.zfs.zed = {
|
||||
enableMail = false;
|
||||
settings = {
|
||||
ZED_NTFY_URL = cfg.serverUrl;
|
||||
ZED_NTFY_TOPIC = "$(cat ${cfg.topicFile} | tr -d '[:space:]')";
|
||||
ZED_NTFY_ACCESS_TOKEN = lib.mkIf (cfg.tokenFile != null) "$(cat ${cfg.tokenFile})";
|
||||
ZED_NOTIFY_VERBOSE = true;
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
BIN
secrets/ntfy-alerts-token.age
Normal file
BIN
secrets/ntfy-alerts-token.age
Normal file
Binary file not shown.
BIN
secrets/ntfy-alerts-topic.age
Normal file
BIN
secrets/ntfy-alerts-topic.age
Normal file
Binary file not shown.
10
services/ntfy-alerts.nix
Normal file
10
services/ntfy-alerts.nix
Normal file
@@ -0,0 +1,10 @@
|
||||
{ config, service_configs, ... }:
|
||||
{
|
||||
services.ntfyAlerts = {
|
||||
enable = true;
|
||||
serverUrl = "https://${service_configs.ntfy.domain}";
|
||||
topicFile = config.age.secrets.ntfy-alerts-topic.path;
|
||||
|
||||
tokenFile = config.age.secrets.ntfy-alerts-token.path;
|
||||
};
|
||||
}
|
||||
174
tests/ntfy-alerts.nix
Normal file
174
tests/ntfy-alerts.nix
Normal file
@@ -0,0 +1,174 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
testPkgs = pkgs.appendOverlays [ (import ../modules/overlays.nix) ];
|
||||
in
|
||||
testPkgs.testers.runNixOSTest {
|
||||
name = "ntfy-alerts";
|
||||
|
||||
nodes.machine =
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
imports = [
|
||||
../modules/ntfy-alerts.nix
|
||||
];
|
||||
|
||||
system.stateVersion = config.system.stateVersion;
|
||||
|
||||
virtualisation.memorySize = 2048;
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
curl
|
||||
jq
|
||||
];
|
||||
|
||||
# Create test topic file
|
||||
systemd.tmpfiles.rules = [
|
||||
"f /run/ntfy-test-topic 0644 root root - test-alerts"
|
||||
];
|
||||
|
||||
# Mock ntfy server that records POST requests
|
||||
systemd.services.mock-ntfy =
|
||||
let
|
||||
mockNtfyScript = pkgs.writeScript "mock-ntfy.py" ''
|
||||
import json
|
||||
import os
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from datetime import datetime
|
||||
|
||||
REQUESTS_FILE = "/tmp/ntfy-requests.json"
|
||||
|
||||
class MockNtfy(BaseHTTPRequestHandler):
|
||||
def _respond(self, code=200, body=b"Ok"):
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(body if isinstance(body, bytes) else body.encode())
|
||||
|
||||
def do_GET(self):
|
||||
self._respond()
|
||||
|
||||
def do_POST(self):
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length).decode() if content_length > 0 else ""
|
||||
|
||||
request_data = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"path": self.path,
|
||||
"headers": dict(self.headers),
|
||||
"body": body,
|
||||
}
|
||||
|
||||
# Load existing requests or start new list
|
||||
requests = []
|
||||
if os.path.exists(REQUESTS_FILE):
|
||||
try:
|
||||
with open(REQUESTS_FILE, "r") as f:
|
||||
requests = json.load(f)
|
||||
except:
|
||||
requests = []
|
||||
|
||||
requests.append(request_data)
|
||||
|
||||
with open(REQUESTS_FILE, "w") as f:
|
||||
json.dump(requests, f, indent=2)
|
||||
|
||||
self._respond()
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever()
|
||||
'';
|
||||
in
|
||||
{
|
||||
description = "Mock ntfy server";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
before = [ "ntfy-alert@test-fail.service" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}";
|
||||
Type = "simple";
|
||||
};
|
||||
};
|
||||
|
||||
# Test service that will fail
|
||||
systemd.services.test-fail = {
|
||||
description = "Test service that fails";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.coreutils}/bin/false";
|
||||
};
|
||||
};
|
||||
|
||||
# Configure ntfy-alerts to use mock server
|
||||
services.ntfyAlerts = {
|
||||
enable = true;
|
||||
serverUrl = "http://localhost:8080";
|
||||
topicFile = "/run/ntfy-test-topic";
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import json
|
||||
import time
|
||||
|
||||
start_all()
|
||||
|
||||
# Wait for mock ntfy server to be ready
|
||||
machine.wait_for_unit("mock-ntfy.service")
|
||||
machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30)
|
||||
|
||||
# Verify the ntfy-alert@ template service exists
|
||||
machine.succeed("systemctl list-unit-files | grep ntfy-alert@")
|
||||
|
||||
# Verify the global OnFailure drop-in is configured
|
||||
machine.succeed("cat /etc/systemd/system/service.d/onfailure.conf | grep -q 'OnFailure=ntfy-alert@%p.service'")
|
||||
|
||||
# Trigger the test-fail service
|
||||
machine.succeed("systemctl start test-fail.service || true")
|
||||
|
||||
# Wait a moment for the failure notification to be sent
|
||||
time.sleep(2)
|
||||
|
||||
# Verify the ntfy-alert@test-fail service ran
|
||||
machine.succeed("systemctl is-active ntfy-alert@test-fail.service || systemctl is-failed ntfy-alert@test-fail.service || true")
|
||||
|
||||
# Check that the mock server received a POST request
|
||||
machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30)
|
||||
|
||||
# Verify the request content
|
||||
result = machine.succeed("cat /tmp/ntfy-requests.json")
|
||||
requests = json.loads(result)
|
||||
|
||||
assert len(requests) >= 1, f"Expected at least 1 request, got {len(requests)}"
|
||||
|
||||
# Check the first request
|
||||
req = requests[0]
|
||||
assert "/test-alerts" in req["path"], f"Expected path to contain /test-alerts, got {req['path']}"
|
||||
assert "Title" in req["headers"], "Expected Title header"
|
||||
assert "test-fail" in req["headers"]["Title"], f"Expected Title to contain 'test-fail', got {req['headers']['Title']}"
|
||||
assert req["headers"]["Priority"] == "high", f"Expected Priority 'high', got {req['headers'].get('Priority')}"
|
||||
assert req["headers"]["Tags"] == "warning", f"Expected Tags 'warning', got {req['headers'].get('Tags')}"
|
||||
|
||||
print(f"Received notification: Title={req['headers']['Title']}, Body={req['body'][:100]}...")
|
||||
|
||||
# Idempotency test: trigger failure again
|
||||
machine.succeed("rm /tmp/ntfy-requests.json")
|
||||
machine.succeed("systemctl reset-failed test-fail.service || true")
|
||||
machine.succeed("systemctl start test-fail.service || true")
|
||||
time.sleep(2)
|
||||
|
||||
# Verify another notification was sent
|
||||
machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30)
|
||||
result = machine.succeed("cat /tmp/ntfy-requests.json")
|
||||
requests = json.loads(result)
|
||||
assert len(requests) >= 1, f"Expected at least 1 request after second failure, got {len(requests)}"
|
||||
|
||||
print("All tests passed!")
|
||||
'';
|
||||
}
|
||||
@@ -24,4 +24,6 @@ in
|
||||
|
||||
# arr tests
|
||||
arrInitTest = handleTest ./arr-init.nix;
|
||||
# ntfy alerts test
|
||||
ntfyAlertsTest = handleTest ./ntfy-alerts.nix;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user