From 8013435d999b9ecd7b157c565f61d7ca8fefe64e Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Tue, 24 Feb 2026 14:43:15 -0500 Subject: [PATCH] ntfy-alerts: init --- configuration.nix | 2 + modules/age-secrets.nix | 15 +++ modules/ntfy-alerts.nix | 121 +++++++++++++++++++++++ secrets/ntfy-alerts-token.age | Bin 0 -> 266 bytes secrets/ntfy-alerts-topic.age | Bin 0 -> 254 bytes services/ntfy-alerts.nix | 10 ++ tests/ntfy-alerts.nix | 174 ++++++++++++++++++++++++++++++++++ tests/tests.nix | 2 + 8 files changed, 324 insertions(+) create mode 100644 modules/ntfy-alerts.nix create mode 100644 secrets/ntfy-alerts-token.age create mode 100644 secrets/ntfy-alerts-topic.age create mode 100644 services/ntfy-alerts.nix create mode 100644 tests/ntfy-alerts.nix diff --git a/configuration.nix b/configuration.nix index 1a7af64..aad090a 100644 --- a/configuration.nix +++ b/configuration.nix @@ -20,6 +20,7 @@ ./modules/no-rgb.nix ./modules/security.nix ./modules/arr-init.nix + ./modules/ntfy-alerts.nix ./services/postgresql.nix ./services/jellyfin.nix @@ -64,6 +65,7 @@ ./services/syncthing.nix ./services/ntfy.nix + ./services/ntfy-alerts.nix ]; services.kmscon.enable = true; diff --git a/modules/age-secrets.nix b/modules/age-secrets.nix index bdc56eb..fea3c05 100644 --- a/modules/age-secrets.nix +++ b/modules/age-secrets.nix @@ -65,5 +65,20 @@ owner = "root"; group = "root"; }; + + # ntfy-alerts secrets + ntfy-alerts-topic = { + file = ../secrets/ntfy-alerts-topic.age; + mode = "0400"; + owner = "root"; + group = "root"; + }; + + ntfy-alerts-token = { + file = ../secrets/ntfy-alerts-token.age; + mode = "0400"; + owner = "root"; + group = "root"; + }; }; } diff --git a/modules/ntfy-alerts.nix b/modules/ntfy-alerts.nix new file mode 100644 index 0000000..6446e05 --- /dev/null +++ b/modules/ntfy-alerts.nix @@ -0,0 +1,121 @@ +{ + config, + lib, + pkgs, + ... +}: +let + cfg = config.services.ntfyAlerts; + + curl = "${pkgs.curl}/bin/curl"; + hostname = config.networking.hostName; + + # Build the curl auth args as a proper bash array fragment + authCurlArgs = + if cfg.tokenFile != null then + '' + if [ -f "${cfg.tokenFile}" ]; then + TOKEN=$(cat "${cfg.tokenFile}" 2>/dev/null || echo "") + if [ -n "$TOKEN" ]; then + AUTH_ARGS=(-H "Authorization: Bearer $TOKEN") + fi + fi + '' + else + ""; + + # Systemd failure alert script + systemdAlertScript = pkgs.writeShellScript "ntfy-systemd-alert" '' + set -euo pipefail + + UNIT_NAME="$1" + SERVER_URL="${cfg.serverUrl}" + TOPIC=$(cat "${cfg.topicFile}" 2>/dev/null | tr -d '[:space:]') + if [ -z "$TOPIC" ]; then + echo "ERROR: Could not read topic from ${cfg.topicFile}" + exit 1 + fi + + # Get journal output for context + JOURNAL_OUTPUT=$(${pkgs.systemd}/bin/journalctl -u "$UNIT_NAME" -n 15 --no-pager 2>/dev/null || echo "No journal output available") + + # Build auth args + AUTH_ARGS=() + ${authCurlArgs} + + # Send notification + ${curl} -sf --max-time 15 -X POST \ + "$SERVER_URL/$TOPIC" \ + -H "Title: [${hostname}] Service failed: $UNIT_NAME" \ + -H "Priority: high" \ + -H "Tags: warning" \ + "''${AUTH_ARGS[@]}" \ + -d "$JOURNAL_OUTPUT" || true + ''; + +in +{ + options.services.ntfyAlerts = { + enable = lib.mkEnableOption "ntfy push notifications for system alerts"; + + serverUrl = lib.mkOption { + type = lib.types.str; + description = "The ntfy server URL (e.g. https://ntfy.example.com)"; + example = "https://ntfy.example.com"; + }; + + topicFile = lib.mkOption { + type = lib.types.path; + description = "Path to a file containing the ntfy topic name to publish alerts to."; + example = "/run/agenix/ntfy-alerts-topic"; + }; + + tokenFile = lib.mkOption { + type = lib.types.nullOr lib.types.path; + default = null; + description = '' + Path to a file containing the ntfy auth token. + If set, uses Authorization: Bearer header for authentication. + ''; + example = "/run/secrets/ntfy-token"; + }; + + }; + + config = lib.mkIf cfg.enable { + # Per-service OnFailure for monitored services + systemd.services = { + "ntfy-alert@" = { + description = "Send ntfy notification for failed service %i"; + + unitConfig.OnFailure = lib.mkForce ""; + + serviceConfig = { + Type = "oneshot"; + ExecStart = "${systemdAlertScript} %i"; + TimeoutSec = 30; + }; + }; + + }; + + # Global OnFailure drop-in for all services + systemd.packages = [ + (pkgs.writeTextDir "etc/systemd/system/service.d/onfailure.conf" '' + [Unit] + OnFailure=ntfy-alert@%p.service + '') + ]; + # ZED (ZFS Event Daemon) ntfy notification settings + services.zfs.zed = { + enableMail = false; + settings = { + ZED_NTFY_URL = cfg.serverUrl; + ZED_NTFY_TOPIC = "$(cat ${cfg.topicFile} | tr -d '[:space:]')"; + ZED_NTFY_ACCESS_TOKEN = lib.mkIf (cfg.tokenFile != null) "$(cat ${cfg.tokenFile})"; + ZED_NOTIFY_VERBOSE = true; + }; + }; + + }; +} diff --git a/secrets/ntfy-alerts-token.age b/secrets/ntfy-alerts-token.age new file mode 100644 index 0000000000000000000000000000000000000000..f3f31aea9463bc8d92251fcd2fec4e23a7102e20 GIT binary patch literal 266 zcmZQ@_Y83kiVO&0xG`sKxN?h%L~f4M_r;v1d$zT&+$E5`C?V5jF1tSCN$&0vrO9=B z?@RNG35eYe`#-6m^XR3D#3#ReUO9CfR6g)Zc|$v2IjhpQ$(wha+RKu1&u@lS*|{4F zN;8ft>8gnZ9C>~AufV*Aza`WzeF&TUT844)wh66AXHA;Fz}~CPr{jk8JTosBy(3YR zyf3ljUzJt5*%#1fBVTa2?D5S`hRVAgSHf;On_hbyv*+C1a_M(fmAXfkPExJx<*+T` z?Wy?odXj_CxeAd(Tr;}E;ye?>U5uZv>Yg<3eDp-sdN+oIq?owqTV+QdY_Qn1>Bx(` b7Yj7y<4gT#{*at&H=C#C%7G6l8#V#}lt6%B literal 0 HcmV?d00001 diff --git a/secrets/ntfy-alerts-topic.age b/secrets/ntfy-alerts-topic.age new file mode 100644 index 0000000000000000000000000000000000000000..2fc770fbbef742c789afc491d5ff04bbf238bc69 GIT binary patch literal 254 zcmZQ@_Y83kiVO&0*c!KaUy|7piG9M~mgGh(+v>pAvf!UerPvJjt)}bq6u8Y_ZT#i9 z{6NKyla}v7VdfiMIs*PyR*gK3J@6@8&g?Okw@Or~HC9c2%J4uK8!DAH8QfBkoj9 ze?sO2v3dW^+J4{5GJNg-PWZ0<@^sGCm9|fGwtl)-^7_Ohi7L6Nf-kP@oFj8($|-xB znFszeCtk|G{;1n%wwAHdSMS5S51)TNKjFN9!_(6VMFJ1q_swaWxF>qzs%6dpPH`1v PFzhV!RS1{4RV)qw<<))X literal 0 HcmV?d00001 diff --git a/services/ntfy-alerts.nix b/services/ntfy-alerts.nix new file mode 100644 index 0000000..089b270 --- /dev/null +++ b/services/ntfy-alerts.nix @@ -0,0 +1,10 @@ +{ config, service_configs, ... }: +{ + services.ntfyAlerts = { + enable = true; + serverUrl = "https://${service_configs.ntfy.domain}"; + topicFile = config.age.secrets.ntfy-alerts-topic.path; + + tokenFile = config.age.secrets.ntfy-alerts-token.path; + }; +} diff --git a/tests/ntfy-alerts.nix b/tests/ntfy-alerts.nix new file mode 100644 index 0000000..c9b72cf --- /dev/null +++ b/tests/ntfy-alerts.nix @@ -0,0 +1,174 @@ +{ + config, + lib, + pkgs, + ... +}: +let + testPkgs = pkgs.appendOverlays [ (import ../modules/overlays.nix) ]; +in +testPkgs.testers.runNixOSTest { + name = "ntfy-alerts"; + + nodes.machine = + { pkgs, ... }: + { + imports = [ + ../modules/ntfy-alerts.nix + ]; + + system.stateVersion = config.system.stateVersion; + + virtualisation.memorySize = 2048; + + environment.systemPackages = with pkgs; [ + curl + jq + ]; + + # Create test topic file + systemd.tmpfiles.rules = [ + "f /run/ntfy-test-topic 0644 root root - test-alerts" + ]; + + # Mock ntfy server that records POST requests + systemd.services.mock-ntfy = + let + mockNtfyScript = pkgs.writeScript "mock-ntfy.py" '' + import json + import os + from http.server import HTTPServer, BaseHTTPRequestHandler + from datetime import datetime + + REQUESTS_FILE = "/tmp/ntfy-requests.json" + + class MockNtfy(BaseHTTPRequestHandler): + def _respond(self, code=200, body=b"Ok"): + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(body if isinstance(body, bytes) else body.encode()) + + def do_GET(self): + self._respond() + + def do_POST(self): + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode() if content_length > 0 else "" + + request_data = { + "timestamp": datetime.now().isoformat(), + "path": self.path, + "headers": dict(self.headers), + "body": body, + } + + # Load existing requests or start new list + requests = [] + if os.path.exists(REQUESTS_FILE): + try: + with open(REQUESTS_FILE, "r") as f: + requests = json.load(f) + except: + requests = [] + + requests.append(request_data) + + with open(REQUESTS_FILE, "w") as f: + json.dump(requests, f, indent=2) + + self._respond() + + def log_message(self, format, *args): + pass + + HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever() + ''; + in + { + description = "Mock ntfy server"; + wantedBy = [ "multi-user.target" ]; + before = [ "ntfy-alert@test-fail.service" ]; + serviceConfig = { + ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}"; + Type = "simple"; + }; + }; + + # Test service that will fail + systemd.services.test-fail = { + description = "Test service that fails"; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${pkgs.coreutils}/bin/false"; + }; + }; + + # Configure ntfy-alerts to use mock server + services.ntfyAlerts = { + enable = true; + serverUrl = "http://localhost:8080"; + topicFile = "/run/ntfy-test-topic"; + + }; + }; + + testScript = '' + import json + import time + + start_all() + + # Wait for mock ntfy server to be ready + machine.wait_for_unit("mock-ntfy.service") + machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30) + + # Verify the ntfy-alert@ template service exists + machine.succeed("systemctl list-unit-files | grep ntfy-alert@") + + # Verify the global OnFailure drop-in is configured + machine.succeed("cat /etc/systemd/system/service.d/onfailure.conf | grep -q 'OnFailure=ntfy-alert@%p.service'") + + # Trigger the test-fail service + machine.succeed("systemctl start test-fail.service || true") + + # Wait a moment for the failure notification to be sent + time.sleep(2) + + # Verify the ntfy-alert@test-fail service ran + machine.succeed("systemctl is-active ntfy-alert@test-fail.service || systemctl is-failed ntfy-alert@test-fail.service || true") + + # Check that the mock server received a POST request + machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30) + + # Verify the request content + result = machine.succeed("cat /tmp/ntfy-requests.json") + requests = json.loads(result) + + assert len(requests) >= 1, f"Expected at least 1 request, got {len(requests)}" + + # Check the first request + req = requests[0] + assert "/test-alerts" in req["path"], f"Expected path to contain /test-alerts, got {req['path']}" + assert "Title" in req["headers"], "Expected Title header" + assert "test-fail" in req["headers"]["Title"], f"Expected Title to contain 'test-fail', got {req['headers']['Title']}" + assert req["headers"]["Priority"] == "high", f"Expected Priority 'high', got {req['headers'].get('Priority')}" + assert req["headers"]["Tags"] == "warning", f"Expected Tags 'warning', got {req['headers'].get('Tags')}" + + print(f"Received notification: Title={req['headers']['Title']}, Body={req['body'][:100]}...") + + # Idempotency test: trigger failure again + machine.succeed("rm /tmp/ntfy-requests.json") + machine.succeed("systemctl reset-failed test-fail.service || true") + machine.succeed("systemctl start test-fail.service || true") + time.sleep(2) + + # Verify another notification was sent + machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30) + result = machine.succeed("cat /tmp/ntfy-requests.json") + requests = json.loads(result) + assert len(requests) >= 1, f"Expected at least 1 request after second failure, got {len(requests)}" + + print("All tests passed!") + ''; +} diff --git a/tests/tests.nix b/tests/tests.nix index 5b1e57b..295485d 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -24,4 +24,6 @@ in # arr tests arrInitTest = handleTest ./arr-init.nix; + # ntfy alerts test + ntfyAlertsTest = handleTest ./ntfy-alerts.nix; }