{ config, lib, pkgs, ... }: let cfg = config.services.ntfyAlerts; curl = "${pkgs.curl}/bin/curl"; hostname = config.networking.hostName; # Build the curl auth args as a proper bash array fragment authCurlArgs = if cfg.tokenFile != null then '' if [ -f "${cfg.tokenFile}" ]; then TOKEN=$(cat "${cfg.tokenFile}" 2>/dev/null || echo "") if [ -n "$TOKEN" ]; then AUTH_ARGS=(-H "Authorization: Bearer $TOKEN") fi fi '' else ""; # Systemd failure alert script systemdAlertScript = pkgs.writeShellScript "ntfy-systemd-alert" '' set -euo pipefail UNIT_NAME="$1" SERVER_URL="${cfg.serverUrl}" TOPIC=$(cat "${cfg.topicFile}" 2>/dev/null | tr -d '[:space:]') if [ -z "$TOPIC" ]; then echo "ERROR: Could not read topic from ${cfg.topicFile}" exit 1 fi # Get journal output for context JOURNAL_OUTPUT=$(${pkgs.systemd}/bin/journalctl -u "$UNIT_NAME" -n 15 --no-pager 2>/dev/null || echo "No journal output available") # Build auth args AUTH_ARGS=() ${authCurlArgs} # Send notification ${curl} -sf --max-time 15 -X POST \ "$SERVER_URL/$TOPIC" \ -H "Title: [${hostname}] Service failed: $UNIT_NAME" \ -H "Priority: high" \ -H "Tags: warning" \ "''${AUTH_ARGS[@]}" \ -d "$JOURNAL_OUTPUT" || true ''; in { options.services.ntfyAlerts = { enable = lib.mkEnableOption "ntfy push notifications for system alerts"; serverUrl = lib.mkOption { type = lib.types.str; description = "The ntfy server URL (e.g. https://ntfy.example.com)"; example = "https://ntfy.example.com"; }; topicFile = lib.mkOption { type = lib.types.path; description = "Path to a file containing the ntfy topic name to publish alerts to."; example = "/run/agenix/ntfy-alerts-topic"; }; tokenFile = lib.mkOption { type = lib.types.nullOr lib.types.path; default = null; description = '' Path to a file containing the ntfy auth token. If set, uses Authorization: Bearer header for authentication. ''; example = "/run/secrets/ntfy-token"; }; }; config = lib.mkIf cfg.enable { # Per-service OnFailure for monitored services systemd.services = { "ntfy-alert@" = { description = "Send ntfy notification for failed service %i"; unitConfig.OnFailure = lib.mkForce ""; serviceConfig = { Type = "oneshot"; ExecStart = "${systemdAlertScript} %i"; TimeoutSec = 30; }; }; # TODO: sanoid's ExecStartPre runs `zfs allow` which blocks on TXG sync; # on the hdds pool (slow spinning disks + large async frees) this causes # 30+ minute hangs and guaranteed timeouts. "sanoid".unitConfig.OnFailure = lib.mkForce ""; }; # Global OnFailure drop-in for all services systemd.packages = [ (pkgs.writeTextDir "etc/systemd/system/service.d/onfailure.conf" '' [Unit] OnFailure=ntfy-alert@%p.service '') ]; # ZED (ZFS Event Daemon) ntfy notification settings services.zfs.zed = { enableMail = false; settings = { ZED_NTFY_URL = cfg.serverUrl; ZED_NTFY_TOPIC = "$(cat ${cfg.topicFile} | tr -d '[:space:]')"; ZED_NTFY_ACCESS_TOKEN = lib.mkIf (cfg.tokenFile != null) "$(cat ${cfg.tokenFile})"; ZED_NOTIFY_VERBOSE = true; }; }; }; }