126 lines
3.4 KiB
Nix
126 lines
3.4 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
cfg = config.services.ntfyAlerts;
|
|
|
|
curl = "${pkgs.curl}/bin/curl";
|
|
hostname = config.networking.hostName;
|
|
|
|
# Build the curl auth args as a proper bash array fragment
|
|
authCurlArgs =
|
|
if cfg.tokenFile != null then
|
|
''
|
|
if [ -f "${cfg.tokenFile}" ]; then
|
|
TOKEN=$(cat "${cfg.tokenFile}" 2>/dev/null || echo "")
|
|
if [ -n "$TOKEN" ]; then
|
|
AUTH_ARGS=(-H "Authorization: Bearer $TOKEN")
|
|
fi
|
|
fi
|
|
''
|
|
else
|
|
"";
|
|
|
|
# Systemd failure alert script
|
|
systemdAlertScript = pkgs.writeShellScript "ntfy-systemd-alert" ''
|
|
set -euo pipefail
|
|
|
|
UNIT_NAME="$1"
|
|
SERVER_URL="${cfg.serverUrl}"
|
|
TOPIC=$(cat "${cfg.topicFile}" 2>/dev/null | tr -d '[:space:]')
|
|
if [ -z "$TOPIC" ]; then
|
|
echo "ERROR: Could not read topic from ${cfg.topicFile}"
|
|
exit 1
|
|
fi
|
|
|
|
# Get journal output for context
|
|
JOURNAL_OUTPUT=$(${pkgs.systemd}/bin/journalctl -u "$UNIT_NAME" -n 15 --no-pager 2>/dev/null || echo "No journal output available")
|
|
|
|
# Build auth args
|
|
AUTH_ARGS=()
|
|
${authCurlArgs}
|
|
|
|
# Send notification
|
|
${curl} -sf --max-time 15 -X POST \
|
|
"$SERVER_URL/$TOPIC" \
|
|
-H "Title: [${hostname}] Service failed: $UNIT_NAME" \
|
|
-H "Priority: high" \
|
|
-H "Tags: warning" \
|
|
"''${AUTH_ARGS[@]}" \
|
|
-d "$JOURNAL_OUTPUT" || true
|
|
'';
|
|
|
|
in
|
|
{
|
|
options.services.ntfyAlerts = {
|
|
enable = lib.mkEnableOption "ntfy push notifications for system alerts";
|
|
|
|
serverUrl = lib.mkOption {
|
|
type = lib.types.str;
|
|
description = "The ntfy server URL (e.g. https://ntfy.example.com)";
|
|
example = "https://ntfy.example.com";
|
|
};
|
|
|
|
topicFile = lib.mkOption {
|
|
type = lib.types.path;
|
|
description = "Path to a file containing the ntfy topic name to publish alerts to.";
|
|
example = "/run/agenix/ntfy-alerts-topic";
|
|
};
|
|
|
|
tokenFile = lib.mkOption {
|
|
type = lib.types.nullOr lib.types.path;
|
|
default = null;
|
|
description = ''
|
|
Path to a file containing the ntfy auth token.
|
|
If set, uses Authorization: Bearer header for authentication.
|
|
'';
|
|
example = "/run/secrets/ntfy-token";
|
|
};
|
|
|
|
};
|
|
|
|
config = lib.mkIf cfg.enable {
|
|
# Per-service OnFailure for monitored services
|
|
systemd.services = {
|
|
"ntfy-alert@" = {
|
|
description = "Send ntfy notification for failed service %i";
|
|
|
|
unitConfig.OnFailure = lib.mkForce "";
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
ExecStart = "${systemdAlertScript} %i";
|
|
TimeoutSec = 30;
|
|
};
|
|
};
|
|
|
|
# TODO: sanoid's ExecStartPre runs `zfs allow` which blocks on TXG sync;
|
|
# on the hdds pool (slow spinning disks + large async frees) this causes
|
|
# 30+ minute hangs and guaranteed timeouts.
|
|
"sanoid".unitConfig.OnFailure = lib.mkForce "";
|
|
};
|
|
|
|
# Global OnFailure drop-in for all services
|
|
systemd.packages = [
|
|
(pkgs.writeTextDir "etc/systemd/system/service.d/onfailure.conf" ''
|
|
[Unit]
|
|
OnFailure=ntfy-alert@%p.service
|
|
'')
|
|
];
|
|
# ZED (ZFS Event Daemon) ntfy notification settings
|
|
services.zfs.zed = {
|
|
enableMail = false;
|
|
settings = {
|
|
ZED_NTFY_URL = cfg.serverUrl;
|
|
ZED_NTFY_TOPIC = "$(cat ${cfg.topicFile} | tr -d '[:space:]')";
|
|
ZED_NTFY_ACCESS_TOKEN = lib.mkIf (cfg.tokenFile != null) "$(cat ${cfg.tokenFile})";
|
|
ZED_NOTIFY_VERBOSE = true;
|
|
};
|
|
};
|
|
|
|
};
|
|
}
|