{ config, pkgs, lib, hostname, inputs, ... }: let # stolen from: https://stackoverflow.com/a/42398526 optimizeWithFlags = pkg: flags: lib.overrideDerivation pkg ( old: let newflags = lib.foldl' (acc: x: "${acc} ${x}") "" flags; oldflags = if (lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else ""; in { NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}"; # stdenv = pkgs.clang19Stdenv; } ); optimizePackage = pkg: optimizeWithFlags pkg [ "-O3" "-march=znver5" "-mtune=znver5" ]; in { services.llama-cpp = { enable = true; model = pkgs.fetchurl { url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-IQ4_NL.gguf"; sha256 = "c49cd4652cffc3da91986ac2c3dccdf9ab19afe007445f30fe83001eb29b44cf"; }; host = "127.0.0.1"; port = 8012; package = pkgs.llama-cpp.override { vulkanSupport = true; }; extraFlags = [ # Context size "-c" "32768" "-ngl" "99" "--alias" "local" ]; }; # have to do this in order to get vulkan to work systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false; }