{ pkgs, service_configs, config, inputs, ... }: let # stolen from: https://stackoverflow.com/a/42398526 optimizeWithFlags = pkg: flags: pkgs.lib.overrideDerivation pkg ( old: let newflags = pkgs.lib.foldl' (acc: x: "${acc} ${x}") "" flags; oldflags = if (pkgs.lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else ""; in { NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}"; stdenv = pkgs.clang19Stdenv; } ); in { services.llama-cpp = { enable = true; model = builtins.toString ( pkgs.fetchurl { url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-Q4_0.gguf"; sha256 = "9a7b70be8727da9fb28523b35946dd42d4fe0f622cce03daa44fccff0775516d"; } ); port = service_configs.ports.llama_cpp; host = "0.0.0.0"; package = ( optimizeWithFlags (inputs.llamacpp.packages.${pkgs.system}.default.overrideAttrs (old: { cmakeFlags = old.cmakeFlags ++ [ "-DGGML_AVX2=ON" ]; })) [ "-O3" "-march=znver2" "-mtune=znver2" ] ); extraFlags = [ "--flash-attn" ]; }; services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' ${builtins.readFile ../secrets/caddy_auth} reverse_proxy :${builtins.toString config.services.llama-cpp.port} ''; }