{ pkgs, service_configs, config, inputs, optimizeWithFlags, ... }: { services.llama-cpp = { enable = true; model = builtins.toString ( # pkgs.fetchurl { # url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-Q4_0.gguf"; # sha256 = "9a7b70be8727da9fb28523b35946dd42d4fe0f622cce03daa44fccff0775516d"; # } pkgs.fetchurl { url = "https://huggingface.co/bartowski/mlabonne_gemma-3-27b-it-abliterated-GGUF/resolve/main/mlabonne_gemma-3-27b-it-abliterated-Q4_0.gguf"; sha256 = "d47047ff6fabb02e8aa8bea1d3fd32a551382016bd7d91f45f74615ada670a21"; } ); port = service_configs.ports.llama_cpp; host = "0.0.0.0"; package = ( optimizeWithFlags (inputs.llamacpp.packages.${pkgs.system}.default.overrideAttrs (old: { cmakeFlags = old.cmakeFlags ++ [ "-DGGML_AVX2=ON" ]; })) [ "-O3" "-march=znver2" "-mtune=znver2" ] ); extraFlags = [ "--flash-attn" ]; }; services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' ${builtins.readFile ../secrets/caddy_auth} reverse_proxy :${builtins.toString config.services.llama-cpp.port} ''; }