From 2d47c441fefd563a70312034d55bd7de321cdc0a Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Mon, 31 Mar 2025 18:33:24 -0400 Subject: [PATCH] llm: use Q4_0 quants (faster) --- services/llama-cpp.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index 008c904..59f67e3 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -26,8 +26,8 @@ in enable = true; model = builtins.toString ( pkgs.fetchurl { - url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-IQ4_XS.gguf"; - sha256 = "aa7b7ae0b17931c379ede82da59b01f246046925aeb752af1ab4285a3b0d69db"; + url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-Q4_0.gguf"; + sha256 = "9a7b70be8727da9fb28523b35946dd42d4fe0f622cce03daa44fccff0775516d"; } ); port = service_configs.ports.llama_cpp;