llm: use Q4_0 quants (faster)

2025-03-31 18:33:24 -04:00
parent c31635bdd7
commit 2d47c441fe
1 changed files with 2 additions and 2 deletions
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -26,8 +26,8 @@ in
    enable = true;
    model = builtins.toString (
      pkgs.fetchurl {
-        url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-IQ4_XS.gguf";
-        sha256 = "aa7b7ae0b17931c379ede82da59b01f246046925aeb752af1ab4285a3b0d69db";
+        url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-Q4_0.gguf";
+        sha256 = "9a7b70be8727da9fb28523b35946dd42d4fe0f622cce03daa44fccff0775516d";
      }
    );
    port = service_configs.ports.llama_cpp;