From 7c217d6ead0c21a3b8e48efece648328afca318a Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Wed, 28 May 2025 21:20:42 -0700 Subject: [PATCH] llama-cpp: use q8 quantization instead of q4 --- services/llama-cpp.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index cb8fdbc..86b4d85 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -12,8 +12,8 @@ enable = true; model = builtins.toString ( pkgs.fetchurl { - url = "https://huggingface.co/bartowski/nvidia_AceReason-Nemotron-7B-GGUF/resolve/main/nvidia_AceReason-Nemotron-7B-Q4_0.gguf"; - sha256 = "27f93349ea88f3c84e53469288ac2ac3f5c985de9f8e00e275870e7e524bb3d8"; + url = "https://huggingface.co/bartowski/nvidia_AceReason-Nemotron-7B-GGUF/resolve/main/nvidia_AceReason-Nemotron-7B-Q8_0.gguf"; + sha256 = "0d5eb8b46490af7c097357cb20ad215ebfd30efacedac58bf68a8c7d84e996fc"; } ); port = service_configs.ports.llama_cpp;