llm: use Q4_0 quants (faster)
This commit is contained in:
parent
c31635bdd7
commit
2d47c441fe
@ -26,8 +26,8 @@ in
|
|||||||
enable = true;
|
enable = true;
|
||||||
model = builtins.toString (
|
model = builtins.toString (
|
||||||
pkgs.fetchurl {
|
pkgs.fetchurl {
|
||||||
url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-IQ4_XS.gguf";
|
url = "https://huggingface.co/bartowski/google_gemma-3-12b-it-GGUF/resolve/main/google_gemma-3-12b-it-Q4_0.gguf";
|
||||||
sha256 = "aa7b7ae0b17931c379ede82da59b01f246046925aeb752af1ab4285a3b0d69db";
|
sha256 = "9a7b70be8727da9fb28523b35946dd42d4fe0f622cce03daa44fccff0775516d";
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
port = service_configs.ports.llama_cpp;
|
port = service_configs.ports.llama_cpp;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user