61 lines
1.3 KiB
Nix
61 lines
1.3 KiB
Nix
{
|
|
config,
|
|
pkgs,
|
|
lib,
|
|
hostname,
|
|
inputs,
|
|
...
|
|
}:
|
|
let
|
|
# stolen from: https://stackoverflow.com/a/42398526
|
|
optimizeWithFlags =
|
|
pkg: flags:
|
|
lib.overrideDerivation pkg (
|
|
old:
|
|
let
|
|
newflags = lib.foldl' (acc: x: "${acc} ${x}") "" flags;
|
|
oldflags = if (lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else "";
|
|
in
|
|
{
|
|
NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}";
|
|
# stdenv = pkgs.clang19Stdenv;
|
|
}
|
|
);
|
|
|
|
optimizePackage =
|
|
pkg:
|
|
optimizeWithFlags pkg [
|
|
"-O3"
|
|
"-march=znver5"
|
|
"-mtune=znver5"
|
|
];
|
|
in
|
|
{
|
|
services.llama-cpp = {
|
|
enable = true;
|
|
model = pkgs.fetchurl {
|
|
url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-IQ4_NL.gguf";
|
|
sha256 = "c49cd4652cffc3da91986ac2c3dccdf9ab19afe007445f30fe83001eb29b44cf";
|
|
};
|
|
|
|
host = "127.0.0.1";
|
|
port = 8012;
|
|
package = pkgs.llama-cpp.override { vulkanSupport = true; };
|
|
|
|
extraFlags = [
|
|
# Context size
|
|
"-c"
|
|
"32768"
|
|
|
|
"-ngl"
|
|
"99"
|
|
|
|
"--alias"
|
|
"local"
|
|
];
|
|
};
|
|
|
|
# have to do this in order to get vulkan to work
|
|
systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
|
|
}
|