llama.cpp things

This commit is contained in:
2025-12-24 03:00:28 -05:00
parent 8708d16dac
commit 9bb15ae4d2
3 changed files with 74 additions and 3 deletions

View File

@@ -24,7 +24,7 @@
## Behavior
Do not be sycophantic in your responses.
Do not use emojis unless explicitly asked to. This includes in code.
ALWAYS test code before returning to the user with something you think works. Always verify your work.
Use Test Driven Development methodology.
## Nix
For using `nix build` append `-L` to get better visibility into the logs.
@@ -39,8 +39,7 @@
autoshare = false;
autoupdate = true;
agent = {
};
agent = { };
provider = {
openrouter = {
models = {
@@ -52,6 +51,17 @@
apiKey = "{file:${../secrets/openrouter_api_key}}";
};
};
# Local llama-cpp server with OpenAI-compatible API
"llama.cpp" = {
npm = "@ai-sdk/openai-compatible";
options = {
baseURL = "http://127.0.0.1:8012/v1";
};
models = {
"local" = { };
};
};
};
};
};

View File

@@ -15,6 +15,7 @@
# ./vm.nix
./steam.nix
./networking.nix
./llama-cpp.nix
inputs.disko.nixosModules.disko
inputs.lanzaboote.nixosModules.lanzaboote

60
system/llama-cpp.nix Normal file
View File

@@ -0,0 +1,60 @@
{
config,
pkgs,
lib,
hostname,
inputs,
...
}:
let
# stolen from: https://stackoverflow.com/a/42398526
optimizeWithFlags =
pkg: flags:
lib.overrideDerivation pkg (
old:
let
newflags = lib.foldl' (acc: x: "${acc} ${x}") "" flags;
oldflags = if (lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else "";
in
{
NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}";
# stdenv = pkgs.clang19Stdenv;
}
);
optimizePackage =
pkg:
optimizeWithFlags pkg [
"-O3"
"-march=znver5"
"-mtune=znver5"
];
in
{
services.llama-cpp = {
enable = true;
model = pkgs.fetchurl {
url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-IQ4_NL.gguf";
sha256 = "c49cd4652cffc3da91986ac2c3dccdf9ab19afe007445f30fe83001eb29b44cf";
};
host = "127.0.0.1";
port = 8012;
package = pkgs.llama-cpp.override { vulkanSupport = true; };
extraFlags = [
# Context size
"-c"
"32768"
"-ngl"
"99"
"--alias"
"local"
];
};
# have to do this in order to get vulkan to work
systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
}