llama.cpp things
This commit is contained in:
@@ -24,7 +24,7 @@
|
||||
## Behavior
|
||||
Do not be sycophantic in your responses.
|
||||
Do not use emojis unless explicitly asked to. This includes in code.
|
||||
ALWAYS test code before returning to the user with something you think works. Always verify your work.
|
||||
Use Test Driven Development methodology.
|
||||
|
||||
## Nix
|
||||
For using `nix build` append `-L` to get better visibility into the logs.
|
||||
@@ -39,8 +39,7 @@
|
||||
|
||||
autoshare = false;
|
||||
autoupdate = true;
|
||||
agent = {
|
||||
};
|
||||
agent = { };
|
||||
provider = {
|
||||
openrouter = {
|
||||
models = {
|
||||
@@ -52,6 +51,17 @@
|
||||
apiKey = "{file:${../secrets/openrouter_api_key}}";
|
||||
};
|
||||
};
|
||||
# Local llama-cpp server with OpenAI-compatible API
|
||||
"llama.cpp" = {
|
||||
npm = "@ai-sdk/openai-compatible";
|
||||
options = {
|
||||
baseURL = "http://127.0.0.1:8012/v1";
|
||||
};
|
||||
|
||||
models = {
|
||||
"local" = { };
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
# ./vm.nix
|
||||
./steam.nix
|
||||
./networking.nix
|
||||
./llama-cpp.nix
|
||||
|
||||
inputs.disko.nixosModules.disko
|
||||
inputs.lanzaboote.nixosModules.lanzaboote
|
||||
|
||||
60
system/llama-cpp.nix
Normal file
60
system/llama-cpp.nix
Normal file
@@ -0,0 +1,60 @@
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
lib,
|
||||
hostname,
|
||||
inputs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
# stolen from: https://stackoverflow.com/a/42398526
|
||||
optimizeWithFlags =
|
||||
pkg: flags:
|
||||
lib.overrideDerivation pkg (
|
||||
old:
|
||||
let
|
||||
newflags = lib.foldl' (acc: x: "${acc} ${x}") "" flags;
|
||||
oldflags = if (lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else "";
|
||||
in
|
||||
{
|
||||
NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}";
|
||||
# stdenv = pkgs.clang19Stdenv;
|
||||
}
|
||||
);
|
||||
|
||||
optimizePackage =
|
||||
pkg:
|
||||
optimizeWithFlags pkg [
|
||||
"-O3"
|
||||
"-march=znver5"
|
||||
"-mtune=znver5"
|
||||
];
|
||||
in
|
||||
{
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
model = pkgs.fetchurl {
|
||||
url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-IQ4_NL.gguf";
|
||||
sha256 = "c49cd4652cffc3da91986ac2c3dccdf9ab19afe007445f30fe83001eb29b44cf";
|
||||
};
|
||||
|
||||
host = "127.0.0.1";
|
||||
port = 8012;
|
||||
package = pkgs.llama-cpp.override { vulkanSupport = true; };
|
||||
|
||||
extraFlags = [
|
||||
# Context size
|
||||
"-c"
|
||||
"32768"
|
||||
|
||||
"-ngl"
|
||||
"99"
|
||||
|
||||
"--alias"
|
||||
"local"
|
||||
];
|
||||
};
|
||||
|
||||
# have to do this in order to get vulkan to work
|
||||
systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
|
||||
}
|
||||
Reference in New Issue
Block a user