Back up NixOS config · karashiiro.moe/nixos-config@9335f03

+506

1 changed file

expand all

+506

configuration.nix

··· 1 + # Edit this configuration file to define what should be installed on 2 + # your system. Help is available in the configuration.nix(5) man page 3 + # and in the NixOS manual (accessible by running 'nixos-help'). 4 + 5 + { config, pkgs, lib, ... }: 6 + 7 + let 8 + # Import unstable channel for latest llama.cpp 9 + unstable = import <nixpkgs-unstable> { 10 + config = { 11 + allowUnfree = true; 12 + cudaSupport = true; 13 + }; 14 + }; 15 + 16 + # Build llama.cpp with CUDA support 17 + llamaCppCuda = unstable.llama-cpp.override { 18 + cudaSupport = true; 19 + }; 20 + 21 + # ============================================================================ 22 + # Large Model Proxy - manages multiple LLM backends with automatic VRAM management 23 + # ============================================================================ 24 + 25 + large-model-proxy = pkgs.buildGoModule rec { 26 + pname = "large-model-proxy"; 27 + version = "0.7.1"; 28 + 29 + src = pkgs.fetchFromGitHub { 30 + owner = "perk11"; 31 + repo = "large-model-proxy"; 32 + rev = version; 33 + hash = "sha256-FAu8YGJRH0V5kDCI5UezxE/A8N0XQI6c/jsqUvGBkzM="; 34 + }; 35 + 36 + vendorHash = "sha256-zMAapi6RDlXM7ewk8+vzUQftxGUy6PfBB27RQEeM+3A="; 37 + 38 + # Skip tests that require network 39 + doCheck = false; 40 + 41 + meta = with lib; { 42 + description = "Proxy for managing multiple large language models with automatic VRAM management"; 43 + homepage = "https://github.com/perk11/large-model-proxy"; 44 + license = licenses.gpl2; 45 + mainProgram = "large-model-proxy"; 46 + }; 47 + }; 48 + 49 + # Large Model Proxy configuration 50 + # Docs: https://github.com/perk11/large-model-proxy#configuration 51 + lmpConfig = { 52 + OpenAiApi = { 53 + ListenPort = "7070"; # OpenAI-compatible API endpoint 54 + }; 55 + ManagementApi = { 56 + ListenPort = "7071"; # Web UI for monitoring at http://localhost:7071 57 + }; 58 + DefaultServiceUrl = "http://127.0.0.1:{{.PORT}}"; 59 + ShutDownAfterInactivitySeconds = 300; # Shut down models after 5 min idle 60 + MaxTimeToWaitForServiceToCloseConnectionBeforeGivingUpSeconds = 120; 61 + 62 + # Adjust these to match your hardware 63 + ResourcesAvailable = { 64 + VRAM = 24000; # GPU VRAM in MB (e.g., RTX 3090 = 24000) 65 + RAM = 16000; # System RAM available for CPU inference in MB 66 + }; 67 + 68 + Services = [ 69 + { 70 + Name = "gpt-oss-20b-uwufied-v2"; 71 + ListenPort = "8080"; 72 + ProxyTargetHost = "127.0.0.1"; 73 + ProxyTargetPort = "18080"; # Internal port for llama-server 74 + Command = "${llamaCppCuda}/bin/llama-server"; 75 + Args = builtins.concatStringsSep " " [ 76 + "-m /srv/gpt-oss-20b/merged_gguf/gpt-oss-20b-Q4_K_M-uwufied-v2.gguf" 77 + "--jinja" 78 + "-ngl 99" 79 + "--threads -1" 80 + "--ctx-size 131072" 81 + "-fa on" 82 + "--temp 1.0" 83 + "--top-p 1.0" 84 + "--top-k 0" 85 + "--host 127.0.0.1" 86 + "--port 18080" 87 + "--chat-template-kwargs '{\"model_identity\": \" \", \"reasoning_effort\": \"low\"}'" 88 + ]; 89 + OpenAiApi = true; 90 + HealthcheckCommand = "curl --fail http://127.0.0.1:18080/health"; 91 + HealthcheckIntervalMilliseconds = 500; 92 + RestartOnConnectionFailure = true; 93 + ResourceRequirements = { 94 + VRAM = 15000; # Estimated VRAM usage for this model in MB 95 + }; 96 + } 97 + # Devstral 2 - 24B coding agent model from Mistral AI 98 + # (unsloth version has fixed system prompts!) 99 + # Download: huggingface-cli download unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF \ 100 + # --include "*Q4_K_XL*" \ 101 + # --local-dir /srv/devstral-2 102 + { 103 + Name = "devstral-2"; 104 + ListenPort = "8081"; 105 + ProxyTargetHost = "127.0.0.1"; 106 + ProxyTargetPort = "18081"; 107 + Command = "${llamaCppCuda}/bin/llama-server"; 108 + Args = builtins.concatStringsSep " " [ 109 + "-m /srv/devstral-2/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf" 110 + "--jinja" # Enable Jinja2 templates for chat formatting 111 + "-ngl 99" # Offload all layers to GPU 112 + "--threads -1" # Auto-detect threads 113 + "--ctx-size 98304" # 96k context 114 + "-fa on" # Flash attention 115 + "--cache-type-k q8_0" # Quantize K cache to save VRAM 116 + "--cache-type-v q8_0" # Quantize V cache to save VRAM 117 + "--host 127.0.0.1" 118 + "--port 18081" 119 + ]; 120 + OpenAiApi = true; 121 + HealthcheckCommand = "curl --fail http://127.0.0.1:18081/health"; 122 + HealthcheckIntervalMilliseconds = 500; 123 + RestartOnConnectionFailure = true; 124 + ResourceRequirements = { 125 + VRAM = 23000; # ~23GB for Q4_K_XL quantization at a 96k context 126 + }; 127 + } 128 + # Add more models here - LMP will swap them based on VRAM availability 129 + ]; 130 + }; 131 + 132 + lmpConfigFile = pkgs.writeText "large-model-proxy-config.json" (builtins.toJSON lmpConfig); 133 + 134 + # Set up Discord bot 135 + discord-llm-demobot-src = pkgs.fetchFromGitHub { 136 + owner = "karashiiro"; 137 + repo = "discord-llm-demobot"; 138 + rev = "f71bce043d7a6fa945dc8ec933615ac1663c10ab"; 139 + sha256 = "09p0mwz1g3f9fnx5l8fizqxn6ai23gkbx5dxdvvh917g4vp3fj85"; 140 + }; 141 + 142 + discord-llm-demobot-pkg = pkgs.buildNpmPackage { 143 + pname = "discord-llm-demobot"; 144 + version = "1.0.0"; 145 + src = discord-llm-demobot-src; 146 + 147 + npmDepsHash = "sha256-SepfSwdRpl7eq4TwXusqtk2r3uNsL6IxnKCh1oIRga0="; 148 + 149 + nativeBuildInputs = [ pkgs.nodejs_20 ]; 150 + buildPhase = "npm run build"; 151 + installPhase = '' 152 + mkdir -p $out/{lib/discord-llm-demobot,bin} 153 + cp -r dist node_modules package.json $out/lib/discord-llm-demobot/ 154 + 155 + cat > $out/bin/discord-llm-demobot <<EOF 156 + #!${pkgs.bash}/bin/bash 157 + exec ${pkgs.nodejs_20}/bin/node $out/lib/discord-llm-demobot/dist/index.js "\$@" 158 + EOF 159 + chmod +x $out/bin/discord-llm-demobot 160 + 161 + cat > $out/bin/discord-llm-demobot-deploy-commands <<EOF 162 + #!${pkgs.bash}/bin/bash 163 + exec ${pkgs.nodejs_20}/bin/node $out/lib/discord-llm-demobot/dist/deployCommands.js "\$@" 164 + EOF 165 + chmod +x $out/bin/discord-llm-demobot-deploy-commands 166 + ''; 167 + }; 168 + in 169 + { 170 + imports = 171 + [ # Include the results of the hardware scan. 172 + ./hardware-configuration.nix 173 + (builtins.fetchGit { 174 + url = "https://github.com/karashiiro/discord-llm-demobot"; 175 + ref = "main"; 176 + } + "/nixos-module.nix") 177 + (fetchTarball "https://github.com/nix-community/nixos-vscode-server/tarball/master") 178 + ]; 179 + 180 + # Bootloader. 181 + boot.loader.systemd-boot.enable = true; 182 + boot.loader.efi.canTouchEfiVariables = true; 183 + 184 + # Load NVIDIA kernel module early in boot (for containers) 185 + boot.initrd.kernelModules = [ "nvidia" ]; 186 + 187 + nix.settings.experimental-features = [ "nix-command" "flakes" ]; 188 + 189 + networking.hostName = "karashiiro-merotron"; # Define your hostname. 190 + # networking.wireless.enable = true; # Enables wireless support via wpa_supplicant. 191 + 192 + # Configure network proxy if necessary 193 + # networking.proxy.default = "http://user:password@proxy:port/"; 194 + # networking.proxy.noProxy = "127.0.0.1,localhost,internal.domain"; 195 + 196 + # Enable networking 197 + networking.networkmanager.enable = true; 198 + 199 + # Set your time zone. 200 + time.timeZone = "America/Los_Angeles"; 201 + 202 + # Select internationalisation properties. 203 + i18n.defaultLocale = "en_US.UTF-8"; 204 + 205 + i18n.extraLocaleSettings = { 206 + LC_ADDRESS = "en_US.UTF-8"; 207 + LC_IDENTIFICATION = "en_US.UTF-8"; 208 + LC_MEASUREMENT = "en_US.UTF-8"; 209 + LC_MONETARY = "en_US.UTF-8"; 210 + LC_NAME = "en_US.UTF-8"; 211 + LC_NUMERIC = "en_US.UTF-8"; 212 + LC_PAPER = "en_US.UTF-8"; 213 + LC_TELEPHONE = "en_US.UTF-8"; 214 + LC_TIME = "en_US.UTF-8"; 215 + }; 216 + 217 + # Disable suspend 218 + systemd.sleep.extraConfig = '' 219 + AllowSuspend=no 220 + AllowHibernation=no 221 + AllowHybridSleep=no 222 + AllowSuspendThenHibernate=no 223 + ''; 224 + 225 + # Enable the X11 windowing system. 226 + services.xserver.enable = true; 227 + 228 + # Enable the GNOME Desktop Environment. 229 + services.xserver.displayManager.gdm.enable = true; 230 + services.xserver.desktopManager.gnome.enable = true; 231 + 232 + # Configure keymap in X11 233 + services.xserver.xkb = { 234 + layout = "us"; 235 + variant = ""; 236 + }; 237 + 238 + # Enable CUPS to print documents. 239 + services.printing.enable = true; 240 + 241 + # Enable sound with pipewire. 242 + services.pulseaudio.enable = false; 243 + security.rtkit.enable = true; 244 + services.pipewire = { 245 + enable = true; 246 + alsa.enable = true; 247 + alsa.support32Bit = true; 248 + pulse.enable = true; 249 + # If you want to use JACK applications, uncomment this 250 + #jack.enable = true; 251 + 252 + # use the example session manager (no others are packaged yet so this is enabled by default, 253 + # no need to redefine it in your config for now) 254 + #media-session.enable = true; 255 + }; 256 + 257 + # Enable touchpad support (enabled default in most desktopManager). 258 + # services.xserver.libinput.enable = true; 259 + 260 + services.vscode-server.enable = true; 261 + 262 + # ============================================================================ 263 + # NVIDIA GPU Configuration for ML Workloads 264 + # ============================================================================ 265 + 266 + # Enable graphics/OpenGL support 267 + hardware.graphics = { 268 + enable = true; 269 + enable32Bit = true; # Support for 32-bit applications 270 + }; 271 + 272 + # Load NVIDIA driver for X11 and Wayland 273 + services.xserver.videoDrivers = [ "nvidia" ]; 274 + 275 + # NVIDIA driver configuration 276 + hardware.nvidia = { 277 + # Modesetting is required for most Wayland compositors 278 + modesetting.enable = true; 279 + 280 + # Power management (can be unstable on some hardware) 281 + # Set to true if you experience issues with sleep/suspend 282 + powerManagement.enable = false; 283 + 284 + # Fine-grained power management (experimental, Turing+ GPUs) 285 + powerManagement.finegrained = false; 286 + 287 + # Use the open source kernel module (for RTX 20-series and newer) 288 + # Set to false if you have older hardware or experience issues 289 + open = true; 290 + 291 + # Enable the NVIDIA settings menu (accessible via nvidia-settings command) 292 + nvidiaSettings = true; 293 + 294 + # Select driver version - options: stable, beta, production, latest 295 + # 'stable' is recommended for most users 296 + # 'latest' for newest features (use this if open = true doesn't work) 297 + package = config.boot.kernelPackages.nvidiaPackages.stable; 298 + }; 299 + 300 + # CRITICAL: Enable NVIDIA Container Toolkit for Docker/Podman GPU access 301 + # This allows containers to use your GPU for ML workloads 302 + hardware.nvidia-container-toolkit.enable = true; 303 + 304 + # ============================================================================ 305 + # Containerization (Docker) 306 + # ============================================================================ 307 + 308 + virtualisation.docker.enable = true; 309 + 310 + # ============================================================================ 311 + # Binary Cache Configuration (CRITICAL for CUDA packages) 312 + # ============================================================================ 313 + # This prevents rebuilding CUDA packages from source (saves hours of compilation) 314 + 315 + nix.settings = { 316 + # Add community caches for pre-built CUDA packages 317 + substituters = [ 318 + "https://cuda-maintainers.cachix.org" 319 + "https://nix-community.cachix.org" 320 + ]; 321 + 322 + # Public keys for cache verification 323 + trusted-public-keys = [ 324 + "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=" 325 + "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" 326 + ]; 327 + }; 328 + 329 + # Define a user account. Don't forget to set a password with 'passwd'. 330 + users.users.karashiiro = { 331 + isNormalUser = true; 332 + description = "Kara Aki"; 333 + createHome = true; 334 + home = "/home/karashiiro"; 335 + # Add docker group for container access 336 + extraGroups = [ "networkmanager" "wheel" "docker" ]; 337 + packages = with pkgs; [ 338 + ]; 339 + openssh.authorizedKeys.keys = [ 340 + "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAQEAti2henKxmcE/VSVDrT5m5bNLjX/2IqkARsI3tUbDEQ6IeSi38ZgJ1NBXlgSqjjBCUNXLeLJVYMFpL6l8mkGQA1zQuzDz8waINbmk0twQp4y8XxV3hloMBCtOl8KTm3P1Xage/oNKD7bnOYsdCDyEaT22OGG1CU+W7N8SEZslVqJstc3n6h3iIpS0LpWSuIYhBaOJ5mTeQGRyNS8lGEDCD/Jz5YqcWF5iyNoy8iSsM+wcfUd+flpYLvLKUG6UWDtUBKjqSksqwX1HX46EE7TpiAtMOoUQVum/s/uwIRSd9XabLNKIbx1g6m/lxfSCzo6hPsLSwk18tOjL8FBvpUSqTQ== karashiiro-merotron" 341 + ]; 342 + }; 343 + 344 + users.defaultUserShell = pkgs.zsh; 345 + 346 + # Install firefox. 347 + programs.firefox.enable = true; 348 + 349 + programs.zsh = { 350 + enable = true; 351 + autosuggestions.enable = true; 352 + syntaxHighlighting.enable = true; 353 + 354 + shellAliases = { 355 + ll = "ls -l"; 356 + update = "sudo nixos-rebuild switch"; 357 + }; 358 + 359 + ohMyZsh = { 360 + enable = true; 361 + plugins = [ "git" ]; 362 + theme = "robbyrussell"; 363 + }; 364 + }; 365 + 366 + programs.git = { 367 + enable = true; 368 + config = { 369 + user = { 370 + name = "karashiiro"; 371 + email = "49822414+karashiiro@users.noreply.github.com"; 372 + }; 373 + credential = { 374 + helper = "store"; 375 + }; 376 + }; 377 + }; 378 + 379 + environment.shells = with pkgs; [ zsh ]; 380 + 381 + # Allow unfree packages (required for NVIDIA drivers) 382 + nixpkgs.config.allowUnfree = true; 383 + 384 + # List packages installed in system profile. To search, run: 385 + # $ nix search wget 386 + environment.systemPackages = with pkgs; [ 387 + # Container tools 388 + docker-compose 389 + 390 + # GPU monitoring tools 391 + nvtopPackages.full # GPU monitoring (like htop for GPU) 392 + 393 + # Optional: Uncomment if you want native CUDA tools 394 + # cudaPackages.cudatoolkit 395 + 396 + # Useful utilities 397 + vim 398 + wget 399 + git 400 + 401 + # Development tools 402 + devcontainer 403 + python3Packages.huggingface-hub 404 + llamaCppCuda 405 + unstable.claude-code 406 + ]; 407 + 408 + # Some programs need SUID wrappers, can be configured further or are 409 + # started in user sessions. 410 + # programs.mtr.enable = true; 411 + # programs.gnupg.agent = { 412 + # enable = true; 413 + # enableSSHSupport = true; 414 + # }; 415 + 416 + # List services that you want to enable: 417 + 418 + # Enable the OpenSSH daemon. 419 + services.openssh = { 420 + enable = true; 421 + settings.PasswordAuthentication = false; 422 + }; 423 + 424 + # Tailscale 425 + services.tailscale.enable = true; 426 + services.tailscale.extraDaemonFlags = ["--no-logs-no-support"]; 427 + 428 + # ============================================================================ 429 + # Large Model Proxy Service 430 + # ============================================================================ 431 + # Manages llama-server instances automatically, starting/stopping based on demand 432 + # Management UI available at http://localhost:7071 433 + 434 + systemd.services.large-model-proxy = { 435 + description = "Large Model Proxy"; 436 + after = [ "network.target" "nvidia-persistenced.service" ]; 437 + wants = [ "nvidia-persistenced.service" ]; 438 + wantedBy = [ "multi-user.target" ]; 439 + 440 + # Make llama-server, curl, and shell available in PATH for LMP to spawn/healthcheck 441 + path = [ llamaCppCuda pkgs.curl pkgs.bash pkgs.coreutils ]; 442 + 443 + serviceConfig = { 444 + Type = "simple"; 445 + User = "karashiiro"; 446 + Group = "users"; 447 + 448 + ExecStart = "${large-model-proxy}/bin/large-model-proxy -c ${lmpConfigFile}"; 449 + 450 + Restart = "on-failure"; 451 + RestartSec = "10s"; 452 + 453 + # Security hardening 454 + NoNewPrivileges = true; 455 + PrivateTmp = true; 456 + 457 + WorkingDirectory = "/home/karashiiro"; 458 + }; 459 + }; 460 + 461 + # Configure the bot 462 + services.discord-llm-demobot = { 463 + enable = true; 464 + package = discord-llm-demobot-pkg; 465 + 466 + # Path to your secrets file 467 + environmentFile = "/var/lib/discord-llm-demobot/secrets.env"; 468 + 469 + # Your Discord app's client ID (not secret - safe to put here) 470 + discord.clientId = "625907060315389954"; 471 + 472 + # Chat API settings - use LMP's OpenAI-compatible endpoint 473 + chat = { 474 + endpointUrl = "http://127.0.0.1:7070"; 475 + model = "gpt-oss-20b-uwufied-v2"; 476 + }; 477 + }; 478 + 479 + # Add extra swap space 480 + swapDevices = [{ 481 + device = "/var/lib/swapfile"; 482 + size = 16*1024; # 16 GB 483 + }]; 484 + 485 + # Open ports in the firewall. 486 + networking.firewall.allowedTCPPorts = [ 487 + 22 # SSH 488 + 7070 # Large Model Proxy - OpenAI-compatible API 489 + 7071 # Large Model Proxy - Management UI 490 + 8080 # LMP proxied model endpoint (gpt-oss-20b) 491 + 8081 # LMP proxied model endpoint (devstral-2) 492 + 8888 # Jupyter notebook (if you use it) 493 + ]; 494 + # networking.firewall.allowedUDPPorts = [ ... ]; 495 + # Or disable the firewall altogether. 496 + # networking.firewall.enable = false; 497 + 498 + # This value determines the NixOS release from which the default 499 + # settings for stateful data, like file locations and database versions 500 + # on your system were taken. It's perfectly fine and recommended to leave 501 + # this value at the release version of the first install of this system. 502 + # Before changing this value read the documentation for this option 503 + # (e.g. man configuration.nix or on https://nixos.org/nixos/options.html). 504 + system.stateVersion = "25.05"; # Did you read the comment? 505 + 506 + }

Configure Feed

Configure Feed