From d53db88fd21594f88c4d60f9b1e9599d4e816eb3 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Wed, 23 Jul 2025 19:51:26 +0200 Subject: [PATCH] gpu offload in llama.cpp --- apps/llama/configmap.yaml | 1 + apps/llama/deployment.yaml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/apps/llama/configmap.yaml b/apps/llama/configmap.yaml index c28fcd6..e8cd84b 100644 --- a/apps/llama/configmap.yaml +++ b/apps/llama/configmap.yaml @@ -10,4 +10,5 @@ data: cmd: | /app/llama-server -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M + -ngl 37 --port ${PORT} diff --git a/apps/llama/deployment.yaml b/apps/llama/deployment.yaml index 2dd28ab..8cf69b2 100644 --- a/apps/llama/deployment.yaml +++ b/apps/llama/deployment.yaml @@ -18,6 +18,8 @@ spec: - name: llama-swap image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957 imagePullPolicy: IfNotPresent + command: + - /app/llama-swap args: - --config=/config/config.yaml - --watch-config