gpu offload in llama.cpp

This commit is contained in:
2025-07-23 19:51:26 +02:00
parent 5fb2bcfc7e
commit d53db88fd2
2 changed files with 3 additions and 0 deletions

View File

@@ -10,4 +10,5 @@ data:
cmd: |
/app/llama-server
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
-ngl 37
--port ${PORT}

View File

@@ -18,6 +18,8 @@ spec:
- name: llama-swap
image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
imagePullPolicy: IfNotPresent
command:
- /app/llama-swap
args:
- --config=/config/config.yaml
- --watch-config