gpu offload in llama.cpp
This commit is contained in:
@@ -10,4 +10,5 @@ data:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
|
||||
-ngl 37
|
||||
--port ${PORT}
|
||||
|
||||
@@ -18,6 +18,8 @@ spec:
|
||||
- name: llama-swap
|
||||
image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /app/llama-swap
|
||||
args:
|
||||
- --config=/config/config.yaml
|
||||
- --watch-config
|
||||
|
||||
Reference in New Issue
Block a user