gpu offload in llama.cpp
This commit is contained in:
@@ -10,4 +10,5 @@ data:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
|
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
|
||||||
|
-ngl 37
|
||||||
--port ${PORT}
|
--port ${PORT}
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ spec:
|
|||||||
- name: llama-swap
|
- name: llama-swap
|
||||||
image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
|
image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
command:
|
||||||
|
- /app/llama-swap
|
||||||
args:
|
args:
|
||||||
- --config=/config/config.yaml
|
- --config=/config/config.yaml
|
||||||
- --watch-config
|
- --watch-config
|
||||||
|
|||||||
Reference in New Issue
Block a user