gpu offload in llama.cpp

2025-07-23 19:51:26 +02:00
parent 5fb2bcfc7e
commit d53db88fd2
2 changed files with 3 additions and 0 deletions
@@ -10,4 +10,5 @@ data:
        cmd: |
          /app/llama-server
          -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
+          -ngl 37
          --port ${PORT}
@@ -18,6 +18,8 @@ spec:
        - name: llama-swap
          image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
          imagePullPolicy: IfNotPresent
+          command:
+            - /app/llama-swap
          args:
            - --config=/config/config.yaml
            - --watch-config