add qwen3 no thinking

2025-07-23 22:45:26 +02:00
parent b379c181f2
commit 5f3a00b382
5 changed files with 128 additions and 28 deletions
--- a/apps/llama/configs/config.yaml
+++ b/apps/llama/configs/config.yaml
@@ -0,0 +1,20 @@
+models:
+  "DeepSeek-R1-0528-Qwen3-8B-GGUF":
+    cmd: |
+      /app/llama-server
+      -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
+      -ngl 37 -c 16384
+      --port ${PORT}
+  "Qwen3-8B-GGUF":
+    cmd: |
+      /app/llama-server
+      -hf unsloth/Qwen3-8B-GGUF:Q4_K_M
+      -ngl 37 -c 16384
+      --port ${PORT}
+  "Qwen3-8B-GGUF-no-thinking":
+    cmd: |
+      /app/llama-server
+      -hf unsloth/Qwen3-8B-GGUF:Q4_K_M
+      -ngl 37 -c 16384
+      --jinja --chat-template-file /config/qwen_nothink_chat_template.jinja
+      --port ${PORT}