add qwen3-vl thinking variant

2025-11-15 19:30:52 +01:00
parent 202ebc7b86
commit 9b556e98a9
2 changed files with 21 additions and 1 deletions
@@ -59,7 +59,8 @@ spec:
                  "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
                  "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
                  "Qwen3-VL-4B-Instruct-GGUF",
-                  "Qwen3-VL-4B-Instruct-GGUF-unslothish"
+                  "Qwen3-VL-4B-Instruct-GGUF-unslothish",
+                  "Qwen3-VL-4B-Thinking-GGUF"
                ]
              titleConvo: true
              titleModel: "gemma3-4b-novision"
@@ -286,3 +286,22 @@ models:
        --presence-penalty 0.7
        --no-warmup
        --port ${PORT}
+
+  "Qwen3-VL-4B-Thinking-GGUF":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
+        --n-gpu-layers 99
+        --ctx-size 12288
+        --predict 4096
+        --flash-attn auto
+        --jinja
+        --top-p 0.95
+        --top-k 20
+        --temp 1.0
+        --min-p 0.0
+        --repeat-penalty 1.0
+        --presence-penalty 0.0
+        --no-warmup
+        --port ${PORT}