add qwen3-vl thinking variant

This commit is contained in:
2025-11-15 19:30:52 +01:00
parent 202ebc7b86
commit 9b556e98a9
2 changed files with 21 additions and 1 deletions

View File

@@ -59,7 +59,8 @@ spec:
"Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
"Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
"Qwen3-VL-4B-Instruct-GGUF",
"Qwen3-VL-4B-Instruct-GGUF-unslothish"
"Qwen3-VL-4B-Instruct-GGUF-unslothish",
"Qwen3-VL-4B-Thinking-GGUF"
]
titleConvo: true
titleModel: "gemma3-4b-novision"

View File

@@ -286,3 +286,22 @@ models:
--presence-penalty 0.7
--no-warmup
--port ${PORT}
"Qwen3-VL-4B-Thinking-GGUF":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 12288
--predict 4096
--flash-attn auto
--jinja
--top-p 0.95
--top-k 20
--temp 1.0
--min-p 0.0
--repeat-penalty 1.0
--presence-penalty 0.0
--no-warmup
--port ${PORT}