From 5eb7b7bb0cf380b7afd4d5c39dee4acd9cecbc90 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Sat, 15 Nov 2025 19:30:52 +0100 Subject: [PATCH] add qwen3-vl thinking variant --- apps/librechat/release.yaml | 3 ++- apps/llama/configs/config.yaml | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index 1aa0095..a7d5e58 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -59,7 +59,8 @@ spec: "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S", "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L", "Qwen3-VL-4B-Instruct-GGUF", - "Qwen3-VL-4B-Instruct-GGUF-unslothish" + "Qwen3-VL-4B-Instruct-GGUF-unslothish", + "Qwen3-VL-4B-Thinking-GGUF" ] titleConvo: true titleModel: "gemma3-4b-novision" diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 76e00b4..074c6d3 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -286,3 +286,22 @@ models: --presence-penalty 0.7 --no-warmup --port ${PORT} + + "Qwen3-VL-4B-Thinking-GGUF": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M + --n-gpu-layers 99 + --ctx-size 12288 + --predict 4096 + --flash-attn auto + --jinja + --top-p 0.95 + --top-k 20 + --temp 1.0 + --min-p 0.0 + --repeat-penalty 1.0 + --presence-penalty 0.0 + --no-warmup + --port ${PORT}