From 63f154293d1fd8a1bcbd75ce36268b436792a3d7 Mon Sep 17 00:00:00 2001
From: Lumpiasty <arek.dzski@gmail.com>
Date: Fri, 6 Mar 2026 23:17:48 +0100
Subject: [PATCH] fiix thinking versions of Qwen3.5 small

---
 apps/llama/configs/config.yaml | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml
index c7fde83..8b757de 100644
--- a/apps/llama/configs/config.yaml
+++ b/apps/llama/configs/config.yaml
@@ -566,6 +566,20 @@ models:
         --no-warmup
         --port ${PORT}
 
+  "Qwen3.5-0.8B-GGUF:Q4_K_XL":
+    ttl: 0
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
   "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
     ttl: 0
     cmd: |
@@ -580,19 +594,6 @@ models:
         --port ${PORT}
         --chat-template-kwargs "{\"enable_thinking\": false}"
 
-  "Qwen3.5-0.8B-GGUF:Q4_K_XL":
-    ttl: 0
-    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
-        --ctx-size 16384
-        --temp 0.6
-        --top-p 0.95
-        --top-k 20
-        --min-p 0.00
-        --no-warmup
-        --port ${PORT}
-
   "Qwen3.5-2B-GGUF:Q4_K_M":
     ttl: 600
     cmd: |
@@ -605,6 +606,7 @@ models:
         --min-p 0.00
         --no-warmup
         --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
 
   "Qwen3.5-2B-GGUF-nothink:Q4_K_M":
     ttl: 600
@@ -632,6 +634,7 @@ models:
         --min-p 0.00
         --no-warmup
         --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
 
   "Qwen3.5-4B-GGUF-nothink:Q4_K_M":
     ttl: 600
@@ -659,6 +662,7 @@ models:
         --min-p 0.00
         --no-warmup
         --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
 
   "Qwen3.5-9B-GGUF-nothink:Q4_K_M":
     ttl: 600