Update renovate/renovate Docker tag to v43.59.2

Add Q3_K_M variand of Qwen3.5-9B
fiix thinking versions of Qwen3.5 small
2026-03-07 00:00:30 +00:00 · 2026-03-06 23:21:58 +01:00 · 2026-03-06 23:17:48 +01:00 · 2026-03-06 23:08:03 +01:00 · 2026-03-06 23:07:02 +01:00 · 2026-03-06 02:41:34 +01:00
3 changed files with 134 additions and 1 deletions
--- a/apps/llama/configs/config.yaml
+++ b/apps/llama/configs/config.yaml
@@ -461,6 +461,7 @@ models:
      /app/llama-server
        -hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M
        --ctx-size 32768
+        --fit-target 2048
        --predict 8192
        --temp 1.0
        --min-p 0.01
@@ -475,6 +476,7 @@ models:
    cmd: |
      /app/llama-server
        -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
+        --fit-target 2048
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -488,6 +490,7 @@ models:
    cmd: |
      /app/llama-server
        -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
+        --fit-target 2048
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -506,6 +509,7 @@ models:
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
        --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
+        --fit-target 2048
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -521,6 +525,7 @@ models:
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
        --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
+        --fit-target 2048
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -561,6 +566,20 @@ models:
        --no-warmup
        --port ${PORT}

+  "Qwen3.5-0.8B-GGUF:Q4_K_XL":
+    ttl: 0
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
  "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
    ttl: 0
    cmd: |
@@ -574,3 +593,115 @@ models:
        --no-warmup
        --port ${PORT}
        --chat-template-kwargs "{\"enable_thinking\": false}"
+
+  "Qwen3.5-2B-GGUF:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
+  "Qwen3.5-2B-GGUF-nothink:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": false}"
+
+  "Qwen3.5-4B-GGUF:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
+  "Qwen3.5-4B-GGUF-nothink:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": false}"
+
+  "Qwen3.5-9B-GGUF:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
+  "Qwen3.5-9B-GGUF-nothink:Q4_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": false}"
+
+  "Qwen3.5-9B-GGUF:Q3_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": true}"
+
+  "Qwen3.5-9B-GGUF-nothink:Q3_K_M":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": false}"
--- a/apps/llama/deployment.yaml
+++ b/apps/llama/deployment.yaml
@@ -6,6 +6,8 @@ metadata:
  namespace: llama
 spec:
  replicas: 1
+  strategy:
+    type: Recreate
  selector:
    matchLabels:
      app: llama-swap
--- a/apps/renovate/cronjob.yaml
+++ b/apps/renovate/cronjob.yaml
@@ -15,7 +15,7 @@ spec:
            - name: renovate
              # Update this to the latest available and then enable Renovate on
              # the manifest
-              image: renovate/renovate:43.56.1-full
+              image: renovate/renovate:43.59.2-full
              envFrom:
                - secretRef:
                    name: renovate-gitea-token
Author	SHA1	Message	Date
Renovate Bot	71e5d70996	Update renovate/renovate Docker tag to v43.59.2	2026-03-07 00:00:30 +00:00
Lumpiasty	a3ebc531fe	Add Q3_K_M variand of Qwen3.5-9B	2026-03-06 23:21:58 +01:00
Lumpiasty	63f154293d	fiix thinking versions of Qwen3.5 small	2026-03-06 23:17:48 +01:00
Lumpiasty	42aa0a7263	set strategy to recreate on llama-swap deployment	2026-03-06 23:08:03 +01:00
Lumpiasty	a9b8b45328	add 2B, 4B, 9B versions of Qwen3.5 in thinking + nonthinking variants	2026-03-06 23:07:02 +01:00
Lumpiasty	3dc481bc8b	increase target margin of 2048MB of VRAM	2026-03-06 02:41:34 +01:00