Update renovate/renovate Docker tag to v43.56.1

add Qwen3.5 Small 0.8B model and replace Qwen3-VL-2B as task model
shorten context for qwen3-vl-2b and lower kv cache quant
2026-03-06 00:00:45 +00:00 · 2026-03-05 23:17:30 +01:00 · 2026-03-05 22:42:54 +01:00 · 2026-03-05 19:31:03 +01:00 · 2026-03-05 19:27:45 +01:00
3 changed files with 25 additions and 8 deletions
--- a/apps/llama/configs/config.yaml
+++ b/apps/llama/configs/config.yaml
@@ -5,15 +5,15 @@ logToStdout: "both" # proxy and upstream
 hooks:
  on_startup:
    preload:
-      - "Qwen3-VL-2B-Instruct-GGUF:Q4_K_M"
+      - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"

 groups:
-  qwen-vl-always:
+  always:
    persistent: true
    exclusive: false
    swap: false
    members:
-      - "Qwen3-VL-2B-Instruct-GGUF:Q4_K_M"
+      - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"

 models:
  "DeepSeek-R1-0528-Qwen3-8B-GGUF":
@@ -505,6 +505,7 @@ models:
      /app/llama-server
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
+        --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -519,6 +520,7 @@ models:
      /app/llama-server
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
+        --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
        --ctx-size 16384
        --temp 1.0
        --min-p 0.00
@@ -529,12 +531,13 @@ models:
        --chat-template-kwargs "{\"enable_thinking\": false}"

  "Qwen3-VL-2B-Instruct-GGUF:Q4_K_M":
-    ttl: 0
+    ttl: 600
    cmd: |
      /app/llama-server
        -hf unsloth/Qwen3-VL-2B-Instruct-GGUF:Q4_K_M
-        --ctx-size 16384
-        --predict 4096
+        --ctx-size 6144
+        --cache-type-k q8_0
+        --cache-type-v q8_0
        --temp 0.7
        --top-p 0.8
        --top-k 20
@@ -557,3 +560,17 @@ models:
        --repeat-penalty 1.0
        --no-warmup
        --port ${PORT}
+
+  "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
+    ttl: 0
+    cmd: |
+      /app/llama-server
+        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
+        --ctx-size 16384
+        --temp 0.6
+        --top-p 0.95
+        --top-k 20
+        --min-p 0.00
+        --no-warmup
+        --port ${PORT}
+        --chat-template-kwargs "{\"enable_thinking\": false}"
--- a/apps/llama/deployment.yaml
+++ b/apps/llama/deployment.yaml
@@ -17,7 +17,7 @@ spec:
      containers:
        - name: llama-swap
          # TODO: make renovate update the image tag
-          image: ghcr.io/mostlygeek/llama-swap:v195-vulkan-b8148
+          image: ghcr.io/mostlygeek/llama-swap:v197-vulkan-b8202
          imagePullPolicy: IfNotPresent
          command:
            - /app/llama-swap
--- a/apps/renovate/cronjob.yaml
+++ b/apps/renovate/cronjob.yaml
@@ -15,7 +15,7 @@ spec:
            - name: renovate
              # Update this to the latest available and then enable Renovate on
              # the manifest
-              image: renovate/renovate:43.46.6-full
+              image: renovate/renovate:43.56.1-full
              envFrom:
                - secretRef:
                    name: renovate-gitea-token
Author	SHA1	Message	Date
Renovate Bot	ff18b21349	Update renovate/renovate Docker tag to v43.56.1	2026-03-06 00:00:45 +00:00
Lumpiasty	711c437c0a	add Qwen3.5 Small 0.8B model and replace Qwen3-VL-2B as task model	2026-03-05 23:17:30 +01:00
Lumpiasty	975f1db8f5	shorten context for qwen3-vl-2b and lower kv cache quant	2026-03-05 22:42:54 +01:00
Lumpiasty	ab9ddd0f3b	add path to mmproj in qwen3.5 heretic	2026-03-05 19:31:03 +01:00
Lumpiasty	3e59786c83	manually update llama-swap image tag	2026-03-05 19:27:45 +01:00