add abliterated versions of qwen3-vl

2025-12-06 23:33:56 +01:00
parent 95879f05d7
commit 9032060930
2 changed files with 43 additions and 1 deletions
@@ -66,7 +66,9 @@ spec:
                  "Qwen3-VL-4B-Thinking-GGUF",
                  "Qwen3-VL-8B-Instruct-GGUF",
                  "Qwen3-VL-8B-Instruct-GGUF-unslothish",
-                  "Qwen3-VL-8B-Thinking-GGUF"
+                  "Qwen3-VL-8B-Thinking-GGUF",
+                  "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF",
+                  "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF"
                ]
              titleConvo: true
              titleModel: "gemma3-4b-novision"
@@ -426,3 +426,43 @@ models:
        --presence-penalty 0.0
        --no-warmup
        --port ${PORT}
+
+  "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K
+        --n-gpu-layers 99
+        --ctx-size 12288
+        --predict 4096
+        --flash-attn auto
+        --jinja
+        --temp 0.7
+        --top-p 0.85
+        --top-k 20
+        --min-p 0.05
+        --repeat-penalty 1.15
+        --frequency-penalty 0.5
+        --presence-penalty 0.4
+        --no-warmup
+        --port ${PORT}
+
+  "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF":
+    ttl: 600
+    cmd: |
+      /app/llama-server
+        -hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K
+        --n-gpu-layers 99
+        --ctx-size 12288
+        --predict 4096
+        --flash-attn auto
+        --jinja
+        --temp 0.7
+        --top-p 0.85
+        --top-k 20
+        --min-p 0.05
+        --repeat-penalty 1.15
+        --frequency-penalty 0.5
+        --presence-penalty 0.4
+        --no-warmup
+        --port ${PORT}