add abliterated versions of qwen3-vl

This commit is contained in:
2025-12-06 23:33:56 +01:00
parent 95879f05d7
commit 9032060930
2 changed files with 43 additions and 1 deletions

View File

@@ -66,7 +66,9 @@ spec:
"Qwen3-VL-4B-Thinking-GGUF", "Qwen3-VL-4B-Thinking-GGUF",
"Qwen3-VL-8B-Instruct-GGUF", "Qwen3-VL-8B-Instruct-GGUF",
"Qwen3-VL-8B-Instruct-GGUF-unslothish", "Qwen3-VL-8B-Instruct-GGUF-unslothish",
"Qwen3-VL-8B-Thinking-GGUF" "Qwen3-VL-8B-Thinking-GGUF",
"Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF",
"Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF"
] ]
titleConvo: true titleConvo: true
titleModel: "gemma3-4b-novision" titleModel: "gemma3-4b-novision"

View File

@@ -426,3 +426,43 @@ models:
--presence-penalty 0.0 --presence-penalty 0.0
--no-warmup --no-warmup
--port ${PORT} --port ${PORT}
"Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF":
ttl: 600
cmd: |
/app/llama-server
-hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K
--n-gpu-layers 99
--ctx-size 12288
--predict 4096
--flash-attn auto
--jinja
--temp 0.7
--top-p 0.85
--top-k 20
--min-p 0.05
--repeat-penalty 1.15
--frequency-penalty 0.5
--presence-penalty 0.4
--no-warmup
--port ${PORT}
"Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF":
ttl: 600
cmd: |
/app/llama-server
-hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K
--n-gpu-layers 99
--ctx-size 12288
--predict 4096
--flash-attn auto
--jinja
--temp 0.7
--top-p 0.85
--top-k 20
--min-p 0.05
--repeat-penalty 1.15
--frequency-penalty 0.5
--presence-penalty 0.4
--no-warmup
--port ${PORT}