diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index 04d5272..3534efd 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -66,7 +66,9 @@ spec: "Qwen3-VL-4B-Thinking-GGUF", "Qwen3-VL-8B-Instruct-GGUF", "Qwen3-VL-8B-Instruct-GGUF-unslothish", - "Qwen3-VL-8B-Thinking-GGUF" + "Qwen3-VL-8B-Thinking-GGUF", + "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF", + "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF" ] titleConvo: true titleModel: "gemma3-4b-novision" diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 50c7d63..4b95728 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -426,3 +426,43 @@ models: --presence-penalty 0.0 --no-warmup --port ${PORT} + + "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF": + ttl: 600 + cmd: | + /app/llama-server + -hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K + --n-gpu-layers 99 + --ctx-size 12288 + --predict 4096 + --flash-attn auto + --jinja + --temp 0.7 + --top-p 0.85 + --top-k 20 + --min-p 0.05 + --repeat-penalty 1.15 + --frequency-penalty 0.5 + --presence-penalty 0.4 + --no-warmup + --port ${PORT} + + "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF": + ttl: 600 + cmd: | + /app/llama-server + -hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K + --n-gpu-layers 99 + --ctx-size 12288 + --predict 4096 + --flash-attn auto + --jinja + --temp 0.7 + --top-p 0.85 + --top-k 20 + --min-p 0.05 + --repeat-penalty 1.15 + --frequency-penalty 0.5 + --presence-penalty 0.4 + --no-warmup + --port ${PORT} \ No newline at end of file