From 708ffe203cd6def572b47ed1522d4515a52d0d51 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Sat, 13 Sep 2025 00:27:50 +0200 Subject: [PATCH] Add Qwen2.5-VL models --- apps/librechat/release.yaml | 5 ++++- apps/llama/configs/config.yaml | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index 706d2a8..49dcaf7 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -54,7 +54,10 @@ spec: "gemma3-4b", "gemma3-4b-novision", "Qwen3-4B-Thinking-2507", - "Qwen3-4B-Thinking-2507-long-ctx" + "Qwen3-4B-Thinking-2507-long-ctx", + "Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S", + "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L" ] titleConvo: true titleModel: "gemma3-4b-novision" diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 7d8c01f..d577735 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -175,3 +175,42 @@ models: --flash-attn --cache-type-k q8_0 --cache-type-v q8_0 --port ${PORT} + "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:IQ1_S + -ngl 99 -c 16384 --predict 8192 + --temp 0.7 + --min-p 0.00 + --top-p 0.8 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --port ${PORT} + "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:Q2_K_L + -ngl 99 -c 16384 --predict 8192 + --temp 0.7 + --min-p 0.00 + --top-p 0.8 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --port ${PORT} + "Qwen2.5-VL-7B-Instruct-GGUF": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M + -ngl 37 -c 16384 --predict 8192 + --temp 0.7 + --min-p 0.00 + --top-p 0.8 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --port ${PORT}