From eb4ac7acf404f5bf2a1468029adf0859d0153b03 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Sat, 16 Aug 2025 23:26:51 +0200 Subject: [PATCH] add qwen3-4b-2507 model --- apps/librechat/release.yaml | 8 +++-- apps/llama/configs/config.yaml | 58 +++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index f06f205..172bceb 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -72,12 +72,14 @@ spec: "gemma3-12b-q2", "gemma3-12b-novision", "gemma3-4b", - "gemma3-4b-novision" + "gemma3-4b-novision", + "Qwen3-4B-Thinking-2507", + "Qwen3-4B-Thinking-2507-long-ctx" ] titleConvo: true - titleModel: "current_model" + titleModel: "gemma3-4b-novision" summarize: false - summaryModel: "current_model" + summaryModel: "gemma3-4b-novision" forcePrompt: false modelDisplayLabel: "Llama.cpp" imageVolume: diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 2883f8d..7d8c01f 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -26,7 +26,7 @@ models: --jinja --chat-template-file /config/qwen_nothink_chat_template.jinja --no-warmup --port ${PORT} - "gemma3n-e3b": + "gemma3n-e4b": ttl: 600 cmd: | /app/llama-server @@ -119,3 +119,59 @@ models: --no-mmproj --no-warmup --port ${PORT} + "Qwen3-4B-Thinking-2507": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M + -ngl 99 -c 16384 --predict 8192 + --temp 0.6 + --min-p 0.00 + --top-p 0.95 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --port ${PORT} + "Qwen3-4B-Thinking-2507-long-ctx": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M + -ngl 99 -c 262144 --predict 81920 + --temp 0.6 + --min-p 0.00 + --top-p 0.95 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --flash-attn + --cache-type-k q8_0 --cache-type-v q8_0 + --port ${PORT} + "Qwen3-4B-Instruct-2507": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M + -ngl 99 -c 16384 --predict 8192 + --temp 0.7 + --min-p 0.00 + --top-p 0.8 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --port ${PORT} + "Qwen3-4B-Instruct-2507-long-ctx": + ttl: 600 + cmd: | + /app/llama-server + -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M + -ngl 99 -c 262144 --predict 81920 + --temp 0.7 + --min-p 0.00 + --top-p 0.8 + --top-k 20 + --repeat-penalty 1.0 + --no-warmup + --flash-attn + --cache-type-k q8_0 --cache-type-v q8_0 + --port ${PORT}