diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index 0e89e09..d6fd98b 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -67,7 +67,9 @@ spec: "DeepSeek-R1-0528-Qwen3-8B-GGUF", "Qwen3-8B-GGUF", "Qwen3-8B-GGUF-no-thinking", - "gemma3n" + "gemma3n-e4b", + "gemma3-12b", + "gemma3-12b-novision" ] titleConvo: true titleModel: "current_model" diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index a71f216..b758c12 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -18,7 +18,7 @@ models: -ngl 37 -c 16384 --jinja --chat-template-file /config/qwen_nothink_chat_template.jinja --port ${PORT} - "gemma3n": + "gemma3n-e3b": cmd: | /app/llama-server -hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL @@ -32,3 +32,30 @@ models: --top-k 64 --top-p 0.95 --port ${PORT} + "gemma3-12b": + cmd: | + /app/llama-server + -hf unsloth/gemma-3-12b-it-GGUF:Q3_K_M + --ctx-size 16384 + --n-gpu-layers 99 + --prio 2 + --temp 1.0 + --repeat-penalty 1.0 + --min-p 0.00 + --top-k 64 + --top-p 0.95 + --port ${PORT} + "gemma3-12b-novision": + cmd: | + /app/llama-server + -hf unsloth/gemma-3-12b-it-GGUF:Q3_K_M + --ctx-size 16384 + --n-gpu-layers 99 + --prio 2 + --temp 1.0 + --repeat-penalty 1.0 + --min-p 0.00 + --top-k 64 + --top-p 0.95 + --no-mmproj + --port ${PORT}