add gemma3n

This commit is contained in:
2025-07-23 23:07:46 +02:00
parent de3ef465f0
commit 32eea7c3af
2 changed files with 16 additions and 1 deletions

View File

@@ -66,7 +66,8 @@ spec:
default: [
"DeepSeek-R1-0528-Qwen3-8B-GGUF",
"Qwen3-8B-GGUF",
"Qwen3-8B-GGUF-no-thinking"
"Qwen3-8B-GGUF-no-thinking",
"gemma3n"
]
titleConvo: true
titleModel: "current_model"

View File

@@ -18,3 +18,17 @@ models:
-ngl 37 -c 16384
--jinja --chat-template-file /config/qwen_nothink_chat_template.jinja
--port ${PORT}
"gemma3n":
cmd: |
/app/llama-server
-hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL
--ctx-size 16384
--n-gpu-layers 99
--seed 3407
--prio 2
--temp 1.0
--repeat-penalty 1.0
--min-p 0.00
--top-k 64
--top-p 0.95
--port ${PORT}