add gemma3n

2025-07-23 23:07:46 +02:00
parent de3ef465f0
commit 32eea7c3af
2 changed files with 16 additions and 1 deletions
@@ -66,7 +66,8 @@ spec:
                default: [
                  "DeepSeek-R1-0528-Qwen3-8B-GGUF",
                  "Qwen3-8B-GGUF",
-                  "Qwen3-8B-GGUF-no-thinking"
+                  "Qwen3-8B-GGUF-no-thinking",
+                  "gemma3n"
                ]
              titleConvo: true
              titleModel: "current_model"
@@ -18,3 +18,17 @@ models:
      -ngl 37 -c 16384
      --jinja --chat-template-file /config/qwen_nothink_chat_template.jinja
      --port ${PORT}
+  "gemma3n":
+    cmd: |
+      /app/llama-server
+      -hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL
+      --ctx-size 16384
+      --n-gpu-layers 99
+      --seed 3407
+      --prio 2
+      --temp 1.0
+      --repeat-penalty 1.0
+      --min-p 0.00
+      --top-k 64
+      --top-p 0.95
+      --port ${PORT}