add qwen3-4b-thinking-2507 model

This commit is contained in:
2025-08-16 23:26:51 +02:00
parent 83e5cada3f
commit 8b06571280
2 changed files with 32 additions and 4 deletions

View File

@@ -72,12 +72,14 @@ spec:
"gemma3-12b-q2",
"gemma3-12b-novision",
"gemma3-4b",
"gemma3-4b-novision"
"gemma3-4b-novision",
"Qwen3-4B-Thinking-2507",
"Qwen3-4B-Thinking-2507-long-ctx"
]
titleConvo: true
titleModel: "current_model"
titleModel: "gemma3-4b-novision"
summarize: false
summaryModel: "current_model"
summaryModel: "gemma3-4b-novision"
forcePrompt: false
modelDisplayLabel: "Llama.cpp"
imageVolume:

View File

@@ -26,7 +26,7 @@ models:
--jinja --chat-template-file /config/qwen_nothink_chat_template.jinja
--no-warmup
--port ${PORT}
"gemma3n-e3b":
"gemma3n-e4b":
ttl: 600
cmd: |
/app/llama-server
@@ -119,3 +119,29 @@ models:
--no-mmproj
--no-warmup
--port ${PORT}
"Qwen3-4B-Thinking-2507":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
-ngl 99 -c 16384 --predict 8192
--temp 0.6
--min-p 0.00
--top-p 0.95
--top-k 20
--repeat-penalty 1.0
--no-warmup
--port ${PORT}
"Qwen3-4B-Thinking-2507-long-ctx":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
-ngl 99 -c 262144 --predict 81920
--temp 0.6
--min-p 0.00
--top-p 0.95
--top-k 20
--repeat-penalty 1.0
--no-warmup
--port ${PORT}