add qwen3-vl, fix librechat taking over settings and clean up llama config

This commit is contained in:
2025-11-15 19:09:13 +01:00
parent e3325670de
commit 0b677d0faf
2 changed files with 234 additions and 150 deletions

View File

@@ -57,7 +57,9 @@ spec:
"Qwen3-4B-Thinking-2507-long-ctx",
"Qwen2.5-VL-7B-Instruct-GGUF",
"Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
"Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L"
"Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
"Qwen3-VL-4B-Instruct-GGUF",
"Qwen3-VL-4B-Instruct-GGUF-unslothish"
]
titleConvo: true
titleModel: "gemma3-4b-novision"
@@ -65,6 +67,16 @@ spec:
summaryModel: "gemma3-4b-novision"
forcePrompt: false
modelDisplayLabel: "Llama.cpp"
# ✨ IMPORTANT: let llama-swap/llama-server own all these
dropParams:
- "temperature"
- "top_p"
- "top_k"
- "presence_penalty"
- "frequency_penalty"
- "stop"
- "max_tokens"
imageVolume:
enabled: true
size: 10G