add qwen3-vl, fix librechat taking over settings and clean up llama config
This commit is contained in:
@@ -57,7 +57,9 @@ spec:
|
||||
"Qwen3-4B-Thinking-2507-long-ctx",
|
||||
"Qwen2.5-VL-7B-Instruct-GGUF",
|
||||
"Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
|
||||
"Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L"
|
||||
"Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
|
||||
"Qwen3-VL-4B-Instruct-GGUF",
|
||||
"Qwen3-VL-4B-Instruct-GGUF-unslothish"
|
||||
]
|
||||
titleConvo: true
|
||||
titleModel: "gemma3-4b-novision"
|
||||
@@ -65,6 +67,16 @@ spec:
|
||||
summaryModel: "gemma3-4b-novision"
|
||||
forcePrompt: false
|
||||
modelDisplayLabel: "Llama.cpp"
|
||||
|
||||
# ✨ IMPORTANT: let llama-swap/llama-server own all these
|
||||
dropParams:
|
||||
- "temperature"
|
||||
- "top_p"
|
||||
- "top_k"
|
||||
- "presence_penalty"
|
||||
- "frequency_penalty"
|
||||
- "stop"
|
||||
- "max_tokens"
|
||||
imageVolume:
|
||||
enabled: true
|
||||
size: 10G
|
||||
|
||||
Reference in New Issue
Block a user