add qwen3-vl, fix librechat taking over settings and clean up llama config

2025-11-15 19:09:13 +01:00
parent e3325670de
commit 0b677d0faf
2 changed files with 234 additions and 150 deletions
@@ -57,7 +57,9 @@ spec:
                  "Qwen3-4B-Thinking-2507-long-ctx",
                  "Qwen2.5-VL-7B-Instruct-GGUF",
                  "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
-                  "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L"
+                  "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
+                  "Qwen3-VL-4B-Instruct-GGUF",
+                  "Qwen3-VL-4B-Instruct-GGUF-unslothish"
                ]
              titleConvo: true
              titleModel: "gemma3-4b-novision"
@@ -65,6 +67,16 @@ spec:
              summaryModel: "gemma3-4b-novision"
              forcePrompt: false
              modelDisplayLabel: "Llama.cpp"
+
+              # ✨ IMPORTANT: let llama-swap/llama-server own all these
+              dropParams:
+                - "temperature"
+                - "top_p"
+                - "top_k"
+                - "presence_penalty"
+                - "frequency_penalty"
+                - "stop"
+                - "max_tokens"
      imageVolume:
        enabled: true
        size: 10G