From 3dc481bc8bb27b4aed883552d688fa2871af59f1 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Fri, 6 Mar 2026 02:41:34 +0100 Subject: [PATCH] increase target margin of 2048MB of VRAM --- apps/llama/configs/config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 94a1f36..1ae1384 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -461,6 +461,7 @@ models: /app/llama-server -hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M --ctx-size 32768 + --fit-target 2048 --predict 8192 --temp 1.0 --min-p 0.01 @@ -475,6 +476,7 @@ models: cmd: | /app/llama-server -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M + --fit-target 2048 --ctx-size 16384 --temp 1.0 --min-p 0.00 @@ -488,6 +490,7 @@ models: cmd: | /app/llama-server -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M + --fit-target 2048 --ctx-size 16384 --temp 1.0 --min-p 0.00 @@ -506,6 +509,7 @@ models: -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf + --fit-target 2048 --ctx-size 16384 --temp 1.0 --min-p 0.00 @@ -521,6 +525,7 @@ models: -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf + --fit-target 2048 --ctx-size 16384 --temp 1.0 --min-p 0.00