From 669beccc3500095e5b374774ee43492359eb414d Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Sat, 15 Nov 2025 19:49:21 +0100 Subject: [PATCH] fix Qwen3-VL-4B-Instruct-GGUF models looping issue --- apps/llama/configs/config.yaml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 074c6d3..fd0c7b9 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -254,18 +254,19 @@ models: ttl: 600 cmd: | /app/llama-server - -hf unsloth/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M + -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0 --n-gpu-layers 99 --ctx-size 12288 --predict 4096 --flash-attn auto --jinja - --top-p 0.95 + --temp 0.7 + --top-p 0.85 --top-k 20 - --temp 1.0 --min-p 0.05 - --repeat-penalty 1.0 - --presence-penalty 0.0 + --repeat-penalty 1.15 + --frequency-penalty 0.5 + --presence-penalty 0.4 --no-warmup --port ${PORT} @@ -273,17 +274,19 @@ models: ttl: 600 cmd: | /app/llama-server - -hf unsloth/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M + -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0 --n-gpu-layers 99 --ctx-size 12288 --predict 4096 --flash-attn auto --jinja + --temp 0.7 --top-p 0.8 --top-k 20 - --temp 0.7 - --min-p 0.0 - --presence-penalty 0.7 + --min-p 0.05 + --repeat-penalty 1.15 + --frequency-penalty 0.5 + --presence-penalty 0.6 --no-warmup --port ${PORT} @@ -291,7 +294,7 @@ models: ttl: 600 cmd: | /app/llama-server - -hf unsloth/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M + -hf Qwen/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M --n-gpu-layers 99 --ctx-size 12288 --predict 4096