fix Qwen3-VL-4B-Instruct-GGUF models looping issue

2025-11-15 19:49:21 +01:00
parent 9b556e98a9
commit 6c7457d095
1 changed files with 13 additions and 10 deletions
@@ -254,18 +254,19 @@ models:
    ttl: 600
    cmd: |
      /app/llama-server
-        -hf unsloth/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M
+        -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
        --n-gpu-layers 99
        --ctx-size 12288
        --predict 4096
        --flash-attn auto
        --jinja
-        --top-p 0.95
+        --temp 0.7
+        --top-p 0.85
        --top-k 20
-        --temp 1.0
        --min-p 0.05
-        --repeat-penalty 1.0
-        --presence-penalty 0.0
+        --repeat-penalty 1.15
+        --frequency-penalty 0.5
+        --presence-penalty 0.4
        --no-warmup
        --port ${PORT}

@@ -273,17 +274,19 @@ models:
    ttl: 600
    cmd: |
      /app/llama-server
-        -hf unsloth/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M
+        -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
        --n-gpu-layers 99
        --ctx-size 12288
        --predict 4096
        --flash-attn auto
        --jinja
+        --temp 0.7
        --top-p 0.8
        --top-k 20
-        --temp 0.7
-        --min-p 0.0
-        --presence-penalty 0.7
+        --min-p 0.05
+        --repeat-penalty 1.15
+        --frequency-penalty 0.5
+        --presence-penalty 0.6
        --no-warmup
        --port ${PORT}

@@ -291,7 +294,7 @@ models:
    ttl: 600
    cmd: |
      /app/llama-server
-        -hf unsloth/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
+        -hf Qwen/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
        --n-gpu-layers 99
        --ctx-size 12288
        --predict 4096