add mlock and disable mmap in llama-server
ci/woodpecker/push/flux-reconcile-source Pipeline was successful

This commit is contained in:
2026-05-21 23:18:05 +02:00
parent fc2c15d154
commit c161da3657
+4 -4
View File
@@ -3,8 +3,8 @@ healthCheckTimeout: 600
logToStdout: "both" # proxy and upstream logToStdout: "both" # proxy and upstream
macros: macros:
base_args: "--no-warmup --port ${PORT}" base_args: "--no-warmup --port ${PORT} --mlock --no-mmap"
common_args: "--fit-target 1536 --no-warmup --port ${PORT}" common_args: "--fit-target 1536 --no-warmup --port ${PORT} --mlock --no-mmap"
cpu_args: "--no-warmup --port ${PORT} -ngl 0" cpu_args: "--no-warmup --port ${PORT} -ngl 0"
ctx_64k: "--ctx-size 65536" ctx_64k: "--ctx-size 65536"
ctx_128k: "--ctx-size 131072" ctx_128k: "--ctx-size 131072"
@@ -217,7 +217,7 @@ models:
"unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M": "unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M":
cmd: | cmd: |
llama-server /app/llama-server
-hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M
${ctx_256k} ${ctx_256k}
${qwen35_think_args} ${qwen35_think_args}
@@ -227,7 +227,7 @@ models:
"unsloth/Qwen3.6-35B-A3B-MTP-GGUF-nothink:Q4_K_M": "unsloth/Qwen3.6-35B-A3B-MTP-GGUF-nothink:Q4_K_M":
cmd: | cmd: |
llama-server /app/llama-server
-hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M
${ctx_256k} ${ctx_256k}
${qwen35_nothink_args} ${qwen35_nothink_args}