From c161da36574577cb9946f4d01866524ea3910517 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Thu, 21 May 2026 23:18:05 +0200 Subject: [PATCH] add mlock and disable mmap in llama-server --- apps/llama/configs/config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 5b3ecf0..14ca6af 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -3,8 +3,8 @@ healthCheckTimeout: 600 logToStdout: "both" # proxy and upstream macros: - base_args: "--no-warmup --port ${PORT}" - common_args: "--fit-target 1536 --no-warmup --port ${PORT}" + base_args: "--no-warmup --port ${PORT} --mlock --no-mmap" + common_args: "--fit-target 1536 --no-warmup --port ${PORT} --mlock --no-mmap" cpu_args: "--no-warmup --port ${PORT} -ngl 0" ctx_64k: "--ctx-size 65536" ctx_128k: "--ctx-size 131072" @@ -217,7 +217,7 @@ models: "unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M": cmd: | - llama-server + /app/llama-server -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M ${ctx_256k} ${qwen35_think_args} @@ -227,7 +227,7 @@ models: "unsloth/Qwen3.6-35B-A3B-MTP-GGUF-nothink:Q4_K_M": cmd: | - llama-server + /app/llama-server -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M ${ctx_256k} ${qwen35_nothink_args}