5 Commits

2 changed files with 98 additions and 1 deletions

View File

@@ -2,6 +2,19 @@
healthCheckTimeout: 600
logToStdout: "both" # proxy and upstream
hooks:
on_startup:
preload:
- "Qwen3-VL-2B-Instruct-GGUF:Q4_K_M"
groups:
qwen-vl-always:
persistent: true
exclusive: false
swap: false
members:
- "Qwen3-VL-2B-Instruct-GGUF:Q4_K_M"
models:
"DeepSeek-R1-0528-Qwen3-8B-GGUF":
ttl: 600
@@ -456,3 +469,87 @@ models:
--repeat-penalty 1.0
--no-warmup
--port ${PORT}
"Qwen3.5-35B-A3B-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
--ctx-size 16384
--temp 1.0
--min-p 0.00
--top-p 0.95
--top-k 20
--no-warmup
--port ${PORT}
"Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
--ctx-size 16384
--temp 1.0
--min-p 0.00
--top-p 0.95
--top-k 20
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
--ctx-size 16384
--temp 1.0
--min-p 0.00
--top-p 0.95
--top-k 20
--no-warmup
--port ${PORT}
"Qwen3.5-35B-A3B-heretic-GGUF-nothink:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
--ctx-size 16384
--temp 1.0
--min-p 0.00
--top-p 0.95
--top-k 20
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3-VL-2B-Instruct-GGUF:Q4_K_M":
ttl: 0
cmd: |
/app/llama-server
-hf unsloth/Qwen3-VL-2B-Instruct-GGUF:Q4_K_M
--ctx-size 16384
--predict 4096
--temp 0.7
--top-p 0.8
--top-k 20
--min-p 0.0
--presence-penalty 1.5
--no-warmup
--port ${PORT}
"gemma-3-270m-it-qat-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/gemma-3-270m-it-qat-GGUF:Q4_K_M
--ctx-size 16384
--predict 4096
--temp 1.0
--min-p 0.01
--top-p 0.95
--top-k 64
--repeat-penalty 1.0
--no-warmup
--port ${PORT}

View File

@@ -15,7 +15,7 @@ spec:
- name: renovate
# Update this to the latest available and then enable Renovate on
# the manifest
image: renovate/renovate:43.46.0-full
image: renovate/renovate:43.46.2-full
envFrom:
- secretRef:
name: renovate-gitea-token