1 Commits

Author SHA1 Message Date
ff18b21349 Update renovate/renovate Docker tag to v43.56.1 2026-03-06 00:00:45 +00:00
3 changed files with 1 additions and 134 deletions

View File

@@ -461,7 +461,6 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M -hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M
--ctx-size 32768 --ctx-size 32768
--fit-target 2048
--predict 8192 --predict 8192
--temp 1.0 --temp 1.0
--min-p 0.01 --min-p 0.01
@@ -476,7 +475,6 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
--fit-target 2048
--ctx-size 16384 --ctx-size 16384
--temp 1.0 --temp 1.0
--min-p 0.00 --min-p 0.00
@@ -490,7 +488,6 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
--fit-target 2048
--ctx-size 16384 --ctx-size 16384
--temp 1.0 --temp 1.0
--min-p 0.00 --min-p 0.00
@@ -509,7 +506,6 @@ models:
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
--mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
--fit-target 2048
--ctx-size 16384 --ctx-size 16384
--temp 1.0 --temp 1.0
--min-p 0.00 --min-p 0.00
@@ -525,7 +521,6 @@ models:
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf
--mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf
--fit-target 2048
--ctx-size 16384 --ctx-size 16384
--temp 1.0 --temp 1.0
--min-p 0.00 --min-p 0.00
@@ -566,20 +561,6 @@ models:
--no-warmup --no-warmup
--port ${PORT} --port ${PORT}
"Qwen3.5-0.8B-GGUF:Q4_K_XL":
ttl: 0
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": true}"
"Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL": "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
ttl: 0 ttl: 0
cmd: | cmd: |
@@ -593,115 +574,3 @@ models:
--no-warmup --no-warmup
--port ${PORT} --port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}" --chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3.5-2B-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": true}"
"Qwen3.5-2B-GGUF-nothink:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3.5-4B-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": true}"
"Qwen3.5-4B-GGUF-nothink:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3.5-9B-GGUF:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": true}"
"Qwen3.5-9B-GGUF-nothink:Q4_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"
"Qwen3.5-9B-GGUF:Q3_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": true}"
"Qwen3.5-9B-GGUF-nothink:Q3_K_M":
ttl: 600
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
--ctx-size 16384
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
--no-warmup
--port ${PORT}
--chat-template-kwargs "{\"enable_thinking\": false}"

View File

@@ -6,8 +6,6 @@ metadata:
namespace: llama namespace: llama
spec: spec:
replicas: 1 replicas: 1
strategy:
type: Recreate
selector: selector:
matchLabels: matchLabels:
app: llama-swap app: llama-swap

View File

@@ -15,7 +15,7 @@ spec:
- name: renovate - name: renovate
# Update this to the latest available and then enable Renovate on # Update this to the latest available and then enable Renovate on
# the manifest # the manifest
image: renovate/renovate:43.59.2-full image: renovate/renovate:43.56.1-full
envFrom: envFrom:
- secretRef: - secretRef:
name: renovate-gitea-token name: renovate-gitea-token