Compare commits
4 Commits
7a20ecd526
...
94ea73eb61
| Author | SHA1 | Date | |
|---|---|---|---|
| 94ea73eb61 | |||
| 054df42d8b | |||
| 08db022d0d | |||
| e485a4fc7f |
@@ -6,6 +6,7 @@ macros:
|
|||||||
base_args: "--no-warmup --port ${PORT}"
|
base_args: "--no-warmup --port ${PORT}"
|
||||||
common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
|
common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
|
||||||
gemma3_ctx_128k: "--ctx-size 131072"
|
gemma3_ctx_128k: "--ctx-size 131072"
|
||||||
|
qwen35_ctx_128k: "--ctx-size 131072"
|
||||||
qwen35_ctx_256k: "--ctx-size 262144"
|
qwen35_ctx_256k: "--ctx-size 262144"
|
||||||
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
|
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
|
||||||
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
|
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
|
||||||
@@ -163,7 +164,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${qwen35_ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -172,7 +173,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${qwen35_ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -182,7 +183,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_4b_heretic_mmproj}
|
${qwen35_4b_heretic_mmproj}
|
||||||
${qwen35_ctx_256k}
|
${qwen35_ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -192,7 +193,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_4b_heretic_mmproj}
|
${qwen35_4b_heretic_mmproj}
|
||||||
${qwen35_ctx_256k}
|
${qwen35_ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: llama-swap
|
- name: llama-swap
|
||||||
image: ghcr.io/mostlygeek/llama-swap:v199-vulkan-b8562
|
image: ghcr.io/mostlygeek/llama-swap:v199-vulkan-b8576
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command:
|
command:
|
||||||
- /app/llama-swap
|
- /app/llama-swap
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ spec:
|
|||||||
- name: renovate
|
- name: renovate
|
||||||
# Update this to the latest available and then enable Renovate on
|
# Update this to the latest available and then enable Renovate on
|
||||||
# the manifest
|
# the manifest
|
||||||
image: renovate/renovate:43.99.1-full
|
image: renovate/renovate:43.101.2-full
|
||||||
envFrom:
|
envFrom:
|
||||||
- secretRef:
|
- secretRef:
|
||||||
name: renovate-gitea-token
|
name: renovate-gitea-token
|
||||||
|
|||||||
Reference in New Issue
Block a user