remove ttl of all models in llama-swap
This commit is contained in:
@@ -35,7 +35,6 @@ groups:
|
||||
|
||||
models:
|
||||
"gemma3-12b":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||
@@ -43,7 +42,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"gemma3-12b-novision":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||
@@ -52,7 +50,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"gemma3-4b":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||
@@ -60,7 +57,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"gemma3-4b-novision":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||
@@ -69,7 +65,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M
|
||||
@@ -83,7 +78,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"Qwen3.5-35B-A3B-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
||||
@@ -91,7 +85,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
||||
@@ -102,7 +95,6 @@ models:
|
||||
# The "heretic" version does not provide the mmproj
|
||||
# so providing url to the one from the non-heretic version.
|
||||
"Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
||||
@@ -111,7 +103,6 @@ models:
|
||||
${common_args}
|
||||
|
||||
"Qwen3.5-35B-A3B-heretic-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
||||
@@ -121,7 +112,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-0.8B-GGUF:Q4_K_XL":
|
||||
ttl: 0
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
|
||||
@@ -130,7 +120,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
|
||||
ttl: 0
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
|
||||
@@ -140,7 +129,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-2B-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
||||
@@ -149,7 +137,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-2B-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
||||
@@ -158,7 +145,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-4B-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||
@@ -167,7 +153,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-4B-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||
@@ -176,7 +161,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-4B-heretic-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||
@@ -186,7 +170,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-4B-heretic-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||
@@ -196,7 +179,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-9B-GGUF:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
||||
@@ -205,7 +187,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-9B-GGUF-nothink:Q4_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
||||
@@ -214,7 +195,6 @@ models:
|
||||
${thinking_off}
|
||||
|
||||
"Qwen3.5-9B-GGUF:Q3_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
||||
@@ -223,7 +203,6 @@ models:
|
||||
${thinking_on}
|
||||
|
||||
"Qwen3.5-9B-GGUF-nothink:Q3_K_M":
|
||||
ttl: 600
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
||||
|
||||
Reference in New Issue
Block a user