This commit is contained in:
@@ -20,7 +20,6 @@ hooks:
|
||||
preload:
|
||||
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||
- "whisper-small"
|
||||
- "outetts-tts"
|
||||
|
||||
# matrix replaces groups (they are mutually exclusive).
|
||||
# The small 0.8B model runs alongside any LLM.
|
||||
@@ -29,7 +28,6 @@ matrix:
|
||||
vars:
|
||||
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||
stt: "whisper-small"
|
||||
tts: "outetts-tts"
|
||||
flux: "flux2-klein-4b:Q4_K_M"
|
||||
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
||||
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
||||
@@ -56,9 +54,9 @@ matrix:
|
||||
|
||||
sets:
|
||||
# any LLM can run alongside the small always-on model + STT + TTS (all CPU, no VRAM cost)
|
||||
with_q8: "(coder | q35t | q35nt | q35ht | q35hnt | q4t | q4nt | q4ht | q4hnt | g26xl | g26q2 | ge4xl | ge2xl | q36t | q36nt | haut | haunt | mtpt | mtpnt) & q8 & stt & tts"
|
||||
# FLUX runs alone — evicts everything including q8, but keeps STT+TTS for voice during image gen
|
||||
image_gen: "flux & stt & tts"
|
||||
with_q8: "(coder | q35t | q35nt | q35ht | q35hnt | q4t | q4nt | q4ht | q4hnt | g26xl | g26q2 | ge4xl | ge2xl | q36t | q36nt | haut | haunt | mtpt | mtpnt) & q8 & stt"
|
||||
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
|
||||
image_gen: "flux & stt"
|
||||
|
||||
models:
|
||||
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
||||
@@ -252,19 +250,6 @@ models:
|
||||
--threads 6
|
||||
--no-gpu
|
||||
|
||||
# TTS via OuteTTS 0.3 1B + WavTokenizer vocoder (CPU-only, always loaded)
|
||||
# Models auto-downloaded from HuggingFace on first start
|
||||
# OuteTTS 0.3 1B: ~1GB RAM, WavTokenizer: ~600MB RAM
|
||||
# Exposes /v1/audio/speech compatible with OpenAI TTS API
|
||||
"outetts-tts":
|
||||
checkEndpoint: none
|
||||
cmd: |
|
||||
llama-server
|
||||
-hf OuteAI/OuteTTS-0.3-1B-GGUF
|
||||
-hff OuteTTS-0.3-1B-Q8_0.gguf
|
||||
-mv /root/.cache/huggingface/hub/models--ggml-org--WavTokenizer/snapshots/0c97fdc098158ec9bf4e703cd5f81a5aa20520e6/WavTokenizer-Large-75-F16.gguf
|
||||
-c 4096
|
||||
${cpu_args}
|
||||
|
||||
# Image generation via stable-diffusion.cpp (sd-server)
|
||||
# Models must be pre-downloaded to /root/.cache/sd/
|
||||
|
||||
Reference in New Issue
Block a user