use parakeet.cpp instead of whisper
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
This commit is contained in:
@@ -19,7 +19,7 @@ hooks:
|
|||||||
on_startup:
|
on_startup:
|
||||||
preload:
|
preload:
|
||||||
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||||
- "whisper-small"
|
- "parakeet-tdt_ctc-1.1b"
|
||||||
|
|
||||||
# matrix replaces groups (they are mutually exclusive).
|
# matrix replaces groups (they are mutually exclusive).
|
||||||
# The small 0.8B model runs alongside any LLM.
|
# The small 0.8B model runs alongside any LLM.
|
||||||
@@ -27,7 +27,7 @@ hooks:
|
|||||||
matrix:
|
matrix:
|
||||||
vars:
|
vars:
|
||||||
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||||
stt: "whisper-small"
|
stt: "parakeet-tdt_ctc-1.1b"
|
||||||
flux: "flux2-klein-4b:Q4_K_M"
|
flux: "flux2-klein-4b:Q4_K_M"
|
||||||
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
||||||
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
||||||
@@ -58,6 +58,7 @@ matrix:
|
|||||||
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
|
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
|
||||||
image_gen: "flux & stt"
|
image_gen: "flux & stt"
|
||||||
|
|
||||||
|
|
||||||
models:
|
models:
|
||||||
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
||||||
cmd: |
|
cmd: |
|
||||||
@@ -235,20 +236,16 @@ models:
|
|||||||
--parallel 1
|
--parallel 1
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
# STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
|
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
|
||||||
# Model auto-downloaded by init container, see deployment.yaml
|
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
|
||||||
# Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
|
# Exposes POST /v1/audio/transcriptions (OpenAI-compatible)
|
||||||
# if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
|
"parakeet-tdt_ctc-1.1b":
|
||||||
"whisper-small":
|
|
||||||
checkEndpoint: none
|
checkEndpoint: none
|
||||||
cmd: |
|
cmd: |
|
||||||
whisper-server
|
parakeet-server
|
||||||
--port ${PORT}
|
--port ${PORT}
|
||||||
-m /root/.cache/whisper/ggml-small.bin
|
--model tdt_ctc-1.1b-q4_k.gguf
|
||||||
--request-path /v1/audio
|
--cache-dir /root/.cache/parakeet.cpp/models
|
||||||
--inference-path /transcriptions
|
|
||||||
--convert
|
|
||||||
--threads 6
|
|
||||||
|
|
||||||
|
|
||||||
# Image generation via stable-diffusion.cpp (sd-server)
|
# Image generation via stable-diffusion.cpp (sd-server)
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: download-whisper
|
- name: download-whisper
|
||||||
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
|
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
@@ -48,7 +48,7 @@ spec:
|
|||||||
mountPath: /root/.cache
|
mountPath: /root/.cache
|
||||||
containers:
|
containers:
|
||||||
- name: llama-swap
|
- name: llama-swap
|
||||||
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
|
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command:
|
command:
|
||||||
- llama-swap
|
- llama-swap
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ spec:
|
|||||||
value: "Users"
|
value: "Users"
|
||||||
- name: OAUTH_AUTO_REDIRECT
|
- name: OAUTH_AUTO_REDIRECT
|
||||||
value: "true"
|
value: "true"
|
||||||
# STT via whisper-server (routed through llama-swap)
|
# STT via parakeet-server (routed through llama-swap)
|
||||||
- name: AUDIO_STT_ENGINE
|
- name: AUDIO_STT_ENGINE
|
||||||
value: "openai"
|
value: "openai"
|
||||||
- name: AUDIO_STT_OPENAI_API_BASE_URL
|
- name: AUDIO_STT_OPENAI_API_BASE_URL
|
||||||
@@ -92,7 +92,7 @@ spec:
|
|||||||
- name: AUDIO_STT_OPENAI_API_KEY
|
- name: AUDIO_STT_OPENAI_API_KEY
|
||||||
value: "ignored"
|
value: "ignored"
|
||||||
- name: AUDIO_STT_MODEL
|
- name: AUDIO_STT_MODEL
|
||||||
value: "whisper-small"
|
value: "parakeet-tdt_ctc-1.1b"
|
||||||
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
|
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
|
||||||
value: "audio/wav,audio/wave"
|
value: "audio/wav,audio/wave"
|
||||||
# TTS via OuteTTS (routed through llama-swap)
|
# TTS via OuteTTS (routed through llama-swap)
|
||||||
|
|||||||
Reference in New Issue
Block a user