use parakeet.cpp instead of whisper
ci/woodpecker/push/flux-reconcile-source Pipeline was successful

This commit is contained in:
2026-06-10 20:08:14 +02:00
parent 979f5796d5
commit f863a0a496
3 changed files with 14 additions and 17 deletions
+10 -13
View File
@@ -19,7 +19,7 @@ hooks:
on_startup: on_startup:
preload: preload:
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
- "whisper-small" - "parakeet-tdt_ctc-1.1b"
# matrix replaces groups (they are mutually exclusive). # matrix replaces groups (they are mutually exclusive).
# The small 0.8B model runs alongside any LLM. # The small 0.8B model runs alongside any LLM.
@@ -27,7 +27,7 @@ hooks:
matrix: matrix:
vars: vars:
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
stt: "whisper-small" stt: "parakeet-tdt_ctc-1.1b"
flux: "flux2-klein-4b:Q4_K_M" flux: "flux2-klein-4b:Q4_K_M"
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M" coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M" q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
@@ -58,6 +58,7 @@ matrix:
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen # FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
image_gen: "flux & stt" image_gen: "flux & stt"
models: models:
"Qwen3-Coder-Next-GGUF:Q4_K_M": "Qwen3-Coder-Next-GGUF:Q4_K_M":
cmd: | cmd: |
@@ -235,20 +236,16 @@ models:
--parallel 1 --parallel 1
${common_args} ${common_args}
# STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM) # STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
# Model auto-downloaded by init container, see deployment.yaml # Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
# Note: Vulkan whisper on AMD GPUs has known quality issues on some cards; # Exposes POST /v1/audio/transcriptions (OpenAI-compatible)
# if transcriptions come out as garbage/gibberish, add --no-gpu to fall back. "parakeet-tdt_ctc-1.1b":
"whisper-small":
checkEndpoint: none checkEndpoint: none
cmd: | cmd: |
whisper-server parakeet-server
--port ${PORT} --port ${PORT}
-m /root/.cache/whisper/ggml-small.bin --model tdt_ctc-1.1b-q4_k.gguf
--request-path /v1/audio --cache-dir /root/.cache/parakeet.cpp/models
--inference-path /transcriptions
--convert
--threads 6
# Image generation via stable-diffusion.cpp (sd-server) # Image generation via stable-diffusion.cpp (sd-server)
+2 -2
View File
@@ -18,7 +18,7 @@ spec:
spec: spec:
initContainers: initContainers:
- name: download-whisper - name: download-whisper
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09 image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
command: command:
- sh - sh
- -c - -c
@@ -48,7 +48,7 @@ spec:
mountPath: /root/.cache mountPath: /root/.cache
containers: containers:
- name: llama-swap - name: llama-swap
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09 image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
command: command:
- llama-swap - llama-swap
+2 -2
View File
@@ -84,7 +84,7 @@ spec:
value: "Users" value: "Users"
- name: OAUTH_AUTO_REDIRECT - name: OAUTH_AUTO_REDIRECT
value: "true" value: "true"
# STT via whisper-server (routed through llama-swap) # STT via parakeet-server (routed through llama-swap)
- name: AUDIO_STT_ENGINE - name: AUDIO_STT_ENGINE
value: "openai" value: "openai"
- name: AUDIO_STT_OPENAI_API_BASE_URL - name: AUDIO_STT_OPENAI_API_BASE_URL
@@ -92,7 +92,7 @@ spec:
- name: AUDIO_STT_OPENAI_API_KEY - name: AUDIO_STT_OPENAI_API_KEY
value: "ignored" value: "ignored"
- name: AUDIO_STT_MODEL - name: AUDIO_STT_MODEL
value: "whisper-small" value: "parakeet-tdt_ctc-1.1b"
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES - name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
value: "audio/wav,audio/wave" value: "audio/wav,audio/wave"
# TTS via OuteTTS (routed through llama-swap) # TTS via OuteTTS (routed through llama-swap)