From f863a0a4965393bc6511bfa6d3d653d4aff679d3 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Wed, 10 Jun 2026 20:08:14 +0200 Subject: [PATCH] use parakeet.cpp instead of whisper --- apps/llama/configs/config.yaml | 23 ++++++++++------------- apps/llama/deployment.yaml | 4 ++-- apps/openwebui/release.yaml | 4 ++-- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml index 3485469..963c0bc 100644 --- a/apps/llama/configs/config.yaml +++ b/apps/llama/configs/config.yaml @@ -19,7 +19,7 @@ hooks: on_startup: preload: - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" - - "whisper-small" + - "parakeet-tdt_ctc-1.1b" # matrix replaces groups (they are mutually exclusive). # The small 0.8B model runs alongside any LLM. @@ -27,7 +27,7 @@ hooks: matrix: vars: q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" - stt: "whisper-small" + stt: "parakeet-tdt_ctc-1.1b" flux: "flux2-klein-4b:Q4_K_M" coder: "Qwen3-Coder-Next-GGUF:Q4_K_M" q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M" @@ -58,6 +58,7 @@ matrix: # FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen image_gen: "flux & stt" + models: "Qwen3-Coder-Next-GGUF:Q4_K_M": cmd: | @@ -235,20 +236,16 @@ models: --parallel 1 ${common_args} - # STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM) - # Model auto-downloaded by init container, see deployment.yaml - # Note: Vulkan whisper on AMD GPUs has known quality issues on some cards; - # if transcriptions come out as garbage/gibberish, add --no-gpu to fall back. - "whisper-small": + # STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded) + # Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models + # Exposes POST /v1/audio/transcriptions (OpenAI-compatible) + "parakeet-tdt_ctc-1.1b": checkEndpoint: none cmd: | - whisper-server + parakeet-server --port ${PORT} - -m /root/.cache/whisper/ggml-small.bin - --request-path /v1/audio - --inference-path /transcriptions - --convert - --threads 6 + --model tdt_ctc-1.1b-q4_k.gguf + --cache-dir /root/.cache/parakeet.cpp/models # Image generation via stable-diffusion.cpp (sd-server) diff --git a/apps/llama/deployment.yaml b/apps/llama/deployment.yaml index a61aa94..11df5a2 100644 --- a/apps/llama/deployment.yaml +++ b/apps/llama/deployment.yaml @@ -18,7 +18,7 @@ spec: spec: initContainers: - name: download-whisper - image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09 + image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09 command: - sh - -c @@ -48,7 +48,7 @@ spec: mountPath: /root/.cache containers: - name: llama-swap - image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09 + image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09 imagePullPolicy: IfNotPresent command: - llama-swap diff --git a/apps/openwebui/release.yaml b/apps/openwebui/release.yaml index ec6afcc..9dcd26d 100644 --- a/apps/openwebui/release.yaml +++ b/apps/openwebui/release.yaml @@ -84,7 +84,7 @@ spec: value: "Users" - name: OAUTH_AUTO_REDIRECT value: "true" - # STT via whisper-server (routed through llama-swap) + # STT via parakeet-server (routed through llama-swap) - name: AUDIO_STT_ENGINE value: "openai" - name: AUDIO_STT_OPENAI_API_BASE_URL @@ -92,7 +92,7 @@ spec: - name: AUDIO_STT_OPENAI_API_KEY value: "ignored" - name: AUDIO_STT_MODEL - value: "whisper-small" + value: "parakeet-tdt_ctc-1.1b" - name: AUDIO_STT_SUPPORTED_CONTENT_TYPES value: "audio/wav,audio/wave" # TTS via OuteTTS (routed through llama-swap)