switch kokoro to remsky/Kokoro-FastAPI

2026-05-21 21:51:40 +02:00
2 changed files with 5 additions and 5 deletions
@@ -235,10 +235,9 @@ models:
      --parallel 1
      ${common_args}

-  # STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
-  # Model auto-downloaded by init container, see deployment.yaml
-  # Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
-  # if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
+  # STT via whisper.cpp (CPU-only, always loaded)
+  # Model auto-downloaded from HuggingFace on first start
+  # whisper-small: ~240MB RAM, good accuracy/speed tradeoff on R5 3600
  "whisper-small":
    checkEndpoint: none
    cmd: |
@@ -249,6 +248,7 @@ models:
        --inference-path /transcriptions
        --convert
        --threads 6
+        --no-gpu


  # Image generation via stable-diffusion.cpp (sd-server)
@@ -21,7 +21,7 @@ spec:
          # OpenAI-compatible Kokoro-FastAPI TTS server, CPU PyTorch backend.
          # Models baked into the image (no PVC needed).
          # v0.3.0 includes fix for per-request voice tensor memory leak (#459).
-          image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.3.0
+          image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.3.0@sha256:d7df384acb57929f88a696d892c1a2e0c6b60fa934802ebec9a88626003b8e9a
          ports:
            - containerPort: 8880
              name: http