2 Commits

Author SHA1 Message Date
Lumpiasty fc2c15d154 move whisper to gpu
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-05-21 22:02:34 +02:00
Lumpiasty 02b3ec13b4 switch kokoro to remsky/Kokoro-FastAPI 2026-05-21 21:55:34 +02:00
4 changed files with 8 additions and 70 deletions
+4 -4
View File
@@ -235,9 +235,10 @@ models:
--parallel 1 --parallel 1
${common_args} ${common_args}
# STT via whisper.cpp (CPU-only, always loaded) # STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
# Model auto-downloaded from HuggingFace on first start # Model auto-downloaded by init container, see deployment.yaml
# whisper-small: ~240MB RAM, good accuracy/speed tradeoff on R5 3600 # Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
# if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
"whisper-small": "whisper-small":
checkEndpoint: none checkEndpoint: none
cmd: | cmd: |
@@ -248,7 +249,6 @@ models:
--inference-path /transcriptions --inference-path /transcriptions
--convert --convert
--threads 6 --threads 6
--no-gpu
# Image generation via stable-diffusion.cpp (sd-server) # Image generation via stable-diffusion.cpp (sd-server)
+4 -19
View File
@@ -16,37 +16,22 @@ spec:
labels: labels:
app: kokoro app: kokoro
spec: spec:
# Prevent Kubernetes from auto-injecting KOKORO_PORT=tcp://... (legacy
# Docker-link-style env vars from same-namespace Services), which Kokoro
# tries to parse as an integer and fails.
enableServiceLinks: false
containers: containers:
- name: kokoro - name: kokoro
# OpenAI-compatible Kokoro TTS server, CPU-only PyTorch backend # OpenAI-compatible Kokoro-FastAPI TTS server, CPU PyTorch backend.
# Exposes POST /v1/audio/speech with multiple voices and streaming # Models baked into the image (no PVC needed).
image: hwdsl2/kokoro-server:latest@sha256:42886b8720e901f7e31aba2050cd03867767eb9f609bbc38fe93852e72f0feeb # v0.3.0 includes fix for per-request voice tensor memory leak (#459).
image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.3.0
ports: ports:
- containerPort: 8880 - containerPort: 8880
name: http name: http
protocol: TCP protocol: TCP
env:
# Default voice (can be overridden per-request)
- name: KOKORO_VOICE
value: "af_heart"
volumeMounts:
# Persistent cache for the ~320MB Kokoro model
- name: cache
mountPath: /var/lib/kokoro
resources: resources:
requests: requests:
memory: "2Gi" memory: "2Gi"
cpu: "500m" cpu: "500m"
limits: limits:
memory: "6Gi" memory: "6Gi"
volumes:
- name: cache
persistentVolumeClaim:
claimName: kokoro-cache-lvmssd
--- ---
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
-1
View File
@@ -6,7 +6,6 @@ resources:
- auth-proxy.yaml - auth-proxy.yaml
- ingress.yaml - ingress.yaml
- pvc-ssd.yaml - pvc-ssd.yaml
- pvc-kokoro-ssd.yaml
- deployment.yaml - deployment.yaml
- kokoro.yaml - kokoro.yaml
configMapGenerator: configMapGenerator:
-46
View File
@@ -1,46 +0,0 @@
---
apiVersion: local.openebs.io/v1alpha1
kind: LVMVolume
metadata:
labels:
kubernetes.io/nodename: anapistula-delrosalae
name: kokoro-cache-lvmssd
namespace: openebs
spec:
capacity: "2147483648"
ownerNodeID: anapistula-delrosalae
shared: "yes"
thinProvision: "no"
vgPattern: ^openebs-ssd$
volGroup: openebs-ssd
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: kokoro-cache-lvmssd
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: ssd-lvmpv
volumeMode: Filesystem
csi:
driver: local.csi.openebs.io
fsType: btrfs
volumeHandle: kokoro-cache-lvmssd
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: kokoro-cache-lvmssd
namespace: llama
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
storageClassName: ssd-lvmpv
volumeName: kokoro-cache-lvmssd