Compare commits
2 Commits
89f67e9cd4
...
fc2c15d154
| Author | SHA1 | Date | |
|---|---|---|---|
|
fc2c15d154
|
|||
|
02b3ec13b4
|
@@ -235,9 +235,10 @@ models:
|
||||
--parallel 1
|
||||
${common_args}
|
||||
|
||||
# STT via whisper.cpp (CPU-only, always loaded)
|
||||
# Model auto-downloaded from HuggingFace on first start
|
||||
# whisper-small: ~240MB RAM, good accuracy/speed tradeoff on R5 3600
|
||||
# STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
|
||||
# Model auto-downloaded by init container, see deployment.yaml
|
||||
# Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
|
||||
# if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
|
||||
"whisper-small":
|
||||
checkEndpoint: none
|
||||
cmd: |
|
||||
@@ -248,7 +249,6 @@ models:
|
||||
--inference-path /transcriptions
|
||||
--convert
|
||||
--threads 6
|
||||
--no-gpu
|
||||
|
||||
|
||||
# Image generation via stable-diffusion.cpp (sd-server)
|
||||
|
||||
+4
-19
@@ -16,37 +16,22 @@ spec:
|
||||
labels:
|
||||
app: kokoro
|
||||
spec:
|
||||
# Prevent Kubernetes from auto-injecting KOKORO_PORT=tcp://... (legacy
|
||||
# Docker-link-style env vars from same-namespace Services), which Kokoro
|
||||
# tries to parse as an integer and fails.
|
||||
enableServiceLinks: false
|
||||
containers:
|
||||
- name: kokoro
|
||||
# OpenAI-compatible Kokoro TTS server, CPU-only PyTorch backend
|
||||
# Exposes POST /v1/audio/speech with multiple voices and streaming
|
||||
image: hwdsl2/kokoro-server:latest@sha256:42886b8720e901f7e31aba2050cd03867767eb9f609bbc38fe93852e72f0feeb
|
||||
# OpenAI-compatible Kokoro-FastAPI TTS server, CPU PyTorch backend.
|
||||
# Models baked into the image (no PVC needed).
|
||||
# v0.3.0 includes fix for per-request voice tensor memory leak (#459).
|
||||
image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.3.0
|
||||
ports:
|
||||
- containerPort: 8880
|
||||
name: http
|
||||
protocol: TCP
|
||||
env:
|
||||
# Default voice (can be overridden per-request)
|
||||
- name: KOKORO_VOICE
|
||||
value: "af_heart"
|
||||
volumeMounts:
|
||||
# Persistent cache for the ~320MB Kokoro model
|
||||
- name: cache
|
||||
mountPath: /var/lib/kokoro
|
||||
resources:
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "6Gi"
|
||||
volumes:
|
||||
- name: cache
|
||||
persistentVolumeClaim:
|
||||
claimName: kokoro-cache-lvmssd
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
@@ -6,7 +6,6 @@ resources:
|
||||
- auth-proxy.yaml
|
||||
- ingress.yaml
|
||||
- pvc-ssd.yaml
|
||||
- pvc-kokoro-ssd.yaml
|
||||
- deployment.yaml
|
||||
- kokoro.yaml
|
||||
configMapGenerator:
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
apiVersion: local.openebs.io/v1alpha1
|
||||
kind: LVMVolume
|
||||
metadata:
|
||||
labels:
|
||||
kubernetes.io/nodename: anapistula-delrosalae
|
||||
name: kokoro-cache-lvmssd
|
||||
namespace: openebs
|
||||
spec:
|
||||
capacity: "2147483648"
|
||||
ownerNodeID: anapistula-delrosalae
|
||||
shared: "yes"
|
||||
thinProvision: "no"
|
||||
vgPattern: ^openebs-ssd$
|
||||
volGroup: openebs-ssd
|
||||
---
|
||||
kind: PersistentVolume
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: kokoro-cache-lvmssd
|
||||
spec:
|
||||
capacity:
|
||||
storage: 2Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: ssd-lvmpv
|
||||
volumeMode: Filesystem
|
||||
csi:
|
||||
driver: local.csi.openebs.io
|
||||
fsType: btrfs
|
||||
volumeHandle: kokoro-cache-lvmssd
|
||||
---
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: kokoro-cache-lvmssd
|
||||
namespace: llama
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
storageClassName: ssd-lvmpv
|
||||
volumeName: kokoro-cache-lvmssd
|
||||
Reference in New Issue
Block a user