--- apiVersion: apps/v1 kind: Deployment metadata: name: kokoro namespace: llama spec: replicas: 1 strategy: type: Recreate selector: matchLabels: app: kokoro template: metadata: labels: app: kokoro spec: containers: - name: kokoro # OpenAI-compatible Kokoro-FastAPI TTS server, CPU PyTorch backend. # Models baked into the image (no PVC needed). # v0.3.0 includes fix for per-request voice tensor memory leak (#459). image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.4.0 ports: - containerPort: 8880 name: http protocol: TCP resources: requests: memory: "2Gi" cpu: "500m" limits: memory: "6Gi" --- apiVersion: v1 kind: Service metadata: name: kokoro namespace: llama spec: type: ClusterIP ports: - name: http port: 8880 targetPort: 8880 protocol: TCP selector: app: kokoro