Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f0921e903a | |||
|
8860f6782e
|
|||
|
f863a0a496
|
@@ -21,7 +21,7 @@ steps:
|
|||||||
- bao kv get -mount secret -field RENOVATE_TOKEN renovate > /woodpecker/renovate_token
|
- bao kv get -mount secret -field RENOVATE_TOKEN renovate > /woodpecker/renovate_token
|
||||||
- bao kv get -mount secret -field GITHUB_COM_TOKEN renovate > /woodpecker/github_com_token
|
- bao kv get -mount secret -field GITHUB_COM_TOKEN renovate > /woodpecker/github_com_token
|
||||||
- name: Run Renovate
|
- name: Run Renovate
|
||||||
image: renovate/renovate:43.217.1
|
image: renovate/renovate:43.220.0
|
||||||
environment:
|
environment:
|
||||||
RENOVATE_AUTODISCOVER: "true"
|
RENOVATE_AUTODISCOVER: "true"
|
||||||
RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1
|
RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ hooks:
|
|||||||
on_startup:
|
on_startup:
|
||||||
preload:
|
preload:
|
||||||
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||||
- "whisper-small"
|
- "parakeet-tdt_ctc-1.1b"
|
||||||
|
|
||||||
# matrix replaces groups (they are mutually exclusive).
|
# matrix replaces groups (they are mutually exclusive).
|
||||||
# The small 0.8B model runs alongside any LLM.
|
# The small 0.8B model runs alongside any LLM.
|
||||||
@@ -27,7 +27,7 @@ hooks:
|
|||||||
matrix:
|
matrix:
|
||||||
vars:
|
vars:
|
||||||
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||||
stt: "whisper-small"
|
stt: "parakeet-tdt_ctc-1.1b"
|
||||||
flux: "flux2-klein-4b:Q4_K_M"
|
flux: "flux2-klein-4b:Q4_K_M"
|
||||||
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
||||||
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
||||||
@@ -58,6 +58,7 @@ matrix:
|
|||||||
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
|
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
|
||||||
image_gen: "flux & stt"
|
image_gen: "flux & stt"
|
||||||
|
|
||||||
|
|
||||||
models:
|
models:
|
||||||
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
"Qwen3-Coder-Next-GGUF:Q4_K_M":
|
||||||
cmd: |
|
cmd: |
|
||||||
@@ -235,20 +236,14 @@ models:
|
|||||||
--parallel 1
|
--parallel 1
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
# STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
|
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
|
||||||
# Model auto-downloaded by init container, see deployment.yaml
|
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
|
||||||
# Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
|
# parakeet-proxy.py sits in front to convert any audio format to WAV via ffmpeg,
|
||||||
# if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
|
# since parakeet-server only accepts real WAV but browsers send Ogg/Opus.
|
||||||
"whisper-small":
|
"parakeet-tdt_ctc-1.1b":
|
||||||
checkEndpoint: none
|
checkEndpoint: none
|
||||||
cmd: |
|
cmd: |
|
||||||
whisper-server
|
env PROXY_PORT=${PORT} FFMPEG_BIN=/root/.cache/ffmpeg/ffmpeg python3 /config/parakeet-proxy.py
|
||||||
--port ${PORT}
|
|
||||||
-m /root/.cache/whisper/ggml-small.bin
|
|
||||||
--request-path /v1/audio
|
|
||||||
--inference-path /transcriptions
|
|
||||||
--convert
|
|
||||||
--threads 6
|
|
||||||
|
|
||||||
|
|
||||||
# Image generation via stable-diffusion.cpp (sd-server)
|
# Image generation via stable-diffusion.cpp (sd-server)
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Thin reverse proxy for parakeet-server.
|
||||||
|
|
||||||
|
Accepts POST /v1/audio/transcriptions with any audio format,
|
||||||
|
converts the audio to 16 kHz mono WAV via ffmpeg, then forwards
|
||||||
|
the converted file to the real parakeet-server running on PARAKEET_PORT.
|
||||||
|
|
||||||
|
Also proxies GET /health straight through.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
PROXY_PORT=<port> PARAKEET_PORT=<upstream> python3 parakeet-proxy.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
PROXY_PORT = int(os.environ.get("PROXY_PORT", "8080"))
|
||||||
|
PARAKEET_PORT = PROXY_PORT + 1
|
||||||
|
FFMPEG = os.environ.get("FFMPEG_BIN", "ffmpeg")
|
||||||
|
MODEL = os.environ.get("PARAKEET_MODEL", "tdt_ctc-1.1b-q4_k.gguf")
|
||||||
|
CACHE_DIR = os.environ.get("PARAKEET_CACHE_DIR", "/root/.cache/parakeet.cpp/models")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_wav(data: bytes) -> bytes:
|
||||||
|
"""Convert any audio bytes to 16 kHz mono PCM WAV via ffmpeg."""
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".input", delete=False) as inf:
|
||||||
|
inf.write(data)
|
||||||
|
inf_path = inf.name
|
||||||
|
out_path = inf_path + ".wav"
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
FFMPEG, "-y",
|
||||||
|
"-i", inf_path,
|
||||||
|
"-ar", "16000",
|
||||||
|
"-ac", "1",
|
||||||
|
"-f", "wav",
|
||||||
|
out_path,
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
with open(out_path, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
finally:
|
||||||
|
os.unlink(inf_path)
|
||||||
|
if os.path.exists(out_path):
|
||||||
|
os.unlink(out_path)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_multipart(content_type: str, body: bytes):
|
||||||
|
"""
|
||||||
|
Parse a multipart/form-data body.
|
||||||
|
Returns a dict of field_name -> (filename_or_None, content_type, data).
|
||||||
|
"""
|
||||||
|
import email
|
||||||
|
from email import policy as email_policy
|
||||||
|
|
||||||
|
# email.parser needs the full MIME headers to parse multipart
|
||||||
|
raw = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
|
||||||
|
msg = email.message_from_bytes(raw, policy=email_policy.compat32)
|
||||||
|
parts = {}
|
||||||
|
for part in msg.get_payload():
|
||||||
|
cd = part.get("Content-Disposition", "")
|
||||||
|
name = None
|
||||||
|
filename = None
|
||||||
|
for item in cd.split(";"):
|
||||||
|
item = item.strip()
|
||||||
|
if item.startswith('name='):
|
||||||
|
name = item[5:].strip('"')
|
||||||
|
elif item.startswith('filename='):
|
||||||
|
filename = item[9:].strip('"')
|
||||||
|
if name is not None:
|
||||||
|
parts[name] = (filename, part.get_content_type(), part.get_payload(decode=True))
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def build_multipart(fields: dict) -> tuple[bytes, str]:
|
||||||
|
"""
|
||||||
|
Build a multipart/form-data body from fields dict:
|
||||||
|
field_name -> (filename_or_None, content_type, data_bytes)
|
||||||
|
Returns (body_bytes, content_type_header_value).
|
||||||
|
"""
|
||||||
|
boundary = b"----ParakeetProxyBoundary0xDEADBEEF"
|
||||||
|
body = b""
|
||||||
|
for name, (filename, ct, data) in fields.items():
|
||||||
|
body += b"--" + boundary + b"\r\n"
|
||||||
|
if filename:
|
||||||
|
body += (
|
||||||
|
f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
|
||||||
|
).encode()
|
||||||
|
else:
|
||||||
|
body += f'Content-Disposition: form-data; name="{name}"\r\n'.encode()
|
||||||
|
body += f"Content-Type: {ct}\r\n\r\n".encode()
|
||||||
|
body += data + b"\r\n"
|
||||||
|
body += b"--" + boundary + b"--\r\n"
|
||||||
|
return body, f"multipart/form-data; boundary={boundary.decode()}"
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, fmt, *args):
|
||||||
|
print(f"[parakeet-proxy] {self.address_string()} - {fmt % args}", flush=True)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == "/health":
|
||||||
|
self._forward_get("/health")
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path.rstrip("/") == "/v1/audio/transcriptions":
|
||||||
|
self._handle_transcription()
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def _forward_get(self, path):
|
||||||
|
try:
|
||||||
|
url = f"http://127.0.0.1:{PARAKEET_PORT}{path}"
|
||||||
|
with urllib.request.urlopen(url, timeout=5) as resp:
|
||||||
|
body = resp.read()
|
||||||
|
self.send_response(resp.status)
|
||||||
|
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
except Exception as e:
|
||||||
|
self.send_response(502)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(str(e).encode())
|
||||||
|
|
||||||
|
def _handle_transcription(self):
|
||||||
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
|
body = self.rfile.read(length)
|
||||||
|
ct = self.headers.get("Content-Type", "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
fields = parse_multipart(ct, body)
|
||||||
|
except Exception as e:
|
||||||
|
self._error(400, f"failed to parse multipart: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if "file" not in fields:
|
||||||
|
self._error(400, "missing required field 'file'")
|
||||||
|
return
|
||||||
|
|
||||||
|
filename, file_ct, audio_data = fields["file"]
|
||||||
|
|
||||||
|
# Convert to WAV regardless of what we received
|
||||||
|
try:
|
||||||
|
wav_data = convert_to_wav(audio_data)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
self._error(400, "ffmpeg could not decode audio")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
self._error(500, f"conversion error: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Rebuild multipart with converted WAV, preserve other fields
|
||||||
|
new_fields = {}
|
||||||
|
for name, (fn, fct, data) in fields.items():
|
||||||
|
if name == "file":
|
||||||
|
new_fields[name] = ("recording.wav", "audio/wav", wav_data)
|
||||||
|
else:
|
||||||
|
new_fields[name] = (fn, fct, data)
|
||||||
|
|
||||||
|
new_body, new_ct = build_multipart(new_fields)
|
||||||
|
|
||||||
|
# Forward to parakeet-server
|
||||||
|
try:
|
||||||
|
url = f"http://127.0.0.1:{PARAKEET_PORT}/v1/audio/transcriptions"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=new_body,
|
||||||
|
headers={"Content-Type": new_ct},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||||
|
resp_body = resp.read()
|
||||||
|
self.send_response(resp.status)
|
||||||
|
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(resp_body)
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
resp_body = e.read()
|
||||||
|
self.send_response(e.code)
|
||||||
|
self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(resp_body)
|
||||||
|
except Exception as e:
|
||||||
|
self._error(502, f"upstream error: {e}")
|
||||||
|
|
||||||
|
def _error(self, code: int, msg: str):
|
||||||
|
body = f'{{"error":{{"message":"{msg}","type":"proxy_error"}}}}'.encode()
|
||||||
|
self.send_response(code)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
proc = subprocess.Popen([
|
||||||
|
"parakeet-server",
|
||||||
|
"--host", "127.0.0.1",
|
||||||
|
"--port", str(PARAKEET_PORT),
|
||||||
|
"--model", MODEL,
|
||||||
|
"--cache-dir", CACHE_DIR,
|
||||||
|
])
|
||||||
|
print(f"[parakeet-proxy] started parakeet-server pid={proc.pid} on :{PARAKEET_PORT}", flush=True)
|
||||||
|
|
||||||
|
server = http.server.HTTPServer(("0.0.0.0", PROXY_PORT), ProxyHandler)
|
||||||
|
print(f"[parakeet-proxy] listening on :{PROXY_PORT}", flush=True)
|
||||||
|
try:
|
||||||
|
server.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
proc.terminate()
|
||||||
|
proc.wait()
|
||||||
@@ -18,7 +18,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: download-whisper
|
- name: download-whisper
|
||||||
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
|
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
@@ -48,7 +48,7 @@ spec:
|
|||||||
mountPath: /root/.cache
|
mountPath: /root/.cache
|
||||||
containers:
|
containers:
|
||||||
- name: llama-swap
|
- name: llama-swap
|
||||||
image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
|
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command:
|
command:
|
||||||
- llama-swap
|
- llama-swap
|
||||||
|
|||||||
@@ -13,3 +13,4 @@ configMapGenerator:
|
|||||||
namespace: llama
|
namespace: llama
|
||||||
files:
|
files:
|
||||||
- config.yaml=configs/config.yaml
|
- config.yaml=configs/config.yaml
|
||||||
|
- parakeet-proxy.py=configs/parakeet-proxy.py
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ spec:
|
|||||||
value: "Users"
|
value: "Users"
|
||||||
- name: OAUTH_AUTO_REDIRECT
|
- name: OAUTH_AUTO_REDIRECT
|
||||||
value: "true"
|
value: "true"
|
||||||
# STT via whisper-server (routed through llama-swap)
|
# STT via parakeet-server (routed through llama-swap)
|
||||||
- name: AUDIO_STT_ENGINE
|
- name: AUDIO_STT_ENGINE
|
||||||
value: "openai"
|
value: "openai"
|
||||||
- name: AUDIO_STT_OPENAI_API_BASE_URL
|
- name: AUDIO_STT_OPENAI_API_BASE_URL
|
||||||
@@ -92,9 +92,7 @@ spec:
|
|||||||
- name: AUDIO_STT_OPENAI_API_KEY
|
- name: AUDIO_STT_OPENAI_API_KEY
|
||||||
value: "ignored"
|
value: "ignored"
|
||||||
- name: AUDIO_STT_MODEL
|
- name: AUDIO_STT_MODEL
|
||||||
value: "whisper-small"
|
value: "parakeet-tdt_ctc-1.1b"
|
||||||
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
|
|
||||||
value: "audio/wav,audio/wave"
|
|
||||||
# TTS via OuteTTS (routed through llama-swap)
|
# TTS via OuteTTS (routed through llama-swap)
|
||||||
# TTS via dedicated Kokoro server (CPU-only, separate pod)
|
# TTS via dedicated Kokoro server (CPU-only, separate pod)
|
||||||
- name: AUDIO_TTS_ENGINE
|
- name: AUDIO_TTS_ENGINE
|
||||||
|
|||||||
Reference in New Issue
Block a user