add converting proxy to parakeet
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
ci/woodpecker/cron/renovate Pipeline was successful

This commit is contained in:
2026-06-10 20:39:08 +02:00
parent f863a0a496
commit 8860f6782e
4 changed files with 231 additions and 7 deletions
+3 -5
View File
@@ -238,14 +238,12 @@ models:
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
# Exposes POST /v1/audio/transcriptions (OpenAI-compatible)
# parakeet-proxy.py sits in front to convert any audio format to WAV via ffmpeg,
# since parakeet-server only accepts real WAV but browsers send Ogg/Opus.
"parakeet-tdt_ctc-1.1b":
checkEndpoint: none
cmd: |
parakeet-server
--port ${PORT}
--model tdt_ctc-1.1b-q4_k.gguf
--cache-dir /root/.cache/parakeet.cpp/models
env PROXY_PORT=${PORT} FFMPEG_BIN=/root/.cache/ffmpeg/ffmpeg python3 /config/parakeet-proxy.py
# Image generation via stable-diffusion.cpp (sd-server)
+227
View File
@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""
Thin reverse proxy for parakeet-server.
Accepts POST /v1/audio/transcriptions with any audio format,
converts the audio to 16 kHz mono WAV via ffmpeg, then forwards
the converted file to the real parakeet-server running on PARAKEET_PORT.
Also proxies GET /health straight through.
Usage:
PROXY_PORT=<port> PARAKEET_PORT=<upstream> python3 parakeet-proxy.py
"""
import http.server
import io
import os
import subprocess
import sys
import tempfile
import urllib.request
import urllib.error
PROXY_PORT = int(os.environ.get("PROXY_PORT", "8080"))
PARAKEET_PORT = PROXY_PORT + 1
FFMPEG = os.environ.get("FFMPEG_BIN", "ffmpeg")
MODEL = os.environ.get("PARAKEET_MODEL", "tdt_ctc-1.1b-q4_k.gguf")
CACHE_DIR = os.environ.get("PARAKEET_CACHE_DIR", "/root/.cache/parakeet.cpp/models")
def convert_to_wav(data: bytes) -> bytes:
"""Convert any audio bytes to 16 kHz mono PCM WAV via ffmpeg."""
with tempfile.NamedTemporaryFile(suffix=".input", delete=False) as inf:
inf.write(data)
inf_path = inf.name
out_path = inf_path + ".wav"
try:
subprocess.run(
[
FFMPEG, "-y",
"-i", inf_path,
"-ar", "16000",
"-ac", "1",
"-f", "wav",
out_path,
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
with open(out_path, "rb") as f:
return f.read()
finally:
os.unlink(inf_path)
if os.path.exists(out_path):
os.unlink(out_path)
def parse_multipart(content_type: str, body: bytes):
"""
Parse a multipart/form-data body.
Returns a dict of field_name -> (filename_or_None, content_type, data).
"""
import email
from email import policy as email_policy
# email.parser needs the full MIME headers to parse multipart
raw = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
msg = email.message_from_bytes(raw, policy=email_policy.compat32)
parts = {}
for part in msg.get_payload():
cd = part.get("Content-Disposition", "")
name = None
filename = None
for item in cd.split(";"):
item = item.strip()
if item.startswith('name='):
name = item[5:].strip('"')
elif item.startswith('filename='):
filename = item[9:].strip('"')
if name is not None:
parts[name] = (filename, part.get_content_type(), part.get_payload(decode=True))
return parts
def build_multipart(fields: dict) -> tuple[bytes, str]:
"""
Build a multipart/form-data body from fields dict:
field_name -> (filename_or_None, content_type, data_bytes)
Returns (body_bytes, content_type_header_value).
"""
boundary = b"----ParakeetProxyBoundary0xDEADBEEF"
body = b""
for name, (filename, ct, data) in fields.items():
body += b"--" + boundary + b"\r\n"
if filename:
body += (
f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
).encode()
else:
body += f'Content-Disposition: form-data; name="{name}"\r\n'.encode()
body += f"Content-Type: {ct}\r\n\r\n".encode()
body += data + b"\r\n"
body += b"--" + boundary + b"--\r\n"
return body, f"multipart/form-data; boundary={boundary.decode()}"
class ProxyHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
print(f"[parakeet-proxy] {self.address_string()} - {fmt % args}", flush=True)
def do_GET(self):
if self.path == "/health":
self._forward_get("/health")
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
if self.path.rstrip("/") == "/v1/audio/transcriptions":
self._handle_transcription()
else:
self.send_response(404)
self.end_headers()
def _forward_get(self, path):
try:
url = f"http://127.0.0.1:{PARAKEET_PORT}{path}"
with urllib.request.urlopen(url, timeout=5) as resp:
body = resp.read()
self.send_response(resp.status)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(body)
except Exception as e:
self.send_response(502)
self.end_headers()
self.wfile.write(str(e).encode())
def _handle_transcription(self):
length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(length)
ct = self.headers.get("Content-Type", "")
try:
fields = parse_multipart(ct, body)
except Exception as e:
self._error(400, f"failed to parse multipart: {e}")
return
if "file" not in fields:
self._error(400, "missing required field 'file'")
return
filename, file_ct, audio_data = fields["file"]
# Convert to WAV regardless of what we received
try:
wav_data = convert_to_wav(audio_data)
except subprocess.CalledProcessError:
self._error(400, "ffmpeg could not decode audio")
return
except Exception as e:
self._error(500, f"conversion error: {e}")
return
# Rebuild multipart with converted WAV, preserve other fields
new_fields = {}
for name, (fn, fct, data) in fields.items():
if name == "file":
new_fields[name] = ("recording.wav", "audio/wav", wav_data)
else:
new_fields[name] = (fn, fct, data)
new_body, new_ct = build_multipart(new_fields)
# Forward to parakeet-server
try:
url = f"http://127.0.0.1:{PARAKEET_PORT}/v1/audio/transcriptions"
req = urllib.request.Request(
url,
data=new_body,
headers={"Content-Type": new_ct},
method="POST",
)
with urllib.request.urlopen(req, timeout=300) as resp:
resp_body = resp.read()
self.send_response(resp.status)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(resp_body)
except urllib.error.HTTPError as e:
resp_body = e.read()
self.send_response(e.code)
self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(resp_body)
except Exception as e:
self._error(502, f"upstream error: {e}")
def _error(self, code: int, msg: str):
body = f'{{"error":{{"message":"{msg}","type":"proxy_error"}}}}'.encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(body)
if __name__ == "__main__":
proc = subprocess.Popen([
"parakeet-server",
"--host", "127.0.0.1",
"--port", str(PARAKEET_PORT),
"--model", MODEL,
"--cache-dir", CACHE_DIR,
])
print(f"[parakeet-proxy] started parakeet-server pid={proc.pid} on :{PARAKEET_PORT}", flush=True)
server = http.server.HTTPServer(("0.0.0.0", PROXY_PORT), ProxyHandler)
print(f"[parakeet-proxy] listening on :{PROXY_PORT}", flush=True)
try:
server.serve_forever()
except KeyboardInterrupt:
pass
finally:
proc.terminate()
proc.wait()
+1
View File
@@ -13,3 +13,4 @@ configMapGenerator:
namespace: llama
files:
- config.yaml=configs/config.yaml
- parakeet-proxy.py=configs/parakeet-proxy.py
-2
View File
@@ -93,8 +93,6 @@ spec:
value: "ignored"
- name: AUDIO_STT_MODEL
value: "parakeet-tdt_ctc-1.1b"
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
value: "audio/wav,audio/wave"
# TTS via OuteTTS (routed through llama-swap)
# TTS via dedicated Kokoro server (CPU-only, separate pod)
- name: AUDIO_TTS_ENGINE