add converting proxy to parakeet
This commit is contained in:
@@ -238,14 +238,12 @@ models:
|
|||||||
|
|
||||||
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
|
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
|
||||||
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
|
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
|
||||||
# Exposes POST /v1/audio/transcriptions (OpenAI-compatible)
|
# parakeet-proxy.py sits in front to convert any audio format to WAV via ffmpeg,
|
||||||
|
# since parakeet-server only accepts real WAV but browsers send Ogg/Opus.
|
||||||
"parakeet-tdt_ctc-1.1b":
|
"parakeet-tdt_ctc-1.1b":
|
||||||
checkEndpoint: none
|
checkEndpoint: none
|
||||||
cmd: |
|
cmd: |
|
||||||
parakeet-server
|
env PROXY_PORT=${PORT} FFMPEG_BIN=/root/.cache/ffmpeg/ffmpeg python3 /config/parakeet-proxy.py
|
||||||
--port ${PORT}
|
|
||||||
--model tdt_ctc-1.1b-q4_k.gguf
|
|
||||||
--cache-dir /root/.cache/parakeet.cpp/models
|
|
||||||
|
|
||||||
|
|
||||||
# Image generation via stable-diffusion.cpp (sd-server)
|
# Image generation via stable-diffusion.cpp (sd-server)
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Thin reverse proxy for parakeet-server.
|
||||||
|
|
||||||
|
Accepts POST /v1/audio/transcriptions with any audio format,
|
||||||
|
converts the audio to 16 kHz mono WAV via ffmpeg, then forwards
|
||||||
|
the converted file to the real parakeet-server running on PARAKEET_PORT.
|
||||||
|
|
||||||
|
Also proxies GET /health straight through.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
PROXY_PORT=<port> PARAKEET_PORT=<upstream> python3 parakeet-proxy.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
PROXY_PORT = int(os.environ.get("PROXY_PORT", "8080"))
|
||||||
|
PARAKEET_PORT = PROXY_PORT + 1
|
||||||
|
FFMPEG = os.environ.get("FFMPEG_BIN", "ffmpeg")
|
||||||
|
MODEL = os.environ.get("PARAKEET_MODEL", "tdt_ctc-1.1b-q4_k.gguf")
|
||||||
|
CACHE_DIR = os.environ.get("PARAKEET_CACHE_DIR", "/root/.cache/parakeet.cpp/models")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_wav(data: bytes) -> bytes:
|
||||||
|
"""Convert any audio bytes to 16 kHz mono PCM WAV via ffmpeg."""
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".input", delete=False) as inf:
|
||||||
|
inf.write(data)
|
||||||
|
inf_path = inf.name
|
||||||
|
out_path = inf_path + ".wav"
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
FFMPEG, "-y",
|
||||||
|
"-i", inf_path,
|
||||||
|
"-ar", "16000",
|
||||||
|
"-ac", "1",
|
||||||
|
"-f", "wav",
|
||||||
|
out_path,
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
with open(out_path, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
finally:
|
||||||
|
os.unlink(inf_path)
|
||||||
|
if os.path.exists(out_path):
|
||||||
|
os.unlink(out_path)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_multipart(content_type: str, body: bytes):
|
||||||
|
"""
|
||||||
|
Parse a multipart/form-data body.
|
||||||
|
Returns a dict of field_name -> (filename_or_None, content_type, data).
|
||||||
|
"""
|
||||||
|
import email
|
||||||
|
from email import policy as email_policy
|
||||||
|
|
||||||
|
# email.parser needs the full MIME headers to parse multipart
|
||||||
|
raw = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
|
||||||
|
msg = email.message_from_bytes(raw, policy=email_policy.compat32)
|
||||||
|
parts = {}
|
||||||
|
for part in msg.get_payload():
|
||||||
|
cd = part.get("Content-Disposition", "")
|
||||||
|
name = None
|
||||||
|
filename = None
|
||||||
|
for item in cd.split(";"):
|
||||||
|
item = item.strip()
|
||||||
|
if item.startswith('name='):
|
||||||
|
name = item[5:].strip('"')
|
||||||
|
elif item.startswith('filename='):
|
||||||
|
filename = item[9:].strip('"')
|
||||||
|
if name is not None:
|
||||||
|
parts[name] = (filename, part.get_content_type(), part.get_payload(decode=True))
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def build_multipart(fields: dict) -> tuple[bytes, str]:
|
||||||
|
"""
|
||||||
|
Build a multipart/form-data body from fields dict:
|
||||||
|
field_name -> (filename_or_None, content_type, data_bytes)
|
||||||
|
Returns (body_bytes, content_type_header_value).
|
||||||
|
"""
|
||||||
|
boundary = b"----ParakeetProxyBoundary0xDEADBEEF"
|
||||||
|
body = b""
|
||||||
|
for name, (filename, ct, data) in fields.items():
|
||||||
|
body += b"--" + boundary + b"\r\n"
|
||||||
|
if filename:
|
||||||
|
body += (
|
||||||
|
f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
|
||||||
|
).encode()
|
||||||
|
else:
|
||||||
|
body += f'Content-Disposition: form-data; name="{name}"\r\n'.encode()
|
||||||
|
body += f"Content-Type: {ct}\r\n\r\n".encode()
|
||||||
|
body += data + b"\r\n"
|
||||||
|
body += b"--" + boundary + b"--\r\n"
|
||||||
|
return body, f"multipart/form-data; boundary={boundary.decode()}"
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, fmt, *args):
|
||||||
|
print(f"[parakeet-proxy] {self.address_string()} - {fmt % args}", flush=True)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == "/health":
|
||||||
|
self._forward_get("/health")
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path.rstrip("/") == "/v1/audio/transcriptions":
|
||||||
|
self._handle_transcription()
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def _forward_get(self, path):
|
||||||
|
try:
|
||||||
|
url = f"http://127.0.0.1:{PARAKEET_PORT}{path}"
|
||||||
|
with urllib.request.urlopen(url, timeout=5) as resp:
|
||||||
|
body = resp.read()
|
||||||
|
self.send_response(resp.status)
|
||||||
|
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
except Exception as e:
|
||||||
|
self.send_response(502)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(str(e).encode())
|
||||||
|
|
||||||
|
def _handle_transcription(self):
|
||||||
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
|
body = self.rfile.read(length)
|
||||||
|
ct = self.headers.get("Content-Type", "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
fields = parse_multipart(ct, body)
|
||||||
|
except Exception as e:
|
||||||
|
self._error(400, f"failed to parse multipart: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if "file" not in fields:
|
||||||
|
self._error(400, "missing required field 'file'")
|
||||||
|
return
|
||||||
|
|
||||||
|
filename, file_ct, audio_data = fields["file"]
|
||||||
|
|
||||||
|
# Convert to WAV regardless of what we received
|
||||||
|
try:
|
||||||
|
wav_data = convert_to_wav(audio_data)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
self._error(400, "ffmpeg could not decode audio")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
self._error(500, f"conversion error: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Rebuild multipart with converted WAV, preserve other fields
|
||||||
|
new_fields = {}
|
||||||
|
for name, (fn, fct, data) in fields.items():
|
||||||
|
if name == "file":
|
||||||
|
new_fields[name] = ("recording.wav", "audio/wav", wav_data)
|
||||||
|
else:
|
||||||
|
new_fields[name] = (fn, fct, data)
|
||||||
|
|
||||||
|
new_body, new_ct = build_multipart(new_fields)
|
||||||
|
|
||||||
|
# Forward to parakeet-server
|
||||||
|
try:
|
||||||
|
url = f"http://127.0.0.1:{PARAKEET_PORT}/v1/audio/transcriptions"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=new_body,
|
||||||
|
headers={"Content-Type": new_ct},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||||
|
resp_body = resp.read()
|
||||||
|
self.send_response(resp.status)
|
||||||
|
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(resp_body)
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
resp_body = e.read()
|
||||||
|
self.send_response(e.code)
|
||||||
|
self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(resp_body)
|
||||||
|
except Exception as e:
|
||||||
|
self._error(502, f"upstream error: {e}")
|
||||||
|
|
||||||
|
def _error(self, code: int, msg: str):
|
||||||
|
body = f'{{"error":{{"message":"{msg}","type":"proxy_error"}}}}'.encode()
|
||||||
|
self.send_response(code)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
proc = subprocess.Popen([
|
||||||
|
"parakeet-server",
|
||||||
|
"--host", "127.0.0.1",
|
||||||
|
"--port", str(PARAKEET_PORT),
|
||||||
|
"--model", MODEL,
|
||||||
|
"--cache-dir", CACHE_DIR,
|
||||||
|
])
|
||||||
|
print(f"[parakeet-proxy] started parakeet-server pid={proc.pid} on :{PARAKEET_PORT}", flush=True)
|
||||||
|
|
||||||
|
server = http.server.HTTPServer(("0.0.0.0", PROXY_PORT), ProxyHandler)
|
||||||
|
print(f"[parakeet-proxy] listening on :{PROXY_PORT}", flush=True)
|
||||||
|
try:
|
||||||
|
server.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
proc.terminate()
|
||||||
|
proc.wait()
|
||||||
@@ -13,3 +13,4 @@ configMapGenerator:
|
|||||||
namespace: llama
|
namespace: llama
|
||||||
files:
|
files:
|
||||||
- config.yaml=configs/config.yaml
|
- config.yaml=configs/config.yaml
|
||||||
|
- parakeet-proxy.py=configs/parakeet-proxy.py
|
||||||
|
|||||||
@@ -93,8 +93,6 @@ spec:
|
|||||||
value: "ignored"
|
value: "ignored"
|
||||||
- name: AUDIO_STT_MODEL
|
- name: AUDIO_STT_MODEL
|
||||||
value: "parakeet-tdt_ctc-1.1b"
|
value: "parakeet-tdt_ctc-1.1b"
|
||||||
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
|
|
||||||
value: "audio/wav,audio/wave"
|
|
||||||
# TTS via OuteTTS (routed through llama-swap)
|
# TTS via OuteTTS (routed through llama-swap)
|
||||||
# TTS via dedicated Kokoro server (CPU-only, separate pod)
|
# TTS via dedicated Kokoro server (CPU-only, separate pod)
|
||||||
- name: AUDIO_TTS_ENGINE
|
- name: AUDIO_TTS_ENGINE
|
||||||
|
|||||||
Reference in New Issue
Block a user