1 Commits

Author SHA1 Message Date
Renovate a2ae078754 Update renovate/renovate Docker tag to v43.217.1 2026-06-10 02:01:14 +00:00
6 changed files with 21 additions and 242 deletions
+1 -1
View File
@@ -21,7 +21,7 @@ steps:
- bao kv get -mount secret -field RENOVATE_TOKEN renovate > /woodpecker/renovate_token - bao kv get -mount secret -field RENOVATE_TOKEN renovate > /woodpecker/renovate_token
- bao kv get -mount secret -field GITHUB_COM_TOKEN renovate > /woodpecker/github_com_token - bao kv get -mount secret -field GITHUB_COM_TOKEN renovate > /woodpecker/github_com_token
- name: Run Renovate - name: Run Renovate
image: renovate/renovate:43.220.0 image: renovate/renovate:43.217.1
environment: environment:
RENOVATE_AUTODISCOVER: "true" RENOVATE_AUTODISCOVER: "true"
RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1 RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1
+14 -9
View File
@@ -19,7 +19,7 @@ hooks:
on_startup: on_startup:
preload: preload:
- "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
- "parakeet-tdt_ctc-1.1b" - "whisper-small"
# matrix replaces groups (they are mutually exclusive). # matrix replaces groups (they are mutually exclusive).
# The small 0.8B model runs alongside any LLM. # The small 0.8B model runs alongside any LLM.
@@ -27,7 +27,7 @@ hooks:
matrix: matrix:
vars: vars:
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL" q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
stt: "parakeet-tdt_ctc-1.1b" stt: "whisper-small"
flux: "flux2-klein-4b:Q4_K_M" flux: "flux2-klein-4b:Q4_K_M"
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M" coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M" q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
@@ -58,7 +58,6 @@ matrix:
# FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen # FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
image_gen: "flux & stt" image_gen: "flux & stt"
models: models:
"Qwen3-Coder-Next-GGUF:Q4_K_M": "Qwen3-Coder-Next-GGUF:Q4_K_M":
cmd: | cmd: |
@@ -236,14 +235,20 @@ models:
--parallel 1 --parallel 1
${common_args} ${common_args}
# STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded) # STT via whisper.cpp (Vulkan GPU on RX 580, always loaded, ~600MB VRAM)
# Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models # Model auto-downloaded by init container, see deployment.yaml
# parakeet-proxy.py sits in front to convert any audio format to WAV via ffmpeg, # Note: Vulkan whisper on AMD GPUs has known quality issues on some cards;
# since parakeet-server only accepts real WAV but browsers send Ogg/Opus. # if transcriptions come out as garbage/gibberish, add --no-gpu to fall back.
"parakeet-tdt_ctc-1.1b": "whisper-small":
checkEndpoint: none checkEndpoint: none
cmd: | cmd: |
env PROXY_PORT=${PORT} FFMPEG_BIN=/root/.cache/ffmpeg/ffmpeg python3 /config/parakeet-proxy.py whisper-server
--port ${PORT}
-m /root/.cache/whisper/ggml-small.bin
--request-path /v1/audio
--inference-path /transcriptions
--convert
--threads 6
# Image generation via stable-diffusion.cpp (sd-server) # Image generation via stable-diffusion.cpp (sd-server)
-227
View File
@@ -1,227 +0,0 @@
#!/usr/bin/env python3
"""
Thin reverse proxy for parakeet-server.
Accepts POST /v1/audio/transcriptions with any audio format,
converts the audio to 16 kHz mono WAV via ffmpeg, then forwards
the converted file to the real parakeet-server running on PARAKEET_PORT.
Also proxies GET /health straight through.
Usage:
PROXY_PORT=<port> PARAKEET_PORT=<upstream> python3 parakeet-proxy.py
"""
import http.server
import io
import os
import subprocess
import sys
import tempfile
import urllib.request
import urllib.error
PROXY_PORT = int(os.environ.get("PROXY_PORT", "8080"))
PARAKEET_PORT = PROXY_PORT + 1
FFMPEG = os.environ.get("FFMPEG_BIN", "ffmpeg")
MODEL = os.environ.get("PARAKEET_MODEL", "tdt_ctc-1.1b-q4_k.gguf")
CACHE_DIR = os.environ.get("PARAKEET_CACHE_DIR", "/root/.cache/parakeet.cpp/models")
def convert_to_wav(data: bytes) -> bytes:
"""Convert any audio bytes to 16 kHz mono PCM WAV via ffmpeg."""
with tempfile.NamedTemporaryFile(suffix=".input", delete=False) as inf:
inf.write(data)
inf_path = inf.name
out_path = inf_path + ".wav"
try:
subprocess.run(
[
FFMPEG, "-y",
"-i", inf_path,
"-ar", "16000",
"-ac", "1",
"-f", "wav",
out_path,
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
with open(out_path, "rb") as f:
return f.read()
finally:
os.unlink(inf_path)
if os.path.exists(out_path):
os.unlink(out_path)
def parse_multipart(content_type: str, body: bytes):
"""
Parse a multipart/form-data body.
Returns a dict of field_name -> (filename_or_None, content_type, data).
"""
import email
from email import policy as email_policy
# email.parser needs the full MIME headers to parse multipart
raw = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
msg = email.message_from_bytes(raw, policy=email_policy.compat32)
parts = {}
for part in msg.get_payload():
cd = part.get("Content-Disposition", "")
name = None
filename = None
for item in cd.split(";"):
item = item.strip()
if item.startswith('name='):
name = item[5:].strip('"')
elif item.startswith('filename='):
filename = item[9:].strip('"')
if name is not None:
parts[name] = (filename, part.get_content_type(), part.get_payload(decode=True))
return parts
def build_multipart(fields: dict) -> tuple[bytes, str]:
"""
Build a multipart/form-data body from fields dict:
field_name -> (filename_or_None, content_type, data_bytes)
Returns (body_bytes, content_type_header_value).
"""
boundary = b"----ParakeetProxyBoundary0xDEADBEEF"
body = b""
for name, (filename, ct, data) in fields.items():
body += b"--" + boundary + b"\r\n"
if filename:
body += (
f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
).encode()
else:
body += f'Content-Disposition: form-data; name="{name}"\r\n'.encode()
body += f"Content-Type: {ct}\r\n\r\n".encode()
body += data + b"\r\n"
body += b"--" + boundary + b"--\r\n"
return body, f"multipart/form-data; boundary={boundary.decode()}"
class ProxyHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
print(f"[parakeet-proxy] {self.address_string()} - {fmt % args}", flush=True)
def do_GET(self):
if self.path == "/health":
self._forward_get("/health")
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
if self.path.rstrip("/") == "/v1/audio/transcriptions":
self._handle_transcription()
else:
self.send_response(404)
self.end_headers()
def _forward_get(self, path):
try:
url = f"http://127.0.0.1:{PARAKEET_PORT}{path}"
with urllib.request.urlopen(url, timeout=5) as resp:
body = resp.read()
self.send_response(resp.status)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(body)
except Exception as e:
self.send_response(502)
self.end_headers()
self.wfile.write(str(e).encode())
def _handle_transcription(self):
length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(length)
ct = self.headers.get("Content-Type", "")
try:
fields = parse_multipart(ct, body)
except Exception as e:
self._error(400, f"failed to parse multipart: {e}")
return
if "file" not in fields:
self._error(400, "missing required field 'file'")
return
filename, file_ct, audio_data = fields["file"]
# Convert to WAV regardless of what we received
try:
wav_data = convert_to_wav(audio_data)
except subprocess.CalledProcessError:
self._error(400, "ffmpeg could not decode audio")
return
except Exception as e:
self._error(500, f"conversion error: {e}")
return
# Rebuild multipart with converted WAV, preserve other fields
new_fields = {}
for name, (fn, fct, data) in fields.items():
if name == "file":
new_fields[name] = ("recording.wav", "audio/wav", wav_data)
else:
new_fields[name] = (fn, fct, data)
new_body, new_ct = build_multipart(new_fields)
# Forward to parakeet-server
try:
url = f"http://127.0.0.1:{PARAKEET_PORT}/v1/audio/transcriptions"
req = urllib.request.Request(
url,
data=new_body,
headers={"Content-Type": new_ct},
method="POST",
)
with urllib.request.urlopen(req, timeout=300) as resp:
resp_body = resp.read()
self.send_response(resp.status)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(resp_body)
except urllib.error.HTTPError as e:
resp_body = e.read()
self.send_response(e.code)
self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
self.end_headers()
self.wfile.write(resp_body)
except Exception as e:
self._error(502, f"upstream error: {e}")
def _error(self, code: int, msg: str):
body = f'{{"error":{{"message":"{msg}","type":"proxy_error"}}}}'.encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(body)
if __name__ == "__main__":
proc = subprocess.Popen([
"parakeet-server",
"--host", "127.0.0.1",
"--port", str(PARAKEET_PORT),
"--model", MODEL,
"--cache-dir", CACHE_DIR,
])
print(f"[parakeet-proxy] started parakeet-server pid={proc.pid} on :{PARAKEET_PORT}", flush=True)
server = http.server.HTTPServer(("0.0.0.0", PROXY_PORT), ProxyHandler)
print(f"[parakeet-proxy] listening on :{PROXY_PORT}", flush=True)
try:
server.serve_forever()
except KeyboardInterrupt:
pass
finally:
proc.terminate()
proc.wait()
+2 -2
View File
@@ -18,7 +18,7 @@ spec:
spec: spec:
initContainers: initContainers:
- name: download-whisper - name: download-whisper
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09 image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
command: command:
- sh - sh
- -c - -c
@@ -48,7 +48,7 @@ spec:
mountPath: /root/.cache mountPath: /root/.cache
containers: containers:
- name: llama-swap - name: llama-swap
image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-09 image: ghcr.io/mostlygeek/llama-swap:unified-vulkan-2026-06-09
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
command: command:
- llama-swap - llama-swap
-1
View File
@@ -13,4 +13,3 @@ configMapGenerator:
namespace: llama namespace: llama
files: files:
- config.yaml=configs/config.yaml - config.yaml=configs/config.yaml
- parakeet-proxy.py=configs/parakeet-proxy.py
+4 -2
View File
@@ -84,7 +84,7 @@ spec:
value: "Users" value: "Users"
- name: OAUTH_AUTO_REDIRECT - name: OAUTH_AUTO_REDIRECT
value: "true" value: "true"
# STT via parakeet-server (routed through llama-swap) # STT via whisper-server (routed through llama-swap)
- name: AUDIO_STT_ENGINE - name: AUDIO_STT_ENGINE
value: "openai" value: "openai"
- name: AUDIO_STT_OPENAI_API_BASE_URL - name: AUDIO_STT_OPENAI_API_BASE_URL
@@ -92,7 +92,9 @@ spec:
- name: AUDIO_STT_OPENAI_API_KEY - name: AUDIO_STT_OPENAI_API_KEY
value: "ignored" value: "ignored"
- name: AUDIO_STT_MODEL - name: AUDIO_STT_MODEL
value: "parakeet-tdt_ctc-1.1b" value: "whisper-small"
- name: AUDIO_STT_SUPPORTED_CONTENT_TYPES
value: "audio/wav,audio/wave"
# TTS via OuteTTS (routed through llama-swap) # TTS via OuteTTS (routed through llama-swap)
# TTS via dedicated Kokoro server (CPU-only, separate pod) # TTS via dedicated Kokoro server (CPU-only, separate pod)
- name: AUDIO_TTS_ENGINE - name: AUDIO_TTS_ENGINE