llama-swap

This commit is contained in:
2025-07-22 23:07:03 +02:00
parent 53154eeed7
commit 18eb912f03
12 changed files with 272 additions and 1 deletions

View File

@@ -3,7 +3,17 @@ install-router:
gen-talos-config:
mkdir -p talos/generated
talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/openebs.patch --config-patch @talos/patches/openbao.patch --config-patch @talos/patches/ollama.patch --config-patch @talos/patches/frigate.patch --config-patch @talos/patches/anapistula-delrosalae.patch --output-types controlplane -o talos/generated/anapistula-delrosalae.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
talosctl gen config \
--with-secrets secrets.yaml \
--config-patch @talos/patches/controlplane.patch \
--config-patch @talos/patches/openebs.patch \
--config-patch @talos/patches/openbao.patch \
--config-patch @talos/patches/ollama.patch \
--config-patch @talos/patches/llama.patch \
--config-patch @talos/patches/frigate.patch \
--config-patch @talos/patches/anapistula-delrosalae.patch \
--output-types controlplane -o talos/generated/anapistula-delrosalae.yaml \
homelab https://kube-api.homelab.lumpiasty.xyz:6443
talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443
talosctl config endpoint kube-api.homelab.lumpiasty.xyz

View File

@@ -7,3 +7,4 @@ resources:
- ollama
- librechat
- frigate
- llama

View File

@@ -0,0 +1,68 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-proxy
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llama-proxy
template:
metadata:
labels:
app.kubernetes.io/name: llama-proxy
spec:
containers:
- name: caddy
image: caddy:2.10.0-alpine
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /etc/caddy
name: proxy-config
env:
- name: API_KEY
valueFrom:
secretKeyRef:
name: llama-api-key
key: API_KEY
volumes:
- name: proxy-config
configMap:
name: llama-proxy-config
---
apiVersion: v1
kind: ConfigMap
metadata:
namespace: llama
name: llama-proxy-config
data:
Caddyfile: |
http://llama.lumpiasty.xyz {
@requireAuth {
not header Authorization "Bearer {env.API_KEY}"
}
respond @requireAuth "Unauthorized" 401
reverse_proxy llama:11434 {
flush_interval -1
}
}
---
apiVersion: v1
kind: Service
metadata:
namespace: llama
name: llama-proxy
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: llama-proxy
ports:
- name: http
port: 80
targetPort: 80
protocol: TCP

13
apps/llama/configmap.yaml Normal file
View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: ConfigMap
metadata:
namespace: llama
name: llama-swap
data:
config.yaml: |
models:
"DeepSeek-R1-0528-Qwen3-8B-GGUF":
cmd: |
/app/llama-server
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
--port ${PORT}

View File

@@ -0,0 +1,68 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-swap
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app: llama-swap
template:
metadata:
labels:
app: llama-swap
spec:
containers:
- name: llama-swap
image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
imagePullPolicy: IfNotPresent
args:
- --config=/config/config.yaml
- --watch-config
ports:
- containerPort: 8080
name: http
protocol: TCP
volumeMounts:
- name: models
mountPath: /root/.cache
- mountPath: /dev/kfd
name: kfd
- mountPath: /dev/dri
name: dri
- mountPath: /config
name: config
securityContext:
privileged: true
volumes:
- name: models
persistentVolumeClaim:
claimName: models
- name: kfd
hostPath:
path: /dev/kfd
type: CharDevice
- name: dri
hostPath:
path: /dev/dri
type: Directory
- name: config
configMap:
name: llama-swap
---
apiVersion: v1
kind: Service
metadata:
name: llama
namespace: llama
spec:
type: ClusterIP
ports:
- name: http
port: 11434
targetPort: 8080
protocol: TCP
selector:
app: llama-swap

28
apps/llama/ingress.yaml Normal file
View File

@@ -0,0 +1,28 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
namespace: llama
name: llama
annotations:
cert-manager.io/cluster-issuer: letsencrypt
acme.cert-manager.io/http01-edit-in-place: "true"
nginx.ingress.kubernetes.io/proxy-buffering: "false"
nginx.org/proxy-read-timeout: 30m
spec:
ingressClassName: nginx
rules:
- host: llama.lumpiasty.xyz
http:
paths:
- backend:
service:
name: llama-proxy
port:
number: 80
path: /
pathType: Prefix
tls:
- hosts:
- llama.lumpiasty.xyz
secretName: llama-ingress

View File

@@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- secret.yaml
- auth-proxy.yaml
- ingress.yaml
- pvc.yaml
- deployment.yaml
- configmap.yaml

View File

@@ -0,0 +1,5 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: llama

13
apps/llama/pvc.yaml Normal file
View File

@@ -0,0 +1,13 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
namespace: llama
name: models
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Gi
storageClassName: mayastor-single-hdd

38
apps/llama/secret.yaml Normal file
View File

@@ -0,0 +1,38 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: llama-proxy
namespace: llama
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultAuth
metadata:
name: llama
namespace: llama
spec:
method: kubernetes
mount: kubernetes
kubernetes:
role: llama-proxy
serviceAccount: llama-proxy
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: llama-api-key
namespace: llama
spec:
type: kv-v2
mount: secret
path: ollama
destination:
create: true
name: llama-api-key
type: Opaque
transformation:
excludeRaw: true
vaultAuthRef: llama

11
talos/patches/llama.patch Normal file
View File

@@ -0,0 +1,11 @@
# CSI driver requirement
cluster:
apiServer:
admissionControl:
- name: PodSecurity
configuration:
apiVersion: pod-security.admission.config.k8s.io/v1beta1
kind: PodSecurityConfiguration
exemptions:
namespaces:
- llama

View File

@@ -0,0 +1,6 @@
bound_service_account_names:
- llama-proxy
bound_service_account_namespaces:
- llama
token_policies:
- ollama