From af6545444b45f02fad7a5d47f09bed23ac627308 Mon Sep 17 00:00:00 2001
From: Lumpiasty <arek.dzski@gmail.com>
Date: Tue, 22 Jul 2025 23:07:03 +0200
Subject: [PATCH] llama-swap

---
 Makefile                                | 12 ++++-
 apps/kustomization.yaml                 |  1 +
 apps/llama/auth-proxy.yaml              | 68 +++++++++++++++++++++++++
 apps/llama/configmap.yaml               | 13 +++++
 apps/llama/deployment.yaml              | 68 +++++++++++++++++++++++++
 apps/llama/ingress.yaml                 | 28 ++++++++++
 apps/llama/kustomization.yaml           | 10 ++++
 apps/llama/namespace.yaml               |  5 ++
 apps/llama/pvc.yaml                     | 13 +++++
 apps/llama/secret.yaml                  | 38 ++++++++++++++
 talos/patches/llama.patch               | 11 ++++
 vault/kubernetes-roles/llama-proxy.yaml |  6 +++
 12 files changed, 272 insertions(+), 1 deletion(-)
 create mode 100644 apps/llama/auth-proxy.yaml
 create mode 100644 apps/llama/configmap.yaml
 create mode 100644 apps/llama/deployment.yaml
 create mode 100644 apps/llama/ingress.yaml
 create mode 100644 apps/llama/kustomization.yaml
 create mode 100644 apps/llama/namespace.yaml
 create mode 100644 apps/llama/pvc.yaml
 create mode 100644 apps/llama/secret.yaml
 create mode 100644 talos/patches/llama.patch
 create mode 100644 vault/kubernetes-roles/llama-proxy.yaml

diff --git a/Makefile b/Makefile
index 5f931e6..8aec416 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,17 @@ install-router:
 
 gen-talos-config:
 	mkdir -p talos/generated
-	talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/openebs.patch --config-patch @talos/patches/openbao.patch --config-patch @talos/patches/ollama.patch --config-patch @talos/patches/frigate.patch --config-patch @talos/patches/anapistula-delrosalae.patch --output-types controlplane -o talos/generated/anapistula-delrosalae.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
+	talosctl gen config \
+		--with-secrets secrets.yaml \
+		--config-patch @talos/patches/controlplane.patch \
+		--config-patch @talos/patches/openebs.patch \
+		--config-patch @talos/patches/openbao.patch \
+		--config-patch @talos/patches/ollama.patch \
+		--config-patch @talos/patches/llama.patch \
+		--config-patch @talos/patches/frigate.patch \
+		--config-patch @talos/patches/anapistula-delrosalae.patch \
+		--output-types controlplane -o talos/generated/anapistula-delrosalae.yaml \
+		homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl config endpoint kube-api.homelab.lumpiasty.xyz
diff --git a/apps/kustomization.yaml b/apps/kustomization.yaml
index efa806c..436c03f 100644
--- a/apps/kustomization.yaml
+++ b/apps/kustomization.yaml
@@ -7,3 +7,4 @@ resources:
   - ollama
   - librechat
   - frigate
+  - llama
diff --git a/apps/llama/auth-proxy.yaml b/apps/llama/auth-proxy.yaml
new file mode 100644
index 0000000..9e2a0a9
--- /dev/null
+++ b/apps/llama/auth-proxy.yaml
@@ -0,0 +1,68 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-proxy
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llama-proxy
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llama-proxy
+    spec:
+      containers:
+        - name: caddy
+          image: caddy:2.10.0-alpine
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /etc/caddy
+              name: proxy-config
+          env:
+            - name: API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: llama-api-key
+                  key: API_KEY
+      volumes:
+        - name: proxy-config
+          configMap:
+            name: llama-proxy-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: llama
+  name: llama-proxy-config
+data:
+  Caddyfile: |
+    http://llama.lumpiasty.xyz {
+
+      @requireAuth {
+        not header Authorization "Bearer {env.API_KEY}"
+      }
+
+      respond @requireAuth "Unauthorized" 401
+
+      reverse_proxy llama:11434 {
+        flush_interval -1
+      }
+    }
+---
+apiVersion: v1
+kind: Service
+metadata:
+  namespace: llama
+  name: llama-proxy
+spec:
+  type: ClusterIP
+  selector:
+    app.kubernetes.io/name: llama-proxy
+  ports:
+    - name: http
+      port: 80
+      targetPort: 80
+      protocol: TCP
diff --git a/apps/llama/configmap.yaml b/apps/llama/configmap.yaml
new file mode 100644
index 0000000..c28fcd6
--- /dev/null
+++ b/apps/llama/configmap.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: llama
+  name: llama-swap
+data:
+  config.yaml: |
+    models:
+      "DeepSeek-R1-0528-Qwen3-8B-GGUF":
+        cmd: |
+          /app/llama-server
+          -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
+          --port ${PORT}
diff --git a/apps/llama/deployment.yaml b/apps/llama/deployment.yaml
new file mode 100644
index 0000000..2dd28ab
--- /dev/null
+++ b/apps/llama/deployment.yaml
@@ -0,0 +1,68 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-swap
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-swap
+  template:
+    metadata:
+      labels:
+        app: llama-swap
+    spec:
+      containers:
+        - name: llama-swap
+          image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
+          imagePullPolicy: IfNotPresent
+          args:
+            - --config=/config/config.yaml
+            - --watch-config
+          ports:
+            - containerPort: 8080
+              name: http
+              protocol: TCP
+          volumeMounts:
+            - name: models
+              mountPath: /root/.cache
+            - mountPath: /dev/kfd
+              name: kfd
+            - mountPath: /dev/dri
+              name: dri
+            - mountPath: /config
+              name: config
+          securityContext:
+            privileged: true
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: models
+        - name: kfd
+          hostPath:
+            path: /dev/kfd
+            type: CharDevice
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+        - name: config
+          configMap:
+            name: llama-swap
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama
+  namespace: llama
+spec:
+  type: ClusterIP
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 8080
+      protocol: TCP
+  selector:
+    app: llama-swap
diff --git a/apps/llama/ingress.yaml b/apps/llama/ingress.yaml
new file mode 100644
index 0000000..4b40459
--- /dev/null
+++ b/apps/llama/ingress.yaml
@@ -0,0 +1,28 @@
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: llama
+  name: llama
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt
+    acme.cert-manager.io/http01-edit-in-place: "true"
+    nginx.ingress.kubernetes.io/proxy-buffering: "false"
+    nginx.org/proxy-read-timeout: 30m
+spec:
+  ingressClassName: nginx
+  rules:
+    - host: llama.lumpiasty.xyz
+      http:
+        paths:
+          - backend:
+              service:
+                name: llama-proxy
+                port:
+                  number: 80
+            path: /
+            pathType: Prefix
+  tls:
+    - hosts:
+        - llama.lumpiasty.xyz
+      secretName: llama-ingress
diff --git a/apps/llama/kustomization.yaml b/apps/llama/kustomization.yaml
new file mode 100644
index 0000000..aeebaaf
--- /dev/null
+++ b/apps/llama/kustomization.yaml
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - secret.yaml
+  - auth-proxy.yaml
+  - ingress.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - configmap.yaml
diff --git a/apps/llama/namespace.yaml b/apps/llama/namespace.yaml
new file mode 100644
index 0000000..87d360f
--- /dev/null
+++ b/apps/llama/namespace.yaml
@@ -0,0 +1,5 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llama
diff --git a/apps/llama/pvc.yaml b/apps/llama/pvc.yaml
new file mode 100644
index 0000000..5d69435
--- /dev/null
+++ b/apps/llama/pvc.yaml
@@ -0,0 +1,13 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  namespace: llama
+  name: models
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 200Gi
+  storageClassName: mayastor-single-hdd
diff --git a/apps/llama/secret.yaml b/apps/llama/secret.yaml
new file mode 100644
index 0000000..3809b61
--- /dev/null
+++ b/apps/llama/secret.yaml
@@ -0,0 +1,38 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: llama-proxy
+  namespace: llama
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultAuth
+metadata:
+  name: llama
+  namespace: llama
+spec:
+  method: kubernetes
+  mount: kubernetes
+  kubernetes:
+    role: llama-proxy
+    serviceAccount: llama-proxy
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: llama-api-key
+  namespace: llama
+spec:
+  type: kv-v2
+
+  mount: secret
+  path: ollama
+
+  destination:
+    create: true
+    name: llama-api-key
+    type: Opaque
+    transformation:
+      excludeRaw: true
+
+  vaultAuthRef: llama
diff --git a/talos/patches/llama.patch b/talos/patches/llama.patch
new file mode 100644
index 0000000..bfda2d5
--- /dev/null
+++ b/talos/patches/llama.patch
@@ -0,0 +1,11 @@
+# CSI driver requirement
+cluster:
+  apiServer:
+    admissionControl:
+      - name: PodSecurity
+        configuration:
+          apiVersion: pod-security.admission.config.k8s.io/v1beta1
+          kind: PodSecurityConfiguration
+          exemptions:
+            namespaces:
+              - llama
\ No newline at end of file
diff --git a/vault/kubernetes-roles/llama-proxy.yaml b/vault/kubernetes-roles/llama-proxy.yaml
new file mode 100644
index 0000000..8a74b44
--- /dev/null
+++ b/vault/kubernetes-roles/llama-proxy.yaml
@@ -0,0 +1,6 @@
+bound_service_account_names:
+  - llama-proxy
+bound_service_account_namespaces:
+  - llama
+token_policies:
+  - ollama