llama-swap

2025-07-22 23:07:03 +02:00
parent 53154eeed7
commit 18eb912f03
12 changed files with 272 additions and 1 deletions
@@ -3,7 +3,17 @@ install-router:

 gen-talos-config:
 	mkdir -p talos/generated
-	talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/openebs.patch --config-patch @talos/patches/openbao.patch --config-patch @talos/patches/ollama.patch --config-patch @talos/patches/frigate.patch --config-patch @talos/patches/anapistula-delrosalae.patch --output-types controlplane -o talos/generated/anapistula-delrosalae.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
+	talosctl gen config \
+		--with-secrets secrets.yaml \
+		--config-patch @talos/patches/controlplane.patch \
+		--config-patch @talos/patches/openebs.patch \
+		--config-patch @talos/patches/openbao.patch \
+		--config-patch @talos/patches/ollama.patch \
+		--config-patch @talos/patches/llama.patch \
+		--config-patch @talos/patches/frigate.patch \
+		--config-patch @talos/patches/anapistula-delrosalae.patch \
+		--output-types controlplane -o talos/generated/anapistula-delrosalae.yaml \
+		homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443
 	talosctl config endpoint kube-api.homelab.lumpiasty.xyz
@@ -7,3 +7,4 @@ resources:
  - ollama
  - librechat
  - frigate
+  - llama
@@ -0,0 +1,68 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-proxy
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llama-proxy
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llama-proxy
+    spec:
+      containers:
+        - name: caddy
+          image: caddy:2.10.0-alpine
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /etc/caddy
+              name: proxy-config
+          env:
+            - name: API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: llama-api-key
+                  key: API_KEY
+      volumes:
+        - name: proxy-config
+          configMap:
+            name: llama-proxy-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: llama
+  name: llama-proxy-config
+data:
+  Caddyfile: |
+    http://llama.lumpiasty.xyz {
+
+      @requireAuth {
+        not header Authorization "Bearer {env.API_KEY}"
+      }
+
+      respond @requireAuth "Unauthorized" 401
+
+      reverse_proxy llama:11434 {
+        flush_interval -1
+      }
+    }
+---
+apiVersion: v1
+kind: Service
+metadata:
+  namespace: llama
+  name: llama-proxy
+spec:
+  type: ClusterIP
+  selector:
+    app.kubernetes.io/name: llama-proxy
+  ports:
+    - name: http
+      port: 80
+      targetPort: 80
+      protocol: TCP
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: llama
+  name: llama-swap
+data:
+  config.yaml: |
+    models:
+      "DeepSeek-R1-0528-Qwen3-8B-GGUF":
+        cmd: |
+          /app/llama-server
+          -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
+          --port ${PORT}
@@ -0,0 +1,68 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-swap
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-swap
+  template:
+    metadata:
+      labels:
+        app: llama-swap
+    spec:
+      containers:
+        - name: llama-swap
+          image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957
+          imagePullPolicy: IfNotPresent
+          args:
+            - --config=/config/config.yaml
+            - --watch-config
+          ports:
+            - containerPort: 8080
+              name: http
+              protocol: TCP
+          volumeMounts:
+            - name: models
+              mountPath: /root/.cache
+            - mountPath: /dev/kfd
+              name: kfd
+            - mountPath: /dev/dri
+              name: dri
+            - mountPath: /config
+              name: config
+          securityContext:
+            privileged: true
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: models
+        - name: kfd
+          hostPath:
+            path: /dev/kfd
+            type: CharDevice
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+        - name: config
+          configMap:
+            name: llama-swap
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama
+  namespace: llama
+spec:
+  type: ClusterIP
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 8080
+      protocol: TCP
+  selector:
+    app: llama-swap
@@ -0,0 +1,28 @@
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: llama
+  name: llama
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt
+    acme.cert-manager.io/http01-edit-in-place: "true"
+    nginx.ingress.kubernetes.io/proxy-buffering: "false"
+    nginx.org/proxy-read-timeout: 30m
+spec:
+  ingressClassName: nginx
+  rules:
+    - host: llama.lumpiasty.xyz
+      http:
+        paths:
+          - backend:
+              service:
+                name: llama-proxy
+                port:
+                  number: 80
+            path: /
+            pathType: Prefix
+  tls:
+    - hosts:
+        - llama.lumpiasty.xyz
+      secretName: llama-ingress
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - secret.yaml
+  - auth-proxy.yaml
+  - ingress.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - configmap.yaml
@@ -0,0 +1,5 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llama
@@ -0,0 +1,13 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  namespace: llama
+  name: models
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 200Gi
+  storageClassName: mayastor-single-hdd
@@ -0,0 +1,38 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: llama-proxy
+  namespace: llama
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultAuth
+metadata:
+  name: llama
+  namespace: llama
+spec:
+  method: kubernetes
+  mount: kubernetes
+  kubernetes:
+    role: llama-proxy
+    serviceAccount: llama-proxy
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: llama-api-key
+  namespace: llama
+spec:
+  type: kv-v2
+
+  mount: secret
+  path: ollama
+
+  destination:
+    create: true
+    name: llama-api-key
+    type: Opaque
+    transformation:
+      excludeRaw: true
+
+  vaultAuthRef: llama
@@ -0,0 +1,11 @@
+# CSI driver requirement
+cluster:
+  apiServer:
+    admissionControl:
+      - name: PodSecurity
+        configuration:
+          apiVersion: pod-security.admission.config.k8s.io/v1beta1
+          kind: PodSecurityConfiguration
+          exemptions:
+            namespaces:
+              - llama
@@ -0,0 +1,6 @@
+bound_service_account_names:
+  - llama-proxy
+bound_service_account_namespaces:
+  - llama
+token_policies:
+  - ollama