From af6545444b45f02fad7a5d47f09bed23ac627308 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Tue, 22 Jul 2025 23:07:03 +0200 Subject: [PATCH] llama-swap --- Makefile | 12 ++++- apps/kustomization.yaml | 1 + apps/llama/auth-proxy.yaml | 68 +++++++++++++++++++++++++ apps/llama/configmap.yaml | 13 +++++ apps/llama/deployment.yaml | 68 +++++++++++++++++++++++++ apps/llama/ingress.yaml | 28 ++++++++++ apps/llama/kustomization.yaml | 10 ++++ apps/llama/namespace.yaml | 5 ++ apps/llama/pvc.yaml | 13 +++++ apps/llama/secret.yaml | 38 ++++++++++++++ talos/patches/llama.patch | 11 ++++ vault/kubernetes-roles/llama-proxy.yaml | 6 +++ 12 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 apps/llama/auth-proxy.yaml create mode 100644 apps/llama/configmap.yaml create mode 100644 apps/llama/deployment.yaml create mode 100644 apps/llama/ingress.yaml create mode 100644 apps/llama/kustomization.yaml create mode 100644 apps/llama/namespace.yaml create mode 100644 apps/llama/pvc.yaml create mode 100644 apps/llama/secret.yaml create mode 100644 talos/patches/llama.patch create mode 100644 vault/kubernetes-roles/llama-proxy.yaml diff --git a/Makefile b/Makefile index 5f931e6..8aec416 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,17 @@ install-router: gen-talos-config: mkdir -p talos/generated - talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/openebs.patch --config-patch @talos/patches/openbao.patch --config-patch @talos/patches/ollama.patch --config-patch @talos/patches/frigate.patch --config-patch @talos/patches/anapistula-delrosalae.patch --output-types controlplane -o talos/generated/anapistula-delrosalae.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443 + talosctl gen config \ + --with-secrets secrets.yaml \ + --config-patch @talos/patches/controlplane.patch \ + --config-patch @talos/patches/openebs.patch \ + --config-patch @talos/patches/openbao.patch \ + --config-patch @talos/patches/ollama.patch \ + --config-patch @talos/patches/llama.patch \ + --config-patch @talos/patches/frigate.patch \ + --config-patch @talos/patches/anapistula-delrosalae.patch \ + --output-types controlplane -o talos/generated/anapistula-delrosalae.yaml \ + homelab https://kube-api.homelab.lumpiasty.xyz:6443 talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443 talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443 talosctl config endpoint kube-api.homelab.lumpiasty.xyz diff --git a/apps/kustomization.yaml b/apps/kustomization.yaml index efa806c..436c03f 100644 --- a/apps/kustomization.yaml +++ b/apps/kustomization.yaml @@ -7,3 +7,4 @@ resources: - ollama - librechat - frigate + - llama diff --git a/apps/llama/auth-proxy.yaml b/apps/llama/auth-proxy.yaml new file mode 100644 index 0000000..9e2a0a9 --- /dev/null +++ b/apps/llama/auth-proxy.yaml @@ -0,0 +1,68 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-proxy + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llama-proxy + template: + metadata: + labels: + app.kubernetes.io/name: llama-proxy + spec: + containers: + - name: caddy + image: caddy:2.10.0-alpine + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /etc/caddy + name: proxy-config + env: + - name: API_KEY + valueFrom: + secretKeyRef: + name: llama-api-key + key: API_KEY + volumes: + - name: proxy-config + configMap: + name: llama-proxy-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: llama + name: llama-proxy-config +data: + Caddyfile: | + http://llama.lumpiasty.xyz { + + @requireAuth { + not header Authorization "Bearer {env.API_KEY}" + } + + respond @requireAuth "Unauthorized" 401 + + reverse_proxy llama:11434 { + flush_interval -1 + } + } +--- +apiVersion: v1 +kind: Service +metadata: + namespace: llama + name: llama-proxy +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: llama-proxy + ports: + - name: http + port: 80 + targetPort: 80 + protocol: TCP diff --git a/apps/llama/configmap.yaml b/apps/llama/configmap.yaml new file mode 100644 index 0000000..c28fcd6 --- /dev/null +++ b/apps/llama/configmap.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: llama + name: llama-swap +data: + config.yaml: | + models: + "DeepSeek-R1-0528-Qwen3-8B-GGUF": + cmd: | + /app/llama-server + -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M + --port ${PORT} diff --git a/apps/llama/deployment.yaml b/apps/llama/deployment.yaml new file mode 100644 index 0000000..2dd28ab --- /dev/null +++ b/apps/llama/deployment.yaml @@ -0,0 +1,68 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-swap + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: llama-swap + template: + metadata: + labels: + app: llama-swap + spec: + containers: + - name: llama-swap + image: ghcr.io/mostlygeek/llama-swap:v139-vulkan-b5957 + imagePullPolicy: IfNotPresent + args: + - --config=/config/config.yaml + - --watch-config + ports: + - containerPort: 8080 + name: http + protocol: TCP + volumeMounts: + - name: models + mountPath: /root/.cache + - mountPath: /dev/kfd + name: kfd + - mountPath: /dev/dri + name: dri + - mountPath: /config + name: config + securityContext: + privileged: true + volumes: + - name: models + persistentVolumeClaim: + claimName: models + - name: kfd + hostPath: + path: /dev/kfd + type: CharDevice + - name: dri + hostPath: + path: /dev/dri + type: Directory + - name: config + configMap: + name: llama-swap +--- +apiVersion: v1 +kind: Service +metadata: + name: llama + namespace: llama +spec: + type: ClusterIP + ports: + - name: http + port: 11434 + targetPort: 8080 + protocol: TCP + selector: + app: llama-swap diff --git a/apps/llama/ingress.yaml b/apps/llama/ingress.yaml new file mode 100644 index 0000000..4b40459 --- /dev/null +++ b/apps/llama/ingress.yaml @@ -0,0 +1,28 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + namespace: llama + name: llama + annotations: + cert-manager.io/cluster-issuer: letsencrypt + acme.cert-manager.io/http01-edit-in-place: "true" + nginx.ingress.kubernetes.io/proxy-buffering: "false" + nginx.org/proxy-read-timeout: 30m +spec: + ingressClassName: nginx + rules: + - host: llama.lumpiasty.xyz + http: + paths: + - backend: + service: + name: llama-proxy + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - llama.lumpiasty.xyz + secretName: llama-ingress diff --git a/apps/llama/kustomization.yaml b/apps/llama/kustomization.yaml new file mode 100644 index 0000000..aeebaaf --- /dev/null +++ b/apps/llama/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - secret.yaml + - auth-proxy.yaml + - ingress.yaml + - pvc.yaml + - deployment.yaml + - configmap.yaml diff --git a/apps/llama/namespace.yaml b/apps/llama/namespace.yaml new file mode 100644 index 0000000..87d360f --- /dev/null +++ b/apps/llama/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: llama diff --git a/apps/llama/pvc.yaml b/apps/llama/pvc.yaml new file mode 100644 index 0000000..5d69435 --- /dev/null +++ b/apps/llama/pvc.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + namespace: llama + name: models +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi + storageClassName: mayastor-single-hdd diff --git a/apps/llama/secret.yaml b/apps/llama/secret.yaml new file mode 100644 index 0000000..3809b61 --- /dev/null +++ b/apps/llama/secret.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: llama-proxy + namespace: llama +--- +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultAuth +metadata: + name: llama + namespace: llama +spec: + method: kubernetes + mount: kubernetes + kubernetes: + role: llama-proxy + serviceAccount: llama-proxy +--- +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultStaticSecret +metadata: + name: llama-api-key + namespace: llama +spec: + type: kv-v2 + + mount: secret + path: ollama + + destination: + create: true + name: llama-api-key + type: Opaque + transformation: + excludeRaw: true + + vaultAuthRef: llama diff --git a/talos/patches/llama.patch b/talos/patches/llama.patch new file mode 100644 index 0000000..bfda2d5 --- /dev/null +++ b/talos/patches/llama.patch @@ -0,0 +1,11 @@ +# CSI driver requirement +cluster: + apiServer: + admissionControl: + - name: PodSecurity + configuration: + apiVersion: pod-security.admission.config.k8s.io/v1beta1 + kind: PodSecurityConfiguration + exemptions: + namespaces: + - llama \ No newline at end of file diff --git a/vault/kubernetes-roles/llama-proxy.yaml b/vault/kubernetes-roles/llama-proxy.yaml new file mode 100644 index 0000000..8a74b44 --- /dev/null +++ b/vault/kubernetes-roles/llama-proxy.yaml @@ -0,0 +1,6 @@ +bound_service_account_names: + - llama-proxy +bound_service_account_namespaces: + - llama +token_policies: + - ollama