7 Commits

Author SHA1 Message Date
600f9442d7 chore(deps): update alpine/k8s docker tag to v1.35.3 2026-04-05 00:00:44 +00:00
a0814e76ee increase pvc for llama to 300 Gi
All checks were successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-04-04 22:49:26 +02:00
da163398a5 add notes about woodpecker to readme
All checks were successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-04-04 03:29:15 +02:00
8160a52176 add gemma 4 models
All checks were successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-04-04 02:48:02 +02:00
ad3b2229c2 get rid of openrouter proxying via llama-swap
All checks were successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-04-04 02:39:26 +02:00
57c2c7ea8d add woodpecker pipeline to reconcile flux
All checks were successful
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
2026-04-04 02:31:08 +02:00
f2d60e0b15 add kubernetes secret engine and approle auth to openbao 2026-04-04 02:06:18 +02:00
25 changed files with 249 additions and 68 deletions

View File

@@ -0,0 +1,49 @@
when:
- event: push
branch: fresh-start
skip_clone: true
steps:
- name: Get kubernetes access from OpenBao
image: quay.io/openbao/openbao:2.5.2
environment:
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
ROLE_ID:
from_secret: flux_reconcile_role_id
SECRET_ID:
from_secret: flux_reconcile_secret_id
commands:
- bao write -field token auth/approle/login
role_id=$ROLE_ID
secret_id=$SECRET_ID > /woodpecker/.vault_id
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
- bao write -format json -f /kubernetes/creds/flux-reconcile > /woodpecker/kube_credentials
- name: Construct Kubeconfig
image: alpine/k8s:1.35.3
environment:
KUBECONFIG: /woodpecker/kubeconfig
commands:
- kubectl config set-cluster cluster
--server=https://$KUBERNETES_SERVICE_HOST
--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- kubectl config set-credentials cluster
--token=$(jq -r .data.service_account_token /woodpecker/kube_credentials)
- kubectl config set-context cluster
--cluster cluster
--user cluster
--namespace flux-system
- kubectl config use-context cluster
- name: Reconcile git source
image: ghcr.io/fluxcd/flux-cli:v2.8.3
environment:
KUBECONFIG: /woodpecker/kubeconfig
commands:
- flux reconcile source git flux-system
- name: Invalidate OpenBao token
image: quay.io/openbao/openbao:2.5.2
environment:
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
commands:
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
- bao write -f auth/token/revoke-self

View File

@@ -141,7 +141,7 @@ Currently the k8s cluster consists of single node (hostname anapistula-delrosala
## Software stack ## Software stack
The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below. The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. There is a [Woodpecker](https://woodpecker-ci.org/) instance watching repositories on Gitea and scheduling jobs on cluster. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
> Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool. > Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool.
@@ -177,14 +177,23 @@ flowchart TD
vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv
vault -- "Auth method" --> kubeapi vault -- "Auth method" --> kubeapi
gitea -- "Receives events" --> woodpecker[Woodpecker CI] -- "Schedules jobs" --> kubeapi
gitea -- "Stores repositories" --> lv gitea -- "Stores repositories" --> lv
gitea--> renovate[Renovate Bot] -- "Updates manifests" --> gitea gitea--> renovate[Renovate Bot] -- "Updates manifests" --> gitea
end end
``` ```
### Reconcilation paths of each component
- Kubernetes manifests are reconciled using Flux triggerred by Woodpecker CI on push
- RouterOS configs are applied by Ansible <!-- ran by Gitea Action on push -->
- Talos configs are applied using makefile <!-- switch to ansible and trigger on action push -->
- Vault policies are applied by running `synchronize-vault.py` <!-- triggerred by Gitea action on push -->
<!-- - Docker images are built and pushed to registry by Gitea Actions on push -->
<!-- TODO: Backups, monitoring, logging, deployment with ansible etc --> <!-- TODO: Backups, monitoring, logging, deployment with ansible etc -->
## Software ## Software
@@ -228,6 +237,7 @@ flowchart TD
|------|------|-------------| |------|------|-------------|
| <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix | | <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix |
| <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date | | <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date |
| <img src="docs/assets/woodpecker.svg" alt="Woodpecker" height="50" width="50"> | Woodpecker CI | Continous Integration system |
### AI infrastructure ### AI infrastructure

View File

@@ -5,25 +5,18 @@ logToStdout: "both" # proxy and upstream
macros: macros:
base_args: "--no-warmup --port ${PORT}" base_args: "--no-warmup --port ${PORT}"
common_args: "--fit-target 1536 --no-warmup --port ${PORT}" common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
gemma3_ctx_128k: "--ctx-size 131072" ctx_128k: "--ctx-size 131072"
qwen35_ctx_128k: "--ctx-size 131072" ctx_256k: "--ctx-size 262144"
qwen35_ctx_256k: "--ctx-size 262144"
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95" gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0" qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0" qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0"
qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf" qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"
qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf" qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf"
glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0" glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0"
gemma4_sampling: "--temp 1.0 --top-p 0.95 --top-k 64"
thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'" thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'"
thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'" thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'"
peers:
openrouter:
proxy: https://openrouter.ai/api
apiKey: ${env.OPENROUTER_API_KEY}
models:
- z-ai/glm-5
hooks: hooks:
on_startup: on_startup:
preload: preload:
@@ -42,7 +35,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
${gemma3_ctx_128k} ${ctx_128k}
${gemma_sampling} ${gemma_sampling}
${common_args} ${common_args}
@@ -50,7 +43,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
${gemma3_ctx_128k} ${ctx_128k}
${gemma_sampling} ${gemma_sampling}
--no-mmproj --no-mmproj
${common_args} ${common_args}
@@ -59,7 +52,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
${gemma3_ctx_128k} ${ctx_128k}
${gemma_sampling} ${gemma_sampling}
${common_args} ${common_args}
@@ -67,7 +60,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
${gemma3_ctx_128k} ${ctx_128k}
${gemma_sampling} ${gemma_sampling}
--no-mmproj --no-mmproj
${common_args} ${common_args}
@@ -90,7 +83,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_35b_args} ${qwen35_35b_args}
${common_args} ${common_args}
@@ -98,7 +91,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_35b_args} ${qwen35_35b_args}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -110,7 +103,7 @@ models:
/app/llama-server /app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
${qwen35_35b_heretic_mmproj} ${qwen35_35b_heretic_mmproj}
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_35b_args} ${qwen35_35b_args}
${common_args} ${common_args}
@@ -119,7 +112,7 @@ models:
/app/llama-server /app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
${qwen35_35b_heretic_mmproj} ${qwen35_35b_heretic_mmproj}
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_35b_args} ${qwen35_35b_args}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -128,7 +121,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${base_args} ${base_args}
${thinking_on} ${thinking_on}
@@ -146,7 +139,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -155,7 +148,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -164,7 +157,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
${qwen35_ctx_128k} ${ctx_128k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -173,7 +166,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
${qwen35_ctx_128k} ${ctx_128k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -183,7 +176,7 @@ models:
/app/llama-server /app/llama-server
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
${qwen35_4b_heretic_mmproj} ${qwen35_4b_heretic_mmproj}
${qwen35_ctx_128k} ${ctx_128k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -193,7 +186,7 @@ models:
/app/llama-server /app/llama-server
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M -hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
${qwen35_4b_heretic_mmproj} ${qwen35_4b_heretic_mmproj}
${qwen35_ctx_128k} ${ctx_128k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -202,7 +195,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -211,7 +204,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -220,7 +213,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -229,7 +222,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -238,7 +231,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M -hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_on} ${thinking_on}
@@ -247,7 +240,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M -hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
${qwen35_ctx_256k} ${ctx_256k}
${qwen35_sampling} ${qwen35_sampling}
${common_args} ${common_args}
${thinking_off} ${thinking_off}
@@ -258,3 +251,35 @@ models:
-hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M -hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M
${glm47_flash_args} ${glm47_flash_args}
${common_args} ${common_args}
"gemma-4-26B-A4B-it:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL \
${ctx_256k}
${gemma4_sampling}
${common_args}
"gemma-4-26B-A4B-it:UD-Q2_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q2_K_XL \
${ctx_256k}
${gemma4_sampling}
${common_args}
"unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL \
${ctx_128k}
${gemma4_sampling}
${common_args}
"unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL \
${ctx_128k}
${gemma4_sampling}
${common_args}

View File

@@ -29,12 +29,6 @@ spec:
- containerPort: 8080 - containerPort: 8080
name: http name: http
protocol: TCP protocol: TCP
env:
- name: OPENROUTER_API_KEY
valueFrom:
secretKeyRef:
name: llama-openrouter
key: OPENROUTER_API_KEY
volumeMounts: volumeMounts:
- name: models - name: models
mountPath: /root/.cache mountPath: /root/.cache

View File

@@ -7,7 +7,7 @@ metadata:
name: llama-models-lvmssd name: llama-models-lvmssd
namespace: openebs namespace: openebs
spec: spec:
capacity: 200Gi capacity: "322122547200"
ownerNodeID: anapistula-delrosalae ownerNodeID: anapistula-delrosalae
shared: "yes" shared: "yes"
thinProvision: "no" thinProvision: "no"
@@ -20,7 +20,7 @@ metadata:
name: llama-models-lvmssd name: llama-models-lvmssd
spec: spec:
capacity: capacity:
storage: 200Gi storage: 300Gi
accessModes: accessModes:
- ReadWriteOnce - ReadWriteOnce
persistentVolumeReclaimPolicy: Retain persistentVolumeReclaimPolicy: Retain
@@ -41,6 +41,6 @@ spec:
- ReadWriteOnce - ReadWriteOnce
resources: resources:
requests: requests:
storage: 200Gi storage: 300Gi
storageClassName: ssd-lvmpv storageClassName: ssd-lvmpv
volumeName: llama-models-lvmssd volumeName: llama-models-lvmssd

View File

@@ -36,26 +36,3 @@ spec:
excludeRaw: true excludeRaw: true
vaultAuthRef: llama vaultAuthRef: llama
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: llama-openrouter
namespace: llama
spec:
type: kv-v2
mount: secret
path: openrouter
destination:
create: true
name: llama-openrouter
type: Opaque
transformation:
excludeRaw: true
templates:
OPENROUTER_API_KEY:
text: '{{ get .Secrets "API_KEY" }}'
vaultAuthRef: llama

View File

@@ -0,0 +1,10 @@
<svg xmlns="http://www.w3.org/2000/svg" width="284.538" height="253.96">
<style>
@media (prefers-color-scheme: dark) {
path {
fill: white;
}
}
</style>
<path d="M162.51 33.188c-26.77.411-54.004 6.885-71.494 3.745-1.313-.232-2.124 1.338-1.171 2.265 14.749 14.003 20.335 28.16 36.718 30.065l.476.103c-7.567 7.799-14.028 18.018-18.571 31.171-4.89 14.106-6.268 29.421-7.89 47.105-2.445 26.332-5.173 56.152-20.038 93.54a246.489 246.489 0 0 0-13.27 45.946h22.652a221.202 221.202 0 0 1 11.249-37.786c16.049-40.374 19.073-73.257 21.505-99.693 1.493-16.255 2.806-30.309 6.796-41.853 11.647-33.527 39.408-40.889 61.056-36.693 21.004 4.067 41.673 20.502 40.592 44.016-.772 15.985-7.76 23.166-12.87 28.43-2.793 2.883-5.47 5.611-6.731 9.498-3.037 9.19.101 19.434 8.494 27.568 22.24 20.734 34.338 59.717 33.681 106.513h22.176c.592-52.935-13.951-97.839-40.503-122.626-2.097-2.021-2.69-3.604-3.191-3.347 1.222-1.544 3.217-3.346 4.633-4.813 29.382-21.79 77.813-1.892 107.054 9.653 7.58 2.985 11.274-4.338 4.067-8.623-25.097-14.84-76.54-54.016-105.368-79.718-4.029-3.54-6.796-7.8-11.455-11.738-15.547-27.439-41.84-33.127-68.597-32.728Zm35.238 60.27a15.161 15.161 0 0 0-2.008.232 15.161 15.161 0 0 0-1.506 29.434 15.154 15.154 0 0 0 9.473-28.79 15.161 15.161 0 0 0-5.959-.876zm-44.286 147.17a2.033 2.033 0 0 0-1.133.374c-1.08.772-1.93 3.05-.772 5.701 5.38 12.394 9.1 25.445 12.536 40.413h22.484c-5.676-16.629-16.307-34.055-27.851-43.978-2.008-1.737-3.913-2.574-5.251-2.51z" style="stroke-width:12.8704" transform="translate(-67.27 -33.169)"/>
</svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -0,0 +1,32 @@
# Roles with needed access for OpenBao's Kubernetes secret engine
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: k8s-full-secrets-abilities
rules:
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get"]
- apiGroups: [""]
resources: ["serviceaccounts", "serviceaccounts/token"]
verbs: ["create", "update", "delete"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["rolebindings", "clusterrolebindings"]
verbs: ["create", "update", "delete"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "clusterroles"]
verbs: ["bind", "escalate", "create", "update", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: openbao-token-creator-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: k8s-full-secrets-abilities
subjects:
- kind: ServiceAccount
name: openbao
namespace: openbao

View File

@@ -25,3 +25,4 @@ resources:
- configs/openbao-volume.yaml - configs/openbao-volume.yaml
- controllers/openbao.yaml - controllers/openbao.yaml
- configs/openbao-k8s-se-role.yaml

View File

@@ -2,6 +2,7 @@
import argparse import argparse
import os import os
import pathlib
from typing import Any, cast from typing import Any, cast
import hvac import hvac
@@ -42,7 +43,7 @@ def synchronize_auth_kubernetes_config(client: hvac.Client):
def synchronize_kubernetes_roles(client: hvac.Client): def synchronize_kubernetes_roles(client: hvac.Client):
kubernetes = Kubernetes(client.adapter) kubernetes = Kubernetes(client.adapter)
policy_dir = os.path.join(os.path.dirname(__file__), '../vault/kubernetes-roles/') policy_dir = os.path.join(os.path.dirname(__file__), '../vault/kubernetes-auth-roles/')
roles: dict[str, Any] = {} # pyright:ignore[reportExplicitAny] roles: dict[str, Any] = {} # pyright:ignore[reportExplicitAny]
for filename in os.listdir(policy_dir): for filename in os.listdir(policy_dir):
@@ -67,6 +68,69 @@ def synchronize_kubernetes_roles(client: hvac.Client):
# Using write data instead of kubernetes.create_role, we can pass raw yaml # Using write data instead of kubernetes.create_role, we can pass raw yaml
_ = client.write_data(f'/auth/kubernetes/role/{role_name}', data=role_content) # pyright:ignore[reportAny] _ = client.write_data(f'/auth/kubernetes/role/{role_name}', data=role_content) # pyright:ignore[reportAny]
def synchronize_approle_auth(client: hvac.Client):
if client.sys.list_auth_methods().get('approle/') is None:
print('Enabling AppRole auth method')
client.sys.enable_auth_method('approle', 'AppRole authorization for CI')
roles_dir = pathlib.Path(__file__).parent.joinpath('../vault/approles/')
roles: dict[str, Any] = {}
for filename in roles_dir.iterdir():
with filename.open('r') as f:
role = yaml.safe_load(f.read())
assert type(role) is dict
roles[filename.stem] = role
roles_on_vault: list[str] = []
roles_response = client.list("auth/approle/roles")
if roles_response is not None:
roles_on_vault = roles_response['data']['keys']
for role in roles_on_vault:
if role not in roles:
print(f'Deleting role: {role}')
client.delete(f'auth/approle/role/{role}')
for role_name, role_content in roles.items():
print(f'Updating role: {role_name}')
client.write_data(f'auth/approle/role/{role_name}', data=role_content)
def synchronize_kubernetes_secretengine(client: hvac.Client):
# Ensure kubernetes secret engine is enabled
if client.sys.list_mounted_secrets_engines().get('kubernetes/') is None:
print('Enabling kubernetes secret engine')
client.sys.enable_secrets_engine('kubernetes', 'kubernetes', 'Cluster access')
# Write empty config (all defaults, working on the same cluster)
client.write('kubernetes/config', None)
policy_dir = pathlib.Path(__file__).parent.joinpath('../vault/kubernetes-se-roles/')
roles: dict[str, Any] = {}
for filename in policy_dir.iterdir():
with filename.open('r') as f:
role = yaml.safe_load(f.read())
assert type(role) is dict
# generated_role_rules must be json or yaml formatted string, convert it
if 'generated_role_rules' in role and type(role['generated_role_rules']) is not str:
role['generated_role_rules'] = yaml.safe_dump(role['generated_role_rules'])
roles[filename.stem] = role
roles_on_vault: list[str] = []
roles_response = client.list("kubernetes/roles")
if roles_response is not None:
roles_on_vault = roles_response['data']['keys']
for role in roles_on_vault:
if role not in roles:
print(f'Deleting role: {role}')
client.delete(f'kubernetes/roles/{role}')
for role_name, role_content in roles.items():
print(f'Updating role: {role_name}')
client.write_data(f'kubernetes/roles/{role_name}', data=role_content)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="synchronizeVault", prog="synchronizeVault",
@@ -82,5 +146,11 @@ if __name__ == '__main__':
print('Synchronizing kubernetes config') print('Synchronizing kubernetes config')
synchronize_auth_kubernetes_config(client) synchronize_auth_kubernetes_config(client)
print('Synchronizing kubernetes roles') print('Synchronizing kubernetes auth roles')
synchronize_kubernetes_roles(client) synchronize_kubernetes_roles(client)
print('Synchronizing AppRole auth method')
synchronize_approle_auth(client)
print('Synchronizing kubernetes secret engine')
synchronize_kubernetes_secretengine(client)

View File

@@ -0,0 +1,4 @@
token_ttl: 20m
token_max_ttl: 20m
policies:
- flux-reconcile

View File

@@ -0,0 +1,6 @@
allowed_kubernetes_namespaces: flux-system
generated_role_rules:
rules:
- apiGroups: ["source.toolkit.fluxcd.io"]
resources: ["gitrepositories"]
verbs: ["get", "patch", "watch"]

View File

@@ -0,0 +1,3 @@
path "kubernetes/creds/flux-reconcile" {
capabilities = ["update"]
}