1 Commits

Author SHA1 Message Date
2e26b24e7b add woodpecker pipeline to reconcile flux 2026-04-04 02:06:18 +02:00
9 changed files with 89 additions and 99 deletions

View File

@@ -7,8 +7,9 @@ skip_clone: true
steps:
- name: Get kubernetes access from OpenBao
image: quay.io/openbao/openbao:2.5.2
volumes:
- secrets:/secrets
environment:
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
ROLE_ID:
from_secret: flux_reconcile_role_id
SECRET_ID:
@@ -16,34 +17,39 @@ steps:
commands:
- bao write -field token auth/approle/login
role_id=$ROLE_ID
secret_id=$SECRET_ID > /woodpecker/.vault_id
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
- bao write -format json -f /kubernetes/creds/flux-reconcile > /woodpecker/kube_credentials
secret_id=$SECRET_ID
\> /secrets/.vault_id
- export VAULT_TOKEN=$(cat /secrets/.vault_id)
- bao write -format json /kubernetes/creds/flux-reconcile
\> /secrets/kube_credentials
- bao read -format
- name: Construct Kubeconfig
image: alpine/k8s:1.32.13
volumes:
- secrets:/secrets
environment:
KUBECONFIG: /woodpecker/kubeconfig
KUBECONFIG: /secrets/kubeconfig
commands:
- kubectl config set-cluster cluster
--server=https://$KUBERNETES_SERVICE_HOST
--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
--client-certificate=/run/secrets/kubernetes.io/serviceaccount/ca.crt
- kubectl config set-credentials cluster
--token=$(jq -r .data.service_account_token /woodpecker/kube_credentials)
--token=$(jq -r .data.service_account_token /secrets/kube_credentials)
- kubectl config set-context cluster
--cluster cluster
--user cluster
--namespace flux-system
- kubectl config use-context cluster
--current=true
- name: Reconcile git source
image: ghcr.io/fluxcd/flux-cli:v2.8.3
volumes:
- secrets:/secrets
environment:
KUBECONFIG: /woodpecker/kubeconfig
KUBECONFIG: /secrets/kubeconfig
commands:
- flux reconcile source git flux-system
- name: Invalidate OpenBao token
image: quay.io/openbao/openbao:2.5.2
environment:
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
commands:
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
- bao write -f auth/token/revoke-self
- export VAULT_TOKEN=$(cat /secrets/.vault_id)
- bao write auth/token/revoke-self

View File

@@ -141,7 +141,7 @@ Currently the k8s cluster consists of single node (hostname anapistula-delrosala
## Software stack
The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. There is a [Woodpecker](https://woodpecker-ci.org/) instance watching repositories on Gitea and scheduling jobs on cluster. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
> Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool.
@@ -177,23 +177,14 @@ flowchart TD
vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv
vault -- "Auth method" --> kubeapi
gitea -- "Receives events" --> woodpecker[Woodpecker CI] -- "Schedules jobs" --> kubeapi
gitea -- "Stores repositories" --> lv
gitea--> renovate[Renovate Bot] -- "Updates manifests" --> gitea
gitea --> renovate[Renovate Bot] -- "Updates manifests" --> gitea
end
```
### Reconcilation paths of each component
- Kubernetes manifests are reconciled using Flux triggerred by Woodpecker CI on push
- RouterOS configs are applied by Ansible <!-- ran by Gitea Action on push -->
- Talos configs are applied using makefile <!-- switch to ansible and trigger on action push -->
- Vault policies are applied by running `synchronize-vault.py` <!-- triggerred by Gitea action on push -->
<!-- - Docker images are built and pushed to registry by Gitea Actions on push -->
<!-- TODO: Backups, monitoring, logging, deployment with ansible etc -->
## Software
@@ -237,7 +228,6 @@ flowchart TD
|------|------|-------------|
| <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix |
| <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date |
| <img src="docs/assets/woodpecker.svg" alt="Woodpecker" height="50" width="50"> | Woodpecker CI | Continous Integration system |
### AI infrastructure

View File

@@ -5,18 +5,25 @@ logToStdout: "both" # proxy and upstream
macros:
base_args: "--no-warmup --port ${PORT}"
common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
ctx_128k: "--ctx-size 131072"
ctx_256k: "--ctx-size 262144"
gemma3_ctx_128k: "--ctx-size 131072"
qwen35_ctx_128k: "--ctx-size 131072"
qwen35_ctx_256k: "--ctx-size 262144"
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0"
qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"
qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf"
glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0"
gemma4_sampling: "--temp 1.0 --top-p 0.95 --top-k 64"
thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'"
thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'"
peers:
openrouter:
proxy: https://openrouter.ai/api
apiKey: ${env.OPENROUTER_API_KEY}
models:
- z-ai/glm-5
hooks:
on_startup:
preload:
@@ -35,7 +42,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
${ctx_128k}
${gemma3_ctx_128k}
${gemma_sampling}
${common_args}
@@ -43,7 +50,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
${ctx_128k}
${gemma3_ctx_128k}
${gemma_sampling}
--no-mmproj
${common_args}
@@ -52,7 +59,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
${ctx_128k}
${gemma3_ctx_128k}
${gemma_sampling}
${common_args}
@@ -60,7 +67,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
${ctx_128k}
${gemma3_ctx_128k}
${gemma_sampling}
--no-mmproj
${common_args}
@@ -83,7 +90,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_35b_args}
${common_args}
@@ -91,7 +98,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_35b_args}
${common_args}
${thinking_off}
@@ -103,7 +110,7 @@ models:
/app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
${qwen35_35b_heretic_mmproj}
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_35b_args}
${common_args}
@@ -112,7 +119,7 @@ models:
/app/llama-server
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
${qwen35_35b_heretic_mmproj}
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_35b_args}
${common_args}
${thinking_off}
@@ -121,7 +128,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${base_args}
${thinking_on}
@@ -139,7 +146,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -148,7 +155,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -157,7 +164,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
${ctx_128k}
${qwen35_ctx_128k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -166,7 +173,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
${ctx_128k}
${qwen35_ctx_128k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -176,7 +183,7 @@ models:
/app/llama-server
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
${qwen35_4b_heretic_mmproj}
${ctx_128k}
${qwen35_ctx_128k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -186,7 +193,7 @@ models:
/app/llama-server
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
${qwen35_4b_heretic_mmproj}
${ctx_128k}
${qwen35_ctx_128k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -195,7 +202,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -204,7 +211,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -213,7 +220,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -222,7 +229,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -231,7 +238,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_on}
@@ -240,7 +247,7 @@ models:
cmd: |
/app/llama-server
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
${ctx_256k}
${qwen35_ctx_256k}
${qwen35_sampling}
${common_args}
${thinking_off}
@@ -251,35 +258,3 @@ models:
-hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M
${glm47_flash_args}
${common_args}
"gemma-4-26B-A4B-it:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL \
${ctx_256k}
${gemma4_sampling}
${common_args}
"gemma-4-26B-A4B-it:UD-Q2_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q2_K_XL \
${ctx_256k}
${gemma4_sampling}
${common_args}
"unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL \
${ctx_128k}
${gemma4_sampling}
${common_args}
"unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL":
cmd: |
/app/llama-server
-hf unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL \
${ctx_128k}
${gemma4_sampling}
${common_args}

View File

@@ -18,7 +18,7 @@ spec:
spec:
containers:
- name: llama-swap
image: ghcr.io/mostlygeek/llama-swap:v199-vulkan-b8643
image: ghcr.io/mostlygeek/llama-swap:v199-vulkan-b8637
imagePullPolicy: IfNotPresent
command:
- /app/llama-swap
@@ -29,6 +29,12 @@ spec:
- containerPort: 8080
name: http
protocol: TCP
env:
- name: OPENROUTER_API_KEY
valueFrom:
secretKeyRef:
name: llama-openrouter
key: OPENROUTER_API_KEY
volumeMounts:
- name: models
mountPath: /root/.cache

View File

@@ -7,7 +7,7 @@ metadata:
name: llama-models-lvmssd
namespace: openebs
spec:
capacity: "322122547200"
capacity: 200Gi
ownerNodeID: anapistula-delrosalae
shared: "yes"
thinProvision: "no"
@@ -20,7 +20,7 @@ metadata:
name: llama-models-lvmssd
spec:
capacity:
storage: 300Gi
storage: 200Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
@@ -41,6 +41,6 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 300Gi
storage: 200Gi
storageClassName: ssd-lvmpv
volumeName: llama-models-lvmssd

View File

@@ -36,3 +36,26 @@ spec:
excludeRaw: true
vaultAuthRef: llama
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: llama-openrouter
namespace: llama
spec:
type: kv-v2
mount: secret
path: openrouter
destination:
create: true
name: llama-openrouter
type: Opaque
transformation:
excludeRaw: true
templates:
OPENROUTER_API_KEY:
text: '{{ get .Secrets "API_KEY" }}'
vaultAuthRef: llama

View File

@@ -1,10 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="284.538" height="253.96">
<style>
@media (prefers-color-scheme: dark) {
path {
fill: white;
}
}
</style>
<path d="M162.51 33.188c-26.77.411-54.004 6.885-71.494 3.745-1.313-.232-2.124 1.338-1.171 2.265 14.749 14.003 20.335 28.16 36.718 30.065l.476.103c-7.567 7.799-14.028 18.018-18.571 31.171-4.89 14.106-6.268 29.421-7.89 47.105-2.445 26.332-5.173 56.152-20.038 93.54a246.489 246.489 0 0 0-13.27 45.946h22.652a221.202 221.202 0 0 1 11.249-37.786c16.049-40.374 19.073-73.257 21.505-99.693 1.493-16.255 2.806-30.309 6.796-41.853 11.647-33.527 39.408-40.889 61.056-36.693 21.004 4.067 41.673 20.502 40.592 44.016-.772 15.985-7.76 23.166-12.87 28.43-2.793 2.883-5.47 5.611-6.731 9.498-3.037 9.19.101 19.434 8.494 27.568 22.24 20.734 34.338 59.717 33.681 106.513h22.176c.592-52.935-13.951-97.839-40.503-122.626-2.097-2.021-2.69-3.604-3.191-3.347 1.222-1.544 3.217-3.346 4.633-4.813 29.382-21.79 77.813-1.892 107.054 9.653 7.58 2.985 11.274-4.338 4.067-8.623-25.097-14.84-76.54-54.016-105.368-79.718-4.029-3.54-6.796-7.8-11.455-11.738-15.547-27.439-41.84-33.127-68.597-32.728Zm35.238 60.27a15.161 15.161 0 0 0-2.008.232 15.161 15.161 0 0 0-1.506 29.434 15.154 15.154 0 0 0 9.473-28.79 15.161 15.161 0 0 0-5.959-.876zm-44.286 147.17a2.033 2.033 0 0 0-1.133.374c-1.08.772-1.93 3.05-.772 5.701 5.38 12.394 9.1 25.445 12.536 40.413h22.484c-5.676-16.629-16.307-34.055-27.851-43.978-2.008-1.737-3.913-2.574-5.251-2.51z" style="stroke-width:12.8704" transform="translate(-67.27 -33.169)"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -18,7 +18,7 @@ spec:
chart:
spec:
chart: cert-manager-webhook-ovh
version: 0.9.6
version: 0.9.5
sourceRef:
kind: HelmRepository
name: cert-manager-webhook-ovh

View File

@@ -1,6 +1,6 @@
allowed_kubernetes_namespaces: flux-system
generated_role_rules:
rules:
- apiGroups: ["source.toolkit.fluxcd.io"]
- apiGroups: ["kustomize.toolkit.fluxcd.io"]
resources: ["gitrepositories"]
verbs: ["get", "patch", "watch"]
verbs: ["update", "watch"]