Compare commits
6 Commits
2e26b24e7b
...
renovate/a
| Author | SHA1 | Date | |
|---|---|---|---|
| 600f9442d7 | |||
| a0814e76ee | |||
| da163398a5 | |||
| 8160a52176 | |||
| ad3b2229c2 | |||
| 57c2c7ea8d |
49
.woodpecker/flux-reconcile-source.yaml
Normal file
49
.woodpecker/flux-reconcile-source.yaml
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
when:
|
||||||
|
- event: push
|
||||||
|
branch: fresh-start
|
||||||
|
|
||||||
|
skip_clone: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Get kubernetes access from OpenBao
|
||||||
|
image: quay.io/openbao/openbao:2.5.2
|
||||||
|
environment:
|
||||||
|
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
|
||||||
|
ROLE_ID:
|
||||||
|
from_secret: flux_reconcile_role_id
|
||||||
|
SECRET_ID:
|
||||||
|
from_secret: flux_reconcile_secret_id
|
||||||
|
commands:
|
||||||
|
- bao write -field token auth/approle/login
|
||||||
|
role_id=$ROLE_ID
|
||||||
|
secret_id=$SECRET_ID > /woodpecker/.vault_id
|
||||||
|
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
|
||||||
|
- bao write -format json -f /kubernetes/creds/flux-reconcile > /woodpecker/kube_credentials
|
||||||
|
- name: Construct Kubeconfig
|
||||||
|
image: alpine/k8s:1.35.3
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: /woodpecker/kubeconfig
|
||||||
|
commands:
|
||||||
|
- kubectl config set-cluster cluster
|
||||||
|
--server=https://$KUBERNETES_SERVICE_HOST
|
||||||
|
--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
- kubectl config set-credentials cluster
|
||||||
|
--token=$(jq -r .data.service_account_token /woodpecker/kube_credentials)
|
||||||
|
- kubectl config set-context cluster
|
||||||
|
--cluster cluster
|
||||||
|
--user cluster
|
||||||
|
--namespace flux-system
|
||||||
|
- kubectl config use-context cluster
|
||||||
|
- name: Reconcile git source
|
||||||
|
image: ghcr.io/fluxcd/flux-cli:v2.8.3
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: /woodpecker/kubeconfig
|
||||||
|
commands:
|
||||||
|
- flux reconcile source git flux-system
|
||||||
|
- name: Invalidate OpenBao token
|
||||||
|
image: quay.io/openbao/openbao:2.5.2
|
||||||
|
environment:
|
||||||
|
VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
|
||||||
|
commands:
|
||||||
|
- export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
|
||||||
|
- bao write -f auth/token/revoke-self
|
||||||
16
README.md
16
README.md
@@ -141,7 +141,7 @@ Currently the k8s cluster consists of single node (hostname anapistula-delrosala
|
|||||||
|
|
||||||
## Software stack
|
## Software stack
|
||||||
|
|
||||||
The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
|
The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. There is a [Woodpecker](https://woodpecker-ci.org/) instance watching repositories on Gitea and scheduling jobs on cluster. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
|
||||||
|
|
||||||
> Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool.
|
> Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool.
|
||||||
|
|
||||||
@@ -177,14 +177,23 @@ flowchart TD
|
|||||||
vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv
|
vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv
|
||||||
vault -- "Auth method" --> kubeapi
|
vault -- "Auth method" --> kubeapi
|
||||||
|
|
||||||
|
gitea -- "Receives events" --> woodpecker[Woodpecker CI] -- "Schedules jobs" --> kubeapi
|
||||||
|
|
||||||
gitea -- "Stores repositories" --> lv
|
gitea -- "Stores repositories" --> lv
|
||||||
|
|
||||||
gitea --> renovate[Renovate Bot] -- "Updates manifests" --> gitea
|
gitea--> renovate[Renovate Bot] -- "Updates manifests" --> gitea
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Reconcilation paths of each component
|
||||||
|
|
||||||
|
- Kubernetes manifests are reconciled using Flux triggerred by Woodpecker CI on push
|
||||||
|
- RouterOS configs are applied by Ansible <!-- ran by Gitea Action on push -->
|
||||||
|
- Talos configs are applied using makefile <!-- switch to ansible and trigger on action push -->
|
||||||
|
- Vault policies are applied by running `synchronize-vault.py` <!-- triggerred by Gitea action on push -->
|
||||||
|
<!-- - Docker images are built and pushed to registry by Gitea Actions on push -->
|
||||||
|
|
||||||
<!-- TODO: Backups, monitoring, logging, deployment with ansible etc -->
|
<!-- TODO: Backups, monitoring, logging, deployment with ansible etc -->
|
||||||
|
|
||||||
## Software
|
## Software
|
||||||
@@ -228,6 +237,7 @@ flowchart TD
|
|||||||
|------|------|-------------|
|
|------|------|-------------|
|
||||||
| <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix |
|
| <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix |
|
||||||
| <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date |
|
| <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date |
|
||||||
|
| <img src="docs/assets/woodpecker.svg" alt="Woodpecker" height="50" width="50"> | Woodpecker CI | Continous Integration system |
|
||||||
|
|
||||||
### AI infrastructure
|
### AI infrastructure
|
||||||
|
|
||||||
|
|||||||
@@ -5,25 +5,18 @@ logToStdout: "both" # proxy and upstream
|
|||||||
macros:
|
macros:
|
||||||
base_args: "--no-warmup --port ${PORT}"
|
base_args: "--no-warmup --port ${PORT}"
|
||||||
common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
|
common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
|
||||||
gemma3_ctx_128k: "--ctx-size 131072"
|
ctx_128k: "--ctx-size 131072"
|
||||||
qwen35_ctx_128k: "--ctx-size 131072"
|
ctx_256k: "--ctx-size 262144"
|
||||||
qwen35_ctx_256k: "--ctx-size 262144"
|
|
||||||
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
|
gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
|
||||||
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
|
qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
|
||||||
qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0"
|
qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0"
|
||||||
qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"
|
qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"
|
||||||
qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf"
|
qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf"
|
||||||
glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0"
|
glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0"
|
||||||
|
gemma4_sampling: "--temp 1.0 --top-p 0.95 --top-k 64"
|
||||||
thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'"
|
thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'"
|
||||||
thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'"
|
thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'"
|
||||||
|
|
||||||
peers:
|
|
||||||
openrouter:
|
|
||||||
proxy: https://openrouter.ai/api
|
|
||||||
apiKey: ${env.OPENROUTER_API_KEY}
|
|
||||||
models:
|
|
||||||
- z-ai/glm-5
|
|
||||||
|
|
||||||
hooks:
|
hooks:
|
||||||
on_startup:
|
on_startup:
|
||||||
preload:
|
preload:
|
||||||
@@ -42,7 +35,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||||
${gemma3_ctx_128k}
|
${ctx_128k}
|
||||||
${gemma_sampling}
|
${gemma_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
@@ -50,7 +43,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||||
${gemma3_ctx_128k}
|
${ctx_128k}
|
||||||
${gemma_sampling}
|
${gemma_sampling}
|
||||||
--no-mmproj
|
--no-mmproj
|
||||||
${common_args}
|
${common_args}
|
||||||
@@ -59,7 +52,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||||
${gemma3_ctx_128k}
|
${ctx_128k}
|
||||||
${gemma_sampling}
|
${gemma_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
@@ -67,7 +60,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||||
${gemma3_ctx_128k}
|
${ctx_128k}
|
||||||
${gemma_sampling}
|
${gemma_sampling}
|
||||||
--no-mmproj
|
--no-mmproj
|
||||||
${common_args}
|
${common_args}
|
||||||
@@ -90,7 +83,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_35b_args}
|
${qwen35_35b_args}
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
@@ -98,7 +91,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_35b_args}
|
${qwen35_35b_args}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -110,7 +103,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_35b_heretic_mmproj}
|
${qwen35_35b_heretic_mmproj}
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_35b_args}
|
${qwen35_35b_args}
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
@@ -119,7 +112,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_35b_heretic_mmproj}
|
${qwen35_35b_heretic_mmproj}
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_35b_args}
|
${qwen35_35b_args}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -128,7 +121,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
|
-hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${base_args}
|
${base_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -146,7 +139,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -155,7 +148,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -164,7 +157,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_128k}
|
${ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -173,7 +166,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_128k}
|
${ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -183,7 +176,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_4b_heretic_mmproj}
|
${qwen35_4b_heretic_mmproj}
|
||||||
${qwen35_ctx_128k}
|
${ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -193,7 +186,7 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
-hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
|
||||||
${qwen35_4b_heretic_mmproj}
|
${qwen35_4b_heretic_mmproj}
|
||||||
${qwen35_ctx_128k}
|
${ctx_128k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -202,7 +195,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -211,7 +204,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
-hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -220,7 +213,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -229,7 +222,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
-hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -238,7 +231,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
|
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_on}
|
${thinking_on}
|
||||||
@@ -247,7 +240,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
|
-hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
|
||||||
${qwen35_ctx_256k}
|
${ctx_256k}
|
||||||
${qwen35_sampling}
|
${qwen35_sampling}
|
||||||
${common_args}
|
${common_args}
|
||||||
${thinking_off}
|
${thinking_off}
|
||||||
@@ -258,3 +251,35 @@ models:
|
|||||||
-hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M
|
-hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M
|
||||||
${glm47_flash_args}
|
${glm47_flash_args}
|
||||||
${common_args}
|
${common_args}
|
||||||
|
|
||||||
|
"gemma-4-26B-A4B-it:UD-Q4_K_XL":
|
||||||
|
cmd: |
|
||||||
|
/app/llama-server
|
||||||
|
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL \
|
||||||
|
${ctx_256k}
|
||||||
|
${gemma4_sampling}
|
||||||
|
${common_args}
|
||||||
|
|
||||||
|
"gemma-4-26B-A4B-it:UD-Q2_K_XL":
|
||||||
|
cmd: |
|
||||||
|
/app/llama-server
|
||||||
|
-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q2_K_XL \
|
||||||
|
${ctx_256k}
|
||||||
|
${gemma4_sampling}
|
||||||
|
${common_args}
|
||||||
|
|
||||||
|
"unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL":
|
||||||
|
cmd: |
|
||||||
|
/app/llama-server
|
||||||
|
-hf unsloth/gemma-4-E4B-it-GGUF:UD-Q4_K_XL \
|
||||||
|
${ctx_128k}
|
||||||
|
${gemma4_sampling}
|
||||||
|
${common_args}
|
||||||
|
|
||||||
|
"unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL":
|
||||||
|
cmd: |
|
||||||
|
/app/llama-server
|
||||||
|
-hf unsloth/gemma-4-E2B-it-GGUF:UD-Q4_K_XL \
|
||||||
|
${ctx_128k}
|
||||||
|
${gemma4_sampling}
|
||||||
|
${common_args}
|
||||||
|
|||||||
@@ -29,12 +29,6 @@ spec:
|
|||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
name: http
|
name: http
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
env:
|
|
||||||
- name: OPENROUTER_API_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: llama-openrouter
|
|
||||||
key: OPENROUTER_API_KEY
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: models
|
- name: models
|
||||||
mountPath: /root/.cache
|
mountPath: /root/.cache
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ metadata:
|
|||||||
name: llama-models-lvmssd
|
name: llama-models-lvmssd
|
||||||
namespace: openebs
|
namespace: openebs
|
||||||
spec:
|
spec:
|
||||||
capacity: 200Gi
|
capacity: "322122547200"
|
||||||
ownerNodeID: anapistula-delrosalae
|
ownerNodeID: anapistula-delrosalae
|
||||||
shared: "yes"
|
shared: "yes"
|
||||||
thinProvision: "no"
|
thinProvision: "no"
|
||||||
@@ -20,7 +20,7 @@ metadata:
|
|||||||
name: llama-models-lvmssd
|
name: llama-models-lvmssd
|
||||||
spec:
|
spec:
|
||||||
capacity:
|
capacity:
|
||||||
storage: 200Gi
|
storage: 300Gi
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteOnce
|
- ReadWriteOnce
|
||||||
persistentVolumeReclaimPolicy: Retain
|
persistentVolumeReclaimPolicy: Retain
|
||||||
@@ -41,6 +41,6 @@ spec:
|
|||||||
- ReadWriteOnce
|
- ReadWriteOnce
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
storage: 200Gi
|
storage: 300Gi
|
||||||
storageClassName: ssd-lvmpv
|
storageClassName: ssd-lvmpv
|
||||||
volumeName: llama-models-lvmssd
|
volumeName: llama-models-lvmssd
|
||||||
@@ -36,26 +36,3 @@ spec:
|
|||||||
excludeRaw: true
|
excludeRaw: true
|
||||||
|
|
||||||
vaultAuthRef: llama
|
vaultAuthRef: llama
|
||||||
---
|
|
||||||
apiVersion: secrets.hashicorp.com/v1beta1
|
|
||||||
kind: VaultStaticSecret
|
|
||||||
metadata:
|
|
||||||
name: llama-openrouter
|
|
||||||
namespace: llama
|
|
||||||
spec:
|
|
||||||
type: kv-v2
|
|
||||||
|
|
||||||
mount: secret
|
|
||||||
path: openrouter
|
|
||||||
|
|
||||||
destination:
|
|
||||||
create: true
|
|
||||||
name: llama-openrouter
|
|
||||||
type: Opaque
|
|
||||||
transformation:
|
|
||||||
excludeRaw: true
|
|
||||||
templates:
|
|
||||||
OPENROUTER_API_KEY:
|
|
||||||
text: '{{ get .Secrets "API_KEY" }}'
|
|
||||||
|
|
||||||
vaultAuthRef: llama
|
|
||||||
|
|||||||
10
docs/assets/woodpecker.svg
Normal file
10
docs/assets/woodpecker.svg
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="284.538" height="253.96">
|
||||||
|
<style>
|
||||||
|
@media (prefers-color-scheme: dark) {
|
||||||
|
path {
|
||||||
|
fill: white;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<path d="M162.51 33.188c-26.77.411-54.004 6.885-71.494 3.745-1.313-.232-2.124 1.338-1.171 2.265 14.749 14.003 20.335 28.16 36.718 30.065l.476.103c-7.567 7.799-14.028 18.018-18.571 31.171-4.89 14.106-6.268 29.421-7.89 47.105-2.445 26.332-5.173 56.152-20.038 93.54a246.489 246.489 0 0 0-13.27 45.946h22.652a221.202 221.202 0 0 1 11.249-37.786c16.049-40.374 19.073-73.257 21.505-99.693 1.493-16.255 2.806-30.309 6.796-41.853 11.647-33.527 39.408-40.889 61.056-36.693 21.004 4.067 41.673 20.502 40.592 44.016-.772 15.985-7.76 23.166-12.87 28.43-2.793 2.883-5.47 5.611-6.731 9.498-3.037 9.19.101 19.434 8.494 27.568 22.24 20.734 34.338 59.717 33.681 106.513h22.176c.592-52.935-13.951-97.839-40.503-122.626-2.097-2.021-2.69-3.604-3.191-3.347 1.222-1.544 3.217-3.346 4.633-4.813 29.382-21.79 77.813-1.892 107.054 9.653 7.58 2.985 11.274-4.338 4.067-8.623-25.097-14.84-76.54-54.016-105.368-79.718-4.029-3.54-6.796-7.8-11.455-11.738-15.547-27.439-41.84-33.127-68.597-32.728Zm35.238 60.27a15.161 15.161 0 0 0-2.008.232 15.161 15.161 0 0 0-1.506 29.434 15.154 15.154 0 0 0 9.473-28.79 15.161 15.161 0 0 0-5.959-.876zm-44.286 147.17a2.033 2.033 0 0 0-1.133.374c-1.08.772-1.93 3.05-.772 5.701 5.38 12.394 9.1 25.445 12.536 40.413h22.484c-5.676-16.629-16.307-34.055-27.851-43.978-2.008-1.737-3.913-2.574-5.251-2.51z" style="stroke-width:12.8704" transform="translate(-67.27 -33.169)"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.5 KiB |
@@ -73,6 +73,28 @@ def synchronize_approle_auth(client: hvac.Client):
|
|||||||
print('Enabling AppRole auth method')
|
print('Enabling AppRole auth method')
|
||||||
client.sys.enable_auth_method('approle', 'AppRole authorization for CI')
|
client.sys.enable_auth_method('approle', 'AppRole authorization for CI')
|
||||||
|
|
||||||
|
roles_dir = pathlib.Path(__file__).parent.joinpath('../vault/approles/')
|
||||||
|
roles: dict[str, Any] = {}
|
||||||
|
|
||||||
|
for filename in roles_dir.iterdir():
|
||||||
|
with filename.open('r') as f:
|
||||||
|
role = yaml.safe_load(f.read())
|
||||||
|
assert type(role) is dict
|
||||||
|
roles[filename.stem] = role
|
||||||
|
|
||||||
|
roles_on_vault: list[str] = []
|
||||||
|
roles_response = client.list("auth/approle/roles")
|
||||||
|
if roles_response is not None:
|
||||||
|
roles_on_vault = roles_response['data']['keys']
|
||||||
|
|
||||||
|
for role in roles_on_vault:
|
||||||
|
if role not in roles:
|
||||||
|
print(f'Deleting role: {role}')
|
||||||
|
client.delete(f'auth/approle/role/{role}')
|
||||||
|
|
||||||
|
for role_name, role_content in roles.items():
|
||||||
|
print(f'Updating role: {role_name}')
|
||||||
|
client.write_data(f'auth/approle/role/{role_name}', data=role_content)
|
||||||
|
|
||||||
def synchronize_kubernetes_secretengine(client: hvac.Client):
|
def synchronize_kubernetes_secretengine(client: hvac.Client):
|
||||||
# Ensure kubernetes secret engine is enabled
|
# Ensure kubernetes secret engine is enabled
|
||||||
|
|||||||
4
vault/approles/ci-flux-reconcile.yaml
Normal file
4
vault/approles/ci-flux-reconcile.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
token_ttl: 20m
|
||||||
|
token_max_ttl: 20m
|
||||||
|
policies:
|
||||||
|
- flux-reconcile
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
allowed_kubernetes_namespaces: flux-system
|
allowed_kubernetes_namespaces: flux-system
|
||||||
generated_role_rules:
|
generated_role_rules:
|
||||||
rules:
|
rules:
|
||||||
- apiGroups: ["kustomize.toolkit.fluxcd.io"]
|
- apiGroups: ["source.toolkit.fluxcd.io"]
|
||||||
resources: ["gitrepositories"]
|
resources: ["gitrepositories"]
|
||||||
verbs: ["update", "watch"]
|
verbs: ["get", "patch", "watch"]
|
||||||
|
|||||||
3
vault/policy/flux-reconcile.hcl
Normal file
3
vault/policy/flux-reconcile.hcl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
path "kubernetes/creds/flux-reconcile" {
|
||||||
|
capabilities = ["update"]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user