hide IoT WiFi and enable mac filter

Merge pull request 'Update registry.k8s.io/coredns/coredns Docker tag to v1.14.4' (#341 ) from renovate/registry.k8s.io-coredns-coredns-1.x into fresh-start
Update registry.k8s.io/coredns/coredns Docker tag to v1.14.4
2026-06-22 15:19:57 +02:00 · 2026-06-21 02:01:27 +00:00 · 2026-06-21 02:01:26 +00:00 · 2026-06-21 02:38:24 +02:00 · 2026-06-21 02:00:32 +02:00 · 2026-06-19 02:01:13 +00:00
115 changed files with 10490 additions and 13096 deletions
@@ -0,0 +1,46 @@
+when:
+  - event: push
+    path:
+      include:
+        - mikrotik/coredns/**
+
+steps:
+  - name: Get registry creds from OpenBao
+    image: quay.io/openbao/openbao:2.5.5
+    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
+      ROLE_ID:
+        from_secret: renovate_role_id
+      SECRET_ID:
+        from_secret: renovate_secret_id
+    commands:
+      - bao write -field token auth/approle/login
+          role_id=$ROLE_ID
+          secret_id=$SECRET_ID > /woodpecker/.vault_id
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - 'printf "PLUGIN_USERNAME=%s\n" "$(bao kv get -mount secret -field REGISTRY_USERNAME container-registry)" > /woodpecker/registry.env'
+      - 'printf "PLUGIN_PASSWORD=%s\n" "$(bao kv get -mount secret -field REGISTRY_PASSWORD container-registry)" >> /woodpecker/registry.env'
+
+  - name: Build and push
+    image: woodpeckerci/plugin-docker-buildx:6.1.0
+    privileged: true
+    settings:
+      registry: gitea.lumpiasty.xyz
+      repo: gitea.lumpiasty.xyz/lumpiasty/coredns-mikrotik
+      platforms: linux/arm64
+      tags:
+        - latest
+        - ${CI_COMMIT_SHA:0:8}
+      dockerfile: mikrotik/coredns/Dockerfile
+      context: mikrotik/coredns/
+      env_file: /woodpecker/registry.env
+
+  - name: Invalidate OpenBao token
+    image: quay.io/openbao/openbao:2.5.5
+    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
+    commands:
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - bao write -f auth/token/revoke-self
+    when:
+      - status: [success, failure]
@@ -6,10 +6,9 @@ skip_clone: true

 steps:
  - name: Get kubernetes access from OpenBao
-    image: quay.io/openbao/openbao:2.5.2
-    volumes:
-      - secrets:/secrets
+    image: quay.io/openbao/openbao:2.5.5
    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
      ROLE_ID:
        from_secret: flux_reconcile_role_id
      SECRET_ID:
@@ -17,39 +16,34 @@ steps:
    commands:
      - bao write -field token auth/approle/login
          role_id=$ROLE_ID
-          secret_id=$SECRET_ID
-          \> /secrets/.vault_id
-      - export VAULT_TOKEN=$(cat /secrets/.vault_id)
-      - bao write -format json /kubernetes/creds/flux-reconcile
-          \> /secrets/kube_credentials
-      - bao read -format
+          secret_id=$SECRET_ID > /woodpecker/.vault_id
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - bao write -format json -f /kubernetes/creds/flux-reconcile > /woodpecker/kube_credentials
  - name: Construct Kubeconfig
-    image: alpine/k8s:1.32.13
-    volumes:
-      - secrets:/secrets
+    image: alpine/k8s:1.36.2
    environment:
-      KUBECONFIG: /secrets/kubeconfig
+      KUBECONFIG: /woodpecker/kubeconfig
    commands:
      - kubectl config set-cluster cluster
          --server=https://$KUBERNETES_SERVICE_HOST
-          --client-certificate=/run/secrets/kubernetes.io/serviceaccount/ca.crt
+          --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      - kubectl config set-credentials cluster
-          --token=$(jq -r .data.service_account_token /secrets/kube_credentials)
+          --token=$(jq -r .data.service_account_token /woodpecker/kube_credentials)
      - kubectl config set-context cluster
          --cluster cluster
          --user cluster
          --namespace flux-system
-          --current=true
+      - kubectl config use-context cluster
  - name: Reconcile git source
-    image: ghcr.io/fluxcd/flux-cli:v2.8.3
-    volumes:
-      - secrets:/secrets
+    image: ghcr.io/fluxcd/flux-cli:v2.8.8
    environment:
-      KUBECONFIG: /secrets/kubeconfig
+      KUBECONFIG: /woodpecker/kubeconfig
    commands:
      - flux reconcile source git flux-system
  - name: Invalidate OpenBao token
-    image: quay.io/openbao/openbao:2.5.2
+    image: quay.io/openbao/openbao:2.5.5
+    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
    commands:
-      - export VAULT_TOKEN=$(cat /secrets/.vault_id)
-      - bao write auth/token/revoke-self
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - bao write -f auth/token/revoke-self
@@ -0,0 +1,42 @@
+when:
+  - event: cron
+    cron: renovate # schedule on 0 2 * * *, set in ui
+
+skip_clone: true
+
+steps:
+  - name: Get renovate token from OpenBao
+    image: quay.io/openbao/openbao:2.5.5
+    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
+      ROLE_ID:
+        from_secret: renovate_role_id
+      SECRET_ID:
+        from_secret: renovate_secret_id
+    commands:
+      - bao write -field token auth/approle/login
+          role_id=$ROLE_ID
+          secret_id=$SECRET_ID > /woodpecker/.vault_id
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - bao kv get -mount secret -field RENOVATE_TOKEN renovate > /woodpecker/renovate_token
+      - bao kv get -mount secret -field GITHUB_COM_TOKEN renovate > /woodpecker/github_com_token
+  - name: Run Renovate
+    image: renovate/renovate:43.222.1
+    environment:
+      RENOVATE_AUTODISCOVER: "true"
+      RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1
+      RENOVATE_PLATFORM: gitea
+      RENOVATE_GIT_AUTHOR: Renovate Bot <renovate@lumpiasty.xyz>
+      # Required for flux artifact regeneration (gotk-components.yaml); containerbase installs flux on demand
+      RENOVATE_BINARY_SOURCE: install
+    commands:
+      - export RENOVATE_TOKEN=$(cat /woodpecker/renovate_token)
+      - export GITHUB_COM_TOKEN=$(cat /woodpecker/github_com_token)
+      - /usr/local/sbin/renovate-entrypoint.sh renovate
+  - name: Invalidate OpenBao token
+    image: quay.io/openbao/openbao:2.5.5
+    environment:
+      VAULT_ADDR: https://openbao.lumpiasty.xyz:8200
+    commands:
+      - export VAULT_TOKEN=$(cat /woodpecker/.vault_id)
+      - bao write -f auth/token/revoke-self
@@ -15,6 +15,7 @@ gen-talos-config:
 		--config-patch @talos/patches/ollama.patch \
 		--config-patch @talos/patches/llama.patch \
 		--config-patch @talos/patches/frigate.patch \
+		--config-patch @talos/patches/woodpecker.patch \
 		--config-patch @talos/patches/anapistula-delrosalae.patch \
 		--output-types controlplane -o talos/generated/anapistula-delrosalae.yaml \
 		homelab https://kube-api.homelab.lumpiasty.xyz:6443
@@ -2,6 +2,8 @@

 This repo contains configuration and documentation for my homelab setup, which is based on Talos OS for Kubernetes cluster and MikroTik router.

+[<img src="https://woodpecker.lumpiasty.xyz/api/badges/2/status.svg" alt="Pipeline status">](https://woodpecker.lumpiasty.xyz/repos/2)
+
 ## Architecture

 Physical setup consists of MikroTik router which connects to the internet and serves as a gateway for the cluster and other devices in the home network as shown in the diagram below.
@@ -137,11 +139,13 @@ flowchart TD
   cluster -- "Routes exported via BGP" ----- k8s
 ```

+More information on network are available in [Network documentation](docs/network.md)
+
 Currently the k8s cluster consists of single node (hostname anapistula-delrosalae), which is a PC with Ryzen 5 3600, 64GB RAM, RX 580 8GB (for accelerating LLMs), 1TB NVMe SSD, 2TB and 3TB HDDs and serves both as control plane and worker node.

 ## Software stack

-The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.
+The cluster itself is based on [Talos Linux](https://www.talos.dev/) (which is also a Kubernetes distribution) and uses [Cilium](https://cilium.io/) as CNI, IPAM, kube-proxy replacement, Load Balancer, and BGP control plane. Persistent volumes are managed by [OpenEBS LVM LocalPV](https://openebs.io/docs/user-guides/local-storage-user-guide/local-pv-lvm/lvm-overview). Applications are deployed using GitOps (this repo) and reconciled on cluster using [Flux](https://fluxcd.io/). Git repository is hosted on [Gitea](https://gitea.io/) running on a cluster itself. Secets are kept in [OpenBao](https://openbao.org/) (HashiCorp Vault fork) running on a cluster and synced to cluster objects using [Vault Secrets Operator](https://github.com/hashicorp/vault-secrets-operator). Deployments are kept up to date using self hosted [Renovate](https://www.mend.io/renovate/) bot updating manifests in the Git repository. There is a [Woodpecker](https://woodpecker-ci.org/) instance watching repositories on Gitea and scheduling jobs on cluster. Incoming HTTP traffic is routed to cluster using [Nginx Ingress Controller](https://kubernetes.github.io/ingress-nginx/) and certificates are issued by [cert-manager](https://cert-manager.io/) with [Let's Encrypt](https://letsencrypt.org/) ACME issuer with [cert-manager-webhook-ovh](https://github.com/aureq/cert-manager-webhook-ovh) resolving DNS-01 challanges. Cluster also runs [CloudNativePG](https://cloudnative-pg.io/) operator for managing PostgreSQL databases. Router is running [Mikrotik RouterOS](https://help.mikrotik.com/docs/spaces/ROS/pages/328059/RouterOS) and its configuration is managed via [Ansible](https://docs.ansible.com/) playbook in this repo. High level core cluster software architecture is shown on the diagram below.

 > Talos Linux is an immutable Linux distribution purpose-built for running Kubernetes. The OS is distributed as an OCI (Docker) image and does not contain any package manager, shell, SSH, or any other tools for managing the system. Instead, all operations are performed using API, which can be accessed using `talosctl` CLI tool.

@@ -177,14 +181,23 @@ flowchart TD
      vault_operator -- "Retrieves secrets" --> vault[OpenBao] -- "Secret storage" --> lv
      vault -- "Auth method" --> kubeapi

+      gitea -- "Receives events" --> woodpecker[Woodpecker CI] -- "Schedules jobs" --> kubeapi
+
      gitea -- "Stores repositories" --> lv

-      gitea --> renovate[Renovate Bot] -- "Updates manifests" --> gitea
-
+      gitea--> renovate[Renovate Bot] -- "Updates manifests" --> gitea

   end
 ```

+### Reconcilation paths of each component
+
+- Kubernetes manifests are reconciled using Flux triggerred by Woodpecker CI on push
+- RouterOS configs are applied by Ansible <!-- ran by Gitea Action on push -->
+- Talos configs are applied using makefile <!-- switch to ansible and trigger on action push -->
+- Vault policies are applied by running `synchronize-vault.py` <!-- triggerred by Gitea action on push -->
+<!-- - Docker images are built and pushed to registry by Gitea Actions on push -->
+
 <!-- TODO: Backups, monitoring, logging, deployment with ansible etc -->

 ## Software
@@ -228,12 +241,17 @@ flowchart TD
 |------|------|-------------|
 | <img src="docs/assets/devenv.svg" alt="devenv" height="50" width="50"> | devenv | Tool for declarative managment of development environment using Nix |
 | <img src="docs/assets/renovate.svg" alt="Renovate" height="50" width="50"> | Renovate | Bot for keeping dependencies up to date |
+| <img src="docs/assets/woodpecker.svg" alt="Woodpecker" height="50" width="50"> | Woodpecker CI | Continous Integration system |

 ### AI infrastructure

-| Logo | Name | Address | Description |
-|------|------|---------|-------------|
-| <img src="docs/assets/llama-cpp.svg" alt="LLaMA.cpp" height="50" width="50"> | LLaMA.cpp | https://llama.lumpiasty.xyz/ | LLM inference server running local models with GPU acceleration |
+| Logo | Name | Description |
+|------|------|-------------|
+| <img src="docs/assets/llama-cpp.svg" alt="LLaMA.cpp" height="50" width="50"> | LLaMA.cpp | LLM inference server running local models with GPU acceleration |
+| <img src="docs/assets/llama-swap.svg" alt="llama-swap" height="50" width="50"> | llama-swap | Model swapping for LLaMA.cpp |
+| <img src="docs/assets/meridian.svg" alt="meridian" height="50" width="50"> | Meridian | Proxy that bridges Anthropic's official SDK to third-party tools |
+| | whisper.cpp | High-performance Whisper Automatic Speech Recognition inference server |
+| | Kokoro-FastAPI | Kokoro-82M text-to-speech inference server |

 ### Applications/Services

@@ -244,6 +262,7 @@ flowchart TD
 | <img src="docs/assets/teamspeak.svg" alt="iSpeak3" height="50" width="50"> | iSpeak3.pl | [ts3server://ispeak3.pl](ts3server://ispeak3.pl) | Public TeamSpeak 3 voice communication server |
 | <img src="docs/assets/immich.svg" alt="Immich" height="50" width="50"> | Immich | https://immich.lumpiasty.xyz/ | Self-hosted photo and video backup and streaming service |
 | <img src="docs/assets/frigate.svg" alt="Frigate" height="50" width="50"> | Frigate | https://frigate.lumpiasty.xyz/ | NVR for camera system with AI object detection and classification |
+| <img src="docs/assets/kaneo.svg" alt="Kaneo" height="50" width="50"> | Kaneo | https://kaneo.lumpiasty.xyz | Project management software |


 ## Development
@@ -252,7 +271,7 @@ This repo leverages [devenv](https://devenv.sh/) for easy setup of a development

 ### App deployment

-This repo is being watched by Flux running on cluster. To change config/add new app, simply commit to this repo and wait a while for cluster to reconcile changes. You can speed up this process by "notifying" Flux using `flux reconcile source git flux-system`.
+This repo is being watched by Flux running on cluster. To change config/add new app, simply commit to this repo and wait a while for cluster to reconcile changes. There is a Woodpecker job pushing Flux to reconcile cluster on push to this repository.

 Flux watches 3 kustomizations in this repo:

@@ -1,20 +1,91 @@
-## RouterOS Ansible
+# Ansible

-This directory contains the new Ansible automation for the MikroTik router.
+Idempotent configuration management for the home-lab network devices.

- Transport: RouterOS API (`community.routeros` collection), not SSH CLI scraping.
- Layout: one playbook (`playbooks/routeros.yml`) importing domain task files from `tasks/`.
- Goal: idempotent convergence using `community.routeros.api_modify` for managed paths.
+## Devices

-### Quick start
+| Host | Group | IP | Playbook |
+|---|---|---|---|
+| crs418 (MikroTik CRS418) | `mikrotik` | 192.168.255.10 | `playbooks/routeros.yml` |
+| dlink (OpenWrt AP) | `openwrt` | 192.168.255.11 | `playbooks/openwrt.yml` |

-1. Install dependencies:
-   - `ansible-galaxy collection install -r ansible/requirements.yml`
-   - `python -m pip install librouteros hvac`
-2. Configure secret references in `ansible/vars/routeros-secrets.yml`.
-3. Store required fields in OpenBao under configured KV path.
-4. Export token (`OPENBAO_TOKEN` or `VAULT_TOKEN`).
-5. Run:
-   - `ANSIBLE_CONFIG=ansible/ansible.cfg ansible-playbook ansible/playbooks/routeros.yml`
+Both devices are reachable on the MGMT network (192.168.255.0/24) once fully set up.

-More details and design rationale: `docs/ansible/routeros-design.md`.
+## Dependencies
+
+```bash
+ansible-galaxy collection install -r requirements.yml
+pip install librouteros hvac
+```
+
+Collections used:
+
+- `community.routeros >= 3.16.0` — MikroTik API modules
+- `community.hashi_vault >= 7.1.0` — OpenBao/Vault secret lookup
+- `community.openwrt >= 1.0.0` — OpenWrt UCI and shell modules
+
+## MikroTik (routeros)
+
+Secrets are fetched at runtime from OpenBao. No credentials are stored in files.
+
+```bash
+export VAULT_TOKEN=...   # or OPENBAO_TOKEN
+ansible-playbook playbooks/routeros.yml
+```
+
+Secret layout expected in OpenBao (KVv2, mount `secret`):
+
+| Path | Fields |
+|---|---|
+| `routeros_api` | `username`, `password` |
+| `wan_pppoe` | `username`, `password` |
+
+## OpenWrt dlink AP
+
+The dlink needs a one-time initialisation before it can be managed through MikroTik.
+There are two playbooks:
+
+### Step 1 — `dlink-init.yml` (once, PC directly connected)
+
+Run this while your PC is plugged into one of the dlink **LAN ports** with the
+device still on its factory IP (192.168.1.1) and your SSH key has been added in
+web ui. MikroTik must **not** be in the picture yet.
+
+What it does:
+- Reconfigures switch0 so the **WAN port** becomes a VLAN trunk:
+  - untagged → VLAN 1 (MGMT, 192.168.255.0/24)
+  - tagged → VLAN 2 (LAN, 192.168.0.0/24)
+- Adds `mgmt` interface: static 192.168.255.11/24, gateway 192.168.255.10
+- Reconfigures `lan` to a bridge on eth0.2 with no IP (AP mode)
+- Removes routed `wan`/`wan6` interfaces
+- Commits and reloads network in the background
+
+After the reload the device is no longer reachable at 192.168.1.1.
+
+```bash
+ansible-playbook playbooks/dlink-init.yml
+```
+
+### Step 2 — connect dlink WAN port to MikroTik ether3
+
+Plug the **dlink WAN port** into **MikroTik ether3**.
+
+If the MikroTik config hasn't been applied yet, do it now:
+
+```bash
+export VAULT_TOKEN=...
+ansible-playbook playbooks/routeros.yml
+```
+
+MikroTik ether3 is configured to send MGMT traffic untagged and VLAN 2 (LAN)
+tagged, which matches what dlink expects on its WAN port.
+
+### Step 3 — `openwrt.yml` (ongoing, via MikroTik)
+
+All subsequent runs connect to 192.168.255.11 through MikroTik:
+
+```bash
+ansible-playbook playbooks/openwrt.yml
+```
+
+This is the idempotent main playbook. Run it any time to converge configuration.
@@ -1,5 +1,6 @@
 [defaults]
 inventory = inventory/hosts.yml
+roles_path = roles
 host_key_checking = False
 retry_files_enabled = False
 result_format = yaml
@@ -4,3 +4,9 @@ all:
      hosts:
        crs418:
          ansible_host: 192.168.255.10
+    openwrt:
+      hosts:
+        dlink:
+          ansible_host: 192.168.255.11
+          ansible_user: root
+          ansible_ssh_port: 22
@@ -0,0 +1,45 @@
+---
+# One-time initialisation playbook for the dlink OpenWrt AP.
+#
+# Run this while your PC is directly connected to a dlink LAN port
+# (factory IP 192.168.1.1, no MikroTik in the picture yet).
+#
+# Applies the same network and firewall config as the main openwrt role,
+# then reloads network in the background. Skips wireless (requires Vault).
+#
+# After this playbook finishes the device is no longer reachable at 192.168.1.1.
+# Plug the WAN port into MikroTik ether3 and use playbooks/openwrt.yml for all
+# further configuration.
+
+- name: dlink — one-time network initialisation
+  hosts: openwrt
+  gather_facts: false
+  vars:
+    ansible_host: "192.168.1.1"
+    ansible_user: root
+    # Role defaults are not loaded when importing role task files directly.
+    # These must mirror roles/openwrt/defaults/main.yml.
+    openwrt_mgmt_ip: 192.168.255.11
+    openwrt_mgmt_prefix: 24
+    openwrt_mgmt_gateway: 192.168.255.10
+    openwrt_dns_servers:
+      - 192.168.0.1
+
+  tasks:
+    - name: Verify connectivity
+      community.openwrt.ping:
+
+    # import_tasks (static) is used instead of include_tasks (dynamic) so that
+    # handler names referenced via notify in the imported files are silently
+    # ignored rather than causing an error — no handlers are defined in this
+    # play, and the explicit nohup reload below replaces them for the init case.
+    - name: Network configuration
+      ansible.builtin.import_tasks: ../roles/openwrt/tasks/network.yml
+
+    - name: Firewall configuration
+      ansible.builtin.import_tasks: ../roles/openwrt/tasks/firewall.yml
+
+    - name: Reload network in background (device will drop off 192.168.1.1)
+      community.openwrt.nohup:
+        command: /etc/init.d/network reload
+      ignore_unreachable: true
@@ -0,0 +1,10 @@
+---
+# Main OpenWrt playbook. Connects to dlink on its permanent management IP
+# (192.168.255.11 via MikroTik ether3). Run dlink-init.yml first if the
+# device has not been initialised yet.
+- name: Configure OpenWrt
+  hosts: openwrt
+  gather_facts: false
+
+  roles:
+    - role: openwrt
@@ -4,9 +4,6 @@
  gather_facts: false
  connection: local

-  vars_files:
-    - ../vars/routeros-secrets.yml
-
  pre_tasks:
    - name: Load router secrets from OpenBao
      ansible.builtin.set_fact:
@@ -42,15 +39,10 @@
              engine_mount_point=openbao_kv_mount
            ).secret[openbao_fields.wan_pppoe.password_key]
          }}
-        routeros_tailscale_container_password: >-
-          {{
-            lookup(
-              'community.hashi_vault.vault_kv2_get',
-              openbao_fields.routeros_tailscale_container.path,
-              engine_mount_point=openbao_kv_mount
-            ).secret[openbao_fields.routeros_tailscale_container.container_password_key]
-          }}
+
      no_log: true
+      tags:
+        - tailscale-script

  module_defaults:
    group/community.routeros.api:
@@ -63,30 +55,5 @@
      force_no_cert: true
      encoding: UTF-8

-  tasks:
-    - name: Preflight checks
-      ansible.builtin.import_tasks: ../tasks/preflight.yml
-
-    - name: Base network configuration
-      ansible.builtin.import_tasks: ../tasks/base.yml
-
-    - name: WAN and tunnel interfaces
-      ansible.builtin.import_tasks: ../tasks/wan.yml
-
-    - name: Hardware and platform tuning
-      ansible.builtin.import_tasks: ../tasks/hardware.yml
-
-    - name: RouterOS container configuration
-      ansible.builtin.import_tasks: ../tasks/containers.yml
-
-    - name: Addressing configuration
-      ansible.builtin.import_tasks: ../tasks/addressing.yml
-
-    - name: Firewall configuration
-      ansible.builtin.import_tasks: ../tasks/firewall.yml
-
-    - name: Routing configuration
-      ansible.builtin.import_tasks: ../tasks/routing.yml
-
-    - name: System configuration
-      ansible.builtin.import_tasks: ../tasks/system.yml
+  roles:
+    - role: routeros
@@ -3,3 +3,9 @@ collections:
    version: ">=3.16.0"
  - name: community.hashi_vault
    version: ">=7.1.0"
+  # community.openwrt.apk module (required for OpenWrt 25.12+ which dropped opkg)
+  # is only available in 1.4.0 which is not yet released on Galaxy — install from git.
+  - name: community.openwrt
+    source: https://github.com/ansible-collections/community.openwrt.git
+    type: git
+    version: main
@@ -0,0 +1,27 @@
+---
+# Hostname for the AP
+openwrt_hostname: dlink
+
+# Timezone (POSIX TZ string used by OpenWrt)
+openwrt_timezone: CET-1CEST,M3.5.0,M10.5.0/3
+
+# Management interface and IP (statically assigned on VLAN 1 / eth0.1)
+openwrt_mgmt_ip: 192.168.255.11
+openwrt_mgmt_prefix: 24
+openwrt_mgmt_gateway: 192.168.255.10
+
+# SSH authorised keys (list of public key strings)
+openwrt_ssh_authorized_keys: []
+
+# NTP servers
+openwrt_ntp_servers:
+  - 0.pl.pool.ntp.org
+  - 1.pl.pool.ntp.org
+
+# Packages to install
+openwrt_packages:
+  - usb-modeswitch   # switches embedded LTE modem (Qualcomm 05c6:9008) from EDL to QMI mode on boot
+  - luci-proto-qmi   # adds QMI protocol support to LuCI for configuring the embedded LTE modem
+  - bird2            # BGP daemon — peers with CRS for LTE failover route signalling
+  - bird2c           # Control CLI interface for BGP daemon
+
@@ -0,0 +1,19 @@
+---
+- name: Reload network
+  community.openwrt.nohup:
+    command: /etc/init.d/network reload
+  ignore_unreachable: true
+
+- name: Reload firewall
+  community.openwrt.service:
+    name: firewall
+    state: restarted
+
+- name: Reload wireless
+  community.openwrt.command:
+    cmd: wifi reload
+
+- name: Reload bird
+  community.openwrt.service:
+    name: bird
+    state: restarted
@@ -0,0 +1,153 @@
+---
+# Configures BIRD2 on the D-Link as an iBGP peer of the MikroTik CRS418.
+#
+# Route exchange:
+#   D-Link → CRS: announces 0.0.0.0/0 and 2000::/3 when wwan0 is up.
+#                 CRS installs these at BGP distance 200 (below the GPON
+#                 static default at distance 1 — activates only on GPON failure).
+#
+#   CRS → D-Link: announces connected routes (VLAN subnets), static routes
+#                 (Tailscale, GPON default), and reflects k8s BGP routes.
+#                 BIRD2 installs all of these into the kernel at metric 10.
+#
+# D-Link's own routing:
+#   - Kernel metric 10: BGP-learned routes from CRS (preferred)
+#   - Kernel metric 100: wwan QMI-assigned routes (fallback)
+#   No static default gateway on uplink — the default comes from BGP.
+#   When GPON fails, CRS withdraws the BGP default; D-Link falls back to wwan.
+
+- name: Write BIRD2 configuration
+  community.openwrt.copy:
+    dest: /etc/bird.conf
+    mode: '0640'
+    owner: root
+    group: root
+    content: |
+      # BIRD2 — LTE failover BGP peer for MikroTik CRS418
+      # iBGP session, AS 65000, peer: 192.168.6.1 (CRS vlan6)
+
+      router id 192.168.6.2;
+
+      protocol device {
+        # Tracks interface up/down state via netlink.
+        # scan time is a periodic reconciliation fallback; real events are
+        # netlink-driven and processed immediately.
+        scan time 5;
+      }
+
+      # Announce directly connected prefixes into BIRD2's RIB so that
+      # next-hop resolution works for BGP routes received from CRS.
+      # Without this, 192.168.6.1 (CRS uplink) is unresolvable and all
+      # IPv4 BGP routes appear unreachable. Same for IPv6 uplink prefix.
+      protocol direct {
+        ipv4;
+        ipv6;
+        interface "eth0.6";
+      }
+
+      # Install BGP-learned routes from CRS into the kernel at metric 10.
+      # This is lower than the wwan QMI default (metric 100), so D-Link
+      # prefers the CRS path for its own outbound traffic when GPON is up.
+      # import none: BIRD2 does not read the kernel table, preventing
+      # wwan kernel routes from leaking into BGP.
+      protocol kernel k4 {
+        ipv4 {
+          import none;
+          export filter {
+            if proto = "crs" then {
+              krt_metric = 10;
+              accept;
+            }
+            reject;
+          };
+        };
+      }
+
+      protocol kernel k6 {
+        ipv6 {
+          import none;
+          export filter {
+            if proto = "crs" then {
+              krt_metric = 10;
+              accept;
+            }
+            reject;
+          };
+        };
+      }
+
+      # LTE default routes — exist only while wwan0 is up.
+      # BIRD2's device protocol tracks wwan0 via netlink; when the interface
+      # goes down the routes become unreachable and BGP withdraws them.
+      # Uses interface-name routing (no explicit gateway IP) which is correct
+      # for QMI raw-ip POINTOPOINT NOARP interfaces.
+      #
+      # Preference 50 is below BGP's default of 100 — these routes are only
+      # used by BIRD2 internally as a presence signal for BGP export, NOT for
+      # installing into the kernel as our active default route. The kernel
+      # already gets the wwan default at metric 100 via netifd/qmi.sh, and
+      # we want the BGP-learned default via CRS (kernel metric 10) to be
+      # preferred for D-Link's own outbound traffic when GPON is up.
+      protocol static lte_default {
+        ipv4 {
+          preference 50;
+        };
+        route 0.0.0.0/0 via "wwan0";
+      }
+
+      protocol static lte_default6 {
+        ipv6 {
+          preference 50;
+        };
+        route 2000::/3 via "wwan0";
+      }
+
+      protocol bgp crs {
+        description "MikroTik CRS418 — LTE failover signalling";
+        local 192.168.6.2 as 65000;
+        neighbor 192.168.6.1 as 65000;
+        hold time 30;
+        keepalive time 10;
+
+        ipv4 {
+          # Import all prefixes CRS announces (VLAN subnets, static routes,
+          # k8s BGP routes reflected via RR). Installed into kernel via k4.
+          import all;
+          # Export only the wwan-sourced LTE default route.
+          # BGP-learned CRS routes are never re-exported (iBGP split-horizon
+          # applies; BIRD2 also does not import CRS routes into its RIB from
+          # the kernel, so they cannot appear here).
+          export where proto = "lte_default";
+        };
+
+        ipv6 {
+          # CRS uses Extended Next Hop (RFC 5549) for IPv6 routes, advertising
+          # them with the IPv4 next-hop 192.168.6.1. The Linux kernel cannot
+          # install IPv6 routes with IPv4 next-hops. Accept the routes from BGP
+          # (we negotiated ENHE via "extended next hop yes") but rewrite the
+          # next-hop in the import filter to the CRS's native IPv6 address on
+          # vlan6 before they reach the kernel.
+          extended next hop yes;
+          import filter {
+            gw = 2001:470:61a3:600::1;
+            accept;
+          };
+          # Force our own native IPv6 address as the next-hop when advertising
+          # to CRS, otherwise BIRD2 also uses ENHE and CRS receives a route
+          # with ::ffff:192.168.6.2 which it can't resolve as an IPv6 next-hop.
+          export filter {
+            if proto = "lte_default6" then {
+              bgp_next_hop = 2001:470:61a3:600::2;
+              accept;
+            }
+            reject;
+          };
+        };
+      }
+  notify: Reload bird
+
+- name: Enable and start BIRD2 service
+  community.openwrt.service:
+    name: bird
+    enabled: true
+    state: started
@@ -0,0 +1,101 @@
+---
+# This device is a pure AP — no routing, no NAT.
+#
+# Zones:
+#   mgmt   — management interface (192.168.255.11)
+#            input: ACCEPT  (SSH, ping reachable from MGMT network)
+#            forward: REJECT (nothing routes through mgmt)
+#
+#   lan    — client bridge (eth0.2, LAN ports)
+#            input: REJECT  (clients cannot SSH into the AP itself)
+#            forward: ACCEPT (traffic passes through to MikroTik for firewalling)
+#
+#   iot    — IoT bridge (eth0.5, wifi only)
+#            input: REJECT  (IoT devices cannot reach the AP itself)
+#            forward: ACCEPT (traffic passes through to MikroTik, which allows
+#                             internet only and blocks all internal networks)
+#
+#   uplink — internet uplink via MikroTik vlan6 (192.168.6.2/24)
+#            input: REJECT  (no inbound connections from internet side)
+#            output: ACCEPT (AP itself initiates outbound — opkg, NTP, etc.)
+#            forward: REJECT (AP does not route client traffic through uplink)
+#
+#   wwan   — LTE modem uplink (Orange PL, /dev/cdc-wdm0, always-on)
+#            input: REJECT  (no inbound from LTE)
+#            output: ACCEPT (AP itself uses LTE for outbound when uplink unavailable)
+#            forward: REJECT (default; overridden by explicit uplink→wwan forwarding rule)
+#            masq/masq6: enabled — NAT all traffic exiting via wwan (own + forwarded)
+#
+# No forwarding rules between zones — all inter-zone policy is on MikroTik.
+
+- name: Configure firewall
+  community.openwrt.uci:
+    command: import
+    merge: false
+    config: firewall
+    value: |
+      package firewall
+
+      config defaults
+        option syn_flood '1'
+        option input 'REJECT'
+        option output 'ACCEPT'
+        option forward 'REJECT'
+
+      config zone
+        option name 'mgmt'
+        list network 'mgmt'
+        option input 'ACCEPT'
+        option output 'ACCEPT'
+        option forward 'REJECT'
+
+      config zone
+        option name 'lan'
+        list network 'lan'
+        option input 'REJECT'
+        option output 'ACCEPT'
+        option forward 'ACCEPT'
+
+      config zone
+        option name 'iot'
+        list network 'iot'
+        option input 'REJECT'
+        option output 'ACCEPT'
+        option forward 'ACCEPT'
+
+      config zone
+        option name 'uplink'
+        list network 'uplink'
+        option input 'REJECT'
+        option output 'ACCEPT'
+        option forward 'REJECT'
+
+      config zone
+        option name 'wwan'
+        list network 'wwan'
+        option input 'REJECT'
+        option output 'ACCEPT'
+        option forward 'REJECT'
+        option masq '1'
+        option masq6 '1'
+
+      # Forward traffic from MikroTik (arriving on uplink/vlan6) out through wwan
+      # during LTE failover. MikroTik routes LAN/SRV/IoT traffic here when GPON
+      # is down and the BGP-learned default via 192.168.6.2 is active.
+      config forwarding
+        option src 'uplink'
+        option dest 'wwan'
+
+      config rule
+        option name 'Allow-ICMPv6-uplink'
+        option src 'uplink'
+        option proto 'icmpv6'
+        option target 'ACCEPT'
+
+      config rule
+        option name 'Allow-ICMPv6-wwan'
+        option src 'wwan'
+        option proto 'icmpv6'
+        option target 'ACCEPT'
+
+  notify: Reload firewall
@@ -0,0 +1,20 @@
+---
+- name: Set WLAN WiFi LED to trigger on IoT net dev
+  community.openwrt.uci:
+    command: set
+    key: system.led_wifi_led.dev
+    value: phy0-ap0 # IoT network
+
+- name: Set WLAN WiFi LED triggers
+  community.openwrt.uci:
+    command: set
+    key: system.led_wifi_led.mode
+    value:
+      - link
+      - tx
+      - rx
+
+- name: Commit LED config
+  community.openwrt.uci:
+    command: commit
+    key: system
@@ -0,0 +1,31 @@
+---
+- name: Preflight — verify connectivity
+  ansible.builtin.import_tasks: preflight.yml
+
+- name: System configuration
+  ansible.builtin.import_tasks: system.yml
+
+# Packages must be installed before wwan.yml — usb-modeswitch is what triggers
+# the modem out of EDL mode (05c6:9008 → 2020:2033 QMI), and uqmi/luci-proto-qmi
+# provide the tools used downstream.
+- name: Package management
+  ansible.builtin.import_tasks: packages.yml
+  when: openwrt_packages | length > 0
+
+- name: Network configuration
+  ansible.builtin.import_tasks: network.yml
+
+- name: WWAN modem configuration
+  ansible.builtin.import_tasks: wwan.yml
+
+- name: BIRD2 BGP configuration
+  ansible.builtin.import_tasks: bird.yml
+
+- name: Firewall configuration
+  ansible.builtin.import_tasks: firewall.yml
+
+- name: Wireless configuration
+  ansible.builtin.import_tasks: wireless.yml
+
+- name: LED configuration
+  ansible.builtin.import_tasks: led.yml
@@ -0,0 +1,180 @@
+---
+#   Network layout:
+#   MikroTik ether3 ↔ dlink WAN port (switch0 port4)
+#   MikroTik sends MGMT traffic untagged, vlan2/vlan5/vlan6 tagged.
+#
+#   switch0 VLAN table:
+#     VLAN 1 (MGMT):   CPU(6) tagged, WAN(4) untagged                        → eth0.1 → mgmt
+#     VLAN 2 (LAN):    CPU(6) tagged, WAN(4) tagged, LAN1-4(0-3) untagged    → eth0.2 → br-lan → lan
+#     VLAN 5 (IOT):    CPU(6) tagged, WAN(4) tagged                          → eth0.5 → br-iot → iot
+#     VLAN 6 (UPLINK): CPU(6) tagged, WAN(4) tagged                          → eth0.6 → uplink
+#
+#   Interfaces:
+#     mgmt   — static 192.168.255.11/24 on eth0.1, management
+#     lan    — bridge (br-lan) on eth0.2, LAN clients via LAN ports
+#     iot    — bridge (br-iot) on eth0.5, IoT clients via wifi only
+#     uplink — static 192.168.6.2/24 + 2001:470:61a3:600::2/64 on eth0.6, BGP peer link to CRS (no static gateway — default learned via BIRD2)
+#     wwan   — QMI LTE modem (/dev/cdc-wdm0), Orange PL dual-stack failover (APNs: internet + internetipv6)
+#              Manual ifup only (option auto '0'); modem-specific quirks handled in wwan.yml.
+
+- name: Configure network
+  community.openwrt.uci:
+    command: import
+    merge: false
+    config: network
+    value: |
+      package network
+
+      config interface 'loopback'
+        option device 'lo'
+        option proto 'static'
+        list ipaddr '127.0.0.1/8'
+
+      config globals 'globals'
+        option ula_prefix 'fd4d:508e:899a::/48'
+
+      config switch
+        option name 'switch0'
+        option reset '1'
+        option enable_vlan '1'
+
+      config switch_vlan
+        option device 'switch0'
+        option vlan '1'
+        option vid '1'
+        option description 'mgmt'
+        option ports '4 6t'
+
+      config switch_vlan
+        option device 'switch0'
+        option vlan '2'
+        option vid '2'
+        option description 'lan'
+        option ports '0 1 2 3 4t 6t'
+
+      config switch_vlan
+        option device 'switch0'
+        option vlan '5'
+        option vid '5'
+        option description 'iot'
+        option ports '4t 6t'
+
+      config switch_vlan
+        option device 'switch0'
+        option vlan '6'
+        option vid '6'
+        option description 'uplink'
+        option ports '4t 6t'
+
+      config device
+        option name 'br-lan'
+        option type 'bridge'
+        list ports 'eth0.2'
+
+      config interface 'mgmt'
+        option device 'eth0.1'
+        option proto 'static'
+        option ipaddr '{{ openwrt_mgmt_ip }}/{{ openwrt_mgmt_prefix }}'
+
+      # Policy routing for mgmt interface.
+      #
+      # Without this, replies to traffic destined for 192.168.255.11 (mgmt IP)
+      # would be sent via the default route (eth0.6/uplink, src 192.168.6.2)
+      # instead of back through eth0.1. This is because mgmt clients (e.g. PCs
+      # on 192.168.0.0/24) are not on the directly connected 192.168.255.0/24
+      # subnet — they reach 192.168.255.11 via MikroTik routing, so the kernel
+      # has no connected route matching the reply destination and falls back to
+      # the default route, causing asymmetric routing.
+      #
+      # ip4table cannot be used here — it generates rules matching only the
+      # interface IP (from 192.168.255.11) and destination (to 192.168.255.11/24),
+      # not the source subnet needed for return traffic from arbitrary clients.
+      # Instead we manually add a rule matching any traffic sourced from the mgmt
+      # subnet and a default route in table 100 via the MikroTik mgmt gateway.
+      # Same-subnet traffic (src and dst both in 192.168.255.0/24) must stay in
+      # main table so replies go directly out eth0.1 without being redirected.
+      # Priority 500 ensures this fires before the catch-all rule below (1000).
+      config rule
+        option src '192.168.255.0/24'
+        option dest '192.168.255.0/24'
+        option lookup 'main'
+        option priority '500'
+
+      # All other traffic sourced from 192.168.255.0/24 (i.e. replies to clients
+      # outside this subnet, routed via MikroTik) uses table 100 which has a
+      # default route back via eth0.1 to prevent asymmetric routing.
+      config rule
+        option src '192.168.255.0/24'
+        option lookup '100'
+        option priority '1000'
+
+      config route
+        option table '100'
+        option interface 'mgmt'
+        option target '0.0.0.0/0'
+        option gateway '{{ openwrt_mgmt_gateway }}'
+
+      config interface 'lan'
+        option device 'br-lan'
+        option proto 'none'
+
+      config device
+        option name 'br-iot'
+        option type 'bridge'
+        list ports 'eth0.5'
+
+      config interface 'iot'
+        option device 'br-iot'
+        option proto 'none'
+
+      # LTE failover via embedded BroadMobi BM806C (Qualcomm MDM9225, fw M1.2.0_E1.0.1_A1.1.8).
+      # This modem has a firmware bug: when QMI --start-network is invoked with --apn
+      # (a WDS TLV), the modem establishes a phantom bearer that gets assigned IP
+      # addresses but cannot pass downlink data — TX packets egress, zero replies arrive.
+      # See https://forum.openwrt.org/t/problem-with-bm806u-e1-dwr-921-c3/130094 and
+      # https://github.com/openwrt/openwrt/issues/6295 (FS#1363). Workaround: configure
+      # the APN via NVRAM profile (uqmi --modify-profile, done by qmi.sh) and reference
+      # the profile via --start-network --profile, NOT --apn. qmi.sh already supports
+      # passing --profile when UCI option 'profile' is set — and 'apn' is kept because
+      # qmi.sh's --modify-profile call (line 314) still needs it to write the profile.
+      # qmi.sh only writes profile 1; profile 2 (used for the IPv6 v6apn) is created by
+      # the wwan role task.
+      #
+      # The BM806C also requires raw-ip framing (kernel qmi_wwan driver mode) to
+      # work properly. qmi.sh defaults to 802.3 mode; a patch in the wwan role task
+      # changes this to raw-ip for our setup.
+      config interface 'wwan'
+        option device '/dev/cdc-wdm0'
+        option proto 'qmi'
+        option apn 'internet'
+        option v6apn 'internetipv6'
+        option profile '1'
+        option v6profile '2'
+        option auth 'pap'
+        option username 'internet'
+        option password 'internet'
+        option pdptype 'ipv4v6'
+        option dhcp '0'
+        option dhcpv6 '0'
+        option peerdns '0'
+        option metric '100'
+        # auto '0': netifd does not bring up wwan at boot. The modem takes
+        # 30-90s after boot before its QMI service responds, and netifd's
+        # retry/backoff handles this poorly (failed attempts leave the
+        # interface in 'pending' state). A separate procd service waits
+        # for the modem to be ready and triggers ifup wwan once.
+        option auto '0'
+
+      config interface 'uplink'
+        option device 'eth0.6'
+        option proto 'static'
+        option ipaddr '192.168.6.2/24'
+        option dns '192.168.6.1'
+        option ip6addr '2001:470:61a3:600::2/64'
+
+  notify: Reload network
+
+- name: Commit network config
+  community.openwrt.uci:
+    command: commit
+    key: network
@@ -0,0 +1,7 @@
+---
+- name: Install packages
+  community.openwrt.apk:
+    name: "{{ openwrt_packages | join(',') }}"
+    state: present
+    update_cache: true
+  when: openwrt_packages | length > 0
@@ -0,0 +1,11 @@
+---
+- name: Verify connectivity to OpenWrt device
+  community.openwrt.ping:
+
+- name: Gather OpenWrt facts
+  community.openwrt.setup:
+  register: openwrt_facts
+
+- name: Show device info
+  ansible.builtin.debug:
+    msg: "Managing {{ inventory_hostname }} ({{ openwrt_facts.ansible_facts.ansible_system | default('OpenWrt') }})"
@@ -0,0 +1,40 @@
+---
+- name: Set hostname
+  community.openwrt.uci:
+    command: set
+    key: system.@system[0].hostname
+    value: "{{ openwrt_hostname }}"
+
+- name: Set timezone
+  community.openwrt.uci:
+    command: set
+    key: system.@system[0].timezone
+    value: "{{ openwrt_timezone }}"
+
+- name: Configure NTP servers
+  community.openwrt.uci:
+    command: set
+    key: system.ntp.server
+    value: "{{ openwrt_ntp_servers }}"
+
+- name: Commit system config
+  community.openwrt.uci:
+    command: commit
+    key: system
+
+- name: Set SSH authorized keys
+  community.openwrt.uci:
+    command: set
+    key: "dropbear.@dropbear[0].authorized_keys"
+    value: "{{ openwrt_ssh_authorized_keys | join('\n') }}"
+  when: openwrt_ssh_authorized_keys | length > 0
+
+# The D-Link is a pure AP/relay — no local clients need DNS from it.
+# Disable dnsmasq entirely and point the system resolver directly at the
+# CRS (192.168.6.1), which is always reachable via vlan6 regardless of
+# WAN state and resolves using public upstream servers (1.1.1.1 etc.).
+- name: Disable dnsmasq service
+  community.openwrt.service:
+    name: dnsmasq
+    enabled: false
+    state: stopped
@@ -0,0 +1,50 @@
+---
+- name: Load IoT WiFi password from OpenBao
+  ansible.builtin.set_fact:
+    openwrt_iot_wifi_password: >-
+      {{
+        lookup(
+          'community.hashi_vault.vault_kv2_get',
+          openbao_fields.iot_wifi.path,
+          engine_mount_point=openbao_kv_mount
+        ).secret[openbao_fields.iot_wifi.password_key]
+      }}
+  no_log: true
+
+- name: Configure IoT WiFi interface (szafa, WPA2, network iot)
+  community.openwrt.uci:
+    command: section
+    config: wireless
+    type: wifi-iface
+    name: iot_radio0
+    find:
+      device: radio0
+      ssid: szafa
+    value:
+      device: radio0
+      network: iot
+      mode: ap
+      ssid: szafa
+      hidden: '1' # Stop broadcasting SSID
+      macfilter: allow # Apply MAC filter allowing only specific addresses
+      maclist:
+        - 80:64:7c:99:21:20 # Thermomether
+        - C0:F8:53:89:E5:EF # Smart plug
+        - C0:F8:53:89:E3:42 # smart plug
+      encryption: psk2
+      key: "{{ openwrt_iot_wifi_password }}"
+      disabled: '0'
+    replace: true
+  notify: Reload wireless
+
+- name: Enable radio0
+  community.openwrt.uci:
+    command: set
+    key: wireless.radio0.disabled
+    value: '0'
+  notify: Reload wireless
+
+- name: Commit wireless config
+  community.openwrt.uci:
+    command: commit
+    key: wireless
@@ -0,0 +1,240 @@
+---
+# Configures the embedded BroadMobi BM806C LTE modem for QMI use.
+#
+# Two workarounds are applied here for documented bugs in this modem's firmware
+# (M1.2.0_E1.0.1_A1.1.8, no public updates available):
+#
+#   1. raw-ip framing. The modem advertises 802.3 support but the 802.3 firmware
+#      path is buggy — downlink frames don't reach the host. raw-ip works.
+#      See bmork's kernel commit "net: qmi_wwan: support 'raw IP' mode":
+#      'newer generations of QMI hardware and firmware have moved towards
+#       defaulting to raw IP mode instead, followed by an increasing number of
+#       bugs in the already buggy 802.3 firmware implementation'.
+#      qmi.sh hardcodes 802.3; we patch it in-place to use raw-ip.
+#
+#   2. --profile instead of --apn on --start-network. With --apn, the modem
+#      establishes a phantom bearer that has no working downlink data path.
+#      With --profile referencing a pre-configured NVRAM profile, data flows.
+#      See https://forum.openwrt.org/t/problem-with-bm806u-e1-dwr-921-c3/130094
+#      and https://github.com/openwrt/openwrt/issues/6295 (FS#1363).
+#      qmi.sh already supports the 'profile' UCI option; we set it in the
+#      wwan interface config (profile=1 for IPv4, v6profile=2 for IPv6).
+#      qmi.sh's --modify-profile call writes profile 1; profile 2 is bootstrapped
+#      here for the IPv6-specific APN.
+
+# Patch qmi.sh to request raw-ip framing instead of 802.3. Two distinct uqmi
+# calls in the upstream proto handler request the data format — both must be
+# patched for the readback to return raw-ip and trigger the kernel driver's
+# sysfs raw_ip=Y flip. Idempotent: lineinfile is a no-op once the patterns
+# no longer match.
+# Diff output is suppressed for these tasks: the full qmi.sh file (~13 KB) is
+# included in both before/after diff payloads, and ansible's SSH stdout parser
+# truncates the resulting JSON, causing 'Module result deserialization failed:
+# No end of json char found'. The change is self-evident from the task name.
+- name: Patch qmi.sh — set-data-format to raw-ip
+  community.openwrt.lineinfile:
+    path: /lib/netifd/proto/qmi.sh
+    regex: '^(\s*uqmi .* --set-data-format) 802\.3(.*)$'
+    line: '\1 raw-ip\2'
+    backrefs: true
+  diff: false
+
+- name: Patch qmi.sh — wda-set-data-format to raw-ip
+  community.openwrt.lineinfile:
+    path: /lib/netifd/proto/qmi.sh
+    regex: '^(\s*uqmi .* --wda-set-data-format) 802\.3(.*)$'
+    line: '\1 raw-ip\2'
+    backrefs: true
+  diff: false
+
+# The kernel rejects writes to /sys/class/net/wwan0/qmi/raw_ip while the netdev
+# is up: 'Cannot change a running device' (-EBUSY). qmi.sh tries to flip it
+# while the interface is up — this works in 802.3 mode (no-op when already N),
+# but with our raw-ip patch above, the flip is mandatory and must succeed.
+# We bracket the sysfs write with ip link down/up.
+- name: Patch qmi.sh — bracket raw_ip sysfs write with ip link down/up
+  community.openwrt.lineinfile:
+    path: /lib/netifd/proto/qmi.sh
+    regex: '^(\s*)echo "Y" > /sys/class/net/\$ifname/qmi/raw_ip$'
+    line: '\1ip link set $ifname down; echo "Y" > /sys/class/net/$ifname/qmi/raw_ip; ip link set $ifname up'
+    backrefs: true
+  diff: false
+
+# Profile 2 in modem NVRAM holds the IPv6 APN. qmi.sh only manages profile 1
+# (the v4 APN via --modify-profile, line 314); profile 2 is our responsibility.
+# These steps are skipped if the modem isn't enumerated yet (fresh boot before
+# usb-modeswitch completes, or modem in a fault state).
+- name: Check if QMI device is available
+  community.openwrt.stat:
+    path: /dev/cdc-wdm0
+  register: wwan_cdc_wdm
+
+- name: Query QMI profile list
+  community.openwrt.command:
+    cmd: uqmi -t 3000 -d /dev/cdc-wdm0 --get-profile-list 3gpp
+  register: wwan_profile_list
+  changed_when: false
+  failed_when: false
+  when: wwan_cdc_wdm.stat.exists | default(false)
+
+- name: Configure IPv6 APN profile 2
+  when:
+    - wwan_cdc_wdm.stat.exists | default(false)
+    - wwan_profile_list.rc | default(1) == 0
+    - wwan_profile_list.stdout | default('') | trim | length > 0
+    - wwan_profile_list.stdout | default('') | trim is match('^\\{')
+  block:
+    - name: Parse profile indexes
+      ansible.builtin.set_fact:
+        wwan_profile_indexes: >-
+          {{ (wwan_profile_list.stdout | from_json).profiles
+             | default([]) | map(attribute='index') | list }}
+
+    - name: Create profile 2 for IPv6 APN if missing
+      community.openwrt.command:
+        cmd: uqmi -t 3000 -d /dev/cdc-wdm0 --create-profile 3gpp --apn internetipv6 --pdp-type ipv6
+      when: 2 not in wwan_profile_indexes
+
+    # --modify-profile is idempotent at the modem level. We can't detect
+    # whether values changed (uqmi doesn't return diff info), so we always
+    # report 'ok' (changed_when: false) to keep play output clean. The cost
+    # of always calling this is one QMI roundtrip.
+    - name: Ensure profile 2 settings are current
+      community.openwrt.command:
+        cmd: uqmi -t 3000 -d /dev/cdc-wdm0 --modify-profile 3gpp,2 --apn internetipv6 --pdp-type ipv6
+      changed_when: false
+
+# On cold boot the BM806C's UIM (SIM) QMI service comes up permanently
+# broken: --uim-get-sim-state returns {}, --get-imsi returns
+# "UIM uninitialized", AT+CPIN? returns +CME ERROR: SIM busy, and the
+# modem never converges (verified at uptime 21 min with no intervention).
+# CTL/NAS/WDS do come up after ~5 min of warmup, but UIM does not.
+#
+# A single USB re-enumeration of the device (authorized=0 / authorized=1)
+# forces the modem to redo its internal QMI service init from scratch.
+# After this, UIM comes up within ~1 s and ifup wwan succeeds normally.
+#
+# We use authorized=0/1 rather than usb/unbind+bind because the former
+# keeps qmi_wwan in the bound-drivers list and the kernel re-runs its
+# bind machinery for us; the latter detaches and re-attaches drivers
+# explicitly. Both work; authorized is cleaner.
+#
+# Full investigation, ruled-out hypotheses, and reproduction steps:
+# /root/wwan-diag/boot-wedge-investigation.md on the router.
+- name: Install wwan-bringup worker script
+  community.openwrt.copy:
+    dest: /usr/libexec/wwan-bringup
+    mode: '0755'
+    owner: root
+    group: root
+    content: |
+      #!/bin/sh
+      # Force-clean BM806C cold-boot UIM wedge by re-enumerating the USB
+      # device once, then bring up wwan. Called by /etc/init.d/wwan-bringup
+      # as a procd service.
+
+      DEV=/dev/cdc-wdm0
+      IFACE=wwan
+      USB_PORT=1-1
+
+      log() {
+        logger -t wwan-bringup "$1"
+      }
+
+      # Wait for cold-boot enumeration of cdc-wdm0 (<=60s).
+      waited=0
+      while [ ! -e "$DEV" ]; do
+        sleep 1
+        waited=$((waited + 1))
+        [ $waited -ge 60 ] && break
+      done
+      if [ ! -e "$DEV" ]; then
+        log "$DEV never appeared within 60s; giving up"
+        exit 1
+      fi
+
+      # Force-clean re-enumeration. The BM806C's UIM QMI service never
+      # comes up on cold boot without this.
+      log "BM806C cold-boot UIM workaround: re-authorizing $USB_PORT"
+      echo 0 > /sys/bus/usb/devices/$USB_PORT/authorized
+      sleep 3
+      echo 1 > /sys/bus/usb/devices/$USB_PORT/authorized
+
+      # Wait for cdc-wdm0 to return after re-enumeration (<=30s).
+      waited=0
+      while [ ! -e "$DEV" ]; do
+        sleep 1
+        waited=$((waited + 1))
+        [ $waited -ge 30 ] && break
+      done
+      if [ ! -e "$DEV" ]; then
+        log "$DEV did not return after re-auth; giving up"
+        exit 1
+      fi
+
+      # qmi.sh's own SIM-init and network-registration loops handle the
+      # small remaining warmup (~5-30s) gracefully now that UIM is healthy.
+      log "bringing up $IFACE"
+      ifup "$IFACE"
+
+      # qmi.sh installs an IPv6 default route with a source-specific prefix
+      # constraint (`default from 2a00:f44:.../64 ...`). This means only
+      # traffic sourced from the wwan IPv6 prefix uses it — forwarded traffic
+      # from internal subnets fails routing lookup with "net unreachable"
+      # before masquerade can rewrite the source. Add a non-source-specific
+      # default at a higher metric so forwarded traffic has a valid route,
+      # gets routed out wwan0, then masqueraded by fw4.
+      #
+      # Wait up to 90s for qmi.sh to install its source-specific default,
+      # then derive the gateway and add a regular default route.
+      waited=0
+      while [ $waited -lt 90 ]; do
+        gw6=$(ip -6 route show default dev wwan0 2>/dev/null | awk '/^default from/ {print $5; exit}')
+        if [ -n "$gw6" ]; then
+          if ip -6 route show default dev wwan0 | grep -qE "^default via "; then
+            log "non-source-specific IPv6 default already present"
+          else
+            log "adding non-source-specific IPv6 default via $gw6"
+            ip -6 route add default via "$gw6" dev wwan0 metric 1024
+          fi
+          break
+        fi
+        sleep 3
+        waited=$((waited + 3))
+      done
+      [ -z "$gw6" ] && log "warning: wwan IPv6 gateway never appeared, skipping default route"
+
+- name: Install wwan-bringup init script
+  community.openwrt.copy:
+    dest: /etc/init.d/wwan-bringup
+    mode: '0755'
+    owner: root
+    group: root
+    content: |
+      #!/bin/sh /etc/rc.common
+      # Starts the wwan-bringup worker which re-enumerates the BM806C USB
+      # device once to clear the cold-boot UIM wedge, then triggers
+      # `ifup wwan`. See /usr/libexec/wwan-bringup.
+
+      START=99
+      USE_PROCD=1
+
+      # One-shot script: launch the worker directly without procd_open_instance
+      # so procd does not respawn it after successful exit.
+      PIDFILE=/var/run/wwan-bringup.pid
+
+      start_service() {
+        /usr/libexec/wwan-bringup &
+        echo $! > $PIDFILE
+      }
+
+      stop_service() {
+        [ -f $PIDFILE ] && kill "$(cat $PIDFILE)" 2>/dev/null
+        rm -f $PIDFILE
+      }
+
+- name: Enable and start wwan-bringup service
+  community.openwrt.service:
+    name: wwan-bringup
+    enabled: true
+    state: started
@@ -0,0 +1,10 @@
+---
+# Secret references only; actual values are loaded from OpenBao/Vault at runtime.
+
+openbao_kv_mount: secret
+
+openbao_fields:
+  iot_wifi:
+    path: openwrt_iot_wifi
+    password_key: password
+
@@ -3,9 +3,9 @@
  community.routeros.api_modify:
    path: ip address
    data:
-      - address: 172.17.0.1/16
-        interface: dockers
-        network: 172.17.0.0
+      - address: 172.20.0.1/24
+        interface: containers
+        network: 172.20.0.0
      - address: 192.168.4.1/24
        interface: lo
        network: 192.168.4.0
@@ -24,9 +24,14 @@
      - address: 192.168.3.1/24
        interface: vlan3
        network: 192.168.3.0
+      - address: 192.168.5.1/24
+        interface: vlan5
+        network: 192.168.5.0
+      - address: 192.168.6.1/24
+        interface: vlan6
+        network: 192.168.6.0
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure IPv6 addresses
  community.routeros.api_modify:
@@ -35,14 +40,21 @@
      - address: 2001:470:70:dd::2/64
        advertise: false
        interface: sit1
-      - address: ::ffff:ffff:ffff:ffff/64
-        from-pool: pool1
+      # Static instead of from-pool: pool allocation is dynamic (first free /64,
+      # e.g. ...:0::/64) which made the RDNSS address advertised in ND config
+      # point at a nonexistent router address. HE prefix is static, so static
+      # per-VLAN addressing is deterministic and matches docs/network.md.
+      - address: 2001:470:61a3:9:ffff:ffff:ffff:ffff/64
        interface: vlan2
      - address: 2001:470:61a3:500:ffff:ffff:ffff:ffff/64
-        interface: dockers
+        interface: containers
      - address: 2001:470:61a3:100::1/64
        advertise: false
        interface: vlan4
+      - address: 2001:470:61a3:a:ffff:ffff:ffff:ffff/64
+        interface: vlan5
+      - address: 2001:470:61a3:600::1/64
+        advertise: false
+        interface: vlan6
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
@@ -5,10 +5,9 @@
    data:
      - name: bridge1
        vlan-filtering: true
-      - name: dockers
+      - name: containers
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure VLAN interfaces
  community.routeros.api_modify:
@@ -26,9 +25,16 @@
        comment: SERVER LAN
        interface: bridge1
        vlan-id: 4
+      - name: vlan5
+        comment: IOT
+        interface: bridge1
+        vlan-id: 5
+      - name: vlan6
+        comment: OPENWRT UPLINK
+        interface: bridge1
+        vlan-id: 6
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure interface lists
  community.routeros.api_modify:
@@ -38,7 +44,6 @@
        comment: contains interfaces facing internet
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure interface list members
  community.routeros.api_modify:
@@ -46,27 +51,32 @@
    data:
      - interface: pppoe-gpon
        list: wan
-      - interface: lte1
-        list: wan
      - interface: sit1
        list: wan
+      - interface: vlan6
+        list: wan
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure bridge ports
  community.routeros.api_modify:
    path: interface bridge port
    data:
-      - bridge: dockers
-        interface: veth1
+      - bridge: containers
+        interface: veth-tailscale
        comment: Tailscale container interface
+      - bridge: containers
+        interface: veth-coredns
+        comment: CoreDNS container interface
      - bridge: bridge1
        interface: ether1
        pvid: 2
      - bridge: bridge1
        interface: ether2
        pvid: 2
+      - bridge: bridge1
+        interface: ether3
+        comment: OpenWrt AP (dlink)
      - bridge: bridge1
        interface: ether8
        pvid: 4
@@ -82,16 +92,21 @@
        interface: ether11
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure bridge VLAN membership
  community.routeros.api_modify:
    path: interface bridge vlan
    data:
      - bridge: bridge1
-        tagged: sfp-sfpplus2
+        tagged: sfp-sfpplus2,ether3
        untagged: ether1,ether2,ether9
        vlan-ids: 2
+      - bridge: bridge1
+        tagged: bridge1,ether3
+        vlan-ids: 5
+      - bridge: bridge1
+        tagged: bridge1,ether3
+        vlan-ids: 6
      - bridge: bridge1
        tagged: sfp-sfpplus2
        untagged: ether10
@@ -101,7 +116,6 @@
        vlan-ids: 4
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure IPv4 pools
  community.routeros.api_modify:
@@ -113,9 +127,11 @@
      - name: dhcp_pool1
        ranges: 192.168.255.1-192.168.255.9,192.168.255.11-192.168.255.254
        comment: MGMT DHCP pool
+      - name: dhcp_pool2
+        ranges: 192.168.5.50-192.168.5.250
+        comment: IOT DHCP pool
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure DHCP servers
  community.routeros.api_modify:
@@ -131,26 +147,17 @@
        interface: bridge1
        lease-time: 30m
        comment: MGMT
+      - name: dhcp3
+        address-pool: dhcp_pool2
+        interface: vlan5
+        lease-time: 30m
+        comment: IOT
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

- name: Configure DHCP networks
-  community.routeros.api_modify:
-    path: ip dhcp-server network
-    data:
-      - address: 192.168.0.0/24
-        dns-server: 192.168.0.1
-        gateway: 192.168.0.1
-      - address: 192.168.255.0/24
-        dns-none: true
-        gateway: 192.168.255.10
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
-
-# TODO: IPv6 pools are useful when we have dynamic prefix, but we don't
-# We can remove it now
+# Pool is no longer referenced — vlan2/vlan5 now use static addresses
+# (addressing.yml) so the RDNSS addresses in ND config are deterministic.
+# Kept defined for one run after migration; safe to delete afterwards.
 - name: Configure IPv6 pools
  community.routeros.api_modify:
    path: ipv6 pool
@@ -160,7 +167,6 @@
        prefix-length: 64
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure DNS
  community.routeros.api_find_and_modify:
@@ -170,7 +176,27 @@
    values:
      allow-remote-requests: true
      cache-size: 20480
-      servers: 1.1.1.1,1.0.0.1,2606:4700:4700::1111,2606:4700:4700::1001
+      # CoreDNS container: plain forwarder with selective AAAA suppression.
+      # Forwards upstream to 1.1.1.1/8.8.8.8.
+      servers: 172.20.0.3
+
+- name: Configure DNS static entries
+  community.routeros.api_modify:
+    path: ip dns static
+    data:
+      - name: ts.net
+        type: FWD
+        forward-to: 100.100.100.100
+        match-subdomain: true
+        comment: Tailscale MagicDNS
+      # Do NOT add a lumpiasty.xyz FWD entry here. RouterOS FWD entries return
+      # NOERROR with an empty answer instead of relaying NXDOMAIN, which breaks
+      # getaddrinfo search-domain processing (ENOTFOUND for valid names in k8s
+      # pods). Our own zone is handled in the CoreDNS Corefile (lumpiasty.xyz
+      # server block, AAAA kept) which relays rcodes correctly.
+      # See docs/coredns.md.
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible

 - name: Configure NAT-PMP global settings
  community.routeros.api_find_and_modify:
@@ -184,7 +210,7 @@
  community.routeros.api_modify:
    path: ip nat-pmp interfaces
    data:
-      - interface: dockers
+      - interface: containers
        type: internal
      - interface: pppoe-gpon
        type: external
@@ -192,7 +218,6 @@
        type: internal
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure UPnP global settings
  community.routeros.api_find_and_modify:
@@ -206,7 +231,7 @@
  community.routeros.api_modify:
    path: ip upnp interfaces
    data:
-      - interface: dockers
+      - interface: containers
        type: internal
      - interface: pppoe-gpon
        type: external
@@ -214,7 +239,22 @@
        type: internal
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
+
+- name: Configure DHCP networks
+  community.routeros.api_modify:
+    path: ip dhcp-server network
+    data:
+      - address: 192.168.0.0/24
+        dns-server: 192.168.0.1
+        gateway: 192.168.0.1
+      - address: 192.168.255.0/24
+        dns-none: true
+        gateway: 192.168.255.10
+      - address: 192.168.5.0/24
+        dns-server: 192.168.5.1
+        gateway: 192.168.5.1
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible

 - name: Configure IPv6 ND defaults
  community.routeros.api_find_and_modify:
@@ -224,3 +264,21 @@
      default: true
    values:
      advertise-dns: true
+
+# RDNSS (RFC 8106): advertise an IPv6 DNS server in RAs so dual-stack clients
+# have an IPv6 resolver. Points at the router's per-VLAN IPv6 address; RouterOS
+# DNS forwards to CoreDNS. No pref64 — NAT64 has been removed (see docs/coredns.md);
+# AAAA suppression now happens in CoreDNS, no client-side translation needed.
+- name: Configure IPv6 ND per-interface (RDNSS)
+  community.routeros.api_modify:
+    path: ipv6 nd
+    data:
+      # advertise-dns must be explicitly enabled — RouterOS creates new ND
+      # entries with advertise-dns=no, which suppresses the RDNSS option
+      # entirely even when a static dns= list is configured.
+      - interface: vlan2
+        advertise-dns: true
+        dns: 2001:470:61a3:9:ffff:ffff:ffff:ffff
+      - interface: vlan5
+        advertise-dns: true
+        dns: 2001:470:61a3:a:ffff:ffff:ffff:ffff
@@ -0,0 +1,47 @@
+---
+- name: Configure container runtime defaults
+  community.routeros.api_find_and_modify:
+    ignore_dynamic: false
+    path: container config
+    find: {}
+    values:
+      tmpdir: tmp
+
+- name: Configure container env lists
+  community.routeros.api_modify:
+    path: container envs
+    data: []
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible
+
+- name: Configure container mounts
+  community.routeros.api_modify:
+    path: container mounts
+    data:
+      - dst: /var/lib/tailscale
+        list: tailscale_state
+        src: /tailscale/state
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible
+
+- name: Configure containers
+  community.routeros.api_modify:
+    path: container
+    data:
+      - dns: 172.20.0.1
+        interface: veth-tailscale
+        logging: true
+        mountlists: tailscale_state
+        name: tailscale
+        remote-image: gitea.lumpiasty.xyz/lumpiasty/mikrotik-tailscale:stable
+        root-dir: tailscale/root
+        start-on-boot: true
+      - dns: 172.20.0.1
+        interface: veth-coredns
+        logging: true
+        name: coredns
+        remote-image: gitea.lumpiasty.xyz/lumpiasty/coredns-mikrotik:latest
+        root-dir: coredns/root
+        start-on-boot: true
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible
@@ -1,8 +1,56 @@
 ---
+- name: Configure WAN connection marking
+  community.routeros.api_modify:
+    path: ip firewall mangle
+    data:
+      - action: mark-connection
+        chain: forward
+        connection-state: new
+        new-connection-mark: wan-gpon
+        out-interface: pppoe-gpon
+        passthrough: true
+        comment: Mark connections going out GPON
+      - action: mark-connection
+        chain: forward
+        connection-state: new
+        new-connection-mark: wan-lte
+        out-interface: vlan6
+        passthrough: true
+        comment: Mark connections going out LTE
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible
+    ensure_order: true
+
 - name: Configure IPv4 firewall filter rules
  community.routeros.api_modify:
    path: ip firewall filter
    data:
+      - action: reject
+        chain: forward
+        connection-mark: wan-gpon
+        out-interface: vlan6
+        protocol: tcp
+        reject-with: tcp-reset
+        comment: Fast-fail TCP connections that shifted from GPON to LTE
+      - action: reject
+        chain: forward
+        connection-mark: wan-gpon
+        out-interface: vlan6
+        reject-with: icmp-network-unreachable
+        comment: Fast-fail non-TCP connections that shifted from GPON to LTE
+      - action: reject
+        chain: forward
+        connection-mark: wan-lte
+        out-interface: pppoe-gpon
+        protocol: tcp
+        reject-with: tcp-reset
+        comment: Fast-fail TCP connections that shifted from LTE to GPON
+      - action: reject
+        chain: forward
+        connection-mark: wan-lte
+        out-interface: pppoe-gpon
+        reject-with: icmp-network-unreachable
+        comment: Fast-fail non-TCP connections that shifted from LTE to GPON
      - action: fasttrack-connection
        chain: forward
        connection-state: established,related
@@ -10,11 +58,6 @@
        chain: forward
        comment: Allow all already established connections
        connection-state: established,related
-      - action: accept
-        chain: forward
-        comment: Allow LTE modem management (next rule forbids it otherwise)
-        dst-address: 192.168.8.1
-        out-interface: lte1
      - action: reject
        chain: forward
        comment: Forbid forwarding 192.168.0.0/16 to WAN
@@ -48,6 +91,11 @@
        comment: Allow from SRV to internet
        in-interface: vlan4
        out-interface-list: wan
+      - action: accept
+        chain: forward
+        comment: Allow from SRV to SRV
+        in-interface: vlan4
+        out-interface: vlan4
      - action: accept
        chain: forward
        comment: Allow from SRV to CAM
@@ -55,8 +103,18 @@
        out-interface: vlan3
      - action: accept
        chain: forward
-        comment: Allow from dockers to everywhere
-        in-interface: dockers
+        comment: Allow from IOT to internet only
+        in-interface: vlan5
+        out-interface-list: wan
+      - action: accept
+        chain: forward
+        comment: Allow from OPENWRT UPLINK to internet only
+        in-interface: vlan6
+        out-interface-list: wan
+      - action: accept
+        chain: forward
+        comment: Allow from containers to everywhere
+        in-interface: containers
      - action: jump
        chain: forward
        comment: Allow port forwards
@@ -127,21 +185,49 @@
        protocol: tcp
      - action: accept
        chain: input
-        comment: Allow DNS from dockers
+        comment: Allow DNS from containers
        dst-port: 53
-        in-interface: dockers
+        in-interface: containers
        protocol: udp
      - action: accept
        chain: input
        dst-port: 53
-        in-interface: dockers
+        in-interface: containers
+        protocol: tcp
+      - action: accept
+        chain: input
+        comment: Allow DNS from IOT
+        dst-port: 53
+        in-interface: vlan5
+        protocol: udp
+      - action: accept
+        chain: input
+        dst-port: 53
+        in-interface: vlan5
+        protocol: tcp
+      - action: accept
+        chain: input
+        comment: Allow DNS from OPENWRT UPLINK
+        dst-port: 53
+        in-interface: vlan6
+        protocol: udp
+      - action: accept
+        chain: input
+        dst-port: 53
+        in-interface: vlan6
        protocol: tcp
      - action: accept
        chain: input
        comment: Allow BGP from SRV
        dst-port: 179
        in-interface: vlan4
-        protocol: udp
+        protocol: tcp
+      - action: accept
+        chain: input
+        comment: Allow BGP from OPENWRT UPLINK
+        dst-port: 179
+        in-interface: vlan6
+        protocol: tcp
      - action: accept
        chain: input
        comment: NAT-PMP from LAN
@@ -150,9 +236,9 @@
        protocol: udp
      - action: accept
        chain: input
-        comment: NAT-PMP from dockers (for tailscale)
+        comment: NAT-PMP from containers (for tailscale)
        dst-port: 5351
-        in-interface: dockers
+        in-interface: containers
        protocol: udp
      - action: reject
        chain: input
@@ -191,8 +277,8 @@
      - action: accept
        chain: allow-ports
        comment: Allow anything udp to Tailscale
-        dst-address: 172.17.0.2
-        out-interface: dockers
+        dst-address: 172.20.0.2
+        out-interface: containers
        protocol: udp
      - action: accept
        chain: allow-ports
@@ -211,15 +297,11 @@
      - action: masquerade
        chain: srcnat
        comment: Masquerade to internet
-        out-interface-list: wan
+        out-interface: pppoe-gpon
      - action: masquerade
        chain: srcnat
        comment: GPON ONT management
        dst-address: 192.168.100.1
-      - action: masquerade
-        chain: srcnat
-        comment: LTE Modem management
-        dst-address: 192.168.8.1
      - action: dst-nat
        chain: dstnat
        comment: TS3
@@ -248,6 +330,11 @@
        in-interface: '!pppoe-gpon'
        protocol: tcp
        to-addresses: 128.0.70.5
+      - action: masquerade
+        chain: srcnat
+        comment: hairpin to LoadBalancer pool (vlan4 -> vlan4)
+        dst-address: 10.44.0.0/16
+        in-interface: vlan4
      - action: dst-nat
        chain: dstnat
        comment: HTTPS
@@ -370,14 +457,24 @@
        out-interface: vlan3
      - action: accept
        chain: forward
-        comment: Allow from dockers to everywhere
-        in-interface: dockers
+        comment: Allow from IOT to internet only
+        in-interface: vlan5
+        out-interface-list: wan
      - action: accept
        chain: forward
-        comment: Allow from internet to dockers
+        comment: Allow from OPENWRT UPLINK to internet only
+        in-interface: vlan6
+        out-interface-list: wan
+      - action: accept
+        chain: forward
+        comment: Allow from containers to everywhere
+        in-interface: containers
+      - action: accept
+        chain: forward
+        comment: Allow from internet to containers
        dst-address: 2001:470:61a3:500::/64
        in-interface-list: wan
-        out-interface: dockers
+        out-interface: containers
      - action: accept
        chain: forward
        comment: Allow tcp transmission port to LAN
@@ -436,14 +533,36 @@
        protocol: tcp
      - action: accept
        chain: input
-        comment: Allow DNS from dockers
+        comment: Allow DNS from containers
        dst-port: 53
-        in-interface: dockers
+        in-interface: containers
        protocol: udp
      - action: accept
        chain: input
        dst-port: 53
-        in-interface: dockers
+        in-interface: containers
+        protocol: tcp
+      - action: accept
+        chain: input
+        comment: Allow DNS from IOT
+        dst-port: 53
+        in-interface: vlan5
+        protocol: udp
+      - action: accept
+        chain: input
+        dst-port: 53
+        in-interface: vlan5
+        protocol: tcp
+      - action: accept
+        chain: input
+        comment: Allow DNS from OPENWRT UPLINK
+        dst-port: 53
+        in-interface: vlan6
+        protocol: udp
+      - action: accept
+        chain: input
+        dst-port: 53
+        in-interface: vlan6
        protocol: tcp
      - action: accept
        chain: input
@@ -452,6 +571,13 @@
        in-interface: vlan4
        protocol: tcp
        src-address: 2001:470:61a3:100::/64
+      - action: accept
+        chain: input
+        comment: Allow BGP from OPENWRT UPLINK
+        dst-port: 179
+        in-interface: vlan6
+        protocol: tcp
+        src-address: 2001:470:61a3:600::/64
      - action: reject
        chain: input
        comment: Reject all remaining
@@ -13,6 +13,9 @@
    - default_name: ether2
      config:
        comment: Wifi środek
+    - default_name: ether3
+      config:
+        comment: OpenWrt AP (dlink)
    - default_name: ether8
      config:
        comment: Serwer
@@ -36,52 +39,43 @@
  loop_control:
    label: "{{ item.default_name }}"

- name: Configure LTE interface defaults
-  community.routeros.api_find_and_modify:
-    ignore_dynamic: false
-    path: interface lte
-    find:
-      default-name: lte1
-    values:
-      apn-profiles: default-nodns
-      comment: Backup LTE WAN
+# community.routeros.api_modify can't remove hardware disks
+# but it tries to do so with handle_absent_entries: remove
+# Working around by manually deleting other ones

- name: Configure LTE APN profiles
-  community.routeros.api_modify:
-    path: interface lte apn
-    data:
-      - add-default-route: false
-        apn: internet
-        comment: default but without dns and default route
-        ipv6-interface: lte1
-        name: default-nodns
-        use-network-apn: true
-        use-peer-dns: false
-      # Default APN we can't really remove yet I don't want to reconfigure it
-      - add-default-route: true
-        apn: internet
-        authentication: none
-        default-route-distance: 2
-        ip-type: auto
-        name: default
-        use-network-apn: true
-        use-peer-dns: true
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
+- name: Read current disk entries
+  community.routeros.api_info:
+    path: disk
+  register: routeros_disks
+  check_mode: false
+
+- name: Remove stale software-defined disk entries
+  community.routeros.api:
+    path: disk
+    remove: "{{ item['.id'] }}"
+  loop: >-
+    {{
+      routeros_disks.result
+      | rejectattr('type', 'in', ['hardware', 'partition'])
+      | rejectattr('slot', 'equalto', 'tmp')
+    }}
+  loop_control:
+    label: "{{ item.slot }}"
+
+- name: Create temporary disk for containers if absent
+  community.routeros.api:
+    path: disk
+    add: "slot=tmp type=tmpfs"
+  when: routeros_disks.result | selectattr('slot', 'equalto', 'tmp') | list | length == 0

 - name: Configure temporary disk for containers
-  community.routeros.api_modify:
+  community.routeros.api_find_and_modify:
+    ignore_dynamic: false
    path: disk
-    data:
-      - slot: tmp1
-        type: tmpfs
-      # This is not ideal, there's no unique identifier for usb disk,
-      # after reinstall it might be assigned to another slot
-      # Just adding disk with slot usb1 and not specifying anything else
-      # so ansible doesn't touch it
-      - slot: usb1
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
+    find:
+      slot: tmp
+    values:
+      type: tmpfs

 - name: Configure switch settings
  community.routeros.api_find_and_modify:
@@ -0,0 +1,27 @@
+---
+- name: Preflight checks
+  ansible.builtin.import_tasks: preflight.yml
+
+- name: WAN and tunnel interfaces
+  ansible.builtin.import_tasks: wan.yml
+
+- name: Base network configuration
+  ansible.builtin.import_tasks: base.yml
+
+- name: Hardware and platform tuning
+  ansible.builtin.import_tasks: hardware.yml
+
+- name: RouterOS container configuration
+  ansible.builtin.import_tasks: containers.yml
+
+- name: Addressing configuration
+  ansible.builtin.import_tasks: addressing.yml
+
+- name: Firewall configuration
+  ansible.builtin.import_tasks: firewall.yml
+
+- name: Routing configuration
+  ansible.builtin.import_tasks: routing.yml
+
+- name: System configuration
+  ansible.builtin.import_tasks: system.yml
@@ -32,15 +32,4 @@
    fail_msg: "RouterOS device-mode does not report container as enabled. Payload: {{ routeros_device_mode | to_nice_json }}"
    success_msg: "RouterOS device-mode confirms container=yes"

- name: Read configured disks
-  community.routeros.api_info:
-    path: disk
-  register: routeros_disks
-  check_mode: false

- name: Assert usb1 disk is present
-  ansible.builtin.assert:
-    that:
-      - (routeros_disks.result | selectattr('slot', 'equalto', 'usb1') | list | length) > 0
-    fail_msg: "Required disk slot usb1 is not present on router."
-    success_msg: "Required disk usb1 is present"
@@ -7,29 +7,49 @@
        disabled: false
        distance: 1
        dst-address: 100.64.0.0/10
-        gateway: 172.17.0.2
+        gateway: 172.20.0.2
        routing-table: main
        scope: 30
        suppress-hw-offload: false
        target-scope: 10
-      - disabled: false
+      - comment: GPON Monitor 1
+        disabled: false
        distance: 1
-        dst-address: 0.0.0.0/0
+        dst-address: 1.0.0.1/32
        gateway: pppoe-gpon
        routing-table: main
-        scope: 30
+        scope: 10
        suppress-hw-offload: false
        target-scope: 10
-        vrf-interface: pppoe-gpon
-      - disabled: false
-        distance: 2
+      - comment: GPON Monitor 2
+        disabled: false
+        distance: 1
+        dst-address: 8.8.4.4/32
+        gateway: pppoe-gpon
+        routing-table: main
+        scope: 10
+        suppress-hw-offload: false
+        target-scope: 10
+      - comment: GPON Default 1
+        disabled: false
+        distance: 1
        dst-address: 0.0.0.0/0
-        gateway: 192.168.8.1
+        gateway: 1.0.0.1
+        check-gateway: ping
        routing-table: main
        scope: 30
        suppress-hw-offload: false
-        target-scope: 10
-        vrf-interface: lte1
+        target-scope: 11
+      - comment: GPON Default 2
+        disabled: false
+        distance: 2
+        dst-address: 0.0.0.0/0
+        gateway: 8.8.4.4
+        check-gateway: ping
+        routing-table: main
+        scope: 30
+        suppress-hw-offload: false
+        target-scope: 11
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible

@@ -41,6 +61,7 @@
        distance: 1
        dst-address: 2000::/3
        gateway: 2001:470:70:dd::1
+        check-gateway: ping
        scope: 30
        target-scope: 10
      - comment: Tailnet
@@ -64,7 +85,6 @@
        routing-table: main
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure BGP templates
  community.routeros.api_modify:
@@ -94,6 +114,27 @@
        remote.address: 2001:470:61a3:100::3/128
        routing-table: main
        templates: klaster
+      - name: dlink-lte
+        afi: ip,ipv6
+        as: 65000
+        connect: true
+        disabled: false
+        instance: bgp-homelab
+        listen: true
+        # ibgp-rr: CRS acts as route reflector for D-Link (the RR client).
+        # This allows k8s routes learned from bgp1 to be reflected to D-Link
+        # without violating iBGP split-horizon.
+        local.role: ibgp-rr
+        remote.address: 192.168.6.2/32
+        routing-table: main
+        templates: klaster
+        hold-time: 30s
+        keepalive-time: 10s
+        # Redistribute connected (VLAN addresses) and static routes (Tailscale,
+        # GPON default) so D-Link has explicit routes to all internal subnets
+        # and a default route when GPON is up.
+        output.redistribute: connected,static
+        output.default-originate: if-installed
+        nexthop-choice: force-self
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
@@ -0,0 +1,138 @@
+---
+- name: Configure system clock
+  community.routeros.api_find_and_modify:
+    ignore_dynamic: false
+    path: system clock
+    find: {}
+    values:
+      time-zone-name: Europe/Warsaw
+
+- name: Configure dedicated Ansible management user
+  community.routeros.api_modify:
+    path: user
+    data:
+      - name: "{{ routeros_api_username }}"
+        group: full
+        password: "{{ routeros_api_password }}"
+        disabled: false
+        comment: "Ansible management user"
+    handle_absent_entries: ignore
+    handle_entries_content: remove_as_much_as_possible
+
+# The RouterOS API can neither store multi-line script source (newlines
+# collapse into one line) nor evaluate the [/file/get ...] expression itself.
+# So we fetch the update logic as a .rsc file onto the router's flash, then run
+# a single-line bootstrap script (which the API CAN store) whose body RouterOS
+# evaluates natively: it builds the real, browsable, multi-line named script
+# from the file via [/file get ... contents]. The scheduler then runs that
+# named script by name (the upstream-intended design). The update logic stays
+# out of this repo entirely.
+- name: Download tailscale auto-update script to router
+  community.routeros.api:
+    path: tool
+    cmd: >-
+      fetch
+      url=https://gitea.lumpiasty.xyz/Lumpiasty/mikrotik-tailscale/raw/branch/main/routeros/update-tailscale.rsc
+      dst-path=update-tailscale.rsc
+      mode=https
+  changed_when: true
+  tags:
+    - tailscale-script
+
+- name: Build the named auto-update script from the fetched file
+  community.routeros.api:
+    path: system script
+    cmd: >-
+      add name=update-tailscale-bootstrap
+      source=":do { /system script remove update-tailscale } on-error={};
+      /system script add name=update-tailscale
+      comment=\"Check for mikrotik-tailscale image updates\"
+      source=[/file get update-tailscale.rsc contents]"
+  changed_when: true
+  tags:
+    - tailscale-script
+
+- name: Find bootstrap script id
+  community.routeros.api:
+    path: system script
+    extended_query:
+      attributes: [.id, name]
+      where:
+        - attribute: name
+          is: "=="
+          value: update-tailscale-bootstrap
+  register: routeros_bootstrap
+  changed_when: false
+  tags:
+    - tailscale-script
+
+- name: Run bootstrap to create the named auto-update script
+  community.routeros.api:
+    path: system script
+    cmd: "run .id={{ routeros_bootstrap.msg[0]['.id'] }}"
+  register: routeros_bootstrap_run
+  failed_when:
+    - routeros_bootstrap_run is failed
+    - "'interrupted' not in (routeros_bootstrap_run.msg | string)"
+  changed_when: true
+  tags:
+    - tailscale-script
+
+- name: Verify named auto-update script exists
+  community.routeros.api:
+    path: system script
+    extended_query:
+      attributes: [.id, name]
+      where:
+        - attribute: name
+          is: "=="
+          value: update-tailscale
+  register: routeros_named_script
+  failed_when: (routeros_named_script.msg | length) == 0
+  changed_when: false
+  tags:
+    - tailscale-script
+
+- name: Remove bootstrap script
+  community.routeros.api:
+    path: system script
+    remove: "{{ routeros_bootstrap.msg[0]['.id'] }}"
+  changed_when: true
+  tags:
+    - tailscale-script
+
+- name: Configure tailscale auto-update scheduler
+  community.routeros.api_modify:
+    path: system scheduler
+    data:
+      - name: update-tailscale
+        interval: 1d
+        on-event: /system script run update-tailscale
+        comment: Check for mikrotik-tailscale image updates
+    handle_absent_entries: remove
+    handle_entries_content: remove_as_much_as_possible
+  tags:
+    - tailscale-script
+
+- name: Configure service ports and service enablement
+  community.routeros.api_find_and_modify:
+    ignore_dynamic: false
+    path: ip service
+    find:
+      name: "{{ item.name }}"
+    values: "{{ item }}"
+  loop:
+    - name: ftp
+      disabled: true
+    - name: telnet
+      disabled: true
+    - name: www
+      disabled: true
+    - name: ssh
+      port: 2137
+    - name: api
+      disabled: true
+    - name: api-ssl
+      disabled: false
+  loop_control:
+    label: "{{ item.name }}"
@@ -8,11 +8,12 @@
        keepalive-timeout: 2
        name: pppoe-gpon
        password: "{{ routeros_pppoe_password }}"
-        use-peer-dns: true
+        # Using CoreDNS container with DNS64
+        use-peer-dns: false
+        add-default-route: false
        user: "{{ routeros_pppoe_username }}"
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure 6to4 tunnel interface
  community.routeros.api_modify:
@@ -25,20 +26,23 @@
        remote-address: 216.66.80.162
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true

 - name: Configure veth interface for containers
  community.routeros.api_modify:
    path: interface veth
    data:
-      - address: 172.17.0.2/16,2001:470:61a3:500::1/64
+      - address: 172.20.0.2/24,2001:470:61a3:500::1/64
        container-mac-address: 7E:7E:A1:B1:2A:7C
        dhcp: false
-        gateway: 172.17.0.1
+        gateway: 172.20.0.1
        gateway6: 2001:470:61a3:500:ffff:ffff:ffff:ffff
        mac-address: 7E:7E:A1:B1:2A:7B
-        name: veth1
+        name: veth-tailscale
        comment: Tailscale container
+      - address: 172.20.0.3/24
+        dhcp: false
+        gateway: 172.20.0.1
+        name: veth-coredns
+        comment: CoreDNS container
    handle_absent_entries: remove
    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
@@ -14,6 +14,4 @@ openbao_fields:
    path: wan_pppoe
    username_key: username
    password_key: password
-  routeros_tailscale_container:
-    path: router_tailscale
-    container_password_key: container_password
+
@@ -1,66 +0,0 @@
---
- name: Configure container runtime defaults
-  community.routeros.api_find_and_modify:
-    ignore_dynamic: false
-    path: container config
-    find: {}
-    values:
-      registry-url: https://ghcr.io
-      tmpdir: /tmp1/pull
-
- name: Configure container env lists
-  community.routeros.api_modify:
-    path: container envs
-    data:
-      - key: ADVERTISE_ROUTES
-        list: tailscale
-        value: 192.168.0.0/24,192.168.1.0/24,192.168.4.1/32,192.168.100.1/32,192.168.255.0/24,10.42.0.0/16,10.43.0.0/16,10.44.0.0/16,2001:470:61a3::/48
-      - key: CONTAINER_GATEWAY
-        list: tailscale
-        value: 172.17.0.1
-      - key: PASSWORD
-        list: tailscale
-        value: "{{ routeros_tailscale_container_password }}"
-      - key: TAILSCALE_ARGS
-        list: tailscale
-        value: --accept-routes --advertise-exit-node --snat-subnet-routes=false
-      - key: UPDATE_TAILSCALE
-        list: tailscale
-        value: y
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
-
- name: Configure container mounts
-  community.routeros.api_modify:
-    path: container mounts
-    data:
-      - dst: /var/lib/tailscale
-        list: tailscale
-        src: /usb1/tailscale
-      - dst: /root
-        list: tailscale-root
-        src: /tmp1/tailscale-root
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
-
- name: Configure tailscale container
-  community.routeros.api_modify:
-    path: container
-    data:
-      - dns: 172.17.0.1
-        envlists: tailscale
-        hostname: mikrotik
-        interface: veth1
-        layer-dir: ""
-        mountlists: tailscale
-        name: tailscale-mikrotik:latest
-        remote-image: fluent-networks/tailscale-mikrotik:latest
-        root-dir: /usb1/containers/tailscale
-        start-on-boot: true
-        tmpfs: /tmp:67108864:01777
-        workdir: /
-    handle_absent_entries: remove
-    handle_entries_content: remove_as_much_as_possible
-    ensure_order: true
@@ -1,43 +0,0 @@
---
- name: Configure system clock
-  community.routeros.api_find_and_modify:
-    ignore_dynamic: false
-    path: system clock
-    find: {}
-    values:
-      time-zone-name: Europe/Warsaw
-
- name: Configure dedicated Ansible management user
-  community.routeros.api_modify:
-    path: user
-    data:
-      - name: "{{ routeros_api_username }}"
-        group: full
-        password: "{{ routeros_api_password }}"
-        disabled: false
-        comment: "Ansible management user"
-    handle_absent_entries: ignore
-    handle_entries_content: remove_as_much_as_possible
-
- name: Configure service ports and service enablement
-  community.routeros.api_find_and_modify:
-    ignore_dynamic: false
-    path: ip service
-    find:
-      name: "{{ item.name }}"
-    values: "{{ item }}"
-  loop:
-    - name: ftp
-      disabled: true
-    - name: telnet
-      disabled: true
-    - name: www
-      disabled: true
-    - name: ssh
-      port: 2137
-    - name: api
-      disabled: true
-    - name: api-ssl
-      disabled: false
-  loop_control:
-    label: "{{ item.name }}"
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: authentik
-      version: 2026.2.1
+      version: 2026.5.3
      sourceRef:
        kind: HelmRepository
        name: authentik
@@ -7,7 +7,7 @@ metadata:
  name: gitea-shared-storage-lvmhdd
  namespace: openebs
 spec:
-  capacity: 10Gi
+  capacity: "21474836480"
  ownerNodeID: anapistula-delrosalae
  shared: "yes"
  thinProvision: "no"
@@ -20,7 +20,7 @@ metadata:
  name: gitea-shared-storage-lvmhdd
 spec:
  capacity:
-    storage: 10Gi
+    storage: 20Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
@@ -41,6 +41,6 @@ spec:
    - ReadWriteOnce
  resources:
    requests:
-      storage: 10Gi
+      storage: 20Gi
  storageClassName: hdd-lvmpv
  volumeName: gitea-shared-storage-lvmhdd
@@ -17,7 +17,7 @@ spec:
  chart:
    spec:
      chart: gitea
-      version: 12.5.0
+      version: 12.6.0
      sourceRef:
        kind: HelmRepository
        name: gitea-charts
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: valkey
-      version: 0.9.3
+      version: 0.9.4
      sourceRef:
        kind: HelmRepository
        name: valkey
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: immich
-      version: 1.2.2
+      version: 1.2.6
      sourceRef:
        kind: HelmRepository
        name: secustor
@@ -16,7 +16,7 @@ spec:
    spec:
      containers:
      - name: teamspeak3
-        image: teamspeak:3.13.7
+        image: teamspeak:3.13.8
        ports:
        - containerPort: 9987
          name: voice
@@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
-  - configmap.yaml
-  - secret.yaml
-  - cronjob.yaml
+  - oauth-secret.yaml
+  - postgres-volume.yaml
+  - postgres-cluster.yaml
+  - release.yaml
@@ -1,5 +1,4 @@
---
 apiVersion: v1
 kind: Namespace
 metadata:
-  name: searxng
+  name: kaneo
@@ -0,0 +1,43 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: kaneo-secret
+  namespace: kaneo
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultAuth
+metadata:
+  name: kaneo
+  namespace: kaneo
+spec:
+  method: kubernetes
+  mount: kubernetes
+  kubernetes:
+    role: kaneo
+    serviceAccount: kaneo-secret
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: kaneo-authentik
+  namespace: kaneo
+spec:
+  type: kv-v2
+
+  mount: secret
+  path: authentik/kaneo
+
+  destination:
+    create: true
+    name: kaneo-authentik
+    type: Opaque
+    transformation:
+      excludeRaw: true
+      templates:
+        client_id:
+          text: '{{ get .Secrets "client_id" }}'
+        client_secret:
+          text: '{{ get .Secrets "client_secret" }}'
+
+  vaultAuthRef: kaneo
@@ -0,0 +1,16 @@
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: kaneo-db
+  namespace: kaneo
+spec:
+  instances: 1
+
+  storage:
+    pvcTemplate:
+      storageClassName: ssd-lvmpv
+      resources:
+        requests:
+          storage: 10Gi
+      volumeName: kaneo-db-1
@@ -0,0 +1,33 @@
+apiVersion: local.openebs.io/v1alpha1
+kind: LVMVolume
+metadata:
+  labels:
+    kubernetes.io/nodename: anapistula-delrosalae
+  name: kaneo-db-1
+  namespace: openebs
+spec:
+  capacity: 10Gi
+  ownerNodeID: anapistula-delrosalae
+  shared: "yes"
+  thinProvision: "no"
+  vgPattern: ^openebs-ssd$
+  volGroup: openebs-ssd
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: kaneo-db-1
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  storageClassName: ssd-lvmpv
+  volumeMode: Filesystem
+  csi:
+    driver: local.csi.openebs.io
+    fsType: btrfs
+    volumeHandle: kaneo-db-1
+---
+# PVCs are dynamically created by the Postgres operator
@@ -0,0 +1,82 @@
+---
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: GitRepository
+metadata:
+  name: kaneo
+  namespace: kaneo
+spec:
+  interval: 24h
+  url: https://github.com/usekaneo/kaneo.git
+  ref:
+    tag: v2.7.7
+  ignore: |
+    # exclude all
+    /*
+    # include charts directory
+    !/charts/    
+---
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: kaneo
+  namespace: kaneo
+spec:
+  interval: 30m
+  chart:
+    spec:
+      chart: ./charts/kaneo
+      sourceRef:
+        kind: GitRepository
+        name: kaneo
+  values:
+    ingress:
+      enabled: true
+      className: nginx-ingress
+      annotations:
+        # nginx.ingress.kubernetes.io/rewrite-target: /$1
+        cert-manager.io/cluster-issuer: letsencrypt
+      hosts:
+        - host: kaneo.lumpiasty.xyz
+          paths:
+            # Docs at https://github.com/usekaneo/kaneo/blob/main/charts/kaneo/README.md
+            # they talk nonsense 
+            - path: /
+              pathType: Prefix
+              service: kaneo
+              port: 5173
+      tls:
+        - secretName: kaneo-ingress
+          hosts:
+            - kaneo.lumpiasty.xyz
+    
+    postgresql:
+      enabled: false
+    
+    kaneo:
+      image:
+        tag: "2.7.3"  # renovate: depName=ghcr.io/usekaneo/kaneo registryUrl=https://ghcr.io
+      env:
+        clientUrl: "https://kaneo.lumpiasty.xyz"
+        disablePasswordRegistration: true
+        database:
+          external:
+            enabled: true
+            existingSecret:
+              enabled: true
+              name: kaneo-db-app
+              passwordKey: uri
+      extraEnv:
+        - name: CUSTOM_OAUTH_DISCOVERY_URL
+          value: https://authentik.lumpiasty.xyz/application/o/kaneo/.well-known/openid-configuration
+        - name: CUSTOM_OAUTH_CLIENT_ID
+          valueFrom:
+            secretKeyRef:
+              name: kaneo-authentik
+              key: client_id
+        - name: CUSTOM_OAUTH_CLIENT_SECRET
+          valueFrom:
+            secretKeyRef:
+              name: kaneo-authentik
+              key: client_secret
+        - name: DISABLE_GUEST_ACCESS
+          value: "true"
@@ -5,13 +5,12 @@ resources:
  - crawl4ai-proxy
  - authentik
  - gitea
-  - renovate
-  - librechat
  - frigate
  - llama
  - immich
  - nas
-  - searxng
  - ispeak3
  - openwebui
  - woodpecker
+  - meridian
+  - kaneo
@@ -1,5 +0,0 @@
---
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: librechat
@@ -1,120 +0,0 @@
---
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: dynomite567-charts
-  namespace: librechat
-spec:
-  interval: 24h
-  url: https://dynomite567.github.io/helm-charts/
---
-# apiVersion: helm.toolkit.fluxcd.io/v2
-# kind: HelmRelease
-# metadata:
-#   name: librechat
-#   namespace: librechat
-# spec:
-#   interval: 30m
-#   chart:
-#     spec:
-#       chart: librechat
-#       version: 1.9.1
-#       sourceRef:
-#         kind: HelmRepository
-#         name: dynomite567-charts
-#   values:
-#     global:
-#       librechat:
-#         existingSecretName: librechat
-#     librechat:
-#       configEnv:
-#         PLUGIN_MODELS: null
-#         ALLOW_REGISTRATION: "false"
-#         TRUST_PROXY: "1"
-#         DOMAIN_CLIENT: https://librechat.lumpiasty.xyz
-#         SEARCH: "true"
-#       existingSecretName: librechat
-#       configYamlContent: |
-#         version: 1.0.3
-
-#         endpoints:
-#           custom:
-#             - name: "Llama.cpp"
-#               apiKey: "llama"
-#               baseURL: "http://llama.llama.svc.cluster.local:11434/v1"
-#               models:
-#                 default: [
-#                   "DeepSeek-R1-0528-Qwen3-8B-GGUF",
-#                   "Qwen3-8B-GGUF",
-#                   "Qwen3-8B-GGUF-no-thinking",
-#                   "gemma3n-e4b",
-#                   "gemma3-12b",
-#                   "gemma3-12b-q2",
-#                   "gemma3-12b-novision",
-#                   "gemma3-4b",
-#                   "gemma3-4b-novision",
-#                   "Qwen3-4B-Thinking-2507",
-#                   "Qwen3-4B-Thinking-2507-long-ctx",
-#                   "Qwen2.5-VL-7B-Instruct-GGUF",
-#                   "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
-#                   "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
-#                   "Qwen3-VL-2B-Instruct-GGUF",
-#                   "Qwen3-VL-2B-Instruct-GGUF-unslothish",
-#                   "Qwen3-VL-2B-Thinking-GGUF",
-#                   "Qwen3-VL-4B-Instruct-GGUF",
-#                   "Qwen3-VL-4B-Instruct-GGUF-unslothish",
-#                   "Qwen3-VL-4B-Thinking-GGUF",
-#                   "Qwen3-VL-8B-Instruct-GGUF",
-#                   "Qwen3-VL-8B-Instruct-GGUF-unslothish",
-#                   "Qwen3-VL-8B-Thinking-GGUF",
-#                   "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF",
-#                   "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF"
-#                 ]
-#               titleConvo: true
-#               titleModel: "gemma3-4b-novision"
-#               summarize: false
-#               summaryModel: "gemma3-4b-novision"
-#               forcePrompt: false
-#               modelDisplayLabel: "Llama.cpp"
-
-#               # ✨ IMPORTANT: let llama-swap/llama-server own all these
-#               dropParams:
-#                 - "temperature"
-#                 - "top_p"
-#                 - "top_k"
-#                 - "presence_penalty"
-#                 - "frequency_penalty"
-#                 - "stop"
-#                 - "max_tokens"
-#       imageVolume:
-#         enabled: true
-#         size: 10G
-#         accessModes: ReadWriteOnce
-#         storageClassName: mayastor-single-hdd
-#     ingress:
-#       enabled: true
-#       className: nginx-ingress
-#       annotations:
-#         cert-manager.io/cluster-issuer: letsencrypt
-#         nginx.ingress.kubernetes.io/proxy-body-size: "0"
-#         nginx.ingress.kubernetes.io/proxy-buffering: "false"
-#         nginx.ingress.kubernetes.io/proxy-read-timeout: 30m
-#       hosts:
-#         - host: librechat.lumpiasty.xyz
-#           paths:
-#             - path: /
-#               pathType: ImplementationSpecific
-#       tls:
-#         - hosts:
-#             - librechat.lumpiasty.xyz
-#           secretName: librechat-ingress
-
-#     mongodb:
-#       persistence:
-#         storageClass: mayastor-single-hdd
-
-#     meilisearch:
-#       persistence:
-#         storageClass: mayastor-single-hdd
-#       auth:
-#         existingMasterKeySecret: librechat
@@ -16,7 +16,7 @@ spec:
    spec:
      containers:
        - name: caddy
-          image: caddy:2.11.2-alpine
+          image: caddy:2.11.4-alpine
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /etc/caddy
@@ -3,78 +3,81 @@ healthCheckTimeout: 600
 logToStdout: "both" # proxy and upstream

 macros:
-  base_args: "--no-warmup --port ${PORT}"
-  common_args: "--fit-target 1536 --no-warmup --port ${PORT}"
-  gemma3_ctx_128k: "--ctx-size 131072"
-  qwen35_ctx_128k: "--ctx-size 131072"
-  qwen35_ctx_256k: "--ctx-size 262144"
-  gemma_sampling: "--prio 2 --temp 1.0 --repeat-penalty 1.0 --min-p 0.00 --top-k 64 --top-p 0.95"
-  qwen35_sampling: "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q8_0 -ctv q8_0"
-  qwen35_35b_args: "--temp 1.0 --min-p 0.00 --top-p 0.95 --top-k 20 -ctk q8_0 -ctv q8_0"
+  base_args: "--no-warmup --port ${PORT} --mlock --no-mmap"
+  common_args: "--fit-target 256 --no-warmup --port ${PORT} --no-mmap -tb 12 -t 6"
+  cpu_args: "--no-warmup --port ${PORT} -ngl 0"
+  ctx_64k: "--ctx-size 65536"
+  ctx_128k: "--ctx-size 131072"
+  ctx_256k: "--ctx-size 131072"
+  qwen35_think_args:   "--temp 1.0 --top-p 0.95 --top-k 20 --min-p 0.00 -ctk q4_0 -ctv q4_0 --presence_penalty 1.5 --reasoning on"
+  qwen35_nothink_args: "--temp 0.7 --top-p 0.80 --top-k 20 --min-p 0.00 -ctk q4_0 -ctv q4_0 --presence_penalty 1.5 --reasoning off"
  qwen35_35b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"
  qwen35_4b_heretic_mmproj: "--mmproj-url https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-F16.gguf --mmproj /root/.cache/llama.cpp/unsloth_Qwen3.5-4B-GGUF_mmproj-F16.gguf"
-  glm47_flash_args: "--temp 0.7 --top-p 1.0 --min-p 0.01 --repeat-penalty 1.0"
-  thinking_on: "--chat-template-kwargs '{\"enable_thinking\": true}'"
-  thinking_off: "--chat-template-kwargs '{\"enable_thinking\": false}'"
-
-peers:
-  openrouter:
-    proxy: https://openrouter.ai/api
-    apiKey: ${env.OPENROUTER_API_KEY}
-    models:
-      - z-ai/glm-5
+  gemma4_sampling: "--temp 1.0 --top-p 0.95 --top-k 64 -ctk q4_0 -ctv q4_0"
+  gemma4_nothink_sampling: "--temp 1.0 --top-p 0.95 --top-k 64 -ctk q4_0 -ctv q4_0 --reasoning off"

 hooks:
  on_startup:
    preload:
      - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
+      - "parakeet-tdt_ctc-1.1b"
+
+# matrix replaces groups (they are mutually exclusive).
+# The small 0.8B model runs alongside any LLM.
+# FLUX runs alone — it needs all available VRAM and will evict the 0.8B first.
+matrix:
+  vars:
+    q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
+    stt: "parakeet-tdt_ctc-1.1b"
+    flux: "flux2-klein-4b:Q4_K_M"
+    coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
+    q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
+    q35nt: "Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M"
+    q35ht: "Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M"
+    q35hnt: "Qwen3.5-35B-A3B-heretic-GGUF-nothink:Q4_K_M"
+    q4t: "Qwen3.5-4B-GGUF:Q4_K_M"
+    q4nt: "Qwen3.5-4B-GGUF-nothink:Q4_K_M"
+    q4ht: "Qwen3.5-4B-heretic-GGUF:Q4_K_M"
+    q4hnt: "Qwen3.5-4B-heretic-GGUF-nothink:Q4_K_M"
+    g26xl: "gemma-4-26B-A4B-it-qat:UD-Q4_K_XL"
+    g26xlnt: "gemma-4-26B-A4B-it-qat-nothink:UD-Q4_K_XL"
+    g26mtp: "gemma-4-26B-A4B-it-qat-mtp:UD-Q4_K_XL"
+    g26mtpnt: "gemma-4-26B-A4B-it-qat-mtp-nothink:UD-Q4_K_XL"
+    g26ht: "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL"
+    g26hnt: "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-nothink:UD-Q4_K_XL"
+    g26hmtp: "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-mtp:UD-Q4_K_XL"
+    g26hmnt: "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-mtp-nothink:UD-Q4_K_XL"
+    ge4qat: "unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL"
+    ge4qatnt: "unsloth/gemma-4-E4B-it-qat-GGUF-nothink:UD-Q4_K_XL"
+    ge2qat: "unsloth/gemma-4-E2B-it-qat-GGUF:UD-Q4_K_XL"
+    ge2qatnt: "unsloth/gemma-4-E2B-it-qat-GGUF-nothink:UD-Q4_K_XL"
+    ge4mtp: "unsloth/gemma-4-E4B-it-qat-GGUF-mtp:UD-Q4_K_XL"
+    ge4mtpnt: "unsloth/gemma-4-E4B-it-qat-GGUF-mtp-nothink:UD-Q4_K_XL"
+    ge4ht: "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M"
+    ge4hnt: "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-nothink:Q4_K_M"
+    ge4hmtp: "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-mtp:Q4_K_M"
+    ge4hmnt: "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-mtp-nothink:Q4_K_M"
+    q36t: "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL"
+    q36nt: "unsloth/Qwen3.6-35B-A3B-GGUF-nothink:UD-Q4_K_XL"
+    haut: "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive:Q4_K_M"
+    haunt: "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-nothink:Q4_K_M"
+    mtpt: "unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M"
+    mtpnt: "unsloth/Qwen3.6-35B-A3B-MTP-GGUF-nothink:Q4_K_M"
+
+  evict_costs:
+    flux: 10  # large files, slow to reload
+
+  sets:
+    # any LLM can run alongside the small always-on model + STT + TTS (all CPU, no VRAM cost)
+    with_q8: "(coder | q35t | q35nt | q35ht | q35hnt | q4t | q4nt | q4ht | q4hnt | g26xl | g26xlnt | g26mtp | g26mtpnt | g26ht | g26hnt | g26hmtp | g26hmnt | ge4qat | ge4qatnt | ge2qat | ge2qatnt | ge4mtp | ge4mtpnt | ge4ht | ge4hnt | ge4hmtp | ge4hmnt | q36t | q36nt | haut | haunt | mtpt | mtpnt) & q8 & stt"
+    # FLUX runs alone — evicts everything including q8, but keeps STT for voice during image gen
+    image_gen: "flux & stt"

-groups:
-  always:
-    persistent: true
-    exclusive: false
-    swap: false
-    members:
-      - "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"

 models:
-  "gemma3-12b":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
-        ${gemma3_ctx_128k}
-        ${gemma_sampling}
-        ${common_args}
-
-  "gemma3-12b-novision":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
-        ${gemma3_ctx_128k}
-        ${gemma_sampling}
-        --no-mmproj
-        ${common_args}
-
-  "gemma3-4b":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
-        ${gemma3_ctx_128k}
-        ${gemma_sampling}
-        ${common_args}
-
-  "gemma3-4b-novision":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
-        ${gemma3_ctx_128k}
-        ${gemma_sampling}
-        --no-mmproj
-        ${common_args}
-
  "Qwen3-Coder-Next-GGUF:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_M
        --ctx-size 65536
        --predict 8192
@@ -83,178 +86,359 @@ models:
        --top-p 0.95
        --top-k 40
        --repeat-penalty 1.0
-        -ctk q8_0 -ctv q8_0
+        -ctk q4_0 -ctv q4_0
        ${common_args}

  "Qwen3.5-35B-A3B-GGUF:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_35b_args}
+        ${ctx_256k}
+        ${qwen35_think_args}
        ${common_args}

  "Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_35b_args}
+        ${ctx_256k}
+        ${qwen35_nothink_args}
        ${common_args}
-        ${thinking_off}

  # The "heretic" version does not provide the mmproj
  # so providing url to the one from the non-heretic version.
  "Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        ${qwen35_35b_heretic_mmproj}
-        ${qwen35_ctx_256k}
-        ${qwen35_35b_args}
+        ${ctx_256k}
+        ${qwen35_think_args}
        ${common_args}

  "Qwen3.5-35B-A3B-heretic-GGUF-nothink:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf mradermacher/Qwen3.5-35B-A3B-heretic-GGUF:Q4_K_M
        ${qwen35_35b_heretic_mmproj}
-        ${qwen35_ctx_256k}
-        ${qwen35_35b_args}
+        ${ctx_256k}
+        ${qwen35_nothink_args}
        ${common_args}
-        ${thinking_off}
-
-  "Qwen3.5-0.8B-GGUF:Q4_K_XL":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
-        ${base_args}
-        ${thinking_on}

  "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_XL
        --ctx-size 4096
-        ${qwen35_sampling}
+        ${qwen35_nothink_args}
        ${base_args}
-        ${thinking_off}
-
-  "Qwen3.5-2B-GGUF:Q4_K_M":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
-        ${common_args}
-        ${thinking_on}
-
-  "Qwen3.5-2B-GGUF-nothink:Q4_K_M":
-    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-2B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
-        ${common_args}
-        ${thinking_off}

  "Qwen3.5-4B-GGUF:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
-        ${qwen35_ctx_128k}
-        ${qwen35_sampling}
+        ${ctx_128k}
+        ${qwen35_think_args}
        ${common_args}
-        ${thinking_on}

  "Qwen3.5-4B-GGUF-nothink:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M
-        ${qwen35_ctx_128k}
-        ${qwen35_sampling}
+        ${ctx_128k}
+        ${qwen35_nothink_args}
        ${common_args}
-        ${thinking_off}

  "Qwen3.5-4B-heretic-GGUF:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
        ${qwen35_4b_heretic_mmproj}
-        ${qwen35_ctx_128k}
-        ${qwen35_sampling}
+        ${ctx_128k}
+        ${qwen35_think_args}
        ${common_args}
-        ${thinking_on}

  "Qwen3.5-4B-heretic-GGUF-nothink:Q4_K_M":
    cmd: |
-      /app/llama-server
+      llama-server
        -hf mradermacher/Qwen3.5-4B-heretic-GGUF:Q4_K_M
        ${qwen35_4b_heretic_mmproj}
-        ${qwen35_ctx_128k}
-        ${qwen35_sampling}
+        ${ctx_128k}
+        ${qwen35_nothink_args}
        ${common_args}
-        ${thinking_off}

-  "Qwen3.5-9B-GGUF:Q4_K_M":
+  "gemma-4-26B-A4B-it-qat:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_256k}
+        ${gemma4_sampling}
        ${common_args}
-        ${thinking_on}

-  "Qwen3.5-9B-GGUF-nothink:Q4_K_M":
+  "gemma-4-26B-A4B-it-qat-nothink:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_256k}
+        ${gemma4_nothink_sampling}
        ${common_args}
-        ${thinking_off}

-  "Qwen3.5-9B-GGUF:Q3_K_M":
+  "gemma-4-26B-A4B-it-qat-mtp:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_256k}
+        ${gemma4_sampling}
        ${common_args}
-        ${thinking_on}

-  "Qwen3.5-9B-GGUF-nothink:Q3_K_M":
+  "gemma-4-26B-A4B-it-qat-mtp-nothink:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-9B-GGUF:Q3_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_256k}
+        ${gemma4_nothink_sampling}
        ${common_args}
-        ${thinking_off}

-  "Qwen3.5-27B-GGUF:Q3_K_M":
+  "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL \
+        ${ctx_256k}
+        ${gemma4_sampling}
        ${common_args}
-        ${thinking_on}

-  "Qwen3.5-27B-GGUF-nothink:Q3_K_M":
+  "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-nothink:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/Qwen3.5-27B-GGUF:Q3_K_M
-        ${qwen35_ctx_256k}
-        ${qwen35_sampling}
+      llama-server
+        -hf SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL \
+        ${ctx_256k}
+        ${gemma4_nothink_sampling}
        ${common_args}
-        ${thinking_off}

-  "GLM-4.7-Flash-GGUF:Q4_K_M":
+  # The heretic QAT repo does not ship an MTP drafter,
+  # so borrow the one from the non-heretic unsloth QAT repo.
+  "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-mtp:UD-Q4_K_XL":
    cmd: |
-      /app/llama-server
-        -hf unsloth/GLM-4.7-Flash-GGUF:Q4_K_M
-        ${glm47_flash_args}
+      llama-server
+        -hf SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_256k}
+        ${gemma4_sampling}
        ${common_args}
+
+  "SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF-mtp-nothink:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf SC117/gemma-4-26B-A4B-it-qat-heretic-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_256k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_128k}
+        ${gemma4_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E4B-it-qat-GGUF-nothink:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_128k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E2B-it-qat-GGUF:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E2B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_128k}
+        ${gemma4_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E2B-it-qat-GGUF-nothink:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E2B-it-qat-GGUF:UD-Q4_K_XL \
+        ${ctx_128k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E4B-it-qat-GGUF-mtp:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-E4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_128k}
+        ${gemma4_sampling}
+        ${common_args}
+
+  "unsloth/gemma-4-E4B-it-qat-GGUF-mtp-nothink:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+        -hf unsloth/gemma-4-E4B-it-qat-GGUF:UD-Q4_K_XL \
+        --spec-draft-hf unsloth/gemma-4-E4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_128k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M":
+    cmd: |
+      llama-server
+        -hf llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M \
+        ${ctx_128k}
+        ${gemma4_sampling}
+        ${common_args}
+
+  "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-nothink:Q4_K_M":
+    cmd: |
+      llama-server
+        -hf llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M \
+        ${ctx_128k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-mtp:Q4_K_M":
+    cmd: |
+      llama-server
+        -hf llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M \
+        --spec-draft-hf unsloth/gemma-4-E4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_128k}
+        ${gemma4_sampling}
+        ${common_args}
+
+  "llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF-mtp-nothink:Q4_K_M":
+    cmd: |
+      llama-server
+        -hf llmfan46/gemma-4-E4B-it-ultra-uncensored-heretic-GGUF:Q4_K_M \
+        --spec-draft-hf unsloth/gemma-4-E4B-it-qat-GGUF:Q8_0-MTP \
+        --spec-type draft-mtp
+        --spec-draft-n-max 1
+        --swa-full
+        --kv-unified
+        --parallel 1
+        ${ctx_128k}
+        ${gemma4_nothink_sampling}
+        ${common_args}
+
+  "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+      -hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL
+      ${ctx_256k}
+      ${qwen35_think_args}
+      ${common_args}
+
+  "unsloth/Qwen3.6-35B-A3B-GGUF-nothink:UD-Q4_K_XL":
+    cmd: |
+      llama-server
+      -hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL
+      ${ctx_256k}
+      ${qwen35_nothink_args}
+      ${common_args}
+
+  "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive:Q4_K_M":
+    cmd: |
+      llama-server
+      -hf HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive:Q4_K_M
+      ${ctx_256k}
+      ${qwen35_think_args}
+      ${common_args}
+
+  "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-nothink:Q4_K_M":
+    cmd: |
+      llama-server
+      -hf HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive:Q4_K_M
+      ${ctx_256k}
+      ${qwen35_nothink_args}
+      ${common_args}
+
+  "unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M":
+    cmd: |
+      llama-server
+      -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M
+      ${ctx_256k}
+      ${qwen35_think_args}
+      --spec-type draft-mtp --spec-draft-n-max 1
+      --parallel 1
+      ${common_args}
+
+  "unsloth/Qwen3.6-35B-A3B-MTP-GGUF-nothink:Q4_K_M":
+    cmd: |
+      llama-server
+      -hf unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M
+      ${ctx_256k}
+      ${qwen35_nothink_args}
+      --spec-type draft-mtp --spec-draft-n-max 1
+      --parallel 1
+      ${common_args}
+
+  # STT via parakeet-server (parakeet.cpp OpenAI-compatible server, CPU, always loaded)
+  # Model downloaded on first start and cached under /root/.cache/parakeet.cpp/models
+  # parakeet-proxy.py sits in front to convert any audio format to WAV via ffmpeg,
+  # since parakeet-server only accepts real WAV but browsers send Ogg/Opus.
+  "parakeet-tdt_ctc-1.1b":
+    checkEndpoint: none
+    cmd: |
+      env PROXY_PORT=${PORT} FFMPEG_BIN=/root/.cache/ffmpeg/ffmpeg python3 /config/parakeet-proxy.py
+
+
+  # Image generation via stable-diffusion.cpp (sd-server)
+  # Models must be pre-downloaded to /root/.cache/sd/
+  # FLUX.2-klein-4B: fast unified text-to-image and image editing model (Apache 2.0)
+  # Download: uv run --with huggingface_hub hf download unsloth/FLUX.2-klein-4B-GGUF flux-2-klein-4b-Q4_K_M.gguf --local-dir /root/.cache/sd
+  # Download VAE: uv run --with huggingface_hub hf download Comfy-Org/flux2-klein-4B split_files/vae/flux2-vae.safetensors --local-dir /root/.cache/sd/flux2-klein && cp /root/.cache/sd/flux2-klein/split_files/vae/flux2-vae.safetensors /root/.cache/sd/
+  # Download LLM: uv run --with huggingface_hub hf download ponpoke/flux2-klein-4b-uncensored-text-encoder flux2-klein-4b-uncensored-q4_k_m.gguf --local-dir /root/.cache/sd
+  "flux2-klein-4b:Q4_K_M":
+    checkEndpoint: "/"
+    cmd: |
+      sd-server
+        --listen-port ${PORT}
+        --diffusion-model /root/.cache/sd/flux-2-klein-4b-Q4_K_M.gguf
+        --vae /root/.cache/sd/flux2-vae.safetensors
+        --llm /root/.cache/sd/flux2-klein-4b-uncensored-q4_k_m.gguf
+        --cfg-scale 1.0
+        --sampling-method euler
+        --steps 4
+        --diffusion-fa
+        --offload-to-cpu
@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+"""
+Thin reverse proxy for parakeet-server.
+
+Accepts POST /v1/audio/transcriptions with any audio format,
+converts the audio to 16 kHz mono WAV via ffmpeg, then forwards
+the converted file to the real parakeet-server running on PARAKEET_PORT.
+
+Also proxies GET /health straight through.
+
+Usage:
+    PROXY_PORT=<port>  PARAKEET_PORT=<upstream>  python3 parakeet-proxy.py
+"""
+
+import http.server
+import io
+import os
+import subprocess
+import sys
+import tempfile
+import urllib.request
+import urllib.error
+
+PROXY_PORT    = int(os.environ.get("PROXY_PORT", "8080"))
+PARAKEET_PORT = PROXY_PORT + 1
+FFMPEG        = os.environ.get("FFMPEG_BIN", "ffmpeg")
+MODEL         = os.environ.get("PARAKEET_MODEL", "tdt_ctc-1.1b-q4_k.gguf")
+CACHE_DIR     = os.environ.get("PARAKEET_CACHE_DIR", "/root/.cache/parakeet.cpp/models")
+
+
+def convert_to_wav(data: bytes) -> bytes:
+    """Convert any audio bytes to 16 kHz mono PCM WAV via ffmpeg."""
+    with tempfile.NamedTemporaryFile(suffix=".input", delete=False) as inf:
+        inf.write(data)
+        inf_path = inf.name
+    out_path = inf_path + ".wav"
+    try:
+        subprocess.run(
+            [
+                FFMPEG, "-y",
+                "-i", inf_path,
+                "-ar", "16000",
+                "-ac", "1",
+                "-f", "wav",
+                out_path,
+            ],
+            check=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        with open(out_path, "rb") as f:
+            return f.read()
+    finally:
+        os.unlink(inf_path)
+        if os.path.exists(out_path):
+            os.unlink(out_path)
+
+
+def parse_multipart(content_type: str, body: bytes):
+    """
+    Parse a multipart/form-data body.
+    Returns a dict of field_name -> (filename_or_None, content_type, data).
+    """
+    import email
+    from email import policy as email_policy
+
+    # email.parser needs the full MIME headers to parse multipart
+    raw = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
+    msg = email.message_from_bytes(raw, policy=email_policy.compat32)
+    parts = {}
+    for part in msg.get_payload():
+        cd = part.get("Content-Disposition", "")
+        name = None
+        filename = None
+        for item in cd.split(";"):
+            item = item.strip()
+            if item.startswith('name='):
+                name = item[5:].strip('"')
+            elif item.startswith('filename='):
+                filename = item[9:].strip('"')
+        if name is not None:
+            parts[name] = (filename, part.get_content_type(), part.get_payload(decode=True))
+    return parts
+
+
+def build_multipart(fields: dict) -> tuple[bytes, str]:
+    """
+    Build a multipart/form-data body from fields dict:
+      field_name -> (filename_or_None, content_type, data_bytes)
+    Returns (body_bytes, content_type_header_value).
+    """
+    boundary = b"----ParakeetProxyBoundary0xDEADBEEF"
+    body = b""
+    for name, (filename, ct, data) in fields.items():
+        body += b"--" + boundary + b"\r\n"
+        if filename:
+            body += (
+                f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
+            ).encode()
+        else:
+            body += f'Content-Disposition: form-data; name="{name}"\r\n'.encode()
+        body += f"Content-Type: {ct}\r\n\r\n".encode()
+        body += data + b"\r\n"
+    body += b"--" + boundary + b"--\r\n"
+    return body, f"multipart/form-data; boundary={boundary.decode()}"
+
+
+class ProxyHandler(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        print(f"[parakeet-proxy] {self.address_string()} - {fmt % args}", flush=True)
+
+    def do_GET(self):
+        if self.path == "/health":
+            self._forward_get("/health")
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def do_POST(self):
+        if self.path.rstrip("/") == "/v1/audio/transcriptions":
+            self._handle_transcription()
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def _forward_get(self, path):
+        try:
+            url = f"http://127.0.0.1:{PARAKEET_PORT}{path}"
+            with urllib.request.urlopen(url, timeout=5) as resp:
+                body = resp.read()
+                self.send_response(resp.status)
+                self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
+                self.end_headers()
+                self.wfile.write(body)
+        except Exception as e:
+            self.send_response(502)
+            self.end_headers()
+            self.wfile.write(str(e).encode())
+
+    def _handle_transcription(self):
+        length = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(length)
+        ct = self.headers.get("Content-Type", "")
+
+        try:
+            fields = parse_multipart(ct, body)
+        except Exception as e:
+            self._error(400, f"failed to parse multipart: {e}")
+            return
+
+        if "file" not in fields:
+            self._error(400, "missing required field 'file'")
+            return
+
+        filename, file_ct, audio_data = fields["file"]
+
+        # Convert to WAV regardless of what we received
+        try:
+            wav_data = convert_to_wav(audio_data)
+        except subprocess.CalledProcessError:
+            self._error(400, "ffmpeg could not decode audio")
+            return
+        except Exception as e:
+            self._error(500, f"conversion error: {e}")
+            return
+
+        # Rebuild multipart with converted WAV, preserve other fields
+        new_fields = {}
+        for name, (fn, fct, data) in fields.items():
+            if name == "file":
+                new_fields[name] = ("recording.wav", "audio/wav", wav_data)
+            else:
+                new_fields[name] = (fn, fct, data)
+
+        new_body, new_ct = build_multipart(new_fields)
+
+        # Forward to parakeet-server
+        try:
+            url = f"http://127.0.0.1:{PARAKEET_PORT}/v1/audio/transcriptions"
+            req = urllib.request.Request(
+                url,
+                data=new_body,
+                headers={"Content-Type": new_ct},
+                method="POST",
+            )
+            with urllib.request.urlopen(req, timeout=300) as resp:
+                resp_body = resp.read()
+                self.send_response(resp.status)
+                self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
+                self.end_headers()
+                self.wfile.write(resp_body)
+        except urllib.error.HTTPError as e:
+            resp_body = e.read()
+            self.send_response(e.code)
+            self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
+            self.end_headers()
+            self.wfile.write(resp_body)
+        except Exception as e:
+            self._error(502, f"upstream error: {e}")
+
+    def _error(self, code: int, msg: str):
+        body = f'{{"error":{{"message":"{msg}","type":"proxy_error"}}}}'.encode()
+        self.send_response(code)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(body)
+
+
+if __name__ == "__main__":
+    proc = subprocess.Popen([
+        "parakeet-server",
+        "--host", "127.0.0.1",
+        "--port", str(PARAKEET_PORT),
+        "--model", MODEL,
+        "--cache-dir", CACHE_DIR,
+    ])
+    print(f"[parakeet-proxy] started parakeet-server pid={proc.pid} on :{PARAKEET_PORT}", flush=True)
+
+    server = http.server.HTTPServer(("0.0.0.0", PROXY_PORT), ProxyHandler)
+    print(f"[parakeet-proxy] listening on :{PROXY_PORT}", flush=True)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        proc.terminate()
+        proc.wait()
@@ -1,101 +0,0 @@
-{%- if not add_generation_prompt is defined %}
-    {%- set add_generation_prompt = false %}
-{%- endif %}
-{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
-{%- for message in messages %}
-    {%- if message['role'] == 'system' %}
-        {%- if ns.is_first_sp %}
-            {%- set ns.system_prompt = ns.system_prompt + message['content'] %}
-            {%- set ns.is_first_sp = false %}
-        {%- else %}
-            {%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
-        {%- endif %}
-    {%- endif %}
-{%- endfor %}
-
-{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
-{%- if tools is defined and tools is not none %}
-    {%- set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. ' + 'When a tool call is needed, you MUST use the following format to issue the call:\n' + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>FUNCTION_NAME\n' + '```json\n{"param1": "value1", "param2": "value2"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>\n\n' + 'Make sure the JSON is valid.' + '## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
-    {%- for tool in tools %}
-        {%- set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
-    {%- endfor %}
-    {%- if ns.system_prompt|length != 0 %}
-        {%- set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
-    {%- else %}
-        {%- set ns.system_prompt = tool_ns.text %}
-    {%- endif %}
-{%- endif %}
-{{- bos_token }}
-{{- '/no_think' + ns.system_prompt }}
-{%- set last_index = (messages|length - 1) %}
-{%- for message in messages %}
-    {%- set content = message['content'] %}
-    {%- if message['role'] == 'user' %}
-        {%- set ns.is_tool = false -%}
-        {%- set ns.is_first = false -%}
-        {%- set ns.is_last_user = true -%}
-        {%- if loop.index0 == last_index %}
-            {{- '<｜User｜>' + content }}
-        {%- else %}
-            {{- '<｜User｜>' + content + '<｜Assistant｜>'}}
-        {%- endif %}
-    {%- endif %}
-    {%- if message['role'] == 'assistant' %}
-        {%- if '</think>' in content %}
-            {%- set content = (content.split('</think>')|last) %}
-        {%- endif %}
-    {%- endif %}
-    {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
-        {%- set ns.is_last_user = false -%}
-        {%- if ns.is_tool %}
-            {{- '<｜tool▁outputs▁end｜>'}}
-        {%- endif %}
-        {%- set ns.is_first = false %}
-        {%- set ns.is_tool = false -%}
-        {%- set ns.is_output_first = true %}
-        {%- for tool in message['tool_calls'] %}
-            {%- set arguments = tool['function']['arguments'] %}
-            {%- if arguments is not string %}
-                {%- set arguments = arguments|tojson %}
-            {%- endif %}
-            {%- if not ns.is_first %}
-                {%- if content is none %}
-                    {{- '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<｜tool▁call▁end｜>'}}
-                }
-                {%- else %}
-                    {{- content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<｜tool▁call▁end｜>'}}
-                {%- endif %}
-                {%- set ns.is_first = true -%}
-            {%- else %}
-                {{- '\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<｜tool▁call▁end｜>'}}
-            {%- endif %}
-        {%- endfor %}
-        {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
-    {%- endif %}
-    {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
-        {%- set ns.is_last_user = false -%}
-        {%- if ns.is_tool %}
-            {{- '<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}
-            {%- set ns.is_tool = false -%}
-        {%- else %}
-            {{- content + '<｜end▁of▁sentence｜>'}}
-        {%- endif %}
-    {%- endif %}
-    {%- if message['role'] == 'tool' %}
-        {%- set ns.is_last_user = false -%}
-        {%- set ns.is_tool = true -%}
-        {%- if ns.is_output_first %}
-            {{- '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
-            {%- set ns.is_output_first = false %}
-        {%- else %}
-            {{- '\n<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
-        {%- endif %}
-    {%- endif %}
-{%- endfor -%}
-{%- if ns.is_tool %}
-    {{- '<｜tool▁outputs▁end｜>'}}
-{%- endif %}
-{#- if add_generation_prompt and not ns.is_last_user and not ns.is_tool #}
-{%- if add_generation_prompt and not ns.is_tool %}
-    {{- '<｜Assistant｜>'}}
-{%- endif %}
@@ -16,28 +16,58 @@ spec:
      labels:
        app: llama-swap
    spec:
+      initContainers:
+        - name: download-whisper
+          image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-12
+          command:
+            - sh
+            - -c
+            - |
+              mkdir -p /root/.cache/whisper
+              if [ ! -f /root/.cache/whisper/ggml-small.bin ]; then
+                echo "Downloading whisper-small model..."
+                curl -L -o /root/.cache/whisper/ggml-small.bin \
+                  https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
+              else
+                echo "whisper-small model already present, skipping download"
+              fi
+              if [ ! -f /root/.cache/ffmpeg/ffmpeg ]; then
+                echo "Downloading static ffmpeg..."
+                mkdir -p /root/.cache/ffmpeg
+                apt-get update -qq && apt-get install -y --no-install-recommends xz-utils
+                curl -L -o /root/.cache/ffmpeg/ffmpeg.tar.xz \
+                  https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz
+                tar -xJf /root/.cache/ffmpeg/ffmpeg.tar.xz -C /root/.cache/ffmpeg --wildcards '*/ffmpeg' --strip-components=2
+                rm /root/.cache/ffmpeg/ffmpeg.tar.xz
+                chmod +x /root/.cache/ffmpeg/ffmpeg
+              else
+                echo "ffmpeg already present, skipping download"
+              fi
+          volumeMounts:
+            - name: models
+              mountPath: /root/.cache
      containers:
        - name: llama-swap
-          image: ghcr.io/mostlygeek/llama-swap:v199-vulkan-b8637
+          image: gitea.lumpiasty.xyz/lumpiasty/llama-swap:unified-vulkan-parakeet-2026-06-12
          imagePullPolicy: IfNotPresent
          command:
-            - /app/llama-swap
+            - llama-swap
          args:
            - --config=/config/config.yaml
            - --watch-config
+          env:
+            - name: RADV_EXPERIMENTAL
+              value: transfer_queue
          ports:
            - containerPort: 8080
              name: http
              protocol: TCP
-          env:
-            - name: OPENROUTER_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: llama-openrouter
-                  key: OPENROUTER_API_KEY
          volumeMounts:
            - name: models
              mountPath: /root/.cache
+            - name: models
+              mountPath: /usr/local/bin/ffmpeg
+              subPath: ffmpeg/ffmpeg
            - mountPath: /dev/kfd
              name: kfd
            - mountPath: /dev/dri
@@ -9,6 +9,7 @@ metadata:
    acme.cert-manager.io/http01-edit-in-place: "true"
    nginx.ingress.kubernetes.io/proxy-buffering: "false"
    nginx.ingress.kubernetes.io/proxy-read-timeout: 30m
+    nginx.ingress.kubernetes.io/proxy-body-size: 8m
 spec:
  ingressClassName: nginx-ingress
  rules:
@@ -0,0 +1,50 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: kokoro
+  namespace: llama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: kokoro
+  template:
+    metadata:
+      labels:
+        app: kokoro
+    spec:
+      containers:
+        - name: kokoro
+          # OpenAI-compatible Kokoro-FastAPI TTS server, CPU PyTorch backend.
+          # Models baked into the image (no PVC needed).
+          # v0.3.0 includes fix for per-request voice tensor memory leak (#459).
+          image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.5.0
+          ports:
+            - containerPort: 8880
+              name: http
+              protocol: TCP
+          resources:
+            requests:
+              memory: "2Gi"
+              cpu: "500m"
+            limits:
+              memory: "6Gi"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: kokoro
+  namespace: llama
+spec:
+  type: ClusterIP
+  ports:
+    - name: http
+      port: 8880
+      targetPort: 8880
+      protocol: TCP
+  selector:
+    app: kokoro
+
@@ -7,9 +7,10 @@ resources:
  - ingress.yaml
  - pvc-ssd.yaml
  - deployment.yaml
+  - kokoro.yaml
 configMapGenerator:
  - name: llama-swap
    namespace: llama
    files:
      - config.yaml=configs/config.yaml
-      - qwen_nothink_chat_template.jinja=configs/qwen_nothink_chat_template.jinja
+      - parakeet-proxy.py=configs/parakeet-proxy.py
@@ -7,7 +7,7 @@ metadata:
  name: llama-models-lvmssd
  namespace: openebs
 spec:
-  capacity: 200Gi
+  capacity: "429496729600"
  ownerNodeID: anapistula-delrosalae
  shared: "yes"
  thinProvision: "no"
@@ -20,7 +20,7 @@ metadata:
  name: llama-models-lvmssd
 spec:
  capacity:
-    storage: 200Gi
+    storage: 400Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
@@ -41,6 +41,6 @@ spec:
    - ReadWriteOnce
  resources:
    requests:
-      storage: 200Gi
+      storage: 400Gi
  storageClassName: ssd-lvmpv
  volumeName: llama-models-lvmssd
@@ -36,26 +36,3 @@ spec:
      excludeRaw: true

  vaultAuthRef: llama
---
-apiVersion: secrets.hashicorp.com/v1beta1
-kind: VaultStaticSecret
-metadata:
-  name: llama-openrouter
-  namespace: llama
-spec:
-  type: kv-v2
-
-  mount: secret
-  path: openrouter
-
-  destination:
-    create: true
-    name: llama-openrouter
-    type: Opaque
-    transformation:
-      excludeRaw: true
-      templates:
-        OPENROUTER_API_KEY:
-          text: '{{ get .Secrets "API_KEY" }}'
-
-  vaultAuthRef: llama
@@ -0,0 +1,52 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: meridian
+  namespace: meridian
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: meridian
+  template:
+    metadata:
+      labels:
+        app: meridian
+    spec:
+      containers:
+        - name: meridian
+          image: gitea.lumpiasty.xyz/lumpiasty/meridian:1.41.1
+          imagePullPolicy: Always
+          ports:
+            - containerPort: 3456
+              name: http
+              protocol: TCP
+          volumeMounts:
+            - name: data
+              mountPath: /home/claude/
+          env:
+              # Default port, it has some issue if not set
+            - name: MERIDIAN_PORT
+              value: "3456"
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: meridian-data-lvmssd
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: meridian
+  namespace: meridian
+spec:
+  type: ClusterIP
+  ports:
+    - name: http
+      port: 3456
+      targetPort: 3456
+      protocol: TCP
+  selector:
+    app: meridian
@@ -1,5 +1,7 @@
+---
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
-  - release.yaml
+  - pvc.yaml
+  - deployment.yaml
@@ -2,4 +2,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
-  name: renovate
+  name: meridian
@@ -4,43 +4,43 @@ kind: LVMVolume
 metadata:
  labels:
    kubernetes.io/nodename: anapistula-delrosalae
-  name: searxng-persistent-data-lvmhdd
+  name: meridian-data-lvmssd
  namespace: openebs
 spec:
-  capacity: 1Gi
+  capacity: "1048576000"
  ownerNodeID: anapistula-delrosalae
  shared: "yes"
  thinProvision: "no"
-  vgPattern: ^openebs-hdd$
-  volGroup: openebs-hdd
+  vgPattern: ^openebs-ssd$
+  volGroup: openebs-ssd
 ---
 kind: PersistentVolume
 apiVersion: v1
 metadata:
-  name: searxng-persistent-data-lvmhdd
+  name: meridian-data-lvmssd
 spec:
  capacity:
    storage: 1Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
-  storageClassName: hdd-lvmpv
+  storageClassName: ssd-lvmpv
  volumeMode: Filesystem
  csi:
    driver: local.csi.openebs.io
    fsType: btrfs
-    volumeHandle: searxng-persistent-data-lvmhdd
+    volumeHandle: meridian-data-lvmssd
 ---
 kind: PersistentVolumeClaim
 apiVersion: v1
 metadata:
-  name: searxng-persistent-data-lvmhdd
-  namespace: searxng
+  name: meridian-data-lvmssd
+  namespace: meridian
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
-  storageClassName: hdd-lvmpv
-  volumeName: searxng-persistent-data-lvmhdd
+  storageClassName: ssd-lvmpv
+  volumeName: meridian-data-lvmssd
@@ -15,7 +15,7 @@ spec:
    spec:
      initContainers:
        - name: prepare-home
-          image: alpine:3.23.3
+          image: alpine:3.24.1
          imagePullPolicy: IfNotPresent
          command:
            - /bin/sh
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: open-webui
-      version: 12.13.0
+      version: 14.8.0
      sourceRef:
        kind: HelmRepository
        name: open-webui
@@ -45,6 +45,9 @@ spec:
        enabled: true
        existingClaim: openwebui-pipelines-lvmhdd

+    terminals:
+      enabled: true
+
    # SSO with Authentik
    extraEnvVars:
      - name: WEBUI_URL
@@ -71,3 +74,60 @@ spec:
        value: "false"
      - name: OAUTH_MERGE_ACCOUNTS_BY_EMAIL
        value: "true"
+      - name: ENABLE_OAUTH_ROLE_MANAGEMENT
+        value: "true"
+      - name: OAUTH_ROLES_CLAIM
+        value: "groups"
+      - name: OAUTH_ADMIN_ROLES
+        value: "Admins"
+      - name: OAUTH_ALLOWED_ROLES
+        value: "Users"
+      - name: OAUTH_AUTO_REDIRECT
+        value: "true"
+      # STT via parakeet-server (routed through llama-swap)
+      - name: AUDIO_STT_ENGINE
+        value: "openai"
+      - name: AUDIO_STT_OPENAI_API_BASE_URL
+        value: "http://llama.llama.svc.cluster.local:11434/v1"
+      - name: AUDIO_STT_OPENAI_API_KEY
+        value: "ignored"
+      - name: AUDIO_STT_MODEL
+        value: "parakeet-tdt_ctc-1.1b"
+      # TTS via OuteTTS (routed through llama-swap)
+      # TTS via dedicated Kokoro server (CPU-only, separate pod)
+      - name: AUDIO_TTS_ENGINE
+        value: "openai"
+      - name: AUDIO_TTS_OPENAI_API_BASE_URL
+        value: "http://kokoro.llama.svc.cluster.local:8880/v1"
+      - name: AUDIO_TTS_OPENAI_API_KEY
+        value: "ignored"
+      - name: AUDIO_TTS_MODEL
+        value: "kokoro"
+      - name: AUDIO_TTS_VOICE
+        value: "af_heart"
+      # Image generation via llama-swap sd-server
+      - name: ENABLE_IMAGE_GENERATION
+        value: "true"
+      - name: IMAGE_GENERATION_ENGINE
+        value: "openai"
+      - name: IMAGES_OPENAI_API_BASE_URL
+        value: "http://llama.llama.svc.cluster.local:11434/v1"
+      - name: IMAGES_OPENAI_API_KEY
+        value: "ignored"
+      - name: IMAGE_GENERATION_MODEL
+        value: "flux2-klein-4b:Q4_K_M"
+      - name: IMAGE_SIZE
+        value: "512x512"
+      # Image editing via llama-swap sd-server (/v1/images/edits)
+      - name: ENABLE_IMAGE_EDIT
+        value: "true"
+      - name: IMAGE_EDIT_ENGINE
+        value: "openai"
+      - name: IMAGES_EDIT_OPENAI_API_BASE_URL
+        value: "http://llama.llama.svc.cluster.local:11434/v1"
+      - name: IMAGES_EDIT_OPENAI_API_KEY
+        value: "ignored"
+      - name: IMAGE_EDIT_MODEL
+        value: "flux2-klein-4b:Q4_K_M"
+      - name: IMAGE_EDIT_SIZE
+        value: "512x512"
@@ -1,11 +0,0 @@
---
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  namespace: renovate
-  name: renovate-config
-data:
-  RENOVATE_AUTODISCOVER: "true"
-  RENOVATE_ENDPOINT: https://gitea.lumpiasty.xyz/api/v1
-  RENOVATE_PLATFORM: gitea
-  RENOVATE_GIT_AUTHOR: Renovate Bot <renovate@lumpiasty.xyz>
@@ -1,24 +0,0 @@
---
-apiVersion: batch/v1
-kind: CronJob
-metadata:
-  name: renovate
-  namespace: renovate
-spec:
-  schedule: "@daily"
-  concurrencyPolicy: Forbid
-  jobTemplate:
-    spec:
-      template:
-        spec:
-          containers:
-            - name: renovate
-              # Update this to the latest available and then enable Renovate on
-              # the manifest
-              image: renovate/renovate:43.95.0-full
-              envFrom:
-                - secretRef:
-                    name: renovate-gitea-token
-                - configMapRef:
-                    name: renovate-config
-          restartPolicy: Never
@@ -1,38 +0,0 @@
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: renovate
-  namespace: renovate
---
-apiVersion: secrets.hashicorp.com/v1beta1
-kind: VaultAuth
-metadata:
-  name: renovate
-  namespace: renovate
-spec:
-  method: kubernetes
-  mount: kubernetes
-  kubernetes:
-    role: renovate
-    serviceAccount: renovate
---
-apiVersion: secrets.hashicorp.com/v1beta1
-kind: VaultStaticSecret
-metadata:
-  name: renovate-gitea-token
-  namespace: renovate
-spec:
-  type: kv-v2
-
-  mount: secret
-  path: renovate
-
-  destination:
-    create: true
-    name: renovate-gitea-token
-    type: Opaque
-    transformation:
-      excludeRaw: true
-
-  vaultAuthRef: renovate
@@ -1 +0,0 @@
-use_default_settings: true
@@ -1,42 +0,0 @@
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: searxng
-  namespace: searxng
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: searxng
-  template:
-    metadata:
-      labels:
-        app: searxng
-    spec:
-      containers:
-        - name: searxng
-          image: searxng/searxng:2025.8.12-6b1516d
-          ports:
-            - containerPort: 8080
-          env:
-            - name: SEARXNG_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: searxng-secret
-                  key: SEARXNG_SECRET
-                  optional: false
-          volumeMounts:
-            - name: config-volume
-              mountPath: /etc/searxng/settings.yml
-              subPath: settings.yml
-              readOnly: true
-            - name: searxng-persistent-data
-              mountPath: /var/cache/searxng
-      volumes:
-        - name: config-volume
-          configMap:
-            name: searxng-config
-        - name: searxng-persistent-data
-          persistentVolumeClaim:
-            claimName: searxng-persistent-data-lvmhdd
@@ -1,25 +0,0 @@
---
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  namespace: searxng
-  name: searxng
-  annotations:
-    cert-manager.io/cluster-issuer: letsencrypt
-spec:
-  ingressClassName: nginx-ingress
-  rules:
-    - host: searxng.lumpiasty.xyz
-      http:
-        paths:
-          - backend:
-              service:
-                name: searxng
-                port:
-                  number: 8080
-            path: /
-            pathType: Prefix
-  tls:
-    - hosts:
-        - searxng.lumpiasty.xyz
-      secretName: searxng-ingress
@@ -1,13 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - namespace.yaml
-  - pvc.yaml
-  - deployment.yaml
-  - service.yaml
-  - ingress.yaml
-configMapGenerator:
-  - name: searxng-config
-    namespace: searxng
-    files:
-      - settings.yml=configs/settings.yml
@@ -1,14 +0,0 @@
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: searxng
-  namespace: searxng
-spec:
-  selector:
-    app: searxng
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
-  type: ClusterIP
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: woodpecker
-      version: 3.5.1
+      version: 3.6.4
      sourceRef:
        kind: HelmRepository
        name: woodpecker
@@ -50,6 +50,7 @@ spec:
        WOODPECKER_OPEN: "true"
        # Make lumpiasty admin
        WOODPECKER_ADMIN: GiteaAdmin
+        WOODPECKER_PLUGINS_PRIVILEGED: woodpeckerci/plugin-docker-buildx

      createAgentSecret: true

@@ -4,7 +4,7 @@ metadata:
  name: apps
  namespace: flux-system
 spec:
-  interval: 10m0s
+  interval: 24h
  sourceRef:
    kind: GitRepository
    name: flux-system
@@ -6,7 +6,7 @@ metadata:
  name: flux-system
  namespace: flux-system
 spec:
-  interval: 1m0s
+  interval: 24h
  ref:
    branch: fresh-start
  secretRef:
@@ -19,7 +19,7 @@ metadata:
  name: flux-system
  namespace: flux-system
 spec:
-  interval: 10m0s
+  interval: 24h
  path: ./cluster
  prune: true
  sourceRef:
@@ -4,7 +4,7 @@ metadata:
  name: infra
  namespace: flux-system
 spec:
-  interval: 10m0s
+  interval: 24h
  sourceRef:
    kind: GitRepository
    name: flux-system
@@ -41,16 +41,6 @@ in
    openbao
    pv-migrate
    mermaid-cli
-    (
-      # Wrapping opencode to set the OPENCODE_ENABLE_EXA environment variable
-      runCommand "opencode" {
-        buildInputs = [ makeWrapper ];
-      } ''
-        mkdir -p $out/bin
-        makeWrapper ${pkgs.opencode}/bin/opencode $out/bin/opencode \
-          --set OPENCODE_ENABLE_EXA "1"
-        ''
-    )
    tea
    woodpecker-cli
  ];
@@ -0,0 +1,492 @@
+# App deployment guidelines
+
+This document summarizes current guidelines, requirements, common patterns, and standards that newly deployed apps should meet.
+
+## Structure
+
+Each app on cluster should be contained in its own kustomization living in subdirectory under [apps](/apps) and imported from main [apps kustomization](/apps/kustomization.yaml). Apps that provide infrastructural services belong to [infra](/infra). Few examples:
+
+- **Open WebUI**: Web app, belongs in [apps/openwebui](/apps/openwebui/) together with its direct and unique dependencies eg. database
+- **llama-swap** (llama.cpp + whisper + stablediffusion): Inference server, service used by other deployments on cluster but does not manages cluster, belongs in [apps/llama](/apps/llama/)
+- **kokoro**: Text to speech inference server, also service used by other deployments, I consider it closely related to llama-swap, so due to arbitrary decision, keeping it together with llama-swap under [apps/llama](/apps/llama/)
+- **crawl4ai**: Web scraper, another service used only by other apps, belongs in [apps/crawl4ai](/apps/crawl4ai/)
+- **Gitea**: Code forge, despite being essential for overall architecture (holding cluster's code) is not a core cluster software, belongs in [apps/gitea](/apps/gitea/)
+- **Woodpecker**: Continous Integration system, belongs in [apps/woodpecker](/apps/woodpecker/)
+- **Cilium**: Kubernetes CNI, core cluster functionality, belongs in [infra/controllers/cilium.yaml](/infra/controllers/cilium.yaml)
+- **Nginx Ingress Controller**: Provides ingress kubernetes functionality, belongs in [infra/controllers/nginx-ingress.yaml](/infra/controllers/nginx-ingress.yaml)
+- **CloudNativePG**: Kubernetes PostgreSQL operator, belongs in [infra/controllers/cloudnative-pg.yaml](/infra/controllers/cloudnative-pg.yaml)
+- **OpenBao** Secret storage and Kubernetes operator, belongs in [infra/controllers/openbao.yaml](/infra/controllers/openbao.yaml)
+
+Kustomizations are reconciled on `git push` by flux running on cluster, triggered by [Woodpecker job](/.woodpecker/flux-reconcile-source.yaml). App Kustomization should import all resources related to app in `kustomization.yaml`:
+
+```yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - pvc.yaml
+  - release.yaml
+```
+
+## Namespace
+
+Each app kustomization should have its own kubernetes namespace to contain all resources related to app in `namespace.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: immich
+```
+
+## Helm charts
+
+If app is distributed via Helm chart, you can deploy it using flux HelmRepository and HelmRelease resources like in following example:
+
+```yaml
+---
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: HelmRepository
+metadata:
+  name: secustor
+  namespace: immich
+spec:
+  interval: 24h
+  url: https://secustor.dev/helm-charts
+---
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: immich
+  namespace: immich
+spec:
+  interval: 30m
+  chart:
+    spec:
+      chart: immich
+      version: 1.2.6
+      sourceRef:
+        kind: HelmRepository
+        name: secustor
+  values:
+    <values>
+```
+
+If the app does not have a helm repository, but helm chart is available in git repository directly in repository, you can make use of it using GitRepository flux source:
+
+```yaml
+---
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: GitRepository
+metadata:
+  name: kaneo
+  namespace: kaneo
+spec:
+  interval: 24h
+  url: https://github.com/usekaneo/kaneo.git
+  ref:
+    tag: v2.7.5
+  ignore: |
+    # exclude all
+    /*
+    # include charts directory
+    !/charts/
+```
+
+You can use third-party helm charts to deploy applications, consider this possibility if:
+
+- There is no official helm chart for the application
+- The official helm chart is unmaintained
+- The official helm chart is using glaring bad practices
+- The official helm chart is missing configuration options for what we need
+
+When deciding which helm chart to use, watch out for following things in particular:
+
+- Development activity, stability, maturity
+- Whether the app deployed by chart is up to date - automated updates are large bonus
+- Unresolved / breaking issues
+- Configurability, can we configure things we need, disable undesired features
+
+When configuring Helm chart, keep in mind:
+- Do not use bundled PVCs, bring our own one or at least configure chart to bind it to manually created `PersistentVolume` according to [Data / PVCs pattern](#data--pvcs-pattern)
+- Do not use bundled Postgres database unless the chart is using CloudNativePG's Cluster resource, bring our own one using [Postgres operator](#postgres-operator)
+- do not
+
+## Bare Kubernetes deployments
+
+If:
+
+- the app is not packaged as a helm chart or
+- it would be simpler to deploy it without package (for example custom privileged pod with access to gpu) or
+- the app is so simple it doesn't make sense to make helm package it (for example, simple http proxy that alters headers or stateless single-binary app) or
+- for any other reason it would make more sense to skip helm
+
+You can deploy app skipping helm chart and just create raw Kubernetes manifests like Deployment, StatefulSet and other supporting resources like ConfigMap, Service, Ingress directly.
+
+## Data / PVCs pattern
+
+Data are stored on local disk of node using OpenEBS LVM LocalPV. To create a persistent volume, use following example:
+
+```yaml
+---
+apiVersion: local.openebs.io/v1alpha1
+kind: LVMVolume
+metadata:
+  labels:
+    kubernetes.io/nodename: anapistula-delrosalae
+  name: immich-library-lvmhdd
+  namespace: openebs
+spec:
+  capacity: 150Gi
+  ownerNodeID: anapistula-delrosalae
+  shared: "yes"
+  thinProvision: "no"
+  vgPattern: ^openebs-hdd$
+  volGroup: openebs-hdd
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: immich-library-lvmhdd
+spec:
+  capacity:
+    storage: 150Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  storageClassName: hdd-lvmpv
+  volumeMode: Filesystem
+  csi:
+    driver: local.csi.openebs.io
+    fsType: btrfs
+    volumeHandle: immich-library-lvmhdd
+---
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: library-lvmhdd
+  namespace: immich
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 150Gi
+  storageClassName: hdd-lvmpv
+  volumeName: immich-library-lvmhdd
+```
+
+Create LVMVolume and PersistentVolume resources manually and **do not** rely on automatic scheduling of PVCs because we want created LVM LVs on disk to have deterministic names and be reused if already exist on disk, which scheduler does not give us. There are two LVM storage classes:
+
+- **hdd-lvmpv**, volume group: openebs-hdd, use for bulk data, like media library
+- **ssd-lvmpv**, volume group: openebs-ssd, use for small datasets that benefit from quick storage access like databases, state data etc.
+
+When deciding the size of the volume, make minimal prediction, starting with 1GiB if you do not predict app to use much disk space.
+
+## Vault secrets
+
+There is OpenBao installed on cluster that manages access to secrets. The KV2 secret engine is mounted at `secret`, use it to store static secrets like API keys to external services, passwords and other entries you do not want to keep in plaintext in git repository.
+
+To access the KV secrets on cluster, use Vault Secrets Operator installed on cluster, which provides `VaultStaticSecret` custom resource that syncs a path from OpenBao to Kubernetes `Secret` object.
+
+```yaml
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: llama-proxy
+  namespace: llama
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultAuth
+metadata:
+  name: llama
+  namespace: llama
+spec:
+  method: kubernetes
+  mount: kubernetes
+  kubernetes:
+    role: llama-proxy
+    serviceAccount: llama-proxy
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: llama-api-key
+  namespace: llama
+spec:
+  type: kv-v2
+
+  mount: secret
+  path: ollama
+
+  destination:
+    create: true
+    name: llama-api-key
+    type: Opaque
+    transformation:
+      excludeRaw: true
+
+  vaultAuthRef: llama
+```
+
+To give access to specified secret for given k8s ServiceAccount, you need to create kubernetes auth role and policy. Create a kubernetes auth role named `llama-proxy`, by creating file `vault/kubernetes-auth-roles/llama-proxy.yaml`:
+
+```yaml
+bound_service_account_names:
+  - llama-proxy
+bound_service_account_namespaces:
+  - llama
+token_policies:
+  - ollama
+```
+
+Create policy named `ollama` by creating file `vault/policy/ollama.hcl`:
+
+```hcl
+path "secret/data/ollama" {
+    capabilities = ["read"]
+}
+```
+
+Once these files are created, ask operator to reconcile OpenBao configuration and create required secret.
+
+## Postgres operator
+
+There is CloudNativePG operator installed on cluster that manages databases of applications running on cluster. You can create Postgres database by creating `Cluster` resource:
+
+```yaml
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: kaneo-db
+  namespace: kaneo
+spec:
+  instances: 1
+
+  storage:
+    pvcTemplate:
+      storageClassName: ssd-lvmpv
+      resources:
+        requests:
+          storage: 10Gi
+      volumeName: kaneo-db-1
+
+```
+
+Create a `PersistentVolume` and `LVMVol` resources manually but **do not** create `PersistentVolumeClaim`, CloudNativePG will create one on its own referencing `PersistentVolume` specified in `volumeName`. Do not replicate the database, there is only one node in the cluster currently. The `Cluster` resource will automatically create secret, use it to configure app:
+
+```
+Name:         kaneo-db-app
+Namespace:    kaneo
+Labels:       app.kubernetes.io/managed-by=cloudnative-pg
+              cnpg.io/cluster=kaneo-db
+              cnpg.io/reload=true
+              cnpg.io/userType=app
+Annotations:  cnpg.io/operatorVersion: 1.29.1
+
+Type:  kubernetes.io/basic-auth
+
+Data
+====
+dbname:         3 bytes
+fqdn-jdbc-uri:  145 bytes
+fqdn-uri:       126 bytes
+host:           11 bytes
+jdbc-uri:       127 bytes
+password:       64 bytes
+pgpass:         90 bytes
+port:           4 bytes
+uri:            108 bytes
+user:           3 bytes
+username:       3 bytes
+```
+
+## LoadBalancers
+
+You can expose installed app to the Internet using Cilium's LoadBalancer configured on cluster:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: teamspeak3
+  namespace: ispeak3
+spec:
+  selector:
+    app: teamspeak3
+  ports:
+  - name: voice
+    protocol: UDP
+    port: 9987
+    targetPort: 9987
+  - name: filetransfer
+    protocol: TCP
+    port: 30033
+    targetPort: 30033
+  type: LoadBalancer
+  externalTrafficPolicy: Local
+  ipFamilyPolicy: PreferDualStack
+```
+
+IPv6 will be directly reachable from the internet by its assigned address, for IPv4 currently you need to configure port forward on router in `ansible/roles/routeros/firewall.yml`, that step is not yet automated. The assigned internal IP will be known after manifests are applied on cluster. For this reason, there is no ExternalDNS configured yet, if you need a DNS name, ask the operator to configure DNS name for LoadBalancer. Assign names from lumpiasty.xyz subdomains (eg. kaneo.lumpiasty.xyz) unless explicitly requested. Do not use LoadBalancer for exposing HTTP applications, use Ingress instead.
+
+## Ingress
+
+You can expose HTTP applications using NGINX Ingress Controller:
+
+```yaml
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: llama
+  name: llama
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt
+    acme.cert-manager.io/http01-edit-in-place: "true"
+    nginx.ingress.kubernetes.io/proxy-buffering: "false"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: 30m
+    nginx.ingress.kubernetes.io/proxy-body-size: 8m
+spec:
+  ingressClassName: nginx-ingress
+  rules:
+    - host: llama.lumpiasty.xyz
+      http:
+        paths:
+          - backend:
+              service:
+                name: llama-proxy
+                port:
+                  number: 80
+            path: /
+            pathType: Prefix
+  tls:
+    - hosts:
+        - llama.lumpiasty.xyz
+      secretName: llama-ingress
+```
+
+TLS certificates are automatically issued for subdomains of lumpiasty.xyz using cert-manager. DNS name assignment is not automatic yet, ask operator to create DNS name for ingress resources.
+
+## Keeping app up to date
+
+There is a Renovate job configured for this repository as [Woodpecker job](/.woodpecker/renovate.yaml) to keep applications up to date. Renovate automatically keeps track of:
+
+- Docker images specified in Kubernetes manifests like Deployment, StatefulSet etc
+- HelmRelease versions
+- GitRepository tags
+
+To make Renovate automatically update applications, always specify full versions of docker images or helm chart release. If you use ambigous tags, renovate will not have chance to update and the cluster will never download new image because this tag already existed on node. **Do not** use:
+
+- latest (or its variants like stable, current, main, master current)
+- "Sliding" versions, like 1 or 1.2 that point at 1.2.1 currently and will change image it points at when version 1.2.2 is released
+
+As a last resort if the application does not publish stable image tags, pin digest of image.
+
+Renovate may require custom configuration if:
+
+- App is using non-standard versioning schema
+
+  Example app versioned by date (unified-vulkan-2026-01-01), renovate.json:
+  
+  ```json
+    {
+        "matchDatasources": ["docker"],
+        "matchPackageNames": ["ghcr.io/mostlygeek/llama-swap"],
+        "versioning": "regex:^unified-vulkan-(?<major>\\d{4})-(?<minor>\\d{2})-(?<patch>\\d{2})$",
+        "automerge": true,
+        "automergeType": "pr",
+        "platformAutomerge": true
+    }
+  ```
+
+- Docker image tag is specified in non-standard field that Renovate may not recognise automatically such as Helm values
+  
+  Example app with non-standard image selected in helm values instead of image's default (which is latest in this chart):
+  ```yaml
+    values:
+      kaneo:
+        image:
+          tag: "2.7.3"  # renovate: depName=ghcr.io/usekaneo/kaneo registryUrl=https://ghcr.io
+  ```
+
+Renovate is configured so it automatically merges patch versions, other updates are created as pull requests to be manually reviewed and merged unless explicitly desired on per case basis.
+
+## SSO / OIDC / Authentik
+
+There is an Authentik running on cluster providing SSO for applications. Configure user-facing apps to utilize it correctly.
+
+Authentik supports following protocols:
+
+- OAuth2 / OpenID Connect
+- SAML
+- Radius
+- LDAP
+- SCIM
+
+Currently, there is no Authentik configuration in code, ask operator to create application in the UI and save OAuth id and secret in OpenBao under `secret/authentik/<app>`. Authentik provides discovery URL for OAuth applications: `https://authentik.lumpiasty.xyz/application/o/<app slug>/.well-known/openid-configuration`.
+
+Configure the app to disable guest access, built-in registration and automatically register unprivileged users with `user` role and privileged users with `admin` role as the app allows.
+
+## Privileged apps
+
+Some apps require direct access to devices, like GPU. There are no specific operators yet, apps that require access to GPU are simply launched as privileged pods, example:
+
+```yaml
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-swap
+  namespace: llama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llama-swap
+  template:
+    metadata:
+      labels:
+        app: llama-swap
+    spec:
+      containers:
+        - name: llama-swap
+          volumeMounts:
+            - mountPath: /dev/kfd
+              name: kfd
+            - mountPath: /dev/dri
+              name: dri
+          securityContext:
+            privileged: true
+      volumes:
+        - name: kfd
+          hostPath:
+            path: /dev/kfd
+            type: CharDevice
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+```
+
+Creating of such pods is forbidden unless explicitly allowed in Talos config:
+
+```yaml
+# CSI driver requirement
+cluster:
+  apiServer:
+    admissionControl:
+      - name: PodSecurity
+        configuration:
+          apiVersion: pod-security.admission.config.k8s.io/v1beta1
+          kind: PodSecurityConfiguration
+          exemptions:
+            namespaces:
+              - llama
+```
+
+Create the patch like this under `talos/patches/<app>.patch`, add it to `gen-talos-config` target in Makefile and ask operator to apply reconcile Talos config to allow privileged pods in specified namespace.
@@ -0,0 +1,13 @@
+<svg width="136" height="136" viewBox="0 0 136 136" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_137_2)">
+<rect width="136" height="136" fill="#141414"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M62.6855 103.724C58.5716 104.595 56.0001 103.265 56 101.66L56 70.0264C56 69.8606 56.0032 69.686 56.0088 69.5069C56.0039 69.3848 56.001 69.249 56.001 69.0977L56.001 37.9444C56.015 36.6524 56.2588 35.7449 59.2588 35.1094L73.3145 32.2764C77.4285 31.405 80 32.7365 80 34.3408L80 65.9746C80 66.1409 79.9978 66.3155 79.9922 66.4951C79.997 66.6169 79.999 66.7526 79.999 66.9033L79.999 98.0567C79.9849 99.3483 79.7408 100.256 76.7412 100.892L62.6855 103.724Z" fill="#F5F5F5"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M30.6855 111.723C26.5716 112.594 24.0001 111.264 24 109.659L24 78.0244C24 77.8588 24.0032 77.6848 24.0088 77.5059C24.0039 77.3838 24.001 77.248 24.001 77.0967L24.001 45.9434C24.015 44.6514 24.2588 43.7439 27.2588 43.1084L41.3145 40.2754C45.4285 39.404 48 40.7355 48 42.3399L48 73.9737C48 74.1399 47.9978 74.3146 47.9922 74.4942C47.997 74.6159 47.999 74.7517 47.999 74.9024L47.999 106.056C47.9849 107.347 47.7408 108.255 44.7412 108.891L30.6855 111.723Z" fill="#F5F5F5"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M105.314 24.2754C109.428 23.404 112 24.7355 112 26.3398V37.1845L94.0576 60.5019L111.999 82.7802V90.0576C111.985 91.3492 111.741 92.2571 108.741 92.8925L94.6855 95.7246C90.5717 96.596 88.0002 95.2654 88 93.6611V62.0254C88 61.8598 88.0032 61.6856 88.0088 61.5068C88.0039 61.3848 88.001 61.2488 88.001 61.0976V29.9433C88.0151 28.6516 88.2591 27.7438 91.2588 27.1084L105.314 24.2754Z" fill="#F5F5F5"/>
+</g>
+<defs>
+<clipPath id="clip0_137_2">
+<rect width="136" height="136" fill="white"/>
+</clipPath>
+</defs>
+</svg>
@@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" fill="none">
+  <rect width="64" height="64" rx="14" fill="#1C1830"/>
+  <!-- Meridian axis -->
+  <line x1="32" y1="10" x2="32" y2="54" stroke="#8B7CF6" stroke-width="2.5" stroke-linecap="round"/>
+  <!-- Latitude arcs — outer -->
+  <path d="M16 20 A18 18 0 0 1 48 20" fill="none" stroke="#C4B5FD" stroke-width="1.2" opacity="0.4"/>
+  <path d="M16 44 A18 18 0 0 0 48 44" fill="none" stroke="#C4B5FD" stroke-width="1.2" opacity="0.4"/>
+  <!-- Latitude arcs — inner -->
+  <path d="M20 30 A14 14 0 0 1 44 30" fill="none" stroke="#C4B5FD" stroke-width="0.8" opacity="0.2"/>
+  <path d="M20 34 A14 14 0 0 0 44 34" fill="none" stroke="#C4B5FD" stroke-width="0.8" opacity="0.2"/>
+  <!-- Poles -->
+  <circle cx="32" cy="10" r="3.5" fill="#C4B5FD"/>
+  <circle cx="32" cy="54" r="3.5" fill="#C4B5FD"/>
+  <!-- Center node -->
+  <circle cx="32" cy="32" r="3" fill="#8B7CF6"/>
+</svg>
@@ -0,0 +1,10 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="284.538" height="253.96">
+  <style>
+    @media (prefers-color-scheme: dark) {
+      path {
+        fill: white;
+      }
+    }
+  </style>
+  <path d="M162.51 33.188c-26.77.411-54.004 6.885-71.494 3.745-1.313-.232-2.124 1.338-1.171 2.265 14.749 14.003 20.335 28.16 36.718 30.065l.476.103c-7.567 7.799-14.028 18.018-18.571 31.171-4.89 14.106-6.268 29.421-7.89 47.105-2.445 26.332-5.173 56.152-20.038 93.54a246.489 246.489 0 0 0-13.27 45.946h22.652a221.202 221.202 0 0 1 11.249-37.786c16.049-40.374 19.073-73.257 21.505-99.693 1.493-16.255 2.806-30.309 6.796-41.853 11.647-33.527 39.408-40.889 61.056-36.693 21.004 4.067 41.673 20.502 40.592 44.016-.772 15.985-7.76 23.166-12.87 28.43-2.793 2.883-5.47 5.611-6.731 9.498-3.037 9.19.101 19.434 8.494 27.568 22.24 20.734 34.338 59.717 33.681 106.513h22.176c.592-52.935-13.951-97.839-40.503-122.626-2.097-2.021-2.69-3.604-3.191-3.347 1.222-1.544 3.217-3.346 4.633-4.813 29.382-21.79 77.813-1.892 107.054 9.653 7.58 2.985 11.274-4.338 4.067-8.623-25.097-14.84-76.54-54.016-105.368-79.718-4.029-3.54-6.796-7.8-11.455-11.738-15.547-27.439-41.84-33.127-68.597-32.728Zm35.238 60.27a15.161 15.161 0 0 0-2.008.232 15.161 15.161 0 0 0-1.506 29.434 15.154 15.154 0 0 0 9.473-28.79 15.161 15.161 0 0 0-5.959-.876zm-44.286 147.17a2.033 2.033 0 0 0-1.133.374c-1.08.772-1.93 3.05-.772 5.701 5.38 12.394 9.1 25.445 12.536 40.413h22.484c-5.676-16.629-16.307-34.055-27.851-43.978-2.008-1.737-3.913-2.574-5.251-2.51z" style="stroke-width:12.8704" transform="translate(-67.27 -33.169)"/>
+</svg>
@@ -0,0 +1,110 @@
+# CoreDNS resolver
+
+## Goal
+
+Replace the RouterOS built-in DNS forwarder with a CoreDNS container for
+configurability, and suppress IPv6 (AAAA) resolution by default to keep traffic
+on IPv4.
+
+## Background
+
+The ISP provides no native IPv6 — only a Hurricane Electric (HE) tunnel
+(`2001:470:61a3::/48`). HE addresses fall in ranges some sites flag as
+datacenter/bot traffic, producing endless CAPTCHAs. The goal is to prefer IPv4
+egress while keeping IPv6 available for our own services and any domain
+explicitly trusted over IPv6.
+
+## What this is NOT (and why)
+
+An earlier iteration used **DNS64 + NAT64 (Tayga)** to force traffic through
+IPv4. It was removed:
+
+- **Performance**: Tayga is a userspace translator with no hardware offload.
+  Every translated packet crossed RouterOS twice (v6 in, v4 out) plus a
+  userspace hop, capping throughput at ~250 Mbps against a 1 Gbps line.
+- **SPOF**: two containers (CoreDNS + Tayga) in the datapath of nearly all
+  traffic on a router whose native forwarder had been rock-solid.
+- **Architectural inversion**: NAT64 exists to let IPv6-only clients reach IPv4.
+  We don't want IPv6 egress at all — using NAT64 to avoid IPv6 was solving the
+  problem backwards.
+
+Plain AAAA suppression in CoreDNS achieves the same IPv4-preferred outcome with
+zero datapath overhead — DNS is the only thing touched, packet forwarding stays
+on the RouterOS fastpath at line rate.
+
+The full account of the NAT64/IPv6-mostly attempt and why it was abandoned is in
+[nat64-dns64-postmortem.md](./nat64-dns64-postmortem.md).
+
+## How it works
+
+CoreDNS runs as a single container (`172.20.0.3`), reachable from RouterOS DNS
+which forwards client queries to it. The [Corefile](../mikrotik/coredns/Corefile)
+has three server blocks:
+
+1. **`lumpiasty.xyz`** — our own zone. Forwards normally, keeps AAAA, so internal
+   services reachable over the HE prefix resolve to their real IPv6 addresses.
+2. **`.` (default)** — forwards everything else, but a `template IN AAAA` block
+   returns empty NOERROR for all AAAA queries, so clients fall back to IPv4 and
+   avoid the HE tunnel's flagged egress. A records and all other types pass
+   through untouched.
+
+The whitelist is implemented as a reusable `(aaaa_allowed)` snippet imported by
+zones that should keep AAAA. To trust another domain over IPv6, add a server
+block for it that imports `aaaa_allowed`.
+
+### Why suppression, not NXDOMAIN
+
+The AAAA template returns NOERROR with an empty answer (NODATA), not NXDOMAIN.
+This is correct: the name exists, it just has no (advertised) AAAA. Clients
+treat it as "no IPv6 address" and use the A record. Returning NXDOMAIN would
+wrongly imply the name doesn't exist and break the A lookup.
+
+## Future improvement
+
+The current global-suppress-plus-whitelist is coarse: a domain that is genuinely
+IPv6-only (no A record) and not whitelisted becomes unreachable. The intended
+end state is a plugin that suppresses AAAA only when the domain also has an A
+record, so IPv6-only destinations keep working without manual whitelisting. No
+in-tree CoreDNS plugin does this today.
+
+## Custom image
+
+Built from source with a minimal plugin set (`errors`, `log`, `health`,
+`template`, `cache`, `forward`, `reload`) instead of the default ~40, producing
+a ~6-8 MB image. The `dns64` plugin is no longer compiled in.
+
+Source: [`mikrotik/coredns/`](../mikrotik/coredns/). Built by Woodpecker
+([`.woodpecker/coredns-build.yaml`](../.woodpecker/coredns-build.yaml)) on pushes
+touching `mikrotik/coredns/**`, pushed to `gitea.lumpiasty.xyz/lumpiasty/coredns-mikrotik`.
+
+## RouterOS integration
+
+- `/ip/dns servers=172.20.0.3` — RouterOS forwards client queries to CoreDNS
+- RDNSS in RA (`/ipv6/nd dns=...` on vlan2/vlan5) advertises an IPv6 resolver
+  (the router's per-VLAN address) to dual-stack clients; RouterOS DNS relays to
+  CoreDNS
+- No DHCP option 108, no PREF64 — those belonged to the removed IPv6-mostly setup
+
+## Pitfalls learned (kept for reference)
+
+These were hit during the NAT64 era and the migration; some still apply:
+
+1. **RouterOS static FWD entries corrupt NXDOMAIN.** A `type=FWD match-subdomain=yes`
+   entry returns NOERROR/empty instead of relaying NXDOMAIN. Combined with
+   `ndots:5` and kubernetes pod search domains, `getaddrinfo` stops at the first
+   search-suffixed NODATA candidate and never tries the absolute name — apps fail
+   with `ENOTFOUND` for valid hostnames while `nslookup` (absolute query) works.
+   Our own zone is therefore handled in the Corefile, not via a RouterOS FWD
+   entry. RouterOS DNS does plain forwarding only (plus the Tailscale `ts.net`
+   FWD, which is acceptable as its subdomains genuinely don't exist publicly).
+2. **`advertise-dns=no` on new ND entries.** RouterOS creates per-interface
+   `ipv6 nd` entries with `advertise-dns=no`, suppressing the RDNSS option even
+   when a static `dns=` list is set. Must be enabled explicitly.
+3. **Per-interface ND entries must be created, not modified.** Only the
+   `interface=all` default ships out of the box; `api_find_and_modify` matching a
+   specific interface silently matches nothing. Use `api_modify`.
+
+Verification: `rdisc6` (NixOS package `ndisc6`) dumps RA contents. The CoreDNS
+`log` plugin output is visible via `/log print` on the router (container
+`logging=yes`) and shows the rcode CoreDNS returned — comparing it to what the
+client received isolates which hop corrupts a response.
@@ -0,0 +1,267 @@
+# LTE Failover Design
+
+Reference documentation of the as-built LTE failover design. For day-to-day
+network overview see [network.md](./network.md); for BM806C modem firmware
+workarounds see [wwan-bm806c-qmi-workaround.md](./wwan-bm806c-qmi-workaround.md).
+
+## Summary
+
+| Property | Value |
+|---|---|
+| Failover signalling | Symmetric iBGP between D-Link (BIRD2) and CRS (RouterOS) |
+| BGP AS | 65000 (iBGP; CRS acts as route reflector for D-Link) |
+| LTE transit path | D-Link wwan ← VLAN 6 (192.168.6.0/24) ← CRS |
+| D-Link default route source | Learned from CRS via BGP (no static default gateway) |
+| CRS LTE route source | Learned from D-Link via BGP at distance 200 |
+| Announcement trigger | wwan interface up/down tracked by BIRD2 device protocol |
+| Scope | All internet-capable VLANs (vlan2, vlan4, vlan5, vlan6) |
+| IPv4 NAT | CRS masquerades on `pppoe-gpon` only; D-Link masquerades on `wwan` |
+| IPv6 NAT | D-Link masquerades IPv6 on `wwan` (no inbound on LTE; outbound only) |
+| wwan bringup | Triggered by `/etc/init.d/wwan-bringup` after USB re-auth (BM806C wedge fix) |
+
+## Route exchange
+
+### CRS announces to D-Link
+
+| Prefix | Source | Withdrawn when |
+|---|---|---|
+| `0.0.0.0/0` | `output.default-originate: if-installed` (active default in main table) | GPON drops or `pppoe-gpon` route inactive |
+| `2000::/3` | `output.redistribute: static` (HE tunnel default) | `sit1` interface down / HE route inactive |
+| VLAN subnets (`192.168.0.0/24`, `192.168.1.0/24`, etc.) | `output.redistribute: connected` | never (CRS always reachable on vlan6) |
+| `100.64.0.0/10` (Tailscale) | `output.redistribute: static` | never |
+| `172.17.0.0/16` (dockers bridge) | `output.redistribute: connected` | never |
+| `10.42.0.0/16`, `10.43.0.0/16`, `10.44.0.0/16` (k8s) | reflected via iBGP RR | when k8s BGP session drops |
+| pod/service/LB IPv6 ranges | reflected via iBGP RR | when k8s BGP session drops |
+
+Internal prefixes are announced regardless of GPON state. They remain
+reachable via `192.168.6.1` (directly connected on vlan6) even when GPON
+fails, so D-Link-originated traffic to internal subnets always routes to
+CRS rather than incorrectly exiting via wwan.
+
+The CRS route reflector role (`local.role: ibgp-rr` on the `dlink-lte`
+connection) allows it to reflect routes learned from the k8s peer (`bgp1`)
+to D-Link without violating iBGP split-horizon. RFC 4456 `ORIGINATOR_ID`
+loop prevention is handled automatically by RouterOS — no output filter
+needed.
+
+`nexthop-choice: force-self` ensures CRS advertises `192.168.6.1` as the
+next-hop for all prefixes, rather than the original route's next-hop
+(which may be unreachable from D-Link, e.g. k8s peer `2001:470:61a3:100::3`).
+
+### D-Link announces to CRS
+
+| Prefix | Source | Withdrawn when |
+|---|---|---|
+| `0.0.0.0/0` | BIRD2 static `lte_default` via `wwan0` | wwan0 down (device protocol detects) |
+| `2000::/3` | BIRD2 static `lte_default6` via `wwan0` | wwan0 down |
+
+BIRD2's `protocol device` tracks wwan0 via netlink in real time; when the
+interface goes down the static routes become unreachable and BGP withdraws
+the announcements immediately.
+
+The BIRD2 static routes use `preference 50` (below the BGP default of 100)
+so the BGP-learned routes from CRS are preferred for kernel installation
+on D-Link itself — D-Link's own outbound traffic uses the CRS path when
+GPON is up. The static routes only exist as triggers for BGP export.
+
+### D-Link kernel routing table
+
+| Destination | Source | Kernel metric | Active when |
+|---|---|---|---|
+| Internal prefixes (VLANs, k8s, Tailscale) | BGP from CRS, via `192.168.6.1` | 10 (IPv4) / 32 (IPv6) | always (CRS reachable) |
+| `0.0.0.0/0` | BGP from CRS | 10 | GPON up |
+| `0.0.0.0/0` | wwan QMI-assigned (qmi.sh) | 100 | wwan up |
+| `default via wwan IPv6 GW` (non-source-specific) | wwan-bringup script | 1024 | wwan up |
+| `default from <wwan prefix>/64 via wwan IPv6 GW` (source-specific) | qmi.sh | 100 | wwan up |
+
+D-Link's own outbound traffic prefers the BGP route (metric 10) over wwan
+(metric 100). The non-source-specific IPv6 default at metric 1024 exists
+because qmi.sh only installs a source-specific IPv6 default (constrained
+to the wwan-assigned `/64` prefix); forwarded traffic from internal
+subnets would fail routing lookup with "net unreachable" without it.
+
+### CRS routing table
+
+| Destination | Source | Distance | Active when |
+|---|---|---|---|
+| `1.0.0.1/32`, `8.8.4.4/32` | static via `pppoe-gpon` | 1 | always |
+| `0.0.0.0/0` | static via `1.0.0.1`, `8.8.4.4` (recursive) | 1, 2 | GPON ping check succeeds |
+| `0.0.0.0/0` | BGP from D-Link via `192.168.6.2` | 200 | wwan up on D-Link |
+| `2000::/3` | static via `2001:470:70:dd::1` (HE tunnel) | 1 | HE tunnel ping check succeeds |
+| `2000::/3` | BGP from D-Link via `2001:470:61a3:600::2` | 200 | wwan up on D-Link |
+
+RouterOS distance comparison is straightforward: distance 1 always wins
+over distance 200. BGP-learned routes activate automatically when the
+static route becomes inactive (e.g. GPON down → `pppoe-gpon` route
+inactive → BGP route at distance 200 becomes active).
+
+## Traffic paths
+
+### Normal (GPON up)
+
+```
+LAN/SRV/IoT  →  CRS  →  pppoe-gpon  →  ISP
+D-Link own   →  uplink  →  CRS  →  pppoe-gpon  →  ISP
+                (via BGP-learned default at kernel metric 10)
+```
+
+wwan is connected and D-Link announces the LTE default to CRS, but CRS
+ignores it (distance 200 loses to distance 1). D-Link uses the
+CRS-announced default (metric 10) for its own traffic, not wwan
+(metric 100).
+
+### Failover (GPON down)
+
+```
+LAN/SRV/IoT  →  CRS  →  vlan6 (→192.168.6.2)  →  D-Link  →  wwan  →  Orange LTE
+D-Link own   →  wwan  →  Orange LTE
+```
+
+CRS distance-1 routes go inactive → distance-200 BGP routes from D-Link
+activate. D-Link receives forwarded traffic on uplink, routes it via the
+non-source-specific wwan default (metric 1024), fw4 masquerades the
+source, packet exits via wwan. Return traffic reverses through masquerade
+state and forwards back to CRS via the established connection-tracking
+entry.
+
+When CRS withdraws its BGP-announced default to D-Link (because GPON is
+down and CRS has no default to announce), D-Link's kernel default at
+metric 10 is removed, leaving the wwan default at metric 100 as the
+preferred route for D-Link's own traffic.
+
+### Failure detection
+
+- **D-Link crashes / power loss** → BGP session drops after `hold-time: 30s`
+  → CRS withdraws all D-Link-learned routes → internet unavailable if
+  GPON also down (acceptable single-point-of-failure for home network)
+- **wwan modem goes down** → BIRD2 device protocol detects wwan0 down →
+  static `lte_default` / `lte_default6` become unreachable → BGP withdraws
+  announcements → CRS removes BGP-learned default
+- **GPON drops or blackholes** → recursive ping checks (1.0.0.1, 8.8.4.4) over `pppoe-gpon`
+  fail (takes ~20s: 10s ping interval + 10s timeout) → CRS distance-1/2 default routes inactive → distance-200 BGP route
+  activates → CRS withdraws its default-originate announcement to D-Link (loop
+  prevention prevents reflecting D-Link's own route) → D-Link's kernel
+  default-via-CRS is removed → D-Link uses wwan kernel default → traffic flows
+  from CRS via vlan6 → D-Link → wwan
+
+All transitions are automatic and driven by interface state. No active
+probing (Netwatch / mwan3), no scripts toggling routes.
+
+## NAT rules
+
+NAT rules are always active, matched by output interface. No
+failover-triggered toggling needed.
+
+### CRS (RouterOS)
+
+- IPv4 `masquerade` on `srcnat` chain with `out-interface: pppoe-gpon`.
+  Only the GPON public interface gets masqueraded — `vlan6` is internal
+  and never natted, `sit1` (IPv6) has its own dedicated src-nat for the
+  Tailscale prefix.
+- IPv6 `src-nat tailnet to internet` on `srcnat` chain for Tailscale
+  prefix (`fd7a:115c:a1e0::/48`) to `2001:470:61a3:600::/64`, applied
+  on `out-interface-list: wan`. Fires regardless of whether the
+  egress is `sit1` or `vlan6`.
+
+### D-Link (OpenWrt fw4)
+
+- `wwan` zone has `option masq '1'` and `option masq6 '1'`. All traffic
+  exiting via wwan (own outbound + forwarded from `uplink`) is
+  source-NAT'd, IPv4 to the wwan-assigned CG-NAT IP, IPv6 to the
+  wwan-assigned `/128` from the Orange-assigned `/64` prefix.
+- Forwarding rule `uplink → wwan` allows MikroTik-routed traffic to
+  egress via wwan during failover. Default forward policy on the wwan
+  zone stays REJECT.
+
+## BGP / route reflection details
+
+### CRS connection config
+
+```
+/routing/bgp/connection set dlink-lte \
+  remote.address=192.168.6.2/32 \
+  local.role=ibgp-rr \
+  nexthop-choice=force-self \
+  output.redistribute=connected,static \
+  output.default-originate=if-installed \
+  hold-time=30s keepalive-time=10s
+```
+
+`output.default-originate=if-installed` is required for the `0.0.0.0/0`
+advertisement because RouterOS does not advertise interface-gateway
+static routes (gateway=`pppoe-gpon`) via plain `output.redistribute=static`.
+`default-originate` advertises a synthetic default whenever any active
+default exists in the routing table, regardless of how it was installed.
+
+### IPv6 Extended Next Hop workaround
+
+RouterOS uses BGP Extended Next Hop Encoding (RFC 5549 / RFC 8950) for
+IPv6 routes on this iBGP session, advertising them with an IPv4-mapped
+next-hop (`::ffff:192.168.6.1`). The Linux kernel does not support
+installing IPv6 routes with IPv4 next-hops, so BIRD2 cannot push them
+directly to the kernel.
+
+There is no way to disable ENHE on RouterOS — `local.address`,
+`nexthop-choice: force-self`, and output `set gw` filters all fail to
+override it. The workaround is on the BIRD2 side: an import filter on
+the BGP IPv6 channel rewrites `gw` to CRS's native IPv6 address
+(`2001:470:61a3:600::1`) before the route is exported to the kernel.
+
+```
+ipv6 {
+  extended next hop yes;
+  import filter {
+    gw = 2001:470:61a3:600::1;
+    accept;
+  };
+  ...
+};
+```
+
+The reverse direction (D-Link → CRS) was solved cleanly via BIRD2 export
+filter setting `bgp_next_hop = 2001:470:61a3:600::2`, since BGP-level
+attribute manipulation isn't constrained by kernel limitations.
+
+### Direct protocol on D-Link
+
+BIRD2 needs to know about the directly connected `192.168.6.0/24` and
+`2001:470:61a3:600::/64` subnets on `eth0.6` to resolve BGP next-hops.
+The `protocol direct { interface "eth0.6"; }` declaration provides this;
+without it BIRD2 marks all CRS-learned routes as unreachable.
+
+## BM806C modem cold-boot wedge
+
+The BM806C firmware enters a permanently broken state on cold boot:
+`/dev/cdc-wdm0` exists, kernel driver attaches, but uqmi commands return
+`"Failed to connect to service"` indefinitely. UIM (SIM) QMI service
+specifically never comes up.
+
+Recovery requires a USB device re-enumeration. The `/etc/init.d/wwan-bringup`
+service writes `0` then `1` to `/sys/bus/usb/devices/1-1/authorized` on
+boot, then triggers `ifup wwan`. After re-auth the modem completes its
+QMI initialization within ~1 second.
+
+Full investigation: see [wwan-bm806c-qmi-workaround.md](./wwan-bm806c-qmi-workaround.md).
+
+## Multi-WAN Stale Connection Tracking
+
+When the routing table fails over from GPON to LTE (or vice versa), RouterOS does not automatically clear existing connection tracking entries. If an established TCP/UDP connection is routed out the new WAN interface, it retains the NAT translation state (source IP) of the old WAN interface. The packet is sent to the ISP with the wrong source IP and is silently dropped, causing clients (like Tailscale) to hang for minutes until their internal sockets time out.
+
+To solve this purely declaratively without scripts or blanket connection flushes, the `forward` chain is configured to "fast-fail" these shifted connections:
+
+1. Connections are marked with their egress WAN upon establishment (`wan-gpon` or `wan-lte`) via the `mangle` table.
+2. If an established connection with a `wan-gpon` mark attempts to route out `vlan6` (LTE), or a `wan-lte` mark routes out `pppoe-gpon`, it is explicitly rejected (`tcp-reset` for TCP, `icmp-network-unreachable` for UDP) before reaching the NAT table.
+3. This rejection immediately signals the client OS that the route is dead, forcing the application (Tailscale, SIP clients, etc.) to instantly close the socket and establish a new one, which successfully binds to the new WAN interface and NAT state.
+
+## Implementation files
+
+| File | Role |
+|---|---|
+| `ansible/roles/routeros/tasks/base.yml` | `vlan6` in `wan` interface list |
+| `ansible/roles/routeros/tasks/routing.yml` | BGP instance, template, `dlink-lte` connection |
+| `ansible/roles/routeros/tasks/firewall.yml` | IPv4 masquerade narrowed to `pppoe-gpon`; BGP input rules for `vlan6` |
+| `ansible/roles/openwrt/tasks/network.yml` | `wwan` interface (no auto bringup); `uplink` with no static gateway |
+| `ansible/roles/openwrt/tasks/firewall.yml` | `wwan` zone with `masq '1'` / `masq6 '1'`; `uplink → wwan` forwarding |
+| `ansible/roles/openwrt/tasks/bird.yml` | BIRD2 install + config |
+| `ansible/roles/openwrt/tasks/wwan.yml` | qmi.sh patches, BM806C profiles, wwan-bringup init script |
+| `ansible/roles/openwrt/defaults/main.yml` | `bird2` in `openwrt_packages` |
@@ -0,0 +1,136 @@
+# Postmortem: NAT64 / IPv6-mostly attempt
+
+A record of an architecture that was built, run for ~2 days, and removed. Kept
+so the reasoning isn't re-discovered the hard way. For the current DNS setup see
+[coredns.md](./coredns.md); for network overview see [network.md](./network.md).
+
+## The original problem
+
+The ISP provides no native IPv6 — only a Hurricane Electric (HE) 6in4 tunnel
+(`2001:470:61a3::/48`). HE address ranges are widely classified as
+datacenter/hosting space, so some sites (Google, Cloudflare-fronted services,
+various login flows) treat IPv6 traffic from them as bot/VPN traffic: endless
+CAPTCHAs, "unusual traffic" interstitials, or outright blocks. IPv4 egress
+(the ISP's residential PPPoE address) is unaffected.
+
+The goal: keep using the network normally without IPv6 triggering these flags,
+while still wanting some IPv6 (e.g. inbound to self-hosted services).
+
+## What was built
+
+An **IPv6-mostly** network (RFC 8925) with **DNS64 + NAT64**, intended to push
+egress onto IPv4 while presenting IPv6 to clients:
+
+- **CoreDNS container** with the `dns64` plugin (`translate_all`): synthesized
+  `64:ff9b::/96` AAAA records from A records for *all* names, so even dual-stack
+  destinations resolved to a NAT64 address.
+- **Tayga container** (`ghcr.io/apalrd/tayga-nat64`): stateless NAT64 translator.
+  IPv6 traffic to `64:ff9b::/96` was routed to it, translated to IPv4, and
+  masqueraded out the GPON PPPoE interface. So all "IPv6" egress actually left
+  as IPv4 on the residential address — bypassing the HE tunnel and its flagging.
+- **RouterOS RA + DHCP**: DHCP option 108 (IPv6-only preferred) to make capable
+  clients drop IPv4, PREF64 (RFC 8781) to advertise the NAT64 prefix for CLAT,
+  RDNSS (RFC 8106) to hand IPv6-only clients a resolver.
+- Dedicated `nat64` bridge, `fc64::/126` link, `192.168.240.0/20` Tayga pool,
+  static routes, and firewall rules (including NAT64-mapped RFC1918 blocks to
+  prevent the translator being used as a policy bypass).
+
+## Why it was removed
+
+### 1. Performance — the dealbreaker
+
+Throughput collapsed from line rate (~1 Gbps) to **~200-300 Mbps**, saturating
+the router CPU. Causes, all structural:
+
+- Tayga is a **userspace** translator. Every translated packet leaves the kernel
+  fastpath, is copied to userspace, translated, and re-injected.
+- Translated traffic crosses RouterOS **twice** — once as IPv6 (LAN → Tayga),
+  once as IPv4 (Tayga → WAN, with masquerade) — doubling firewall/conntrack work.
+- No hardware offload or fasttrack applies to either leg.
+
+With `translate_all`, *nearly all* internet traffic went through this path, so
+the penalty hit everything, not just IPv4-only destinations.
+
+### 2. Single point of failure
+
+DNS (CoreDNS) and most of the datapath (Tayga) became two containers in the
+critical path on a router whose built-in forwarder had been completely reliable.
+Container restarts, image pulls, or a crash now took down connectivity.
+
+### 3. Architectural inversion
+
+NAT64 exists to let **IPv6-only** clients reach the **IPv4** internet. The actual
+goal here was the opposite — *avoid* IPv6 egress entirely. Building an IPv6-only
+client environment (option 108, CLAT, PREF64) and then translating all of it back
+to IPv4 was solving the problem backwards. The complexity existed only to route
+around a property of the HE tunnel.
+
+### 4. Firewall complexity and a translation bypass hole
+
+NAT64 punched a hole in the firewall model. RouterOS filters IPv4 and IPv6
+independently, but NAT64 traffic enters as IPv6 and *leaves* as IPv4 after
+translation — so the carefully-built IPv4 forward policy (inter-VLAN isolation,
+RFC1918-to-WAN blocks) was simply bypassed for anything arriving via the
+translator. A client could reach a private IPv4 range by encoding it in the
+NAT64 prefix (`64:ff9b::c0a8:xxyy` = `192.168.x.y`), and the IPv4 rules would
+never see it because the packet was IPv6 until Tayga rewrote it.
+
+Plugging this required mirroring the IPv4 policy in the IPv6 chain: explicit
+`reject` rules for every NAT64-mapped RFC1918 block (`64:ff9b::a00:0/104`,
+`64:ff9b::ac10:0/108`, `64:ff9b::c0a8:0/112`), per-VLAN accept rules toward the
+`nat64` interface, plus a separate masquerade and LB hairpin-accept for the
+Tayga pool. That is a parallel, easy-to-get-wrong copy of the existing ruleset,
+whose correctness depended on getting CIDR-to-prefix arithmetic right. Removing
+NAT64 deleted all of it.
+
+### 5. Operational fragility (see coredns.md for detail)
+
+The setup had a long tail of subtle failure modes, each presenting identically
+as "client can't connect":
+
+- RouterOS static `FWD` entries return `NOERROR`/empty instead of relaying
+  `NXDOMAIN`, which broke `getaddrinfo` search-domain handling in Kubernetes
+  pods (`ENOTFOUND` for valid names).
+- `translate_all` discarded real AAAA for IPv6-only internal services, and
+  returned empty answers for names with no A record.
+- Per-interface RouterOS `ipv6 nd` entries default to `advertise-dns=no` and must
+  be *created* (not modified), so RDNSS/PREF64 silently never advertised.
+- Dynamic `from-pool` VLAN addressing made advertised RDNSS addresses point at
+  nonexistent router addresses.
+- Option 108 honoured by clients before the NAT64 path was verified working left
+  them stuck "obtaining IP address".
+
+Each was individually fixable, but the aggregate was a brittle system whose
+benefit didn't justify the surface area.
+
+## What replaced it
+
+Plain CoreDNS forwarder with **AAAA suppression by default** plus a whitelist for
+domains that should keep IPv6 (our own zone over the HE prefix, and any explicitly
+trusted domain). Clients prefer IPv4 because they simply don't receive AAAA for
+most names — no translation, no extra datapath hop, packet forwarding stays on the
+RouterOS fastpath at line rate. DNS is the only thing in the path. See
+[coredns.md](./coredns.md).
+
+Tradeoff accepted: a non-whitelisted IPv6-only destination (no A record) is
+unreachable. In practice essentially everything on the public internet still has
+an A record. The intended future refinement is a CoreDNS plugin that suppresses
+AAAA only when an A record also exists, removing the need for the whitelist; no
+in-tree plugin does this today.
+
+## Lessons
+
+- **Measure throughput before committing to an in-path translator on SOHO-class
+  hardware.** Userspace NAT64 (Tayga/Jool-in-container) on a MikroTik CPU is
+  fine for a few hundred Mbps, not for saturating a gigabit line.
+- **Match the mechanism to the actual goal.** The goal was "prefer IPv4 egress",
+  which is a one-line DNS policy, not a transition technology.
+- **Prefer solutions that stay on the fastpath.** Anything that pulls bulk
+  traffic into userspace or doubles the forwarding work will dominate the CPU.
+- **Fewer moving parts in the critical path.** Two containers carrying all DNS
+  and most traffic is a worse availability story than the stock forwarder, for a
+  cosmetic benefit (avoiding CAPTCHAs on some sites).
+- **Protocol translation breaks the firewall model.** When traffic changes L3
+  protocol mid-path, the two firewall policies must be kept in sync by hand, and
+  any gap is a silent bypass. A solution that doesn't translate keeps a single
+  coherent policy.
@@ -0,0 +1,141 @@
+# Network topology
+
+Network consists of 2 MikroTik routers, 1 OpenWRT router, UniFi AP, Netgear switch. Internet is connected via GPON ONU connected to MikroTik router with fallback LTE network in D-Link router. They are connected like in the diagram below below:
+
+```mermaid
+flowchart TD
+    crs[Router\nMikroTik CRS418-8P-8G-2S+]
+    hex[Router\nMikroTik hEX S]
+    dlink[Router\nD-Link DWR-921 C3\nOpenWRT 25.12]
+    unifi[Access Point\nUniFi U7 Pro]
+    netgear[Ethernet switch\nNetgear GS108E]
+    gpon[SFP+ GPON ONU\nLEOX LXT-010S-H]
+    isp[ISP]@{ shape: cloud }
+    lte[LTE Network]@{ shape: cloud }
+
+    isp --- gpon --- crs
+    lte ----- dlink
+    crs --- dlink
+    crs --- hex
+    crs --- unifi
+    crs --- netgear
+```
+
+Above diagram lists only active network devices, does not show passive/unmanaged network elements or nodes.
+
+## Internal structure
+
+Network is divided to multiple VLANs to enforce strict access control rules using stateful firewall. There are 6 VLANs:
+
+- 1: Management network<br>
+  No internet access, no outbound access to other networks<br>
+  IP: 192.168.255.0/24<br>
+  Static IP configuration
+- 2: General purpose LAN<br>
+  Access to every other network<br>
+  IP: 192.168.0.0/24 /  2001:470:61a3:9::/64<br>
+  Gateway: 192.168.0.1 / 2001:470:61a3:9:ffff:ffff:ffff:ffff<br>
+  DHCP / SLAAC
+- 3: Cameras<br>
+  No internet access, no outbound access to other networks<br>
+  IP: 192.168.3.0/24<br>
+  Gateway: 192.168.3.1<br>
+  Static IP configuration
+- 4: Server LAN (k8s cluster)<br>
+  Access to internet, cameras<br>
+  IP: 192.168.1.0/24 / 2001:470:61a3:100::/64<br>
+  Gateway: 192.168.1.1 / 2001:470:61a3:100::1<br>
+  Static IP configuration
+- 5: IoT Network<br>
+  Internet access only<br>
+  IP: 192.168.5.0/24 / 2001:470:61a3:a::/64<br>
+  Gateway: 192.168.5.1 / 2001:470:61a3:a:ffff:ffff:ffff:ffff<br>
+  DHCP / SLAAC, accessible via separate, hidden WiFi network "szafa" from D-Link with strict MAC filtering for absolutely untrusted Tuya and like devices
+- 6: Internet access for OpenWRT<br>
+  Internet access only<br>
+  IP: 192.168.6.0/24 / 2001:470:61a3:600::/64<br>
+  Gateway: 192.168.6.1/24 / 2001:470:61a3:600::1/64<br>
+  Static IP configuration
+
+VLANs are connected between devices like on following diagram:
+
+```mermaid
+flowchart TD
+    crs[Router\nMikroTik CRS418-8P-8G-2S+]
+    hex[Router\nMikroTik hEX S]
+    dlink[Router\nD-Link DWR-921 C3\nOpenWRT 25.12]
+    unifi[Access Point\nUniFi U7 Pro]
+    netgear[Ethernet switch\nNetgear GS108E]
+
+    crs -- Untagged 1\nTagged 5,6 --- dlink
+    crs -- Untagged 1\nTagged 2,3 --- hex
+    crs -- Untagged 2 --- unifi
+    crs -- Untagged 1\nTagged 2--- netgear
+
+```
+
+There are also networks, which are not VLANs, but are routed:
+
+- Tailscale, container on CRS<br>
+  Access to every other network, including internet (exit node)<br>
+  IP: 100.64.0.0/10 / fd7a:115c:a1e0::/48<br>
+  Allocations managed completely by Tailscale
+- Kubernetes cluster, routes exposed to CRS via BGP using Cilium<br>
+  Access to internet, cameras<br>
+  Pods: 10.42.0.0/16 (/24 subnet per node), 2001:470:61a3:200::/104 (/120 subnet per node)<br>
+  Service: 10.43.0.0/16, 2001:470:61a3:300::/112<br>
+  LoadBalancer: 10.44.0.0/16, 2001:470:61a3:400::/112<br>
+  Assigned by Cilium MultiPool IPAM (pods), kube-apiserver (services), Cilium LB (LoadBalancer)<br>
+  Native IP routing, no overlay, VXLAN etc.<br>
+  LoadBalancer is reachable from the internet using IPv6 directly or IPv4 port forwards, leveraging ECMP.
+- GPON ONU management<br>
+  IP: 192.168.100.0/24<br>
+  Static assignment on CRS, access to factory IP of ONU
+- Containers on CRS<br>
+  Access to every other network<br>
+  IP: 172.20.0.1/24, 2001:470:61a3:500::/64<br>
+  Static IP management, hosts Tailscale and CoreDNS containers
+
+Whole network is designed to eliminate VLANs, overlays where unnecessary to keep things simple. Only NAT rules are:
+
+- Masquerade outbound IPv4 via GPON PPPoE
+- Masquerade to GPON ONT management<br>
+  It doesn't have a gateway configured, we want to access it from other networks so we need to talk to it as if we were in the same subnet
+- src-nat tailscale IPv6 to internet<br>
+  Tailscale assigns IPv6 from private subnet with no way to configure it, so the assigned IPs are not routable
+- IPv4 port forwards from GPON PPPoE to respective services
+
+## DNS and IPv6 preference
+
+DNS is served by a CoreDNS container (`172.20.0.3`); RouterOS forwards client queries to it. CoreDNS suppresses AAAA records by default so clients prefer IPv4, avoiding the HE tunnel's datacenter-flagged egress (which triggers CAPTCHAs on some sites). Our own zone (`lumpiasty.xyz`) and any explicitly whitelisted domains keep AAAA for native IPv6. See [CoreDNS resolver](./coredns.md). An earlier NAT64/IPv6-mostly approach to the same problem was built and abandoned; see the [postmortem](./nat64-dns64-postmortem.md).
+
+There is also an UPnP and NAT-PMP enabled to automatically configure port forwards from LAN.
+
+## Uplink
+
+Main internet connection is a fibre optics (GPON) service from my ISP, which includes static, publicly reachable IPv4 address. I'm using my own GPON ONU, which is a SFP+ module inserted to CRS, I configured it to clone ISP-provided Huawei box. I'm authenticated using PPPoE credentials and it hands out public IP address directly to the router.
+
+One of quirks of the ISP is that it doesn't allow incoming port 53/DNS connections, which disables me from hosting DNS server, I was wanting to do to configure reverse DNS for pods IPv6. The configuration for public DNS server is still remaining cluster.
+
+The ISP does not provide any IPv6 connectivity at all. For that purpose I'm using [tunnel broker from Hurricane Electric](https://tunnelbroker.net/), which gives /48 routed prefix that I divided to /64 networks.
+
+The backup internet link is an LTE connection via the embedded BroadMobi BM806C modem in the D-Link router (Orange Poland, dual-stack). The SIM was previously in a USB modem attached directly to the CRS; it was moved to the D-Link to reduce rack clutter and gain access to a proper modem interface. The modem requires firmware-level workarounds — QMI data-plane bugs, a cold-boot UIM wedge that needs USB re-enumeration — documented in [LTE failover (BroadMobi BM806C / D-Link DWR-921 C1) — QMI data-plane workaround](./wwan-bm806c-qmi-workaround.md).
+
+Failover is implemented using iBGP between the D-Link (BIRD2, AS 65000) and the CRS (`local.role: ibgp-rr` so CRS acts as route reflector for D-Link). The D-Link announces `0.0.0.0/0` and `2000::/3` to the CRS whenever its `wwan` interface is up. The CRS installs these at BGP distance 200 — below the GPON static default at distance 1 — so they only become active when GPON fails. The CRS in turn announces all its connected and static routes (VLAN subnets, Tailscale, k8s pod/service/LB prefixes via RR reflection) to the D-Link so it always has explicit routes to internal subnets regardless of WAN state. The D-Link's own default route also comes from this BGP session (no static gateway on the uplink interface); when the CRS withdraws the default on GPON failure, the D-Link falls back to its wwan kernel route at metric 100.
+
+For full design rationale, route exchange tables, and implementation notes including the BGP Extended Next Hop workarounds, see [LTE failover design](./lte-failover-design.md).
+
+During LTE failover, all VLANs route through `vlan6` to the D-Link, which forwards traffic out `wwan` and masquerades it (IPv4 and IPv6 via fw4 `masq`/`masq6`). IPv6 is outbound-only — the carrier enforces an inbound firewall, and there is no routed prefix large enough to cover all internal subnets without NAT.
+
+## Configuration management
+
+Currently, only CRS and D-Link are managed in this repository. Other devices currently have been configured manually using dedicated web interface/tools. The end goal is to have full configuration as code.
+
+Network devices are configured using Ansible with playbooks under [ansible/playbooks](../ansible/playbooks/) subdirectory:
+
+- [openwrt.yml](../ansible/playbooks/openwrt.yml) - Configuration of D-Link router
+- [routeros.yml](../ansible/playbooks/routeros.yml) - configures CRS router
+
+There is also one one-time initialisation playbook called [dlink-init.yml](../ansible/playbooks/dlink-init.yml) that is used to configure basic D-Link settings from scratch after configuration reset so it can be accessed from management network.
+
+To reconcile configuration from this repository to device, execute `ansible-playbook playbooks/<playbook>` from `ansible` directory. It will automatically load necessary secrets from vault and start applying configuration. Playbooks without `-init` in their name should be idempotent.
@@ -0,0 +1,677 @@
+# LTE failover (BroadMobi BM806C / D-Link DWR-921 C1) — QMI data-plane workaround
+
+Last verified: 2026-05-27, OpenWrt 25.12.2 r32802-f505120278, netifd 2026.02.26~cbb83a18-r1.
+
+## TL;DR
+
+The embedded BroadMobi BM806C modem in the D-Link DWR-921 has **three
+independent bugs** in its firmware (`M1.2.0_E1.0.1_A1.1.8`, the only
+build that has ever shipped), all of which must be worked around for a
+usable LTE uplink:
+
+1. **Cold-boot UIM wedge.** On every cold boot, the modem's UIM (SIM)
+   QMI service comes up permanently broken: `--uim-get-sim-state`
+   returns `{}`, `--get-imsi` returns `"UIM uninitialized"`, and
+   `AT+CPIN?` returns `+CME ERROR: SIM busy`. The modem **never
+   recovers on its own** (verified at uptime 21 min). A single USB
+   re-enumeration (`echo 0 > /sys/.../1-1/authorized; sleep 3; echo 1
+   > ...`) forces the modem to redo its internal QMI init from
+   scratch, after which UIM comes up within ~1 s. The
+   `wwan-bringup` service installed by this role does the
+   re-enumeration unconditionally on boot, then calls `ifup wwan`.
+   Full investigation: `/root/wwan-diag/boot-wedge-investigation.md`
+   on the router.
+
+2. **`qmi.sh` requests `802.3` framing** from the modem.
+   The BM806C's `802.3` firmware path is buggy on this generation of
+   Qualcomm silicon; raw-ip framing works correctly. The same kernel
+   maintainer who added raw-ip support to `qmi_wwan` documents
+   "buggy 802.3 firmware implementation" as a known issue for the
+   MDM9x25 family this modem is built on.
+
+3. **`qmi.sh` calls `uqmi --start-network --apn <foo>`** to bring up
+   the bearer. On BM806C this triggers a known firmware bug
+   ([OpenWrt FS#1363](https://github.com/openwrt/openwrt/issues/6295))
+   that establishes a *phantom* bearer: kernel and modem agree there is
+   a session, IP addresses are assigned, `--get-data-status` returns
+   `"connected"` — but the bearer is not bound to a real PDN at the
+   GGSN, so packets are blackholed. Invoking `--start-network --profile
+   <N>` against a pre-configured NVRAM profile **with the same APN**
+   works perfectly.
+
+Bug 1 is the boot-time wedge; without the workaround `wwan` simply
+never comes up after a reboot. Bugs 2 and 3 are about the data plane
+itself; without their workarounds, `wwan` comes up but no traffic
+flows. Our role addresses all three: it installs `wwan-bringup`
+(re-enumerates the USB device once on boot, then `ifup wwan`), patches
+`qmi.sh` in two places (raw-ip + a kernel `-EBUSY` fix), creates a
+second NVRAM profile in the modem for the IPv6 APN, and adds
+`option profile`/`option v6profile` to the UCI `wwan` interface so
+`qmi.sh` uses the working code path. After all three workarounds,
+cold boot to working dual-stack IPv4+IPv6 LTE uplink completes in
+~2:30–3:30 — verified end-to-end at HTTPS layer to multiple
+upstreams.
+
+## Symptoms
+
+### Boot-wedge symptoms (bug 1)
+
+When the modem boots into the UIM-wedged state, all of the following
+hold simultaneously:
+
+- `/dev/cdc-wdm0` exists, `wwan0` netdev exists, `qmi_wwan` driver is
+  bound to `1-1:1.4` — kernel side looks fine
+- `ifup wwan` runs forever in the SIM-init loop:
+  `wwan: SIM in illegal state - Power-cycling SIM` repeating every ~8 s
+- `uqmi -d /dev/cdc-wdm0 --uim-get-sim-state` returns `{}` (empty
+  body — no `card_application_state` field at all)
+- `uqmi -d /dev/cdc-wdm0 --get-imsi` returns the QMI string
+  `"UIM uninitialized"`
+- `uqmi -d /dev/cdc-wdm0 --get-pin-status` returns
+  `"Invalid arguments given"` (uqmi cannot allocate a UIM client
+  because the modem-side service has not registered)
+- AT side: `AT+CFUN?` returns `+CFUN: 1` (modem firmware is alive),
+  `AT+CPIN?` returns `+CME ERROR: SIM busy`, and `AT+CREG?` /
+  `AT+CEREG?` / `AT+COPS?` all return bare `ERROR`
+- This persists indefinitely; we measured no recovery at uptime
+  21 min
+
+### Data-plane symptoms (bugs 2 and 3)
+
+When the modem comes up cleanly but the qmi.sh patches are missing or
+the wrong `--start-network` invocation is used, all of the following
+are true at the same time:
+
+- `ifup wwan` succeeds, `ifstatus wwan` reports `"up": true`
+- `wwan0` has a valid CG-NAT IPv4 (`10.x.x.x/30`) and IPv6
+  (`2a00:f41:.../128` for Orange Poland)
+- `uqmi --get-data-status` returns `"connected"`
+- `ip route` shows default routes via `wwan0`
+- `tcpdump -i wwan0` shows outbound TCP SYNs leaving normally with
+  the wwan source IP
+- **No reply ever comes back**: `RX bytes` on `wwan0` stays near zero
+  while `TX bytes` climbs with each connection attempt
+- `ping -I wwan0` to any destination shows 100% loss
+- `curl --interface wwan0` times out on every TCP connect
+- After a while, `+CEER` on an AT port shows
+  `Regular deactivation` or `EMM detached` (the network gives up on
+  the broken session and drops it)
+
+If your symptoms include `Network registration failed, registration
+timeout reached` instead of the silent "everything looks fine but no
+data flows", you are probably hitting
+[OpenWrt forum issue: BM806U-E1/DWR-921 C3](https://forum.openwrt.org/t/problem-with-bm806u-e1-dwr-921-c3/130094)
+which is the same root cause manifesting on a slightly different
+firmware revision. The fix is the same.
+
+## What the issue is NOT
+
+We ruled all of these out during diagnosis. If you're tempted by any of
+them, read the corresponding "how we confirmed" section before going
+down that path.
+
+### Not a signal/RF problem
+
+We initially had RSRP around `-113 dBm` and SNR around 0 dB and
+suspected weak coverage. Adding external antennas brought RSRP to
+`-94 dBm` and SNR to `+15..+17 dB` — well into the "good LTE" range —
+and the data-plane bug remained unchanged. Both poor-signal and
+good-signal sessions showed identical TX-only behaviour.
+
+### Not a SIM / subscription / APN-name problem
+
+The same SIM card was confirmed to work in a different LTE router
+on the same Orange Poland subscription. The modem also registered
+correctly (`+CEREG: 0,1`, `+COPS: 0,0,"Orange",7`), and `AT+CGCONTRDP`
+showed valid IPs being assigned by the GGSN. APN strings `internet`
+(IPv4v6) and `internetipv6` (IPv6) are Orange Poland's documented
+APNs.
+
+### Not a SIM-PIN / SIM-power / EMM detach problem
+
+PIN is disabled and verified disabled (`+CPIN: READY`). EMM detaches
+we observed in `+CEER` were *consequences* of the broken bearer,
+not the cause: a session with no return traffic eventually gets
+torn down by the network.
+
+### Not a firewall / NAT / masquerade problem
+
+We tested with the wwan firewall zone in every combination (REJECT/
+ACCEPT, with and without masquerade, with and without explicit
+forwarding rules) and the symptom was identical. Inspection of the
+nftables byte counters showed packets *leaving* `wwan0` reaching the
+forward chain on egress; the problem is that no packets ever arrive
+in the other direction. The firewall could not be the cause —
+nothing was inbound to be filtered.
+
+### Not an ARP/NDP / asymmetric-routing problem
+
+Initial captures showed unanswered ARP requests for the cellular
+gateway on `wwan0`, which is a known issue with `qmi_wwan` in 802.3
+mode (the kernel does ARP on what is really a point-to-point
+cellular link; the gateway never answers because there is no L2).
+We installed permanent neighbour entries to bypass ARP entirely —
+traffic still failed. Switching to raw-ip mode (where the kernel
+sets `NOARP` on the interface and ARP is never attempted) fixed the
+ARP weirdness but did NOT fix the data-plane problem. Both fixes
+are needed and they are independent.
+
+### Not an MBIM-vs-QMI problem
+
+The BM806C does not expose an MBIM USB composition. Switching
+protocols isn't an option without re-flashing the modem firmware,
+which has no public images.
+
+### Not a modem-firmware-update problem
+
+`M1.2.0_E1.0.1_A1.1.8` is the only BM806C firmware that has ever
+shipped. BroadMobi (Shanghai Mobile) only releases firmware to
+OEM partners; D-Link's last DWR-921 router firmware
+(`1.01.3.006 Generic`, no date) bundles the same modem image.
+Extracting and re-flashing it would change nothing.
+
+### Not "QMI is fundamentally broken on this modem"
+
+This was our working hypothesis for a long time. The decisive
+counter-test was running PPP over `/dev/ttyUSB2` with
+`ATD*99***1#` while QMI was idle: data flowed instantly,
+HTTPS in 0.7 s, ping 25 ms, 0% loss. Same SIM, same cell,
+same antennas, same APN — just a different host-side dial-up
+mechanism. That proved the modem, the RAN, and the operator
+were all fine. Whatever was breaking QMI had to live in the
+QMI control path itself (uqmi / qmi.sh / `qmi_wwan`) and/or in
+how the modem handles specific QMI message shapes.
+
+The forum thread and FS#1363 then nailed it down to
+`--start-network --apn`.
+
+## How we confirmed it IS the QMI control-path bug
+
+The minimal repro is just two `uqmi` invocations:
+
+```sh
+DEV=/dev/cdc-wdm0
+
+# Configure profile 1 in the modem's NVRAM with the v4 APN.
+uqmi -d $DEV --modify-profile "3gpp,1" --apn internet --pdp-type ipv4v6
+
+# Switch to raw-ip framing (the other fix).
+uqmi -d $DEV --wda-set-data-format raw-ip
+ip link set wwan0 down
+echo Y > /sys/class/net/wwan0/qmi/raw_ip
+ip link set wwan0 up
+
+# Start the bearer.  --profile 1 instead of --apn internet.
+cid=$(uqmi -d $DEV --get-client-id wds)
+uqmi -d $DEV --set-client-id wds,$cid --set-ip-family ipv4 > /dev/null
+uqmi -d $DEV --set-client-id wds,$cid --start-network --profile 1
+```
+
+Followed by manual addressing/routing of `wwan0` from
+`--get-current-settings`, this **just works** — `curl -4 --interface
+wwan0 https://1.1.1.1/` returns `301` in under a second, RX bytes climb.
+
+If you replace `--start-network --profile 1` with `--start-network
+--apn internet` (everything else identical), the bearer comes up,
+addresses are assigned, `--get-data-status` says `"connected"`, and
+no downlink traffic ever arrives. This is the smoking-gun isolation
+of the firmware bug.
+
+## Are you affected?
+
+You are affected if all of these hold:
+
+1. Your modem reports `Manufacturer: BroadMobi`, `Model: BM806C` (or
+   `BM806U`), `Revision: M1.2.0_E1.0.1_A1.1.8`. Check via any AT port:
+   `printf 'ATI\r' | picocom -qrx 3000 /dev/ttyUSB2`.
+2. Your USB IDs are `2020:2033`. Check
+   `/sys/bus/usb/devices/<port>/idVendor` / `idProduct`. On the C1
+   hardware revision the modem cold-boots directly into `2020:2033`
+   QMI composite mode — no `usb-modeswitch` involved (there is no
+   `2020:2033` entry in `/etc/usb-mode.json` on our build). Other
+   hardware revisions may go through an EDL `05c6:9008` →
+   `2020:2033` modeswitch first.
+3. `qmi.sh` (`/lib/netifd/proto/qmi.sh`) is the unmodified upstream
+   netifd handler. Grep for `--wda-set-data-format 802.3` —
+   if present, you have the unpatched script.
+
+The quick functional test is the minimal repro above: if you can get
+data flowing with `--start-network --profile 1` but not with
+`--start-network --apn internet`, you have this bug.
+
+## Involved components & versions
+
+| Component                | Version                                    |
+| ------------------------ | ------------------------------------------ |
+| Router                   | D-Link DWR-921 C1 (`dlink,dwr-921-c1`)     |
+| SoC                      | MediaTek MT7620N ver:2 eco:6               |
+| OpenWrt                  | 25.12.2 (r32802-f505120278)                |
+| Kernel                   | Linux 6.12.74                              |
+| netifd                   | 2026.02.26~cbb83a18-r1                     |
+| uqmi                     | 2025.07.30~7914da43-r2                     |
+| libqmi / qmi-utils       | 1.36.0-r1                                  |
+| luci-proto-qmi           | 26.133.20346~e9ebca7                       |
+| qmi_wwan kernel driver   | backports from Linux v6.18.7 (per dmesg)   |
+| LTE modem                | BroadMobi BM806C (Qualcomm MDM9225)        |
+| Modem firmware           | `M1.2.0_E1.0.1_A1.1.8`                     |
+| Modem USB id (data mode) | `2020:2033` (cold-boots directly into this) |
+| Modem USB id (EDL mode)  | `05c6:9008` (not observed on C1; may apply to other revs) |
+| Mobile network           | Orange Poland (MCC 260 / MNC 03)           |
+| APN (IPv4 / dual-stack)  | `internet` (auth: PAP, user/pass `internet`/`internet`) |
+| APN (IPv6)               | `internetipv6` (same auth)                 |
+
+## References
+
+- OpenWrt forum thread (same model, same symptoms):
+  <https://forum.openwrt.org/t/problem-with-bm806u-e1-dwr-921-c3/130094>
+- OpenWrt issue #6295 / FS#1363 — "QMI does not use correct APN":
+  <https://github.com/openwrt/openwrt/issues/6295>
+- Kernel commit "net: qmi_wwan: support 'raw IP' mode" (Bjørn Mork):
+  documents the 802.3-firmware-is-buggy reality across this generation.
+  Search the mainline kernel for `QMI_WWAN_FLAG_RAWIP`.
+- Kernel commit "net: qmi_wwan: add BroadMobi BM806U 2020:2033"
+  (Pawel Dembicki, 2018, `6cb2669cb97f`): adds the `qmi_wwan` entry
+  for our exact USB id `2020:2033` as `QMI_FIXED_INTF(0x2020, 0x2033, 4)`
+  with no quirks. The BM806C and BM806U share the device id and
+  qmi_wwan driver path. The entry has not been touched in mainline
+  through v6.18.7 (what OpenWrt 25.12.2 ships via backports).
+- libqmi maintainer Aleksander Morgado on cdc-wdm port readiness
+  timing (libqmi-devel, Sep 2021):
+  <https://lists.freedesktop.org/archives/libqmi-devel/2021-September/003695.html>
+  — explains that cdc-wdm appearing in `/dev` is not a guarantee that
+  the modem-side QMI service is operational. ModemManager uses up to
+  45 s of warmup tolerance; we measured this modem firmware needs
+  ~5 min before CTL is even responsive, and UIM never converges
+  without a USB re-enumeration.
+- `CastixGitHub/re_wwan` (<https://github.com/CastixGitHub/re_wwan>):
+  another BM806C user, identical firmware build, identical recovery
+  pattern (`rmmod qmi_wwan; insmod qmi_wwan` to recover from a hung
+  modem; AT-side `AT+CFUN=` resets reported as not working). Useful
+  independent confirmation that the right primitive is module
+  reload / USB re-enumeration, not a soft reset.
+- D-Link DWR-921 support page (firmware images, region-specific):
+  hardware revision C3 on the Polish site lists firmware
+  `1.01.3.006 Generic`, `1.00B07 T-Mobile`, `1.00B06 Plus/Cyfrowy Polsat
+  Rev C3` — all of which bundle the same modem firmware build.
+
+## Limitations
+
+### Can this be configured via LuCI / UCI alone?
+
+**Partly.** The UCI side of the workaround is fully achievable through
+LuCI or `uci set`:
+
+```sh
+uci set network.wwan.profile='1'
+uci set network.wwan.v6profile='2'
+uci commit network
+```
+
+`luci-proto-qmi` already exposes `profile` and `v6profile` as fields in
+the LTE wizard. The wwan interface config alone, however, is **not
+sufficient** — `qmi.sh` and the modem NVRAM both need attention before
+`ifup wwan` will work end-to-end. Specifically:
+
+- The `qmi.sh` patches (raw-ip + `ip link down/up` around the sysfs
+  write) are filesystem edits that survive package upgrades only if
+  re-applied. They cannot be expressed as UCI.
+- Creating modem-NVRAM profile 2 with `internetipv6` is a one-shot QMI
+  call (`uqmi --create-profile 3gpp ...`). It is not part of OpenWrt's
+  configuration model; the profile lives in the modem itself.
+
+So in practice: configurable via UCI/LuCI as far as the *router* is
+concerned, but the fixed router config will only do anything once the
+manual modem profile creation and qmi.sh patches are in place.
+
+### `auto '0'` on the wwan interface
+
+We intentionally keep `option auto '0'`: the wwan interface does not
+auto-start at boot. This is a deliberate failover-only setup —
+`uplink` (the wired VLAN to the MikroTik) is the primary path, and a
+human (or future failover script, e.g. `mwan3`) decides when to
+bring up wwan.
+
+This also sidesteps a fragile boot ordering question: on cold boot the
+modem's **UIM (SIM) QMI service comes up permanently broken** and never
+recovers without an explicit USB re-enumeration (`echo 0/1 >
+/sys/bus/usb/devices/1-1/authorized`). Other QMI services (CTL, NAS,
+WDS) do come up after ~5 min of warmup, but UIM does not — verified at
+uptime 21 min with no intervention. The `wwan-bringup` service handles
+the re-enumeration on boot and then calls `ifup wwan` itself; netifd
+never has to deal with the wedge directly. See
+`/root/wwan-diag/boot-wedge-investigation.md` on the router for the
+full root-cause analysis (2026-05-27).
+
+### IPv6 is via a second NVRAM profile, not a single dual-stack PDP
+
+Orange Poland uses two distinct APN strings (`internet` for v4,
+`internetipv6` for v6). The BM806C firmware lets us configure profile 1
+as `IPV4V6` with `internet`, but the IPv6 leg of that profile cannot be
+made to use the dedicated `internetipv6` APN. Our config uses two
+independent profiles (profile 1 = IPv4 from `internet`, profile 2 =
+IPv6 from `internetipv6`) and `qmi.sh` happily fires both
+`--start-network --profile 1` and `--start-network --profile 2`
+in sequence (one per address family).
+
+### qmi.sh patches survive package upgrades only if re-applied
+
+`/lib/netifd/proto/qmi.sh` is owned by the `netifd` package. When
+netifd is upgraded, the file is replaced. Our patches are *not*
+listed in `/etc/sysupgrade.conf` and would not normally be preserved
+across a sysupgrade-style image flash either. The Ansible role
+re-applies them idempotently on every play; outside Ansible, you
+would need a wrapper (e.g. a postinst hook or a manual re-patch
+step in your upgrade runbook).
+
+### No automatic failover yet
+
+Bringing wwan up requires explicit `ifup wwan`. There is no monitor
+that detects loss of `uplink` and switches over. `mwan3` is the
+obvious candidate.
+
+## Implementation (manual, no Ansible)
+
+Everything below assumes you have already SSH'd into the OpenWrt
+router as root, the modem is enumerated as `/dev/cdc-wdm0` /
+`wwan0`, and `uqmi` / `picocom` are installed.
+
+### Step 1 — patch `qmi.sh`
+
+Three single-line edits to `/lib/netifd/proto/qmi.sh`. Around line 233:
+
+```sh
+# Before
+uqmi -s -d "$device" -t 1000 --set-data-format 802.3 > /dev/null 2>&1
+uqmi -s -d "$device" -t 1000 --wda-set-data-format 802.3 > /dev/null 2>&1
+...
+echo "Y" > /sys/class/net/$ifname/qmi/raw_ip
+
+# After
+uqmi -s -d "$device" -t 1000 --set-data-format raw-ip > /dev/null 2>&1
+uqmi -s -d "$device" -t 1000 --wda-set-data-format raw-ip > /dev/null 2>&1
+...
+ip link set $ifname down; echo "Y" > /sys/class/net/$ifname/qmi/raw_ip; ip link set $ifname up
+```
+
+The third edit is essential: writing `Y` to the `raw_ip` sysfs node
+fails with `EBUSY` ("Cannot change a running device") if `wwan0` is
+up at the moment of the write. The kernel only lets you change the
+link-layer protocol while the interface is down. Without this bracket
+the patched script logs `sh: write error: Resource busy`, the kernel
+driver stays in Ethernet mode, and we are back to broken ARP/NDP.
+
+In-place via `sed`:
+
+```sh
+sed -i 's|--set-data-format 802\.3|--set-data-format raw-ip|;
+        s|--wda-set-data-format 802\.3|--wda-set-data-format raw-ip|;
+        s|^\(\s*\)echo "Y" > /sys/class/net/$ifname/qmi/raw_ip$|\1ip link set $ifname down; echo "Y" > /sys/class/net/$ifname/qmi/raw_ip; ip link set $ifname up|' \
+       /lib/netifd/proto/qmi.sh
+```
+
+### Step 2 — create modem-NVRAM profile 2 for the IPv6 APN
+
+Profile 1 is managed by `qmi.sh` itself (it calls `--modify-profile
+"3gpp,1"` with the UCI `apn` value on every ifup). Profile 2 has to be
+bootstrapped once, then it persists in modem NVRAM:
+
+```sh
+uqmi -d /dev/cdc-wdm0 --create-profile 3gpp --apn internetipv6 --pdp-type ipv6
+# returns {"created-profile": 2}
+
+# Verify
+uqmi -d /dev/cdc-wdm0 --get-profile-settings 3gpp,2
+# {"apn":"internetipv6","pdp-type":"ipv6", ...}
+```
+
+If profile 2 already exists with wrong settings, use `--modify-profile`
+instead:
+
+```sh
+uqmi -d /dev/cdc-wdm0 --modify-profile 3gpp,2 --apn internetipv6 --pdp-type ipv6
+```
+
+### Step 3 — UCI config for the wwan interface
+
+```sh
+uci batch <<'EOF'
+set network.wwan=interface
+set network.wwan.device='/dev/cdc-wdm0'
+set network.wwan.proto='qmi'
+set network.wwan.apn='internet'
+set network.wwan.v6apn='internetipv6'
+set network.wwan.profile='1'
+set network.wwan.v6profile='2'
+set network.wwan.auth='pap'
+set network.wwan.username='internet'
+set network.wwan.password='internet'
+set network.wwan.pdptype='ipv4v6'
+set network.wwan.dhcp='0'
+set network.wwan.dhcpv6='0'
+set network.wwan.metric='100'
+set network.wwan.auto='0'
+EOF
+uci commit network
+```
+
+`apn` and `v6apn` are still set even though `profile` / `v6profile`
+take precedence on the `--start-network` call: `qmi.sh` uses `apn`
+when it runs `--modify-profile 3gpp,1 --apn $apn --pdp-type
+$profile_pdptype` near the top of `proto_qmi_setup`, before
+`--start-network`. Without it, `qmi.sh` would re-write profile 1 with
+an empty APN on every ifup. `v6apn` is not strictly used by `qmi.sh`
+in the current code path (the `--start-network --profile 2` invocation
+ignores `--apn $v6apn`), but is kept for clarity and so an operator
+reading the config sees what APN profile 2 is supposed to point at.
+
+`dhcp '0'` / `dhcpv6 '0'` tell `qmi.sh` to apply the IP addresses
+itself (via `proto_add_ipv4_address` / `proto_add_ipv6_address` from
+`uqmi --get-current-settings`) instead of spawning `udhcpc` /
+`odhcp6c` on `wwan0`. The modem hands out the addresses through QMI;
+running DHCP on a point-to-point cellular link would fail anyway.
+
+`metric '100'` keeps `uplink` (metric 0) preferred as the default
+route when both are up.
+
+### Step 4 — test
+
+```sh
+ifup wwan
+sleep 10
+ifstatus wwan | head -20
+uqmi -d /dev/cdc-wdm0 --get-data-status         # "connected"
+cat /sys/class/net/wwan0/qmi/raw_ip             # Y
+ip -d link show wwan0 | head -2                 # POINTOPOINT,NOARP, link/none
+ip addr show wwan0
+```
+
+Then, with `uplink` taken down or the wwan route preferred, verify
+real traffic:
+
+```sh
+curl -4 --interface wwan0 -sS -o /dev/null -w "%{http_code}\n" https://1.1.1.1/
+curl -6 --interface wwan0 -sS -o /dev/null -w "%{http_code}\n" https://[2606:4700:4700::1111]/
+```
+
+Both should return `301` within ~1 second. `ip -s link show wwan0`
+should show RX bytes climbing.
+
+### Step 5 — teardown / cleanup
+
+```sh
+ifdown wwan
+```
+
+That's it. The modem-NVRAM profiles persist across reboots and even
+across `usb-modeswitch` cycles, so step 2 only ever needs to be run
+once per physical SIM/modem.
+
+## Related changes in our config
+
+These accompany the wwan fix in the same time frame; they aren't part
+of the wwan workaround per se but were made in the same series of work
+and are worth pointing at if you're trying to retrace this end-to-end.
+
+- **VLAN 6 ("uplink")** on the MikroTik CRS418 and on the OpenWrt AP:
+  a tagged-only VLAN over ether3/WAN that carries the AP's wired
+  uplink to the MikroTik. IPv4 `192.168.6.0/24`, IPv6
+  `2001:470:61a3:600::/64` (point-to-point, no SLAAC, static `::1` and
+  `::2`). The AP's "uplink" netifd interface is dual-stack on
+  `eth0.6`. wwan failover is *to* this uplink, not the LAN.
+- **Management policy-routing** on the AP. The management interface
+  `mgmt` (192.168.255.11/24 on `eth0.1`) is reached through MikroTik
+  from a non-directly-connected subnet, so replies from arbitrary
+  src-subnets would have followed the default route out `eth0.6` and
+  been blackholed by the MikroTik. We have two policy-routing rules
+  (`priority 500` for same-subnet → main table, `priority 1000` for
+  any other → table 100) and a `config route` in table 100 sending
+  `0.0.0.0/0` back via the MikroTik. None of this interacts with wwan
+  directly but it's mentioned so anyone reading `network.yml` does not
+  trip over the rules wondering whose problem they are.
+- **`community.openwrt.apk` module migration**. OpenWrt 25.12+ uses
+  `apk` instead of `opkg`, and the upstream collection's `apk` module
+  is only in `community.openwrt` git `main` at the time of writing.
+  We pin to `git+main` in `ansible/requirements.yml` until a release
+  ships it.
+- **Manually-installed packages folded back into `openwrt_packages`**:
+  `usb-modeswitch` (drives the modem out of EDL `05c6:9008` into QMI
+  `2020:2033` at boot) and `luci-proto-qmi`.
+
+## Future work
+
+In rough priority order:
+
+1. **Upstream a fix to `qmi.sh`** that does the `ip link down/up`
+   bracket around the `raw_ip` sysfs write. This is a strict bug in
+   the upstream script: as written, the write fails with `EBUSY`
+   whenever the modem actually wants raw-ip, which is precisely the
+   case `qmi.sh` claims to handle. Likely a 3-line patch. This is the
+   easiest, least controversial upstream contribution.
+2. **Upstream a fix or knob for the BM806C-style firmware quirk**.
+   The cleanest path is probably an OpenWrt-level UCI option
+   `prefer_raw_ip` (default off) on the `qmi` proto, similar to how
+   `mbim.sh` is constructed. We don't want to change the default
+   framing for all qmi devices — newer Qualcomm modems advertise 802.3
+   correctly and `qmi.sh`'s readback logic does the right thing for
+   them. A per-device opt-in keeps the existing autodetect intact.
+3. **Document/upstream the `--profile` workaround for FS#1363**. The
+   bug is 7+ years old and still hits real users. The right cleanup is
+   probably to make `qmi.sh` prefer `--profile $N` whenever profile
+   modification has just succeeded, falling back to `--apn $apn` only
+   if no profile was written. This is a behavioural change and would
+   need a discussion thread / PR description that walks the reviewer
+   through the modem-firmware history.
+4. **Replace the `qmi.sh` patches in our Ansible role with a wrapper**
+   that does not edit `qmi.sh` directly. Options:
+   - A custom proto `qmi-bm806c` that sources the original `qmi.sh`,
+     overrides only `proto_qmi_setup`, and registers under a separate
+     name. UCI would switch `option proto 'qmi'` → `'qmi-bm806c'`.
+     Clean but harder to debug because there is now an extra layer of
+     indirection.
+   - A hotplug script in `/etc/hotplug.d/iface/` that intercepts
+     pre-ifup events on wwan, sets WDA + sysfs raw-ip beforehand, and
+     trusts the modem's `802.3` readback to fail naturally so `qmi.sh`
+     never writes the sysfs node. Untested. Likely flaky.
+   - The current "patch the file, reapply via Ansible" approach is the
+     simplest and most direct. It is fine as long as the role is the
+     source of truth.
+5. **Periodic session keepalive / reconnect on detach.** Now that
+   boot bring-up is fast and reliable (~2:30–3:30 from cold boot to
+   wwan up), the next likely failure mode is the modem getting
+   deactivated by the network (`+CEER: Regular deactivation`) after
+   long idle periods. A simple `procd` service that polls
+   `uqmi --get-data-status` and triggers `ifup wwan` on transition
+   `connected → disconnected` would close this gap. Don't pre-emptively
+   add it; wait until you have evidence the problem occurs in practice
+   with the workaround in place. If the disconnect comes with UIM
+   going bad (same wedge signature as cold boot), the keepalive needs
+   to call `wwan-bringup` (which re-authorizes the USB device) rather
+   than `ifup wwan` directly.
+6. **Implement actual failover.** `mwan3` is the conventional choice.
+   Alternatively a tiny shell loop that pings a target via `uplink`
+   and triggers `ifup wwan` / `ifdown wwan` on transitions. Either way
+   the wwan side of the work is done; the failover orchestration is a
+   separate problem.
+7. **Investigate `mbim` mode**. The BM806C does not currently expose
+   MBIM, but the modem chipset (MDM9225) supports it at the silicon
+   level. Whether there exists a magic AT command, vendor QMI message,
+   or firmware composition switch to enable MBIM is unknown — the AT
+   command set we explored (`AT^USBMODE`, `AT^SETPORT`, `AT+QCFG`,
+   `AT+BMSWITCH`, `AT$QCPDPP`, etc.) all returned `ERROR`. If MBIM
+   could be enabled, `qmi.sh` becomes irrelevant and the upstream
+   `mbim.sh` proto might just work. Significant payoff if it pans out;
+   research-heavy if it doesn't.
+8. **Periodic re-test on OpenWrt upgrades**. When OpenWrt's `netifd`
+   gets a new release, re-check the qmi.sh patches still apply
+   cleanly. Our role uses regex-based `lineinfile`, so it tolerates
+   the surrounding code drifting somewhat, but if upstream restructures
+   the data-format block significantly we'd need to revisit.
+
+## Things worth noting if anyone picks this up again
+
+- `qmi.sh`'s upstream "set IP format" block runs `--set-data-format`
+  first (against the kernel/`qmi_wwan`) and `--wda-set-data-format`
+  second (against the modem). Both must agree. We patch both.
+- The readback `--wda-get-data-format` call is what `qmi.sh` uses to
+  decide whether to write `Y` to sysfs. Our patches make this return
+  `"raw-ip"`, which makes the existing branch fire — we don't add a
+  branch, we just nudge the existing logic into the path that already
+  exists for "device only supports raw-ip" modems.
+- The kernel `qmi_wwan` sysfs node `/sys/class/net/wwan0/qmi/raw_ip`
+  toggles the *kernel-side* framing. The QMI WDA call toggles the
+  *modem-side* framing. They are independent. Both must agree, or
+  the kernel will parse bytes that came in as raw-IP as if they were
+  Ethernet frames (or vice versa). The result, depending on which
+  side is wrong, ranges from "all packets dropped silently in
+  `qmi_wwan_rx_fixup`" to "kernel ARPs at a phantom MAC".
+- `uqmi --modify-profile 3gpp,1` does work on this modem — both
+  the JSON `--get-profile-settings 3gpp,1` and the AT-side
+  `AT+CGDCONT?` reflect the new value immediately. The bug is
+  specifically with the `--start-network --apn` TLV, not with
+  profile management.
+- `uqmi --create-profile 3gpp` returns the new profile index in
+  `{"created-profile": N}`. It auto-allocates the next free slot, so
+  in a fresh modem you'll get `2`, but on an already-configured modem
+  you might get `3` or higher. Always read the return value rather
+  than assuming `2`. (Our Ansible task hardcodes 2 but checks
+  `--get-profile-list` first to skip creation if 2 already exists.)
+- `+CEER: Regular deactivation` and `+CEER: EMM detached` are *last
+  error* codes; they persist until the modem clears them. Reading
+  them tells you the last failure, not necessarily the current state.
+  Always cross-reference with `+CEREG?` and `+CGACT?` to know if you
+  are presently attached.
+- `uqmi -t 5000 -d /dev/cdc-wdm0 --get-serving-system` returns
+  `"Failed to connect to service"` (or `"Unknown error"`) for the
+  first ~5 minutes after cold boot. CTL/NAS/WDS *do* eventually come
+  up (we measured `--get-versions` first OK at uptime 320 s,
+  serving-system at 376 s), but they flap in and out for several more
+  minutes. **UIM never comes up on cold boot without a USB
+  re-enumeration** — `--uim-get-sim-state` keeps returning `{}` and
+  `--get-imsi` keeps returning `"UIM uninitialized"` even at uptime
+  21 minutes. This is why the `wwan-bringup` worker now does an
+  unconditional `authorized=0/1` re-enumeration immediately after the
+  modem enumerates; it is not waiting for warmup, it is forcing the
+  modem to redo its init from scratch.
+- A reliable cold-boot vs. wedged-modem discriminator from AT side:
+  `AT+CPIN?` returning `+CME ERROR: SIM busy` while `AT+CFUN?` returns
+  `+CFUN: 1` means the modem firmware is alive but UIM is stuck. If
+  this persists past uptime 5 minutes the modem will not recover on
+  its own; re-authorize the USB port.
+- The diagnostic scripts we accumulated live on the router at
+  `/root/wwan-diag/` (created during debugging; not part of the
+  Ansible role). The most useful ones are `at.sh` (run AT commands
+  through `picocom`), `ppp-test.sh` (PPP-via-AT as a control test
+  that bypasses QMI), `qmi-dual-profile.sh` (manual reproduction of
+  the working `--profile`-based dual-stack flow), and
+  `boot-capture.sh` (instrumented per-service probe that maps the
+  cold-boot wedge timeline; every probe wrapped in `/usr/bin/timeout`
+  so it cannot hang). The full root-cause writeup for the boot wedge
+  is at `/root/wwan-diag/boot-wedge-investigation.md`. Feel free to
+  delete the older scripts once this is stable; they are not
+  load-bearing.
+
+## Acknowledgements
+
+`gotgot04` on the OpenWrt forum did the original triage of FS#1363
+against this exact device (DWR-921 C3 / BM806U-E1), and the comment
+trail on that thread saved us probably another day of guessing.
@@ -3,6 +3,8 @@ apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
  name: ssd-lvmpv
+  annotations:
+    storageclass.kubernetes.io/is-default-class: "true"
 parameters:
  storage: "lvm"
  volgroup: "openebs-ssd"
@@ -18,7 +18,7 @@ spec:
  chart:
    spec:
      chart: cert-manager-webhook-ovh
-      version: 0.9.5
+      version: 0.9.13
      sourceRef:
        kind: HelmRepository
        name: cert-manager-webhook-ovh
@@ -23,7 +23,7 @@ spec:
  chart:
    spec:
      chart: cert-manager
-      version: v1.20.1
+      version: v1.20.2
      sourceRef:
        kind: HelmRepository
        name: cert-manager
@@ -23,7 +23,7 @@ spec:
  chart:
    spec:
      chart: cilium
-      version: 1.19.2
+      version: 1.19.5
      sourceRef:
        kind: HelmRepository
        name: cilium
@@ -23,7 +23,7 @@ spec:
  chart:
    spec:
      chart: cloudnative-pg
-      version: 0.27.1
+      version: 0.28.3
      sourceRef:
        kind: HelmRepository
        name: cnpg
@@ -110,7 +110,7 @@ spec:
        kubernetes.io/os: linux
      containers:
        - name: coredns
-          image: registry.k8s.io/coredns/coredns:v1.14.2
+          image: registry.k8s.io/coredns/coredns:v1.14.4
          imagePullPolicy: IfNotPresent
          args: ["-conf", "/etc/coredns/Corefile"]
          ports:
@@ -97,7 +97,7 @@ spec:
          env:
            - name: GOMEMLIMIT
              value: 161MiB
-          image: registry.k8s.io/coredns/coredns:v1.14.2
+          image: registry.k8s.io/coredns/coredns:v1.14.4
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 5
--- a/Show More
+++ b/Show More