commit b7a47745ead0647afda4af87658d205d0d5f2435 Author: Lumpiasty Date: Sun Feb 2 22:35:43 2025 +0100 Init basic cluster diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2803b81 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +secrets.yaml +talos/generated \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b8513ce --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "openwrt/roles/ansible-openwrt"] + path = openwrt/roles/ansible-openwrt + url = https://github.com/gekmihesg/ansible-openwrt.git diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..7c6bf15 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,3 @@ +{ + "recommendations": ["arrterian.nix-env-selector", "jnoortheen.nix-ide"] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7469ead --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "nixEnvSelector.nixFile": "${workspaceFolder}/shell.nix", + "terminal.integrated.profiles.linux": { + "Nix Shell": { + "path": "nix", + "args": ["develop"], + "icon": "terminal-linux" + } + }, + "terminal.integrated.defaultProfile.linux": "Nix Shell", + "ansible.python.interpreterPath": "/bin/python" +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..478f08e --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +install-router: + ansible-playbook ansible/playbook.yml -i ansible/hosts + +gen-talos-config: + mkdir -p talos/generated + talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/zoma-dibaiyin.patch --output-types controlplane -o talos/generated/zoma-dibaiyin.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443 + talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443 + talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443 + talosctl config endpoint kube-api.homelab.lumpiasty.xyz + +apply-talos-config: + talosctl -n zoma-dibaiyin apply-config -f talos/generated/zoma-dibaiyin.yaml diff --git a/README.md b/README.md new file mode 100644 index 0000000..fa047e8 --- /dev/null +++ b/README.md @@ -0,0 +1,106 @@ +# Homelab + +## Goals + +Wanting to set up homelab kubernetes cluster. + +### Software + +1. Running applications + 1. NAS, backups, security recorder + 2. Online presence, website, email, communicators (ts3, matrix?) + 3. Git server, container registry + 4. Environment to deploy my own apps + 5. Some LLM server, apps for my own use + 6. Public services like Tor, mirrors of linux distros etc. + 7. [Some frontends](https://libredirect.github.io/) + 8. [Awesome-Selfhosted](https://github.com/awesome-selfhosted/awesome-selfhosted), [Awesome Sysadmin](https://github.com/awesome-foss/awesome-sysadmin) +2. Managing them hopefully using GitOps + 1. FluxCD, Argo etc. + 2. State of cluster in git, all apps version pinned + 3. Some bot to inform about updates? +3. It's a home**lab** + 1. Should be open to experimenting + 2. Avoiding vendor lock-in, changing my mind shouldn't block me for too long + 3. Backups of important data in easy to access format + 4. Expecting downtime, no critical workloads + 5. Trying to keep it reasonably up anyways + +### Infrastructure + +1. Using commodity hardware +2. Reasonably scalable +3. Preferably mobile workloads, software should be a bit more flexible than me moving disks and data +4. Replication is overkill for most data +5. Preferably dynamically configured network + 1. BGP with OpenWRT router + 2. Dynamically allocated host subnets + 3. Load-balancing (MetalLB?), ECMP on router + 4. Static IP configurations on nodes +6. IPv6 native, IPv4 accessible + 1. IPv6 has whole block routed to us which gives us control over address routing and usage + 2. Which allows us to expose services directly to the internet without complex router config + 3. Which allows us to use eg. ExternalDNS to autoconfigure domain names for LB + 4. But majority of the world still runs IPv4, which should be supported for public services + 5. Exposing IPv4 service may require additional reconfiguration of router, port forwarding, manual domain setting or controller doing this some day in future + 6. One public IPv4 address means probably extensive use of rule-based ingress controllers + 7. IPv6 internet from pods should not be NATed + 8. IPv4 internet from pods should be NATed by router + +### Current implementation idea + +1. Cluster server nodes running Talos +2. OpenWRT router + 1. VLAN / virtual interface, for cluster + 2. Configuring using Ansible + 3. Peering with cluster using BGP + 4. Load-balancing using ECMP +3. Cluster networking + 1. Cilium CNI + 2. Native routing, no encapsulation or overlay + 3. Using Cilium's network policies for firewall needs + 4. IPv6 address pool + 1. Nodes: 2001:470:61a3:100::/64 + 2. Pods: 2001:470:61a3:200::/64 + 3. Services: 2001:470:61a3:300::/112 + 4. Load balancer: 2001:470:61a3:400::/112 + 5. IPv4 address pool + 1. Nodes: 192.168.1.32/27 + 2. Pods: 10.42.0.0/16 + 3. Services: 10.43.0.0/16 + 4. Load balancer: 10.44.0.0/16 +4. Storage + 1. OS is installed on dedicated disk + 2. Mayastor managing all data disks + 1. DiskPool for each data disk in cluster, labelled by type SSD or HDD + 2. Creating StorageClass for each topology need (type, whether to replicate, on which node etc.) + +## Working with repo + +Repo is preconfigured to use with nix and vscode + +Install nix, vscode should pick up settings and launch terminals in `nix develop` with all needed utils. + +## Bootstrapping cluster + +1. Configure OpenWRT, create dedicated interface for connecting server + 1. Set up node subnet, routing + 2. Create static host entry `kube-api.homelab.lumpiasty.xyz` pointing at ipv6 of first node +2. Connect server +3. Grab Talos ISO, dd it to usb stick +4. Boot it and using keyboard set up static ip ipv6 subnet, should become reachable from pc +5. `talosctl gen config homelab https://kube-api.homelab.lumpiasty.xyz:6443` +6. Generate secrets `talosctl gen secrets`, **backup, keep `secrets.yml` safe** +7. Generate config files `make gen-talos-config` +8. Apply config to first node `talosctl apply-config --insecure -n 2001:470:61a3:100::2 -f controlplane.yml` +9. Wait for reboot then `talosctl bootstrap --talosconfig=talosconfig -n 2001:470:61a3:100::2` +10. Set up router and CNI + +## Updating Talos config + +Update patches and re-generate and apply configs. + +``` +make gen-talos-config +make apply-talos-config +``` diff --git a/ansible/hosts b/ansible/hosts new file mode 100644 index 0000000..3a5e754 --- /dev/null +++ b/ansible/hosts @@ -0,0 +1,2 @@ +[openwrt] +2001:470:61a3:100:ffff:ffff:ffff:ffff ansible_scp_extra_args="-O" \ No newline at end of file diff --git a/ansible/playbook.yml b/ansible/playbook.yml new file mode 100644 index 0000000..1e42071 --- /dev/null +++ b/ansible/playbook.yml @@ -0,0 +1,6 @@ +- name: Configure router + hosts: openwrt + remote_user: root + roles: + - ansible-openwrt + - router diff --git a/ansible/roles/router/files/bird.conf b/ansible/roles/router/files/bird.conf new file mode 100644 index 0000000..7dedb10 --- /dev/null +++ b/ansible/roles/router/files/bird.conf @@ -0,0 +1,54 @@ +# Would never work without this awesome blogpost +# https://farcaller.net/2024/making-cilium-bgp-work-with-ipv6/ + +log "/tmp/bird.log" all; +log syslog all; + +#Router ID +router id 192.168.1.1; + +protocol kernel kernel4 { + learn; + scan time 10; + merge paths yes; + ipv4 { + # Importing only default route, we're their default gateway so they dont need rest + import filter { if net = 0.0.0.0/0 then { igp_metric = 100; accept; } reject; }; + export all; + }; +} + +protocol kernel kernel6 { + learn; + scan time 10; + merge paths yes; + ipv6 { + import filter { if net = ::/0 then { igp_metric = 100; accept; } reject; }; + export all; + }; +} + +protocol device { + scan time 10; +} + +protocol direct { + interface "*"; +} + +protocol bgp homelab { + debug { events }; + passive; + direct; + local 2001:470:61a3:100:ffff:ffff:ffff:ffff as 65000; + neighbor range 2001:470:61a3:100::/64 as 65000; + ipv4 { + extended next hop yes; + import all; + export all; + }; + ipv6 { + import all; + export all; + }; +} diff --git a/ansible/roles/router/handlers/main.yml b/ansible/roles/router/handlers/main.yml new file mode 100644 index 0000000..7607c3a --- /dev/null +++ b/ansible/roles/router/handlers/main.yml @@ -0,0 +1,5 @@ +- name: Reload bird + service: + name: bird + state: restarted + enabled: true diff --git a/ansible/roles/router/tasks/main.yml b/ansible/roles/router/tasks/main.yml new file mode 100644 index 0000000..0c9ba2a --- /dev/null +++ b/ansible/roles/router/tasks/main.yml @@ -0,0 +1,16 @@ +--- +- name: Install bird2 + opkg: + name: "{{ item }}" + state: present + # Workaround for opkg module not handling multiple names at once well + loop: + - bird2 + - bird2c + +- name: Set up bird.conf + ansible.builtin.copy: + src: bird.conf + dest: /etc/bird.conf + mode: "644" + notify: Reload bird diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..5e0ca32 --- /dev/null +++ b/flake.lock @@ -0,0 +1,42 @@ +{ + "nodes": { + "flake-compat": { + "locked": { + "lastModified": 1733328505, + "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=", + "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec", + "revCount": 69, + "type": "tarball", + "url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.1.0/01948eb7-9cba-704f-bbf3-3fa956735b52/source.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://flakehub.com/f/edolstra/flake-compat/1.1.0.tar.gz" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1738142207, + "narHash": "sha256-NGqpVVxNAHwIicXpgaVqJEJWeyqzoQJ9oc8lnK9+WC4=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9d3ae807ebd2981d593cddd0080856873139aa40", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-compat": "flake-compat", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..3878868 --- /dev/null +++ b/flake.nix @@ -0,0 +1,35 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + + # Only to ease updating flake.lock, flake-compat is used by shell.nix + flake-compat.url = https://flakehub.com/f/edolstra/flake-compat/1.1.0.tar.gz; + }; + + outputs = { self, nixpkgs, ... }: let + system = "x86_64-linux"; + in { + devShells."${system}".default = + let + pkgs = import nixpkgs { + inherit system; + }; + in + pkgs.mkShell { + packages = with pkgs; [ + vim gnumake + talosctl cilium-cli + kubectl kubectx k9s kubernetes-helm + ansible + ]; + + shellHook = '' + # Get completions working + source ${pkgs.bash-completion}/share/bash-completion/bash_completion + + export TALOSCONFIG=$(pwd)/talos/generated/talosconfig + export EDITOR=vim + ''; + }; + }; +} \ No newline at end of file diff --git a/infra/cilium/Makefile b/infra/cilium/Makefile new file mode 100644 index 0000000..b87ab12 --- /dev/null +++ b/infra/cilium/Makefile @@ -0,0 +1,5 @@ +install: + helm repo add cilium https://helm.cilium.io/ + helm repo update cilium + helm upgrade --install -n kube-system cilium cilium/cilium --version 1.16.6 -f values.yml + kubectl apply -f loadbalancer-ippool.yml -f bgp-cluster-config.yml diff --git a/infra/cilium/bgp-cluster-config.yml b/infra/cilium/bgp-cluster-config.yml new file mode 100644 index 0000000..355512d --- /dev/null +++ b/infra/cilium/bgp-cluster-config.yml @@ -0,0 +1,62 @@ +apiVersion: cilium.io/v2alpha1 +kind: CiliumBGPClusterConfig +metadata: + name: router +spec: + bgpInstances: + - name: "main" + localASN: 65000 + peers: + - name: barracuda + peerASN: 65000 + peerAddress: 2001:470:61a3:100:ffff:ffff:ffff:ffff + peerConfigRef: + name: cilium-peer +--- +apiVersion: cilium.io/v2alpha1 +kind: CiliumBGPPeerConfig +metadata: + name: cilium-peer +spec: + timers: + keepAliveTimeSeconds: 3 + holdTimeSeconds: 9 + connectRetryTimeSeconds: 12 + gracefulRestart: + enabled: true + restartTimeSeconds: 60 + families: + - afi: ipv4 + safi: unicast + advertisements: + matchLabels: + advertise: bgp + - afi: ipv6 + safi: unicast + advertisements: + matchLabels: + advertise: bgp +--- +apiVersion: cilium.io/v2alpha1 +kind: CiliumBGPAdvertisement +metadata: + name: bgp-advertisements + labels: + advertise: bgp +spec: + advertisements: + - advertisementType: "CiliumPodIPPool" + # Wanting to advertise all pools + # https://docs.cilium.io/en/v1.16/network/bgp-control-plane/bgp-control-plane-v2/#multipool-ipam + selector: + matchExpressions: + - { key: somekey, operator: NotIn, values: ["never-used-value"] } + - advertisementType: Service + service: + addresses: + # Not advertising ClusterIP - kubeproxyreplacement should? translate them directly to pod ips + # Not advertising ExternalIP - they should be reachable via static config + - LoadBalancerIP + selector: + matchExpressions: + - { key: somekey, operator: NotIn, values: ["never-used-value"] } diff --git a/infra/cilium/loadbalancer-ippool.yml b/infra/cilium/loadbalancer-ippool.yml new file mode 100644 index 0000000..b06b8c0 --- /dev/null +++ b/infra/cilium/loadbalancer-ippool.yml @@ -0,0 +1,8 @@ +apiVersion: "cilium.io/v2alpha1" +kind: CiliumLoadBalancerIPPool +metadata: + name: "blue-pool" +spec: + blocks: + - cidr: 10.44.0.0/16 + - cidr: 2001:470:61a3:400::/112 diff --git a/infra/cilium/values.yml b/infra/cilium/values.yml new file mode 100644 index 0000000..78e35dc --- /dev/null +++ b/infra/cilium/values.yml @@ -0,0 +1,59 @@ +# Talos specific +# https://www.talos.dev/v1.9/kubernetes-guides/network/deploying-cilium/ +kubeProxyReplacement: true + +securityContext: + capabilities: + ciliumAgent: + - CHOWN + - KILL + - NET_ADMIN + - NET_RAW + - IPC_LOCK + - SYS_ADMIN + - SYS_RESOURCE + - DAC_OVERRIDE + - FOWNER + - SETGID + - SETUID + cleanCiliumState: + - NET_ADMIN + - SYS_ADMIN + - SYS_RESOURCE + +cgroup: + autoMount: + enabled: false + hostRoot: /sys/fs/cgroup + +k8sServiceHost: localhost +k8sServicePort: 7445 + +# Homelab +routingMode: native +ipv6: + enabled: true + +# Enable managing of IP Pools via CRD +ipam: + mode: multi-pool + operator: + autoCreateCiliumPodIPPools: + default: + # In line with controlplane.yml + ipv4: + cidrs: + - 10.42.0.0/16 + maskSize: 24 + ipv6: + cidrs: + - 2001:470:61a3:200::/104 + maskSize: 120 + +# Disable masquerading, ipv4 is done at router level +enableIPv4Masquerade: false +enableIPv6Masquerade: false + +# Enable peering announcing routes via BGP +bgpControlPlane: + enabled: true diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..fccd2dc --- /dev/null +++ b/shell.nix @@ -0,0 +1,15 @@ +# Needed for Nix Environment Selector +# https://github.com/edolstra/flake-compat/ +(import + ( + let + lock = builtins.fromJSON (builtins.readFile ./flake.lock); + nodeName = lock.nodes.root.inputs.flake-compat; + in + fetchTarball { + url = lock.nodes.${nodeName}.locked.url; + sha256 = lock.nodes.${nodeName}.locked.narHash; + } + ) + { src = ./.; } +).shellNix \ No newline at end of file diff --git a/talos/patches/controlplane.patch b/talos/patches/controlplane.patch new file mode 100644 index 0000000..0904458 --- /dev/null +++ b/talos/patches/controlplane.patch @@ -0,0 +1,34 @@ +machine: + certSANs: + - kube-api.homelab.lumpiasty.xyz + + network: + nameservers: + - 2001:470:61a3:100:ffff:ffff:ffff:ffff + - 192.168.1.1 + + searchDomains: + - homelab-infra.lumpiasty.xyz + + features: + hostDNS: + forwardKubeDNSToHost: false + +cluster: + network: + # Likely redundant, we use Cilium as IPAM with their CRDs + podSubnets: + - 10.42.0.0/16 + - 2001:470:61a3:200::/64 + # Less likely redundant + serviceSubnets: + - 10.43.0.0/16 + - 2001:470:61a3:300::/112 + # We manually install Cilium + cni: + name: none + # We use Cilium's proxy replacement + proxy: + disabled: true + # Too poor + allowSchedulingOnControlPlanes: true diff --git a/talos/patches/zoma-dibaiyin.patch b/talos/patches/zoma-dibaiyin.patch new file mode 100644 index 0000000..73b3620 --- /dev/null +++ b/talos/patches/zoma-dibaiyin.patch @@ -0,0 +1,17 @@ +machine: + network: + interfaces: + - interface: enp0s31f6 + addresses: + - 2001:470:61a3:100::2/64 + - 192.168.1.34/24 + routes: + - network: ::/0 + gateway: 2001:470:61a3:100:ffff:ffff:ffff:ffff + - network: 0.0.0.0/0 + gateway: 192.168.1.1 + mtu: 1500 + + install: + diskSelector: + wwid: t10.ATA Patriot Blast C26D07641DCF00006955